@principles/core 1.161.0 → 1.162.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/runtime-v2/__tests__/adversarial-loop.test.js +24 -78
- package/dist/runtime-v2/__tests__/adversarial-loop.test.js.map +1 -1
- package/dist/runtime-v2/__tests__/architecture-regression.test.js +8 -5
- package/dist/runtime-v2/__tests__/architecture-regression.test.js.map +1 -1
- package/dist/runtime-v2/__tests__/artificer-runner-vslice.test.js +32 -80
- package/dist/runtime-v2/__tests__/artificer-runner-vslice.test.js.map +1 -1
- package/dist/runtime-v2/__tests__/full-chain-real-llm.test.js +2 -2
- package/dist/runtime-v2/__tests__/full-chain-real-llm.test.js.map +1 -1
- package/dist/runtime-v2/__tests__/principle-compiler-core.test.js +7 -7
- package/dist/runtime-v2/__tests__/principle-compiler-core.test.js.map +1 -1
- package/dist/runtime-v2/activation/__tests__/production-gate-deps.test.js +24 -1
- package/dist/runtime-v2/activation/__tests__/production-gate-deps.test.js.map +1 -1
- package/dist/runtime-v2/activation/production-gate-deps.d.ts.map +1 -1
- package/dist/runtime-v2/activation/production-gate-deps.js +18 -1
- package/dist/runtime-v2/activation/production-gate-deps.js.map +1 -1
- package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.js +277 -475
- package/dist/runtime-v2/adapter/__tests__/artificer-l2-adapter.test.js.map +1 -1
- package/dist/runtime-v2/adapter/artificer-l2-adapter.d.ts +14 -34
- package/dist/runtime-v2/adapter/artificer-l2-adapter.d.ts.map +1 -1
- package/dist/runtime-v2/adapter/artificer-l2-adapter.js +182 -222
- package/dist/runtime-v2/adapter/artificer-l2-adapter.js.map +1 -1
- package/dist/runtime-v2/adapter/pi-ai-runtime-adapter.js +2 -2
- package/dist/runtime-v2/adapter/pi-ai-runtime-adapter.js.map +1 -1
- package/dist/runtime-v2/adversarial-loop.d.ts.map +1 -1
- package/dist/runtime-v2/adversarial-loop.js +5 -27
- package/dist/runtime-v2/adversarial-loop.js.map +1 -1
- package/dist/runtime-v2/golden-trace-replay-validator.d.ts +8 -0
- package/dist/runtime-v2/golden-trace-replay-validator.d.ts.map +1 -1
- package/dist/runtime-v2/golden-trace-replay-validator.js +3 -3
- package/dist/runtime-v2/golden-trace-replay-validator.js.map +1 -1
- package/dist/runtime-v2/golden-trace.d.ts +16 -1
- package/dist/runtime-v2/golden-trace.d.ts.map +1 -1
- package/dist/runtime-v2/golden-trace.js +13 -4
- package/dist/runtime-v2/golden-trace.js.map +1 -1
- package/dist/runtime-v2/index.d.ts +8 -5
- package/dist/runtime-v2/index.d.ts.map +1 -1
- package/dist/runtime-v2/index.js +11 -4
- package/dist/runtime-v2/index.js.map +1 -1
- package/dist/runtime-v2/internalization/__tests__/artificer-rule-output.test.d.ts +2 -0
- package/dist/runtime-v2/internalization/__tests__/artificer-rule-output.test.d.ts.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/{artificer-output-v2.test.js → artificer-rule-output.test.js} +126 -127
- package/dist/runtime-v2/internalization/__tests__/artificer-rule-output.test.js.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.d.ts +2 -0
- package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.d.ts.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.js +270 -0
- package/dist/runtime-v2/internalization/__tests__/rule-code-dialect.test.js.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.d.ts +2 -0
- package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.d.ts.map +1 -0
- package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.js +180 -0
- package/dist/runtime-v2/internalization/__tests__/rule-host-input-builder.test.js.map +1 -0
- package/dist/runtime-v2/internalization/artificer-output.d.ts +33 -51
- package/dist/runtime-v2/internalization/artificer-output.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/artificer-output.js +48 -87
- package/dist/runtime-v2/internalization/artificer-output.js.map +1 -1
- package/dist/runtime-v2/internalization/artificer-runner.d.ts +8 -8
- package/dist/runtime-v2/internalization/artificer-runner.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/artificer-runner.js +5 -5
- package/dist/runtime-v2/internalization/artificer-runner.js.map +1 -1
- package/dist/runtime-v2/internalization/evaluator-runner.js +2 -2
- package/dist/runtime-v2/internalization/index.d.ts +9 -4
- package/dist/runtime-v2/internalization/index.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/index.js +8 -3
- package/dist/runtime-v2/internalization/index.js.map +1 -1
- package/dist/runtime-v2/internalization/rule-code-validator.d.ts +16 -0
- package/dist/runtime-v2/internalization/rule-code-validator.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/rule-code-validator.js +50 -1
- package/dist/runtime-v2/internalization/rule-code-validator.js.map +1 -1
- package/dist/runtime-v2/internalization/rule-host-input-builder.d.ts +62 -0
- package/dist/runtime-v2/internalization/rule-host-input-builder.d.ts.map +1 -0
- package/dist/runtime-v2/internalization/rule-host-input-builder.js +182 -0
- package/dist/runtime-v2/internalization/rule-host-input-builder.js.map +1 -0
- package/dist/runtime-v2/internalization/rule-host-validator.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/rule-host-validator.js +22 -1
- package/dist/runtime-v2/internalization/rule-host-validator.js.map +1 -1
- package/dist/runtime-v2/internalization/template-generator.d.ts +7 -2
- package/dist/runtime-v2/internalization/template-generator.d.ts.map +1 -1
- package/dist/runtime-v2/internalization/template-generator.js +10 -5
- package/dist/runtime-v2/internalization/template-generator.js.map +1 -1
- package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.d.ts +2 -0
- package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.d.ts.map +1 -0
- package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.js +322 -0
- package/dist/runtime-v2/tools/__tests__/artificer-l2-tool-contract.test.js.map +1 -0
- package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.d.ts +2 -0
- package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.d.ts.map +1 -0
- package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.js +149 -0
- package/dist/runtime-v2/tools/__tests__/artificer-output-typebox.test.js.map +1 -0
- package/dist/runtime-v2/tools/artificer-l2-tool-contract.d.ts +72 -0
- package/dist/runtime-v2/tools/artificer-l2-tool-contract.d.ts.map +1 -0
- package/dist/runtime-v2/tools/artificer-l2-tool-contract.js +275 -0
- package/dist/runtime-v2/tools/artificer-l2-tool-contract.js.map +1 -0
- package/dist/runtime-v2/tools/artificer-output-typebox.d.ts +78 -0
- package/dist/runtime-v2/tools/artificer-output-typebox.d.ts.map +1 -0
- package/dist/runtime-v2/tools/artificer-output-typebox.js +70 -0
- package/dist/runtime-v2/tools/artificer-output-typebox.js.map +1 -0
- package/dist/telemetry-event.d.ts +2 -2
- package/dist/telemetry-event.d.ts.map +1 -1
- package/dist/telemetry-event.js +5 -3
- package/dist/telemetry-event.js.map +1 -1
- package/package.json +1 -1
- package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.d.ts +0 -2
- package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.d.ts.map +0 -1
- package/dist/runtime-v2/internalization/__tests__/artificer-output-v2.test.js.map +0 -1
|
@@ -1,64 +1,73 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* ArtificerL2Adapter tests (
|
|
3
|
-
* PRD Decision 8, test module 7).
|
|
2
|
+
* ArtificerL2Adapter tests (PRI-439 Phase 4 — tool-using L2 agent).
|
|
4
3
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Mocks runAgentLoop (no real LLM calls) to verify the adapter's orchestration:
|
|
5
|
+
* - submit_rulecode capture terminates the loop and stores the output
|
|
6
|
+
* - maxTurns cap forces stop when submit_rulecode is never called
|
|
7
|
+
* - beforeToolCall whitelist blocks non-allowlisted tools
|
|
8
|
+
* - shouldStopAfterTurn checks output capture + turn count
|
|
9
|
+
* - no V1/L1 fallback: exhaustion throws PDRuntimeError
|
|
10
|
+
* - timeout: abort signal triggers timed_out failure
|
|
11
|
+
* - telemetry events (artificer_l2_turn / artificer_l2_complete) are emitted
|
|
7
12
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
* multi-attempt logic in the adapter, not in succeedTask().
|
|
13
|
-
*
|
|
14
|
-
* Testability: LLM calls are mocked via an injected `generateCode` function.
|
|
15
|
-
* Sandbox replay uses real evaluateRefinerRuleHostGate with a controllable
|
|
16
|
-
* RefinerRuleHostGateDeps. No real LLM calls.
|
|
17
|
-
*
|
|
18
|
-
* Coverage (PRD test module 7):
|
|
19
|
-
* - happy path: 1st attempt passes replay → V2 output (1 LLM call)
|
|
20
|
-
* - fix path: 1st attempt fails → feedback injected → 2nd passes → V2 (2 LLM calls)
|
|
21
|
-
* - exhaustion: 3 attempts all fail → V1 degraded output (no code fields)
|
|
22
|
-
* - error types: forbidden_pattern / runtime_error / timeout / validation_failed
|
|
23
|
-
* - V1 backward compat: degraded V1 output is NOT detected as V2 by isArtificerOutputV2
|
|
24
|
-
*
|
|
25
|
-
* ERR checklist (EP-05 Loop State Freshness): each attempt reads fresh sandbox
|
|
26
|
-
* errors; the feedback injected into attempt N+1 is from attempt N's failure,
|
|
27
|
-
* never stale. (ERR-015/018/019)
|
|
13
|
+
* ERR checklist:
|
|
14
|
+
* - EP-05 Loop State Freshness: each startRun uses fresh outputCapture + turnCount
|
|
15
|
+
* - EP-03 Fail Loud: exhaustion throws PDRuntimeError with structured nextAction
|
|
16
|
+
* - EP-01 Trust Boundary: submit_rulecode validates via injected validator
|
|
28
17
|
*/
|
|
29
|
-
import { describe, it, expect } from 'vitest';
|
|
18
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
19
|
+
const hoisted = vi.hoisted(() => {
|
|
20
|
+
return {
|
|
21
|
+
lastLoopConfig: {},
|
|
22
|
+
mockReturn: [],
|
|
23
|
+
impl: null,
|
|
24
|
+
};
|
|
25
|
+
});
|
|
26
|
+
/* eslint-disable @typescript-eslint/max-params -- runAgentLoop mock mirrors the real 5-param signature */
|
|
27
|
+
vi.mock('@earendil-works/pi-agent-core', () => ({
|
|
28
|
+
runAgentLoop: vi.fn(async (prompts, context, config, emit, signal) => {
|
|
29
|
+
hoisted.lastLoopConfig = config;
|
|
30
|
+
if (typeof hoisted.impl === 'function') {
|
|
31
|
+
const fn = hoisted.impl;
|
|
32
|
+
return fn(prompts, context, config, emit, signal);
|
|
33
|
+
}
|
|
34
|
+
return hoisted.mockReturn.slice();
|
|
35
|
+
}),
|
|
36
|
+
}));
|
|
37
|
+
/* eslint-enable @typescript-eslint/max-params */
|
|
38
|
+
// Mock resolveL2Model's pi-ai dependencies (getModel/getProviders) — the adapter
|
|
39
|
+
// uses the custom baseUrl path so these stubs are never called for real.
|
|
40
|
+
vi.mock('@earendil-works/pi-ai', () => ({
|
|
41
|
+
completeSimple: vi.fn(),
|
|
42
|
+
getModel: vi.fn(() => ({ id: 'test', name: 'test', api: 'openai-completions', provider: 'test-provider' })),
|
|
43
|
+
getProviders: vi.fn(() => []),
|
|
44
|
+
}));
|
|
45
|
+
vi.mock('../../store/event-emitter.js', () => ({
|
|
46
|
+
storeEmitter: { emitTelemetry: vi.fn() },
|
|
47
|
+
}));
|
|
48
|
+
import { storeEmitter } from '../../store/event-emitter.js';
|
|
30
49
|
import { ArtificerL2Adapter } from '../artificer-l2-adapter.js';
|
|
31
|
-
import {
|
|
32
|
-
|
|
33
|
-
import { Value } from '@sinclair/typebox/value';
|
|
34
|
-
import { RunHandleSchema, RuntimeKindSchema } from '../../runtime-protocol.js';
|
|
50
|
+
import { DefaultArtificerValidator } from '../../internalization/artificer-output.js';
|
|
51
|
+
const emitTelemetryMock = storeEmitter.emitTelemetry;
|
|
35
52
|
const TASK_ID = 'task-artificer-l2-001';
|
|
36
|
-
/** A valid
|
|
37
|
-
function
|
|
53
|
+
/** A valid ArtificerRuleOutput the model might submit via submit_rulecode. */
|
|
54
|
+
function makeRuleOutput(overrides = {}) {
|
|
38
55
|
return {
|
|
39
56
|
taskId: TASK_ID,
|
|
40
|
-
sourceScribeArtifactId: 'pi-art-scribe-001
|
|
41
|
-
implementationPlan: {
|
|
42
|
-
summary: 'Block writes to system dirs',
|
|
43
|
-
targetSurface: 'edit gate',
|
|
44
|
-
changes: ['path prefix check'],
|
|
45
|
-
tests: ['golden trace replay'],
|
|
46
|
-
rolloutNotes: ['shadow first'],
|
|
47
|
-
confidence: 0.8,
|
|
48
|
-
},
|
|
49
|
-
sourceTrace: { scribeArtifactId: 'pi-art-scribe-001-run-001' },
|
|
50
|
-
risks: [],
|
|
51
|
-
generatedAt: '2026-06-17T00:00:00.000Z',
|
|
57
|
+
sourceScribeArtifactId: 'pi-art-scribe-001',
|
|
52
58
|
implementationCode: 'function evaluate(input, helpers) { return { decision: "allow", matched: false, reason: "ok" }; }',
|
|
53
59
|
goldenTraceCases: [
|
|
54
60
|
{ caseId: 'negative-1', kind: 'negative', toolName: 'edit', params: { path: '/etc/x' }, expectedDecision: 'block' },
|
|
55
61
|
{ caseId: 'positive-1', kind: 'positive', toolName: 'read', params: { path: '/tmp/y' }, expectedDecision: 'allow' },
|
|
56
62
|
],
|
|
57
63
|
affectedTools: ['edit'],
|
|
64
|
+
implementationSummary: 'Block writes to system dirs',
|
|
65
|
+
risks: [],
|
|
66
|
+
sourceTrace: { scribeArtifactId: 'pi-art-scribe-001' },
|
|
67
|
+
generatedAt: '2026-06-17T00:00:00.000Z',
|
|
58
68
|
...overrides,
|
|
59
69
|
};
|
|
60
70
|
}
|
|
61
|
-
/** Build a gateDeps whose sandbox always accepts (replay passes). */
|
|
62
71
|
function makeAlwaysPassGateDeps() {
|
|
63
72
|
const passingResult = {
|
|
64
73
|
success: true,
|
|
@@ -70,469 +79,262 @@ function makeAlwaysPassGateDeps() {
|
|
|
70
79
|
evaluateInSandbox: (_code, _trace, _opts) => passingResult,
|
|
71
80
|
};
|
|
72
81
|
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
evaluateInSandbox: (code, _trace, _opts) => {
|
|
83
|
-
calls.push({ code });
|
|
84
|
-
const result = failures[attempt] ?? { success: true, failedCases: [], executionTimeMs: 1, forbiddenPatternViolations: [] };
|
|
85
|
-
attempt += 1;
|
|
86
|
-
return result;
|
|
87
|
-
},
|
|
82
|
+
function makeStartRun(overrides = {}) {
|
|
83
|
+
return {
|
|
84
|
+
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
85
|
+
taskRef: { taskId: TASK_ID },
|
|
86
|
+
inputPayload: 'initial prompt',
|
|
87
|
+
contextItems: [],
|
|
88
|
+
outputSchemaRef: 'artificer-output-v2',
|
|
89
|
+
timeoutMs: 300_000,
|
|
90
|
+
...overrides,
|
|
88
91
|
};
|
|
89
|
-
return { deps, calls };
|
|
90
92
|
}
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
};
|
|
103
|
-
const FAILED_TIMEOUT = {
|
|
104
|
-
success: false,
|
|
105
|
-
failedCases: [{ caseId: 'negative-1', errorType: 'timeout', message: 'exceeded 1000ms' }],
|
|
106
|
-
executionTimeMs: 1001,
|
|
107
|
-
forbiddenPatternViolations: [],
|
|
108
|
-
};
|
|
109
|
-
const FAILED_VALIDATION = {
|
|
110
|
-
success: false,
|
|
111
|
-
failedCases: [{ caseId: 'negative-1', errorType: 'validation_failed', message: 'expected block got allow' }],
|
|
112
|
-
executionTimeMs: 1,
|
|
113
|
-
forbiddenPatternViolations: [],
|
|
114
|
-
};
|
|
115
|
-
describe('ArtificerL2Adapter (RuleHost MVP Activation, PRI-424)', () => {
|
|
116
|
-
it('retries when the model omits executable RuleCode from an otherwise valid V1 response', async () => {
|
|
117
|
-
const prompts = [];
|
|
118
|
-
const { implementationCode: _code, goldenTraceCases: _cases, affectedTools: _tools, ...v1 } = makeV2Output();
|
|
119
|
-
const generateCode = async (prompt) => {
|
|
120
|
-
prompts.push(prompt);
|
|
121
|
-
return prompts.length === 1 ? v1 : makeV2Output();
|
|
122
|
-
};
|
|
123
|
-
const adapter = new ArtificerL2Adapter({
|
|
124
|
-
generateCode,
|
|
125
|
-
gateDeps: makeAlwaysPassGateDeps(),
|
|
126
|
-
validator: new DefaultArtificerValidator(),
|
|
127
|
-
});
|
|
128
|
-
const handle = await adapter.startRun({
|
|
129
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
130
|
-
taskRef: { taskId: TASK_ID },
|
|
131
|
-
inputPayload: 'initial prompt',
|
|
132
|
-
contextItems: [],
|
|
133
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
134
|
-
timeoutMs: 300_000,
|
|
135
|
-
});
|
|
136
|
-
expect(prompts).toHaveLength(2);
|
|
137
|
-
expect(prompts[1]).toContain('implementationCode');
|
|
138
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
139
|
-
expect(isArtificerOutputV2(output?.payload)).toBe(true);
|
|
140
|
-
void _code;
|
|
141
|
-
void _cases;
|
|
142
|
-
void _tools;
|
|
93
|
+
function makeAdapter(overrides = {}) {
|
|
94
|
+
return new ArtificerL2Adapter({
|
|
95
|
+
provider: 'test-provider',
|
|
96
|
+
model: 'test-model',
|
|
97
|
+
apiKeyEnv: 'TEST_API_KEY',
|
|
98
|
+
baseUrl: 'http://localhost:1234/v1',
|
|
99
|
+
gateDeps: overrides.gateDeps ?? makeAlwaysPassGateDeps(),
|
|
100
|
+
validator: new DefaultArtificerValidator(),
|
|
101
|
+
maxTurns: overrides.maxTurns,
|
|
102
|
+
totalBudgetMs: overrides.totalBudgetMs ?? 60_000,
|
|
103
|
+
maxTokens: overrides.maxTokens,
|
|
143
104
|
});
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
105
|
+
}
|
|
106
|
+
beforeEach(() => {
|
|
107
|
+
vi.clearAllMocks();
|
|
108
|
+
hoisted.mockReturn = [];
|
|
109
|
+
hoisted.impl = null;
|
|
110
|
+
hoisted.lastLoopConfig = {};
|
|
111
|
+
process.env.TEST_API_KEY = 'test-key';
|
|
112
|
+
});
|
|
113
|
+
// ── submit_rulecode capture (primary extraction) ─────────────────────────────
|
|
114
|
+
describe('PRI-439 ArtificerL2Adapter — submit_rulecode capture', () => {
|
|
115
|
+
it('returns the captured output when submit_rulecode was called', async () => {
|
|
116
|
+
const adapter = makeAdapter();
|
|
117
|
+
hoisted.impl = async (_p, context) => {
|
|
118
|
+
const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
|
|
119
|
+
if (submit) {
|
|
120
|
+
await submit.execute('call-1', makeRuleOutput());
|
|
121
|
+
}
|
|
122
|
+
return [];
|
|
150
123
|
};
|
|
151
|
-
const
|
|
152
|
-
generateCode,
|
|
153
|
-
gateDeps: makeAlwaysPassGateDeps(),
|
|
154
|
-
validator: new DefaultArtificerValidator(),
|
|
155
|
-
});
|
|
156
|
-
const handle = await adapter.startRun({
|
|
157
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
158
|
-
taskRef: { taskId: TASK_ID },
|
|
159
|
-
inputPayload: 'initial prompt',
|
|
160
|
-
contextItems: [],
|
|
161
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
162
|
-
timeoutMs: 300_000,
|
|
163
|
-
});
|
|
164
|
-
expect(generateCalls).toHaveLength(1);
|
|
124
|
+
const handle = await adapter.startRun(makeStartRun());
|
|
165
125
|
const output = await adapter.fetchOutput(handle.runId);
|
|
166
126
|
expect(output).not.toBeNull();
|
|
167
|
-
|
|
168
|
-
return;
|
|
169
|
-
expect(isArtificerOutputV2(output.payload)).toBe(true);
|
|
127
|
+
expect(output?.payload).toEqual(makeRuleOutput());
|
|
170
128
|
});
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
129
|
+
it('shouldStopAfterTurn returns true after output is captured', async () => {
|
|
130
|
+
const adapter = makeAdapter({ maxTurns: 8 });
|
|
131
|
+
hoisted.impl = async (_p, context) => {
|
|
132
|
+
const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
|
|
133
|
+
if (submit) {
|
|
134
|
+
await submit.execute('call-1', makeRuleOutput());
|
|
135
|
+
}
|
|
136
|
+
return [];
|
|
177
137
|
};
|
|
178
|
-
|
|
179
|
-
const
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
validator: new DefaultArtificerValidator(),
|
|
183
|
-
});
|
|
184
|
-
const handle = await adapter.startRun({
|
|
185
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
186
|
-
taskRef: { taskId: TASK_ID },
|
|
187
|
-
inputPayload: 'initial prompt',
|
|
188
|
-
contextItems: [],
|
|
189
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
190
|
-
timeoutMs: 300_000,
|
|
191
|
-
});
|
|
192
|
-
expect(generateCalls).toHaveLength(2);
|
|
193
|
-
// 2nd prompt MUST contain the failure feedback from attempt 1 (EP-05 freshness).
|
|
194
|
-
expect(generateCalls[1]).toContain('TypeError: x is undefined');
|
|
195
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
196
|
-
expect(output).not.toBeNull();
|
|
197
|
-
if (!output)
|
|
138
|
+
await adapter.startRun(makeStartRun());
|
|
139
|
+
const stopFn = hoisted.lastLoopConfig.shouldStopAfterTurn;
|
|
140
|
+
expect(typeof stopFn).toBe('function');
|
|
141
|
+
if (!stopFn)
|
|
198
142
|
return;
|
|
199
|
-
|
|
143
|
+
// After submit_rulecode captured output, the next shouldStopAfterTurn call returns true.
|
|
144
|
+
expect(stopFn()).toBe(true);
|
|
200
145
|
});
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
216
|
-
taskRef: { taskId: TASK_ID },
|
|
217
|
-
inputPayload: 'initial prompt',
|
|
218
|
-
contextItems: [],
|
|
219
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
220
|
-
timeoutMs: 300_000,
|
|
221
|
-
});
|
|
222
|
-
expect(generateCalls).toHaveLength(3);
|
|
223
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
224
|
-
expect(output).not.toBeNull();
|
|
225
|
-
if (!output)
|
|
146
|
+
});
|
|
147
|
+
// ── maxTurns cap ─────────────────────────────────────────────────────────────
|
|
148
|
+
describe('PRI-439 ArtificerL2Adapter — maxTurns cap', () => {
|
|
149
|
+
it('shouldStopAfterTurn returns false below maxTurns and true at/above, WITHOUT submit_rulecode', async () => {
|
|
150
|
+
const adapter = makeAdapter({ maxTurns: 5 });
|
|
151
|
+
hoisted.mockReturn = [
|
|
152
|
+
{ role: 'assistant', content: 'thinking...' },
|
|
153
|
+
];
|
|
154
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
155
|
+
// startRun throws when no output is captured — that's expected here.
|
|
156
|
+
});
|
|
157
|
+
const stopFn = hoisted.lastLoopConfig.shouldStopAfterTurn;
|
|
158
|
+
if (!stopFn) {
|
|
159
|
+
expect.fail('shouldStopAfterTurn not wired');
|
|
226
160
|
return;
|
|
227
|
-
|
|
228
|
-
expect(
|
|
229
|
-
|
|
230
|
-
expect(
|
|
161
|
+
}
|
|
162
|
+
expect(stopFn()).toBe(false); // turn 1
|
|
163
|
+
expect(stopFn()).toBe(false); // turn 2
|
|
164
|
+
expect(stopFn()).toBe(false); // turn 3
|
|
165
|
+
expect(stopFn()).toBe(false); // turn 4
|
|
166
|
+
expect(stopFn()).toBe(true); // turn 5 (>= maxTurns)
|
|
167
|
+
expect(stopFn()).toBe(true); // turn 6 (still >= maxTurns)
|
|
231
168
|
});
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
const adapter =
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
});
|
|
241
|
-
const
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
inputPayload: 'initial prompt',
|
|
245
|
-
contextItems: [],
|
|
246
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
247
|
-
timeoutMs: 300_000,
|
|
248
|
-
});
|
|
249
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
250
|
-
expect(output).not.toBeNull();
|
|
251
|
-
if (!output)
|
|
169
|
+
});
|
|
170
|
+
// ── beforeToolCall whitelist ─────────────────────────────────────────────────
|
|
171
|
+
describe('PRI-439 ArtificerL2Adapter — beforeToolCall whitelist', () => {
|
|
172
|
+
it('blocks unknown tools', async () => {
|
|
173
|
+
const adapter = makeAdapter();
|
|
174
|
+
hoisted.mockReturn = [];
|
|
175
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
176
|
+
// startRun throws when no output is captured — expected.
|
|
177
|
+
});
|
|
178
|
+
const beforeFn = hoisted.lastLoopConfig.beforeToolCall;
|
|
179
|
+
expect(typeof beforeFn).toBe('function');
|
|
180
|
+
if (!beforeFn)
|
|
252
181
|
return;
|
|
253
|
-
const result = await
|
|
254
|
-
expect(result
|
|
182
|
+
const result = await beforeFn({ toolCall: { name: 'unknown_tool' } });
|
|
183
|
+
expect(result).toEqual({ block: true, reason: expect.stringContaining('unknown_tool') });
|
|
255
184
|
});
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
taskRef: { taskId: TASK_ID },
|
|
272
|
-
inputPayload: 'initial prompt',
|
|
273
|
-
contextItems: [],
|
|
274
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
275
|
-
timeoutMs: 300_000,
|
|
276
|
-
});
|
|
277
|
-
expect(generateCalls).toHaveLength(2);
|
|
278
|
-
expect(generateCalls[1]).toContain('require');
|
|
279
|
-
});
|
|
280
|
-
it('handles timeout failure and injects it as feedback', async () => {
|
|
281
|
-
const generateCalls = [];
|
|
282
|
-
const generateCode = async (prompt) => {
|
|
283
|
-
generateCalls.push(prompt);
|
|
284
|
-
return makeV2Output();
|
|
285
|
-
};
|
|
286
|
-
const { deps } = makeFailNTimesGateDeps([FAILED_TIMEOUT]);
|
|
287
|
-
const adapter = new ArtificerL2Adapter({
|
|
288
|
-
generateCode,
|
|
289
|
-
gateDeps: deps,
|
|
290
|
-
validator: new DefaultArtificerValidator(),
|
|
291
|
-
});
|
|
292
|
-
await adapter.startRun({
|
|
293
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
294
|
-
taskRef: { taskId: TASK_ID },
|
|
295
|
-
inputPayload: 'initial prompt',
|
|
296
|
-
contextItems: [],
|
|
297
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
298
|
-
timeoutMs: 300_000,
|
|
299
|
-
});
|
|
300
|
-
expect(generateCalls[1]).toContain('timeout');
|
|
185
|
+
it('allows whitelisted tools', async () => {
|
|
186
|
+
const adapter = makeAdapter();
|
|
187
|
+
hoisted.mockReturn = [];
|
|
188
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
189
|
+
// startRun throws when no output is captured — expected.
|
|
190
|
+
});
|
|
191
|
+
const beforeFn = hoisted.lastLoopConfig.beforeToolCall;
|
|
192
|
+
if (!beforeFn) {
|
|
193
|
+
expect.fail('beforeToolCall not wired');
|
|
194
|
+
return;
|
|
195
|
+
}
|
|
196
|
+
for (const name of ['read_rulecode_spec', 'validate_rulecode', 'replay_rulecode', 'submit_rulecode']) {
|
|
197
|
+
const result = await beforeFn({ toolCall: { name } });
|
|
198
|
+
expect(result).toBeUndefined();
|
|
199
|
+
}
|
|
301
200
|
});
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
inputPayload: 'initial prompt',
|
|
318
|
-
contextItems: [],
|
|
319
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
320
|
-
timeoutMs: 300_000,
|
|
321
|
-
});
|
|
322
|
-
expect(generateCalls[1]).toContain('expected block got allow');
|
|
201
|
+
});
|
|
202
|
+
// ── exhaustion: no V1/L1 fallback ────────────────────────────────────────────
|
|
203
|
+
describe('PRI-439 ArtificerL2Adapter — exhaustion (no fallback)', () => {
|
|
204
|
+
it('throws PDRuntimeError when the loop ends without submit_rulecode', async () => {
|
|
205
|
+
const adapter = makeAdapter({ maxTurns: 3 });
|
|
206
|
+
hoisted.mockReturn = [
|
|
207
|
+
{ role: 'assistant', content: 'I cannot produce valid code.' },
|
|
208
|
+
];
|
|
209
|
+
await expect(adapter.startRun(makeStartRun())).rejects.toThrow(/without a submit_rulecode call/);
|
|
210
|
+
// No output stored for the failed run — fetchOutput returns null.
|
|
211
|
+
const runs = adapter.runs;
|
|
212
|
+
expect(runs.size).toBe(1);
|
|
213
|
+
for (const [, state] of runs) {
|
|
214
|
+
expect(state.output).toBeNull();
|
|
215
|
+
}
|
|
323
216
|
});
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
const { deps } = makeFailNTimesGateDeps([FAILED_RUNTIME, FAILED_TIMEOUT]);
|
|
333
|
-
const adapter = new ArtificerL2Adapter({
|
|
334
|
-
generateCode,
|
|
335
|
-
gateDeps: deps,
|
|
336
|
-
validator: new DefaultArtificerValidator(),
|
|
337
|
-
});
|
|
338
|
-
await adapter.startRun({
|
|
339
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
340
|
-
taskRef: { taskId: TASK_ID },
|
|
341
|
-
inputPayload: 'initial prompt',
|
|
342
|
-
contextItems: [],
|
|
343
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
344
|
-
timeoutMs: 300_000,
|
|
345
|
-
});
|
|
346
|
-
// attempt 2 prompt must mention attempt 1's runtime_error, NOT attempt 2's timeout
|
|
347
|
-
expect(generateCalls[1]).toContain('TypeError: x is undefined');
|
|
348
|
-
expect(generateCalls[1]).not.toContain('exceeded 1000ms');
|
|
349
|
-
// attempt 3 prompt must mention attempt 2's timeout, NOT attempt 1's runtime_error
|
|
350
|
-
expect(generateCalls[2]).toContain('exceeded 1000ms');
|
|
217
|
+
it('emits artificer_l2_complete telemetry with succeeded=false on exhaustion', async () => {
|
|
218
|
+
const adapter = makeAdapter({ maxTurns: 2 });
|
|
219
|
+
hoisted.mockReturn = [{ role: 'assistant', content: 'no code' }];
|
|
220
|
+
await expect(adapter.startRun(makeStartRun())).rejects.toThrow();
|
|
221
|
+
const completeCalls = emitTelemetryMock.mock.calls.filter((c) => c[0].eventType === 'artificer_l2_complete');
|
|
222
|
+
expect(completeCalls.length).toBe(1);
|
|
223
|
+
const payload = completeCalls[0][0].payload;
|
|
224
|
+
expect(payload.succeeded).toBe(false);
|
|
351
225
|
});
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
const
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
return { success: true, failedCases: [], executionTimeMs: 1, forbiddenPatternViolations: [] };
|
|
360
|
-
},
|
|
226
|
+
});
|
|
227
|
+
// ── loop error ───────────────────────────────────────────────────────────────
|
|
228
|
+
describe('PRI-439 ArtificerL2Adapter — loop error', () => {
|
|
229
|
+
it('throws PDRuntimeError when runAgentLoop throws', async () => {
|
|
230
|
+
const adapter = makeAdapter();
|
|
231
|
+
hoisted.impl = async () => {
|
|
232
|
+
throw new Error('LLM provider unavailable');
|
|
361
233
|
};
|
|
362
|
-
|
|
363
|
-
generateCode,
|
|
364
|
-
gateDeps: deps,
|
|
365
|
-
validator: new DefaultArtificerValidator(),
|
|
366
|
-
});
|
|
367
|
-
await adapter.startRun({
|
|
368
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
369
|
-
taskRef: { taskId: TASK_ID },
|
|
370
|
-
inputPayload: 'initial prompt',
|
|
371
|
-
contextItems: [],
|
|
372
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
373
|
-
timeoutMs: 300_000,
|
|
374
|
-
});
|
|
375
|
-
expect(capturedTrace).not.toBeNull();
|
|
376
|
-
expect(validateGoldenTrace(capturedTrace).valid).toBe(true);
|
|
234
|
+
await expect(adapter.startRun(makeStartRun())).rejects.toThrow(/agent loop threw/);
|
|
377
235
|
});
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
return bad;
|
|
236
|
+
});
|
|
237
|
+
// ── runtime metadata ─────────────────────────────────────────────────────────
|
|
238
|
+
describe('PRI-439 ArtificerL2Adapter — runtime metadata', () => {
|
|
239
|
+
it('pollRun returns succeeded status after startRun completes with output', async () => {
|
|
240
|
+
const adapter = makeAdapter();
|
|
241
|
+
hoisted.impl = async (_p, context) => {
|
|
242
|
+
const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
|
|
243
|
+
if (submit) {
|
|
244
|
+
await submit.execute('call-1', makeRuleOutput());
|
|
388
245
|
}
|
|
389
|
-
return
|
|
246
|
+
return [];
|
|
390
247
|
};
|
|
391
|
-
const
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
validator: new DefaultArtificerValidator(),
|
|
395
|
-
});
|
|
396
|
-
const handle = await adapter.startRun({
|
|
397
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
398
|
-
taskRef: { taskId: TASK_ID },
|
|
399
|
-
inputPayload: 'initial prompt',
|
|
400
|
-
contextItems: [],
|
|
401
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
402
|
-
timeoutMs: 300_000,
|
|
403
|
-
});
|
|
404
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
405
|
-
expect(output).not.toBeNull();
|
|
406
|
-
if (!output)
|
|
407
|
-
return;
|
|
408
|
-
// 2nd attempt produces valid V2 → replay passes → V2 output
|
|
409
|
-
expect(isArtificerOutputV2(output.payload)).toBe(true);
|
|
248
|
+
const handle = await adapter.startRun(makeStartRun());
|
|
249
|
+
const status = await adapter.pollRun(handle.runId);
|
|
250
|
+
expect(status.status).toBe('succeeded');
|
|
410
251
|
});
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
// candidate, degradation is impossible (Runtime Contract Rule 1/3 — never
|
|
415
|
-
// emit an unvalidated object). The adapter throws PDRuntimeError instead,
|
|
416
|
-
// which BasePeerRunner.handlePostLeaseError catches → task fails.
|
|
417
|
-
const generateCode = async () => {
|
|
418
|
-
// Every attempt returns malformed V2 (missing affectedTools).
|
|
419
|
-
const bad = makeV2Output();
|
|
420
|
-
delete bad.affectedTools;
|
|
421
|
-
return bad;
|
|
422
|
-
};
|
|
423
|
-
const adapter = new ArtificerL2Adapter({
|
|
424
|
-
generateCode,
|
|
425
|
-
gateDeps: makeAlwaysPassGateDeps(),
|
|
426
|
-
validator: new DefaultArtificerValidator(),
|
|
427
|
-
});
|
|
428
|
-
await expect(adapter.startRun({
|
|
429
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
430
|
-
taskRef: { taskId: TASK_ID },
|
|
431
|
-
inputPayload: 'initial prompt',
|
|
432
|
-
contextItems: [],
|
|
433
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
434
|
-
timeoutMs: 300_000,
|
|
435
|
-
})).rejects.toThrow(/without a validated candidate/);
|
|
252
|
+
it('kind() returns pi-ai-l2', () => {
|
|
253
|
+
const adapter = makeAdapter();
|
|
254
|
+
expect(adapter.kind()).toBe('pi-ai-l2');
|
|
436
255
|
});
|
|
437
|
-
it('
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
const generateCode = async () => makeV2Output();
|
|
442
|
-
const { deps } = makeFailNTimesGateDeps([FAILED_RUNTIME, FAILED_RUNTIME, FAILED_RUNTIME]);
|
|
443
|
-
const adapter = new ArtificerL2Adapter({
|
|
444
|
-
generateCode,
|
|
445
|
-
gateDeps: deps,
|
|
446
|
-
validator: new DefaultArtificerValidator(),
|
|
447
|
-
});
|
|
448
|
-
const handle = await adapter.startRun({
|
|
449
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
450
|
-
taskRef: { taskId: TASK_ID },
|
|
451
|
-
inputPayload: 'initial prompt',
|
|
452
|
-
contextItems: [],
|
|
453
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
454
|
-
timeoutMs: 300_000,
|
|
455
|
-
});
|
|
456
|
-
const output = await adapter.fetchOutput(handle.runId);
|
|
457
|
-
expect(output).not.toBeNull();
|
|
458
|
-
if (!output)
|
|
459
|
-
return;
|
|
460
|
-
expect(isArtificerOutputV2(output.payload)).toBe(false);
|
|
256
|
+
it('getCapabilities reports supportsToolUse=true', async () => {
|
|
257
|
+
const adapter = makeAdapter();
|
|
258
|
+
const caps = await adapter.getCapabilities();
|
|
259
|
+
expect(caps.supportsToolUse).toBe(true);
|
|
461
260
|
});
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
const
|
|
465
|
-
const
|
|
466
|
-
|
|
467
|
-
gateDeps: makeAlwaysPassGateDeps(),
|
|
468
|
-
validator: new DefaultArtificerValidator(),
|
|
469
|
-
});
|
|
470
|
-
const handle = await adapter.startRun({
|
|
471
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
472
|
-
taskRef: { taskId: TASK_ID },
|
|
473
|
-
inputPayload: 'initial prompt',
|
|
474
|
-
contextItems: [],
|
|
475
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
476
|
-
timeoutMs: 300_000,
|
|
477
|
-
});
|
|
478
|
-
const status = await adapter.pollRun(handle.runId);
|
|
479
|
-
// RunStatus is an object { runId, status, ... }; status.status is the execution state.
|
|
480
|
-
expect(['succeeded', 'failed']).toContain(status.status);
|
|
261
|
+
it('healthCheck returns unhealthy when API key is missing', async () => {
|
|
262
|
+
delete process.env.TEST_API_KEY;
|
|
263
|
+
const adapter = makeAdapter();
|
|
264
|
+
const health = await adapter.healthCheck();
|
|
265
|
+
expect(health.healthy).toBe(false);
|
|
481
266
|
});
|
|
482
|
-
it('
|
|
483
|
-
const adapter =
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
validator: new DefaultArtificerValidator(),
|
|
487
|
-
});
|
|
488
|
-
expect(Value.Check(RuntimeKindSchema, adapter.kind())).toBe(true);
|
|
489
|
-
expect(adapter.kind()).toBe('pi-ai-l2');
|
|
267
|
+
it('healthCheck returns healthy when API key is present', async () => {
|
|
268
|
+
const adapter = makeAdapter();
|
|
269
|
+
const health = await adapter.healthCheck();
|
|
270
|
+
expect(health.healthy).toBe(true);
|
|
490
271
|
});
|
|
491
|
-
it('
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
272
|
+
it('startRun throws when API key is missing', async () => {
|
|
273
|
+
delete process.env.TEST_API_KEY;
|
|
274
|
+
const adapter = makeAdapter();
|
|
275
|
+
await expect(adapter.startRun(makeStartRun())).rejects.toThrow(/API key not found/);
|
|
276
|
+
});
|
|
277
|
+
});
|
|
278
|
+
// ── config defaults ──────────────────────────────────────────────────────────
|
|
279
|
+
describe('PRI-439 ArtificerL2Adapter — config defaults', () => {
|
|
280
|
+
it('wires maxTokens=8192 default into loopConfig', async () => {
|
|
281
|
+
const adapter = makeAdapter();
|
|
282
|
+
hoisted.mockReturn = [];
|
|
283
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
284
|
+
// expected — no output captured
|
|
285
|
+
});
|
|
286
|
+
expect(hoisted.lastLoopConfig.maxTokens).toBe(8192);
|
|
287
|
+
});
|
|
288
|
+
it('wires custom maxTokens when provided', async () => {
|
|
289
|
+
const adapter = makeAdapter({ maxTokens: 4096 });
|
|
290
|
+
hoisted.mockReturn = [];
|
|
291
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
292
|
+
// expected
|
|
504
293
|
});
|
|
505
|
-
expect(
|
|
294
|
+
expect(hoisted.lastLoopConfig.maxTokens).toBe(4096);
|
|
506
295
|
});
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
296
|
+
});
|
|
297
|
+
// ── telemetry ────────────────────────────────────────────────────────────────
|
|
298
|
+
describe('PRI-439 ArtificerL2Adapter — telemetry', () => {
|
|
299
|
+
it('emits artificer_l2_turn with phase=loop_started at start', async () => {
|
|
300
|
+
const adapter = makeAdapter();
|
|
301
|
+
hoisted.mockReturn = [];
|
|
302
|
+
await adapter.startRun(makeStartRun()).catch(() => {
|
|
303
|
+
// expected
|
|
304
|
+
});
|
|
305
|
+
const startCalls = emitTelemetryMock.mock.calls.filter((c) => {
|
|
306
|
+
const evt = c[0];
|
|
307
|
+
return evt.eventType === 'artificer_l2_turn' && evt.payload?.phase === 'loop_started';
|
|
308
|
+
});
|
|
309
|
+
expect(startCalls.length).toBe(1);
|
|
514
310
|
});
|
|
311
|
+
it('emits artificer_l2_complete with succeeded=true on success', async () => {
|
|
312
|
+
const adapter = makeAdapter();
|
|
313
|
+
hoisted.impl = async (_p, context) => {
|
|
314
|
+
const submit = context.tools?.find((t) => t.name === 'submit_rulecode');
|
|
315
|
+
if (submit) {
|
|
316
|
+
await submit.execute('call-1', makeRuleOutput());
|
|
317
|
+
}
|
|
318
|
+
return [];
|
|
319
|
+
};
|
|
320
|
+
await adapter.startRun(makeStartRun());
|
|
321
|
+
const completeCalls = emitTelemetryMock.mock.calls.filter((c) => c[0].eventType === 'artificer_l2_complete');
|
|
322
|
+
expect(completeCalls.length).toBe(1);
|
|
323
|
+
const payload = completeCalls[0][0].payload;
|
|
324
|
+
expect(payload.succeeded).toBe(true);
|
|
325
|
+
});
|
|
326
|
+
});
|
|
327
|
+
// ── input serialization ──────────────────────────────────────────────────────
|
|
328
|
+
describe('PRI-439 ArtificerL2Adapter — input serialization', () => {
|
|
515
329
|
it('bounds and safely serializes an unknown prompt payload', async () => {
|
|
516
330
|
const circular = { text: 'x'.repeat(60_000) };
|
|
517
331
|
circular.self = circular;
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
gateDeps: makeAlwaysPassGateDeps(),
|
|
525
|
-
validator: new DefaultArtificerValidator(),
|
|
526
|
-
});
|
|
527
|
-
await adapter.startRun({
|
|
528
|
-
agentSpec: { agentId: 'artificer', schemaVersion: 'v1' },
|
|
529
|
-
taskRef: { taskId: TASK_ID },
|
|
530
|
-
inputPayload: circular,
|
|
531
|
-
contextItems: [],
|
|
532
|
-
outputSchemaRef: 'artificer-output-v2',
|
|
533
|
-
timeoutMs: 30_000,
|
|
534
|
-
});
|
|
535
|
-
expect(receivedPrompt.length).toBeLessThanOrEqual(50_003);
|
|
332
|
+
const adapter = makeAdapter();
|
|
333
|
+
hoisted.mockReturn = [];
|
|
334
|
+
// The circular payload is safely stringified (safeStringifyPreview handles cycles).
|
|
335
|
+
// startRun still throws because no output is captured, but it should NOT throw
|
|
336
|
+
// a serialization error.
|
|
337
|
+
await expect(adapter.startRun(makeStartRun({ inputPayload: circular }))).rejects.toThrow(/without a submit_rulecode call/);
|
|
536
338
|
});
|
|
537
339
|
});
|
|
538
340
|
//# sourceMappingURL=artificer-l2-adapter.test.js.map
|