@cleocode/adapters 2026.4.92 → 2026.4.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/index.js +40795 -18064
  2. package/dist/index.js.map +4 -4
  3. package/dist/providers/claude-sdk/index.d.ts +10 -4
  4. package/dist/providers/claude-sdk/index.d.ts.map +1 -1
  5. package/dist/providers/claude-sdk/spawn.d.ts +29 -28
  6. package/dist/providers/claude-sdk/spawn.d.ts.map +1 -1
  7. package/dist/providers/openai-sdk/adapter.d.ts +18 -17
  8. package/dist/providers/openai-sdk/adapter.d.ts.map +1 -1
  9. package/dist/providers/openai-sdk/guardrails.d.ts +71 -18
  10. package/dist/providers/openai-sdk/guardrails.d.ts.map +1 -1
  11. package/dist/providers/openai-sdk/handoff.d.ts +51 -21
  12. package/dist/providers/openai-sdk/handoff.d.ts.map +1 -1
  13. package/dist/providers/openai-sdk/index.d.ts +8 -5
  14. package/dist/providers/openai-sdk/index.d.ts.map +1 -1
  15. package/dist/providers/openai-sdk/install.d.ts +1 -1
  16. package/dist/providers/openai-sdk/spawn.d.ts +54 -21
  17. package/dist/providers/openai-sdk/spawn.d.ts.map +1 -1
  18. package/dist/providers/openai-sdk/tracing.d.ts +87 -21
  19. package/dist/providers/openai-sdk/tracing.d.ts.map +1 -1
  20. package/dist/providers/shared/sdk-result-mapper.d.ts +9 -7
  21. package/dist/providers/shared/sdk-result-mapper.d.ts.map +1 -1
  22. package/package.json +6 -5
  23. package/src/__tests__/harness-interop.test.ts +451 -0
  24. package/src/providers/claude-sdk/__tests__/spawn.test.ts +100 -265
  25. package/src/providers/claude-sdk/index.ts +10 -4
  26. package/src/providers/claude-sdk/spawn.ts +69 -106
  27. package/src/providers/openai-sdk/__tests__/openai-sdk-spawn.test.ts +134 -103
  28. package/src/providers/openai-sdk/adapter.ts +19 -18
  29. package/src/providers/openai-sdk/guardrails.ts +106 -25
  30. package/src/providers/openai-sdk/handoff.ts +73 -37
  31. package/src/providers/openai-sdk/index.ts +28 -4
  32. package/src/providers/openai-sdk/install.ts +1 -1
  33. package/src/providers/openai-sdk/manifest.json +4 -4
  34. package/src/providers/openai-sdk/spawn.ts +213 -48
  35. package/src/providers/openai-sdk/tracing.ts +105 -22
  36. package/src/providers/shared/sdk-result-mapper.ts +9 -7
@@ -0,0 +1,451 @@
1
+ /**
2
+ * T937 — Harness interop sandbox.
3
+ *
4
+ * Proves the Vercel-AI-SDK-backed provider adapters shipped in T933 can drive
5
+ * the T930 playbook runtime end-to-end, and that the runtime itself stays
6
+ * provider-agnostic (ADR-052 invariant).
7
+ *
8
+ * Test shape:
9
+ *
10
+ * 1. Three {@link AgentDispatcher} wrappers — `claude-sdk`, `openai-sdk`, and
11
+ * a zero-SDK "generic" dispatcher — each execute the `rcasd.cantbook`
12
+ * starter playbook end-to-end against an in-memory SQLite DB.
13
+ * 2. A meta-test grep-asserts that `packages/playbooks/src/runtime.ts` does
14
+ * not import any provider SDK, locking down the architectural boundary
15
+ * that the SDK consolidation decision (ADR-052) depends on.
16
+ *
17
+ * Vercel AI SDK modules (`@ai-sdk/anthropic`, `@ai-sdk/openai`, `ai`) are
18
+ * mocked at the module level so no real network calls or credentials are
19
+ * required. The `@cleocode/playbooks` runtime is imported from source via a
20
+ * test-only Vitest alias (see `vitest.config.ts`) — NOT mocked — so the
21
+ * dispatchers exercise real state-machine code.
22
+ *
23
+ * @task T937
24
+ * @see T930 — Playbook runtime state machine
25
+ * @see T933 — Vercel AI SDK migration for provider adapters
26
+ * @see T934 — rcasd.cantbook starter playbook
27
+ * @see ADR-052 — SDK consolidation decision
28
+ */
29
+
30
+ import { readFileSync } from 'node:fs';
31
+ import { createRequire } from 'node:module';
32
+ import { dirname, resolve } from 'node:path';
33
+ import type { DatabaseSync as _DatabaseSyncType } from 'node:sqlite';
34
+ import { fileURLToPath } from 'node:url';
35
+ import type { AdapterSpawnProvider, SpawnContext, SpawnResult } from '@cleocode/contracts';
36
+ import {
37
+ type AgentDispatcher,
38
+ type AgentDispatchInput,
39
+ type AgentDispatchResult,
40
+ executePlaybook,
41
+ parsePlaybook,
42
+ } from '@cleocode/playbooks';
43
+ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
44
+
45
+ // ---------------------------------------------------------------------------
46
+ // Hoisted mock state — shared across all Vitest factory hoists
47
+ // ---------------------------------------------------------------------------
48
+
49
+ const { anthropicCalls, openaiCalls, mockState } = vi.hoisted(() => {
50
+ return {
51
+ anthropicCalls: [] as Array<{ model: unknown; prompt: string }>,
52
+ openaiCalls: [] as Array<{ model: unknown; system?: string; prompt: string }>,
53
+ mockState: {
54
+ anthropicText: 'mocked anthropic response',
55
+ openaiText: 'mocked openai response',
56
+ shouldThrow: false,
57
+ },
58
+ };
59
+ });
60
+
61
+ // ---------------------------------------------------------------------------
62
+ // Vercel AI SDK mocks — no network, no credentials
63
+ // ---------------------------------------------------------------------------
64
+
65
+ vi.mock('@ai-sdk/anthropic', () => ({
66
+ createAnthropic: vi.fn((_config: { apiKey: string }) => {
67
+ return (modelId: string) => ({ __cleoMockModel: 'anthropic', modelId });
68
+ }),
69
+ }));
70
+
71
+ vi.mock('@ai-sdk/openai', () => ({
72
+ createOpenAI: vi.fn((_config: { apiKey: string }) => {
73
+ return (modelId: string) => ({ __cleoMockModel: 'openai', modelId });
74
+ }),
75
+ }));
76
+
77
+ vi.mock('ai', () => ({
78
+ generateText: vi.fn(
79
+ async ({ model, system, prompt }: { model: unknown; system?: string; prompt: string }) => {
80
+ // Route by provider handle so each dispatcher's output is predictable.
81
+ const handle = model as { __cleoMockModel?: string } | null;
82
+ if (handle?.__cleoMockModel === 'anthropic') {
83
+ anthropicCalls.push({ model, prompt });
84
+ if (mockState.shouldThrow) throw new Error('mock anthropic SDK error');
85
+ return { text: mockState.anthropicText };
86
+ }
87
+ if (handle?.__cleoMockModel === 'openai') {
88
+ openaiCalls.push({ model, system, prompt });
89
+ if (mockState.shouldThrow) throw new Error('mock openai SDK error');
90
+ return { text: mockState.openaiText };
91
+ }
92
+ throw new Error('unexpected model handle passed to mock generateText');
93
+ },
94
+ ),
95
+ }));
96
+
97
+ // CANT enrichment is exercised by the real spawn providers. Stub it so tests
98
+ // don't require the cleo CLI to be installed in the sandbox.
99
+ vi.mock('../cant-context.js', () => ({
100
+ buildCantEnrichedPrompt: vi.fn(
101
+ async ({ basePrompt }: { basePrompt: string }) => `[CANT] ${basePrompt}`,
102
+ ),
103
+ }));
104
+
105
+ // Disable conduit trace writes so the openai-sdk provider never spawns the
106
+ // `cleo` CLI from inside a test run.
107
+ vi.mock('../providers/shared/conduit-trace-writer.js', () => ({
108
+ writeSpanToConduit: vi.fn(async () => ({ written: true })),
109
+ writeSpanBatchToConduit: vi.fn(async () => 0),
110
+ }));
111
+
112
+ // ---------------------------------------------------------------------------
113
+ // Real imports (after mocks so factories take effect)
114
+ // ---------------------------------------------------------------------------
115
+
116
+ import { ClaudeSDKSpawnProvider } from '../providers/claude-sdk/spawn.js';
117
+ import { OpenAiSdkSpawnProvider } from '../providers/openai-sdk/spawn.js';
118
+
119
+ // ---------------------------------------------------------------------------
120
+ // node:sqlite bootstrap (mirrors runtime.test.ts pattern)
121
+ // ---------------------------------------------------------------------------
122
+
123
+ const _require = createRequire(import.meta.url);
124
+ type DatabaseSync = _DatabaseSyncType;
125
+ const { DatabaseSync } = _require('node:sqlite') as {
126
+ DatabaseSync: new (...args: ConstructorParameters<typeof _DatabaseSyncType>) => DatabaseSync;
127
+ };
128
+
129
+ const __dirname = dirname(fileURLToPath(import.meta.url));
130
+
131
+ /**
132
+ * Absolute path to the T889 playbook-tables migration shipped with
133
+ * `@cleocode/core`. Loading the SQL directly keeps the test hermetic without
134
+ * re-implementing the schema inline.
135
+ */
136
+ const MIGRATION_SQL = resolve(
137
+ __dirname,
138
+ '../../../core/migrations/drizzle-tasks/20260417220000_t889-playbook-tables/migration.sql',
139
+ );
140
+
141
+ /** Absolute path to the rcasd.cantbook starter playbook (T934). */
142
+ const RCASD_CANTBOOK = resolve(__dirname, '../../../playbooks/starter/rcasd.cantbook');
143
+
144
+ /** Absolute path to the runtime source we assert is SDK-free. */
145
+ const RUNTIME_SOURCE = resolve(__dirname, '../../../playbooks/src/runtime.ts');
146
+
147
+ /**
148
+ * Apply a Drizzle migration split on `statement-breakpoint` to an in-memory
149
+ * SQLite handle. Identical to the helper in `runtime.test.ts`.
150
+ */
151
+ function applyMigration(db: DatabaseSync, sql: string): void {
152
+ const statements = sql
153
+ .split(/--> statement-breakpoint/)
154
+ .map((s) => s.trim())
155
+ .filter((s) => s.length > 0);
156
+ for (const stmt of statements) {
157
+ const lines = stmt.split('\n');
158
+ const hasSql = lines.some((l) => l.trim().length > 0 && !l.trim().startsWith('--'));
159
+ if (hasSql) db.exec(stmt);
160
+ }
161
+ }
162
+
163
+ // ---------------------------------------------------------------------------
164
+ // Dispatcher factories — wrap each SpawnProvider in the AgentDispatcher shape
165
+ // ---------------------------------------------------------------------------
166
+
167
+ /**
168
+ * Coerce a {@link SpawnResult} into an {@link AgentDispatchResult}. Success
169
+ * carries the output string under `output`, plus a per-dispatcher marker so
170
+ * the playbook runtime can merge provenance into context on every node hop.
171
+ */
172
+ function spawnResultToDispatch(
173
+ result: SpawnResult,
174
+ providerId: string,
175
+ nodeId: string,
176
+ ): AgentDispatchResult {
177
+ if (result.status === 'completed') {
178
+ return {
179
+ status: 'success',
180
+ output: {
181
+ [`${nodeId}_output`]: result.output ?? '',
182
+ [`${nodeId}_provider`]: providerId,
183
+ lastProvider: providerId,
184
+ },
185
+ };
186
+ }
187
+ return {
188
+ status: 'failure',
189
+ output: {},
190
+ error: result.error ?? `spawn failed on ${providerId}`,
191
+ };
192
+ }
193
+
194
+ /**
195
+ * Build an {@link AgentDispatcher} that routes every playbook `agentic` node
196
+ * through the given {@link AdapterSpawnProvider}. Used to stand up the three
197
+ * harness dispatchers exercised by this suite.
198
+ *
199
+ * @param provider - Real provider adapter (claude-sdk | openai-sdk).
200
+ * @param providerId - Static id echoed into the merged context.
201
+ * @returns An {@link AgentDispatcher} plus a `calls` trace for assertions.
202
+ */
203
+ function mkProviderDispatcher(
204
+ provider: AdapterSpawnProvider,
205
+ providerId: string,
206
+ ): AgentDispatcher & { calls: AgentDispatchInput[] } {
207
+ const calls: AgentDispatchInput[] = [];
208
+ return {
209
+ calls,
210
+ async dispatch(input: AgentDispatchInput): Promise<AgentDispatchResult> {
211
+ calls.push(input);
212
+ const context: SpawnContext = {
213
+ taskId: input.taskId,
214
+ prompt: `Execute node ${input.nodeId} for task ${input.taskId} (agent=${input.agentId}).`,
215
+ options: { tier: 'worker', tracingDisabled: true, agentName: input.agentId },
216
+ };
217
+ const result = await provider.spawn(context);
218
+ return spawnResultToDispatch(result, providerId, input.nodeId);
219
+ },
220
+ };
221
+ }
222
+
223
+ /**
224
+ * Build an {@link AgentDispatcher} that has zero provider-SDK imports. This
225
+ * emulates the "Pi / generic harness" shape — the playbook runtime must be
226
+ * able to drive any dispatcher that satisfies the interface, which is the
227
+ * invariant ADR-052 depends on.
228
+ */
229
+ function mkGenericDispatcher(): AgentDispatcher & { calls: AgentDispatchInput[] } {
230
+ const calls: AgentDispatchInput[] = [];
231
+ return {
232
+ calls,
233
+ async dispatch(input: AgentDispatchInput): Promise<AgentDispatchResult> {
234
+ calls.push(input);
235
+ return {
236
+ status: 'success',
237
+ output: {
238
+ [`${input.nodeId}_output`]: `generic:${input.nodeId}:ok`,
239
+ [`${input.nodeId}_provider`]: 'generic',
240
+ lastProvider: 'generic',
241
+ },
242
+ };
243
+ },
244
+ };
245
+ }
246
+
247
+ // ---------------------------------------------------------------------------
248
+ // Fixture — parsed rcasd.cantbook (starter playbook from T934)
249
+ // ---------------------------------------------------------------------------
250
+
251
+ const rcasdSource = readFileSync(RCASD_CANTBOOK, 'utf8');
252
+ const rcasdParsed = parsePlaybook(rcasdSource);
253
+ const rcasdDefinition = rcasdParsed.definition;
254
+ const rcasdHash = rcasdParsed.sourceHash;
255
+
256
+ // ---------------------------------------------------------------------------
257
+ // Suite
258
+ // ---------------------------------------------------------------------------
259
+
260
+ describe('T937 — harness interop: playbook runtime across provider adapters', () => {
261
+ let db: DatabaseSync;
262
+
263
+ beforeEach(() => {
264
+ db = new DatabaseSync(':memory:');
265
+ db.exec('PRAGMA foreign_keys=ON');
266
+ applyMigration(db, readFileSync(MIGRATION_SQL, 'utf8'));
267
+
268
+ anthropicCalls.length = 0;
269
+ openaiCalls.length = 0;
270
+ mockState.shouldThrow = false;
271
+ mockState.anthropicText = 'anthropic node output';
272
+ mockState.openaiText = 'openai node output';
273
+
274
+ process.env.ANTHROPIC_API_KEY = 'sk-anthropic-test';
275
+ process.env.OPENAI_API_KEY = 'sk-openai-test';
276
+ });
277
+
278
+ afterEach(() => {
279
+ db.close();
280
+ delete process.env.ANTHROPIC_API_KEY;
281
+ delete process.env.OPENAI_API_KEY;
282
+ vi.clearAllMocks();
283
+ });
284
+
285
+ // -------------------------------------------------------------------------
286
+ // 1 · claude-sdk adapter drives rcasd.cantbook end-to-end
287
+ // -------------------------------------------------------------------------
288
+ it('claude-sdk adapter dispatches rcasd.cantbook to completion', async () => {
289
+ const provider = new ClaudeSDKSpawnProvider();
290
+ const dispatcher = mkProviderDispatcher(provider, 'claude-sdk');
291
+
292
+ const result = await executePlaybook({
293
+ db,
294
+ playbook: rcasdDefinition,
295
+ playbookHash: rcasdHash,
296
+ initialContext: { taskId: 'T937-claude', epicId: 'T910' },
297
+ dispatcher,
298
+ approvalSecret: 'harness-interop-test',
299
+ });
300
+
301
+ expect(result.terminalStatus).toBe('completed');
302
+ // Every rcasd node must have been dispatched exactly once (happy path).
303
+ expect(dispatcher.calls.map((c) => c.nodeId)).toEqual([
304
+ 'research',
305
+ 'consensus',
306
+ 'architecture',
307
+ 'specification',
308
+ 'decomposition',
309
+ ]);
310
+ // The last-merged provider marker confirms the dispatcher actually ran.
311
+ expect(result.finalContext['lastProvider']).toBe('claude-sdk');
312
+ // Per-node provider markers are merged into context.
313
+ expect(result.finalContext['research_provider']).toBe('claude-sdk');
314
+ expect(result.finalContext['decomposition_provider']).toBe('claude-sdk');
315
+ // Each node triggered exactly one generateText call (no retries required).
316
+ expect(anthropicCalls).toHaveLength(5);
317
+ expect(openaiCalls).toHaveLength(0);
318
+ });
319
+
320
+ // -------------------------------------------------------------------------
321
+ // 2 · openai-sdk adapter drives rcasd.cantbook end-to-end
322
+ // -------------------------------------------------------------------------
323
+ it('openai-sdk adapter dispatches rcasd.cantbook to completion', async () => {
324
+ const provider = new OpenAiSdkSpawnProvider();
325
+ const dispatcher = mkProviderDispatcher(provider, 'openai-sdk');
326
+
327
+ const result = await executePlaybook({
328
+ db,
329
+ playbook: rcasdDefinition,
330
+ playbookHash: rcasdHash,
331
+ initialContext: { taskId: 'T937-openai', epicId: 'T910' },
332
+ dispatcher,
333
+ approvalSecret: 'harness-interop-test',
334
+ });
335
+
336
+ expect(result.terminalStatus).toBe('completed');
337
+ expect(dispatcher.calls.map((c) => c.nodeId)).toEqual([
338
+ 'research',
339
+ 'consensus',
340
+ 'architecture',
341
+ 'specification',
342
+ 'decomposition',
343
+ ]);
344
+ expect(result.finalContext['lastProvider']).toBe('openai-sdk');
345
+ expect(result.finalContext['consensus_provider']).toBe('openai-sdk');
346
+ expect(result.finalContext['specification_provider']).toBe('openai-sdk');
347
+ // openai-sdk uses standalone agents (no handoffs here) so 1 call per node.
348
+ expect(openaiCalls).toHaveLength(5);
349
+ expect(anthropicCalls).toHaveLength(0);
350
+ });
351
+
352
+ // -------------------------------------------------------------------------
353
+ // 3 · generic dispatcher proves the runtime is provider-agnostic
354
+ // -------------------------------------------------------------------------
355
+ it('generic (zero-SDK) dispatcher executes rcasd.cantbook end-to-end', async () => {
356
+ const dispatcher = mkGenericDispatcher();
357
+
358
+ const result = await executePlaybook({
359
+ db,
360
+ playbook: rcasdDefinition,
361
+ playbookHash: rcasdHash,
362
+ initialContext: { taskId: 'T937-generic', epicId: 'T910' },
363
+ dispatcher,
364
+ approvalSecret: 'harness-interop-test',
365
+ });
366
+
367
+ expect(result.terminalStatus).toBe('completed');
368
+ expect(dispatcher.calls.map((c) => c.nodeId)).toEqual([
369
+ 'research',
370
+ 'consensus',
371
+ 'architecture',
372
+ 'specification',
373
+ 'decomposition',
374
+ ]);
375
+ expect(result.finalContext['lastProvider']).toBe('generic');
376
+ // Generic path MUST NOT invoke either SDK.
377
+ expect(anthropicCalls).toHaveLength(0);
378
+ expect(openaiCalls).toHaveLength(0);
379
+ });
380
+
381
+ // -------------------------------------------------------------------------
382
+ // 4 · Cross-provider run matrix — single playbook parse, three adapters
383
+ // -------------------------------------------------------------------------
384
+ it('same parsed playbook executes identically across all three adapters', async () => {
385
+ const providers: Array<[string, AgentDispatcher & { calls: AgentDispatchInput[] }]> = [
386
+ ['claude-sdk', mkProviderDispatcher(new ClaudeSDKSpawnProvider(), 'claude-sdk')],
387
+ ['openai-sdk', mkProviderDispatcher(new OpenAiSdkSpawnProvider(), 'openai-sdk')],
388
+ ['generic', mkGenericDispatcher()],
389
+ ];
390
+
391
+ const results: Array<{ providerId: string; terminal: string; nodeCount: number }> = [];
392
+ for (const [providerId, dispatcher] of providers) {
393
+ // Reset per-provider state so DB rows don't collide.
394
+ const freshDb = new DatabaseSync(':memory:');
395
+ freshDb.exec('PRAGMA foreign_keys=ON');
396
+ applyMigration(freshDb, readFileSync(MIGRATION_SQL, 'utf8'));
397
+
398
+ const result = await executePlaybook({
399
+ db: freshDb,
400
+ playbook: rcasdDefinition,
401
+ playbookHash: rcasdHash,
402
+ initialContext: { taskId: `T937-${providerId}`, epicId: 'T910' },
403
+ dispatcher,
404
+ approvalSecret: 'harness-interop-test',
405
+ });
406
+
407
+ results.push({
408
+ providerId,
409
+ terminal: result.terminalStatus,
410
+ nodeCount: dispatcher.calls.length,
411
+ });
412
+ freshDb.close();
413
+ }
414
+
415
+ for (const r of results) {
416
+ expect(r.terminal).toBe('completed');
417
+ expect(r.nodeCount).toBe(5);
418
+ }
419
+ });
420
+
421
+ // -------------------------------------------------------------------------
422
+ // 5 · Architectural invariant — runtime code is SDK-free (ADR-052)
423
+ // -------------------------------------------------------------------------
424
+ it('zero provider-SDK imports leak into @cleocode/playbooks runtime', () => {
425
+ const runtimeSource = readFileSync(RUNTIME_SOURCE, 'utf8');
426
+
427
+ // Strip comments/docstrings before grepping so narrative mentions of the
428
+ // SDKs in TSDoc don't create false positives. Keep it conservative — we
429
+ // only strip /** ... */ and // ... EOL; inline string literals are fine.
430
+ const codeOnly = runtimeSource.replace(/\/\*[\s\S]*?\*\//g, '').replace(/^[ \t]*\/\/.*$/gm, '');
431
+
432
+ // Banned substrings — must not appear in non-comment source.
433
+ const banned: ReadonlyArray<{ needle: string; hint: string }> = [
434
+ { needle: '@ai-sdk/', hint: 'Vercel AI SDK provider packages' },
435
+ { needle: '@anthropic-ai/claude-agent-sdk', hint: 'legacy Claude agent SDK' },
436
+ { needle: '@anthropic-ai/sdk', hint: 'Anthropic raw SDK' },
437
+ { needle: '@openai/agents', hint: 'legacy OpenAI agents SDK' },
438
+ { needle: 'openai', hint: 'OpenAI client package' },
439
+ { needle: "from 'ai'", hint: 'Vercel AI SDK root package' },
440
+ { needle: 'from "ai"', hint: 'Vercel AI SDK root package (double-quoted)' },
441
+ ];
442
+
443
+ const leaks = banned.filter(({ needle }) => codeOnly.includes(needle));
444
+ expect(
445
+ leaks,
446
+ `SDK leakage detected in runtime.ts — ADR-052 invariant violated: ${leaks
447
+ .map((l) => `${l.needle} (${l.hint})`)
448
+ .join(', ')}`,
449
+ ).toEqual([]);
450
+ });
451
+ });