backend-manager 5.6.4 → 5.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/CLAUDE.md +4 -3
  3. package/PROGRESS.md +34 -0
  4. package/docs/ai-library.md +62 -11
  5. package/docs/cdp-debugging.md +44 -0
  6. package/docs/cli-output.md +22 -10
  7. package/docs/mcp.md +166 -43
  8. package/package.json +1 -1
  9. package/plans/mcp2.md +247 -0
  10. package/src/cli/commands/mcp.js +8 -2
  11. package/src/cli/commands/serve.js +155 -29
  12. package/src/cli/commands/setup-tests/base-test.js +8 -0
  13. package/src/cli/commands/setup-tests/firebase-auth.js +26 -0
  14. package/src/cli/commands/setup-tests/firebase-cli.js +9 -13
  15. package/src/cli/commands/setup-tests/index.js +4 -0
  16. package/src/cli/commands/setup-tests/java-installed.js +26 -0
  17. package/src/cli/commands/setup.js +2 -1
  18. package/src/cli/commands/test.js +8 -0
  19. package/src/cli/index.js +14 -0
  20. package/src/cli/utils/ui.js +27 -5
  21. package/src/manager/index.js +8 -3
  22. package/src/manager/libraries/ai/index.js +45 -1
  23. package/src/manager/libraries/ai/providers/anthropic-format.js +234 -0
  24. package/src/manager/libraries/ai/providers/anthropic.js +28 -49
  25. package/src/manager/libraries/ai/providers/claude-code.js +21 -47
  26. package/src/manager/libraries/ai/providers/openai.js +154 -19
  27. package/src/manager/libraries/ai/providers/test.js +242 -0
  28. package/src/manager/libraries/email/data/disposable-domains.json +465 -0
  29. package/src/mcp/client.js +48 -13
  30. package/src/mcp/handler.js +222 -69
  31. package/src/mcp/index.js +48 -18
  32. package/src/mcp/tools.js +150 -0
  33. package/src/mcp/utils.js +108 -0
  34. package/src/test/fixtures/firebase-project/firebase.json +1 -1
  35. package/test/ai/tools-live.js +170 -0
  36. package/test/helpers/ai-test-provider.js +202 -0
  37. package/test/helpers/ai-tools-format.js +350 -0
  38. package/test/mcp/discovery.js +53 -0
  39. package/test/mcp/oauth.js +161 -0
  40. package/test/mcp/protocol.js +268 -0
  41. package/test/mcp/roles.js +168 -0
  42. package/test/mcp/utils.js +245 -0
  43. package/.claude/settings.local.json +0 -12
package/src/mcp/tools.js CHANGED
@@ -2,14 +2,17 @@
2
2
  * MCP Tool Definitions
3
3
  *
4
4
  * Each tool maps to a BEM route with method, path, and JSON Schema for inputs.
5
+ * annotations.readOnlyHint / destructiveHint control Claude Desktop's read/write categorization.
5
6
  */
6
7
  module.exports = [
7
8
  // --- Firestore ---
8
9
  {
9
10
  name: 'firestore_read',
10
11
  description: 'Read a Firestore document by path (e.g. "users/abc123")',
12
+ role: 'admin',
11
13
  method: 'GET',
12
14
  path: 'admin/firestore',
15
+ annotations: { title: 'Read a Firestore document', readOnlyHint: true },
13
16
  inputSchema: {
14
17
  type: 'object',
15
18
  properties: {
@@ -21,8 +24,10 @@ module.exports = [
21
24
  {
22
25
  name: 'firestore_write',
23
26
  description: 'Write/merge a Firestore document. Set merge=false to overwrite entirely.',
27
+ role: 'admin',
24
28
  method: 'POST',
25
29
  path: 'admin/firestore',
30
+ annotations: { title: 'Write a Firestore document', readOnlyHint: false, destructiveHint: false, idempotentHint: true },
26
31
  inputSchema: {
27
32
  type: 'object',
28
33
  properties: {
@@ -36,8 +41,10 @@ module.exports = [
36
41
  {
37
42
  name: 'firestore_query',
38
43
  description: 'Query a Firestore collection with where clauses, ordering, and limits. Each query in the array has: collection (string), where (array of {field, operator, value}), orderBy (array of {field, order}), limit (number).',
44
+ role: 'admin',
39
45
  method: 'POST',
40
46
  path: 'admin/firestore/query',
47
+ annotations: { title: 'Query a Firestore collection', readOnlyHint: true },
41
48
  inputSchema: {
42
49
  type: 'object',
43
50
  properties: {
@@ -87,8 +94,10 @@ module.exports = [
87
94
  {
88
95
  name: 'send_email',
89
96
  description: 'Send a transactional email via SendGrid. Recipients can be email strings, UIDs (auto-resolves from Firestore), or {email, name} objects.',
97
+ role: 'admin',
90
98
  method: 'POST',
91
99
  path: 'admin/email',
100
+ annotations: { title: 'Send a transactional email', readOnlyHint: false, destructiveHint: false, openWorldHint: true },
92
101
  inputSchema: {
93
102
  type: 'object',
94
103
  properties: {
@@ -111,8 +120,10 @@ module.exports = [
111
120
  {
112
121
  name: 'send_notification',
113
122
  description: 'Send a push notification via FCM to users or topics',
123
+ role: 'admin',
114
124
  method: 'POST',
115
125
  path: 'admin/notification',
126
+ annotations: { title: 'Send a push notification', readOnlyHint: false, destructiveHint: false, openWorldHint: true },
116
127
  inputSchema: {
117
128
  type: 'object',
118
129
  properties: {
@@ -144,8 +155,10 @@ module.exports = [
144
155
  {
145
156
  name: 'get_user',
146
157
  description: 'Get the currently authenticated user info. To look up a specific user, use firestore_read with path "users/{uid}" instead.',
158
+ role: 'user',
147
159
  method: 'GET',
148
160
  path: 'user',
161
+ annotations: { title: 'Get authenticated user info', readOnlyHint: true },
149
162
  inputSchema: {
150
163
  type: 'object',
151
164
  properties: {},
@@ -154,8 +167,10 @@ module.exports = [
154
167
  {
155
168
  name: 'get_subscription',
156
169
  description: 'Get subscription info for a user. Defaults to the authenticated user, or pass a uid to look up another user (admin only).',
170
+ role: 'user',
157
171
  method: 'GET',
158
172
  path: 'user/subscription',
173
+ annotations: { title: 'Get subscription info', readOnlyHint: true },
159
174
  inputSchema: {
160
175
  type: 'object',
161
176
  properties: {
@@ -166,8 +181,10 @@ module.exports = [
166
181
  {
167
182
  name: 'sync_users',
168
183
  description: 'Sync user data across systems (marketing contacts, etc). Processes users in batches.',
184
+ role: 'admin',
169
185
  method: 'POST',
170
186
  path: 'admin/users/sync',
187
+ annotations: { title: 'Sync users across systems', readOnlyHint: false, destructiveHint: false, openWorldHint: true },
171
188
  inputSchema: {
172
189
  type: 'object',
173
190
  properties: {},
@@ -178,8 +195,10 @@ module.exports = [
178
195
  {
179
196
  name: 'list_campaigns',
180
197
  description: 'List marketing campaigns with optional filters by date range, status, and type',
198
+ role: 'admin',
181
199
  method: 'GET',
182
200
  path: 'marketing/campaign',
201
+ annotations: { title: 'List marketing campaigns', readOnlyHint: true },
183
202
  inputSchema: {
184
203
  type: 'object',
185
204
  properties: {
@@ -195,8 +214,10 @@ module.exports = [
195
214
  {
196
215
  name: 'create_campaign',
197
216
  description: 'Create a marketing campaign (email or push notification). Can be immediate or scheduled.',
217
+ role: 'admin',
198
218
  method: 'POST',
199
219
  path: 'marketing/campaign',
220
+ annotations: { title: 'Create a marketing campaign', readOnlyHint: false, destructiveHint: false, openWorldHint: true },
200
221
  inputSchema: {
201
222
  type: 'object',
202
223
  properties: {
@@ -217,12 +238,92 @@ module.exports = [
217
238
  },
218
239
  },
219
240
 
241
+ {
242
+ name: 'update_campaign',
243
+ description: 'Update a pending marketing campaign. Only pending campaigns can be edited.',
244
+ role: 'admin',
245
+ method: 'PUT',
246
+ path: 'marketing/campaign',
247
+ annotations: { title: 'Update a campaign', readOnlyHint: false, destructiveHint: false },
248
+ inputSchema: {
249
+ type: 'object',
250
+ properties: {
251
+ id: { type: 'string', description: 'Campaign ID to update' },
252
+ name: { type: 'string', description: 'Campaign name' },
253
+ subject: { type: 'string', description: 'Email subject line' },
254
+ preheader: { type: 'string', description: 'Email preheader text' },
255
+ template: { type: 'string', description: 'Email template name' },
256
+ data: { type: 'object', description: 'Template data' },
257
+ segments: { type: 'array', items: { type: 'string' }, description: 'Target segment keys' },
258
+ excludeSegments: { type: 'array', items: { type: 'string' }, description: 'Exclude segment keys' },
259
+ all: { type: 'boolean', description: 'Send to all contacts' },
260
+ sendAt: { description: 'Reschedule time (ISO string or unix timestamp)' },
261
+ sender: { type: 'string', description: 'Sender preset name' },
262
+ },
263
+ required: ['id'],
264
+ },
265
+ },
266
+ {
267
+ name: 'delete_campaign',
268
+ description: 'Delete a pending marketing campaign. Only pending campaigns can be deleted.',
269
+ role: 'admin',
270
+ method: 'DELETE',
271
+ path: 'marketing/campaign',
272
+ annotations: { title: 'Delete a campaign', readOnlyHint: false, destructiveHint: true },
273
+ inputSchema: {
274
+ type: 'object',
275
+ properties: {
276
+ id: { type: 'string', description: 'Campaign ID to delete' },
277
+ },
278
+ required: ['id'],
279
+ },
280
+ },
281
+
282
+ // --- Marketing Contacts ---
283
+ {
284
+ name: 'create_contact',
285
+ description: 'Add a marketing contact to email providers (SendGrid/Beehiiv). Admin mode skips reCAPTCHA and allows tags.',
286
+ role: 'admin',
287
+ method: 'POST',
288
+ path: 'marketing/contact',
289
+ annotations: { title: 'Add a marketing contact', readOnlyHint: false, destructiveHint: false, openWorldHint: true },
290
+ inputSchema: {
291
+ type: 'object',
292
+ properties: {
293
+ email: { type: 'string', description: 'Contact email address' },
294
+ firstName: { type: 'string', description: 'First name' },
295
+ lastName: { type: 'string', description: 'Last name' },
296
+ source: { type: 'string', description: 'Contact source (e.g. "manual", "import")' },
297
+ tags: { type: 'array', items: { type: 'string' }, description: 'Contact tags' },
298
+ skipValidation: { type: 'boolean', description: 'Skip email validation (admin only)', default: false },
299
+ },
300
+ required: ['email'],
301
+ },
302
+ },
303
+ {
304
+ name: 'delete_contact',
305
+ description: 'Remove a marketing contact from email providers and revoke marketing consent.',
306
+ role: 'admin',
307
+ method: 'DELETE',
308
+ path: 'marketing/contact',
309
+ annotations: { title: 'Remove a marketing contact', readOnlyHint: false, destructiveHint: true, openWorldHint: true },
310
+ inputSchema: {
311
+ type: 'object',
312
+ properties: {
313
+ email: { type: 'string', description: 'Contact email to remove' },
314
+ },
315
+ required: ['email'],
316
+ },
317
+ },
318
+
220
319
  // --- Stats ---
221
320
  {
222
321
  name: 'get_stats',
223
322
  description: 'Get system statistics (user counts, subscription metrics, etc.)',
323
+ role: 'admin',
224
324
  method: 'GET',
225
325
  path: 'admin/stats',
326
+ annotations: { title: 'Get system statistics', readOnlyHint: true },
226
327
  inputSchema: {
227
328
  type: 'object',
228
329
  properties: {
@@ -235,8 +336,10 @@ module.exports = [
235
336
  {
236
337
  name: 'cancel_subscription',
237
338
  description: 'Cancel a subscription at the end of the current billing period. Requires the authenticated user to have an active subscription.',
339
+ role: 'admin',
238
340
  method: 'POST',
239
341
  path: 'payments/cancel',
342
+ annotations: { title: 'Cancel a subscription', readOnlyHint: false, destructiveHint: true },
240
343
  inputSchema: {
241
344
  type: 'object',
242
345
  properties: {
@@ -250,8 +353,10 @@ module.exports = [
250
353
  {
251
354
  name: 'refund_payment',
252
355
  description: 'Process a refund for a subscription. Immediately cancels and refunds the latest payment.',
356
+ role: 'admin',
253
357
  method: 'POST',
254
358
  path: 'payments/refund',
359
+ annotations: { title: 'Refund a payment', readOnlyHint: false, destructiveHint: true },
255
360
  inputSchema: {
256
361
  type: 'object',
257
362
  properties: {
@@ -263,12 +368,29 @@ module.exports = [
263
368
  },
264
369
  },
265
370
 
371
+ {
372
+ name: 'get_payment_portal',
373
+ description: 'Generate a Stripe Billing Portal link for the authenticated user to manage their subscription.',
374
+ role: 'admin',
375
+ method: 'POST',
376
+ path: 'payments/portal',
377
+ annotations: { title: 'Get payment portal link', readOnlyHint: true, openWorldHint: true },
378
+ inputSchema: {
379
+ type: 'object',
380
+ properties: {
381
+ returnUrl: { type: 'string', description: 'URL to redirect to after the portal session' },
382
+ },
383
+ },
384
+ },
385
+
266
386
  // --- Cron ---
267
387
  {
268
388
  name: 'run_cron',
269
389
  description: 'Manually trigger a cron job by ID (e.g. "daily", "reset-usage", "marketing-campaigns")',
390
+ role: 'admin',
270
391
  method: 'POST',
271
392
  path: 'admin/cron',
393
+ annotations: { title: 'Trigger a cron job', readOnlyHint: false, destructiveHint: false },
272
394
  inputSchema: {
273
395
  type: 'object',
274
396
  properties: {
@@ -282,8 +404,10 @@ module.exports = [
282
404
  {
283
405
  name: 'create_post',
284
406
  description: 'Create a blog post. Handles image downloading, GitHub upload, and body rewriting.',
407
+ role: 'admin',
285
408
  method: 'POST',
286
409
  path: 'admin/post',
410
+ annotations: { title: 'Create a blog post', readOnlyHint: false, destructiveHint: false, openWorldHint: true },
287
411
  inputSchema: {
288
412
  type: 'object',
289
413
  properties: {
@@ -297,13 +421,33 @@ module.exports = [
297
421
  required: ['title', 'body'],
298
422
  },
299
423
  },
424
+ {
425
+ name: 'update_post',
426
+ description: 'Update an existing blog post. Fetches the post by URL and uploads changes via GitHub.',
427
+ role: 'admin',
428
+ method: 'PUT',
429
+ path: 'admin/post',
430
+ annotations: { title: 'Update a blog post', readOnlyHint: false, destructiveHint: false, openWorldHint: true },
431
+ inputSchema: {
432
+ type: 'object',
433
+ properties: {
434
+ url: { type: 'string', description: 'Blog post URL to update' },
435
+ body: { type: 'string', description: 'Updated post content body' },
436
+ title: { type: 'string', description: 'Updated post title' },
437
+ postPath: { type: 'string', description: 'Path to the post (default: "guest")' },
438
+ },
439
+ required: ['url', 'body'],
440
+ },
441
+ },
300
442
 
301
443
  // --- Backup ---
302
444
  {
303
445
  name: 'create_backup',
304
446
  description: 'Create a Firestore data backup. Optionally filter with a deletion regex.',
447
+ role: 'admin',
305
448
  method: 'POST',
306
449
  path: 'admin/backup',
450
+ annotations: { title: 'Create a Firestore backup', readOnlyHint: false, destructiveHint: false },
307
451
  inputSchema: {
308
452
  type: 'object',
309
453
  properties: {
@@ -316,8 +460,10 @@ module.exports = [
316
460
  {
317
461
  name: 'run_hook',
318
462
  description: 'Execute a custom hook by path (e.g. "cron/daily/my-job")',
463
+ role: 'admin',
319
464
  method: 'POST',
320
465
  path: 'admin/hook',
466
+ annotations: { title: 'Run a custom hook', readOnlyHint: false, destructiveHint: false },
321
467
  inputSchema: {
322
468
  type: 'object',
323
469
  properties: {
@@ -331,8 +477,10 @@ module.exports = [
331
477
  {
332
478
  name: 'generate_uuid',
333
479
  description: 'Generate a UUID (v4 random or v5 namespace-based)',
480
+ role: 'admin',
334
481
  method: 'POST',
335
482
  path: 'general/uuid',
483
+ annotations: { title: 'Generate a UUID', readOnlyHint: true },
336
484
  inputSchema: {
337
485
  type: 'object',
338
486
  properties: {
@@ -348,8 +496,10 @@ module.exports = [
348
496
  {
349
497
  name: 'health_check',
350
498
  description: 'Check if the BEM server is running and responding',
499
+ role: 'public',
351
500
  method: 'GET',
352
501
  path: 'test/health',
502
+ annotations: { title: 'Check server health', readOnlyHint: true },
353
503
  inputSchema: {
354
504
  type: 'object',
355
505
  properties: {},
@@ -0,0 +1,108 @@
1
+ const path = require('path');
2
+
3
+ const ROLE_HIERARCHY = {
4
+ admin: ['admin', 'user', 'public'],
5
+ user: ['user', 'public'],
6
+ public: ['public'],
7
+ };
8
+
9
+ /**
10
+ * Classify a Bearer token into a role without hitting the database.
11
+ * Actual validation happens at the route level when a tool is called.
12
+ */
13
+ function resolveAuthInfo(token) {
14
+ const configKey = process.env.BACKEND_MANAGER_KEY || '';
15
+
16
+ if (token && configKey && token === configKey) {
17
+ return { role: 'admin', authType: 'adminKey', token };
18
+ }
19
+
20
+ if (token) {
21
+ return { role: 'user', authType: 'userToken', token };
22
+ }
23
+
24
+ return { role: 'public', authType: 'none', token: '' };
25
+ }
26
+
27
+ /**
28
+ * Filter tools to only those visible for a given role.
29
+ * admin → all, user → user + public, public → public only.
30
+ */
31
+ function filterToolsByRole(tools, role) {
32
+ const allowed = ROLE_HIERARCHY[role] || ROLE_HIERARCHY.public;
33
+
34
+ return tools.filter((tool) => allowed.includes(tool.role || 'admin'));
35
+ }
36
+
37
+ /**
38
+ * Load consumer MCP tools from `functions/mcp.js` if it exists.
39
+ * Returns an empty array if the file doesn't exist or fails to load.
40
+ */
41
+ function loadConsumerTools(cwd) {
42
+ if (!cwd) {
43
+ return [];
44
+ }
45
+
46
+ const mcpPath = path.join(cwd, 'mcp.js');
47
+
48
+ try {
49
+ const jetpack = require('fs-jetpack');
50
+
51
+ if (!jetpack.exists(mcpPath)) {
52
+ return [];
53
+ }
54
+
55
+ const consumerTools = require(mcpPath);
56
+
57
+ if (!Array.isArray(consumerTools)) {
58
+ console.error(`[BEM MCP] Consumer mcp.js must export an array, got ${typeof consumerTools}`);
59
+ return [];
60
+ }
61
+
62
+ for (const tool of consumerTools) {
63
+ if (!tool.name || !tool.description) {
64
+ console.error(`[BEM MCP] Consumer tool missing name or description:`, tool);
65
+ return [];
66
+ }
67
+
68
+ if (!tool.path && !tool.handler) {
69
+ console.error(`[BEM MCP] Consumer tool "${tool.name}" must have a path or handler`);
70
+ return [];
71
+ }
72
+
73
+ tool.role = tool.role || 'admin';
74
+ tool._consumer = true;
75
+ }
76
+
77
+ return consumerTools;
78
+ } catch (error) {
79
+ console.error(`[BEM MCP] Failed to load consumer tools from ${mcpPath}:`, error.message);
80
+ return [];
81
+ }
82
+ }
83
+
84
+ /**
85
+ * Merge built-in and consumer tools into a Map.
86
+ * Consumer tools with the same name override built-ins.
87
+ */
88
+ function buildToolMap(builtinTools, consumerTools) {
89
+ const map = new Map();
90
+
91
+ for (const tool of builtinTools) {
92
+ map.set(tool.name, tool);
93
+ }
94
+
95
+ for (const tool of consumerTools) {
96
+ map.set(tool.name, tool);
97
+ }
98
+
99
+ return map;
100
+ }
101
+
102
+ module.exports = {
103
+ resolveAuthInfo,
104
+ filterToolsByRole,
105
+ loadConsumerTools,
106
+ buildToolMap,
107
+ ROLE_HIERARCHY,
108
+ };
@@ -8,7 +8,7 @@
8
8
  ],
9
9
  "rewrites": [
10
10
  {
11
- "source": "{/backend-manager,/backend-manager/**}",
11
+ "source": "{/backend-manager,/backend-manager/**,/.well-known/oauth-protected-resource,/.well-known/oauth-authorization-server,/authorize,/token}",
12
12
  "function": "bm_api"
13
13
  }
14
14
  ]
@@ -0,0 +1,170 @@
1
+ /**
2
+ * Test: live cross-provider tool loops (libraries/ai)
3
+ *
4
+ * EXTENDED MODE ONLY — drives a real 2-step tool loop (tool call → tool result
5
+ * → final answer) against the live Anthropic and OpenAI APIs to prove the
6
+ * normalized tools interface end-to-end. Costs real API credits; uses the
7
+ * cheapest models.
8
+ *
9
+ * Requires BACKEND_MANAGER_ANTHROPIC_API_KEY / BACKEND_MANAGER_OPENAI_API_KEY
10
+ * in the runner environment.
11
+ */
12
+ const Anthropic = require('../../src/manager/libraries/ai/providers/anthropic.js');
13
+ const OpenAI = require('../../src/manager/libraries/ai/providers/openai.js');
14
+
15
+ const WEATHER_TOOL = {
16
+ name: 'get_weather',
17
+ description: 'Get the current weather for a city. ALWAYS use this tool when asked about weather.',
18
+ parameters: {
19
+ type: 'object',
20
+ properties: {
21
+ city: { type: 'string', description: 'City name' },
22
+ },
23
+ required: ['city'],
24
+ },
25
+ };
26
+
27
+ const SYSTEM = 'You are a weather assistant. Use the get_weather tool to answer weather questions, then summarize the result in one sentence.';
28
+ const QUESTION = 'What is the weather in Paris right now?';
29
+ const TOOL_RESULT = '{"temperature":"21C","conditions":"sunny"}';
30
+
31
+ // Minimal assistant context for direct provider construction — live tests
32
+ // bypass Manager.AI() to pin provider behavior precisely
33
+ function directAssistant() {
34
+ return {
35
+ log: () => {},
36
+ error: () => {},
37
+ errorify: (message) => new Error(message),
38
+ getUser: () => ({ auth: { uid: 'bem-ai-live-test' } }),
39
+ request: { geolocation: { ip: '127.0.0.1' } },
40
+ };
41
+ }
42
+
43
+ function skipReason(keys) {
44
+ if (!process.env.TEST_EXTENDED_MODE) {
45
+ return 'TEST_EXTENDED_MODE not set (live AI tool-loop test)';
46
+ }
47
+
48
+ if (!keys.some((key) => process.env[key])) {
49
+ return `${keys[0]} not set in the runner environment`;
50
+ }
51
+
52
+ return false;
53
+ }
54
+
55
+ module.exports = {
56
+ description: 'Live AI tool loops (anthropic + openai)',
57
+ type: 'group',
58
+ tests: [
59
+ {
60
+ name: 'anthropic-two-step-tool-loop',
61
+ timeout: 120000,
62
+ skip: skipReason(['BACKEND_MANAGER_ANTHROPIC_API_KEY', 'ANTHROPIC_API_KEY']),
63
+
64
+ async run({ assert }) {
65
+ const provider = new Anthropic(directAssistant());
66
+
67
+ const base = [
68
+ { role: 'system', content: SYSTEM },
69
+ { role: 'user', content: QUESTION },
70
+ ];
71
+
72
+ // Step 1 — the model must call the tool
73
+ const first = await provider.request({
74
+ model: 'claude-haiku-4-5',
75
+ maxTokens: 1024,
76
+ messages: base,
77
+ tools: { list: [WEATHER_TOOL], choice: 'required' },
78
+ });
79
+
80
+ assert.equal(first.stopReason, 'tool_use', 'first turn stops for tool use');
81
+ assert.equal(first.toolCalls.length >= 1, true, 'at least one tool call');
82
+ assert.equal(first.toolCalls[0].name, 'get_weather', 'called the weather tool');
83
+ assert.equal(
84
+ String(first.toolCalls[0].arguments.city || '').toLowerCase().includes('paris'),
85
+ true,
86
+ 'extracted the city',
87
+ );
88
+
89
+ // Step 2 — replay the raw assistant blocks + tool result, get the answer
90
+ const second = await provider.request({
91
+ model: 'claude-haiku-4-5',
92
+ maxTokens: 1024,
93
+ messages: [
94
+ ...base,
95
+ { role: 'assistant', content: first.raw.content },
96
+ { role: 'tool', toolCallId: first.toolCalls[0].id, content: TOOL_RESULT },
97
+ ],
98
+ tools: { list: [WEATHER_TOOL] },
99
+ });
100
+
101
+ assert.equal(second.stopReason, 'end', 'second turn is final');
102
+ assert.equal(second.toolCalls.length, 0, 'no further tool calls');
103
+
104
+ const answer = String(second.content).toLowerCase();
105
+ assert.equal(
106
+ answer.includes('21') || answer.includes('sunny'),
107
+ true,
108
+ `final answer uses the tool result (got: ${String(second.content).slice(0, 200)})`,
109
+ );
110
+ assert.equal(second.tokens.total.count > 0, true, 'tokens accounted');
111
+ },
112
+ },
113
+
114
+ {
115
+ name: 'openai-two-step-tool-loop',
116
+ timeout: 120000,
117
+ skip: skipReason(['BACKEND_MANAGER_OPENAI_API_KEY', 'OPENAI_API_KEY']),
118
+
119
+ async run({ assert }) {
120
+ const provider = new OpenAI(directAssistant());
121
+
122
+ const base = [
123
+ { role: 'system', content: SYSTEM },
124
+ { role: 'user', content: QUESTION },
125
+ ];
126
+
127
+ // Step 1 — the model must call the tool
128
+ const first = await provider.request({
129
+ model: 'gpt-5-nano',
130
+ maxTokens: 2048,
131
+ moderate: false,
132
+ messages: base,
133
+ tools: { list: [WEATHER_TOOL], choice: 'required' },
134
+ });
135
+
136
+ assert.equal(first.stopReason, 'tool_use', 'first turn stops for tool use');
137
+ assert.equal(first.toolCalls.length >= 1, true, 'at least one tool call');
138
+ assert.equal(first.toolCalls[0].name, 'get_weather', 'called the weather tool');
139
+ assert.equal(
140
+ String(first.toolCalls[0].arguments.city || '').toLowerCase().includes('paris'),
141
+ true,
142
+ 'extracted the city',
143
+ );
144
+
145
+ // Step 2 — replay normalized toolCalls + tool result, get the answer
146
+ const second = await provider.request({
147
+ model: 'gpt-5-nano',
148
+ maxTokens: 2048,
149
+ moderate: false,
150
+ messages: [
151
+ ...base,
152
+ { role: 'assistant', toolCalls: first.toolCalls },
153
+ { role: 'tool', toolCallId: first.toolCalls[0].id, content: TOOL_RESULT },
154
+ ],
155
+ tools: { list: [WEATHER_TOOL] },
156
+ });
157
+
158
+ assert.equal(second.stopReason, 'end', 'second turn is final');
159
+ assert.equal(second.toolCalls.length, 0, 'no further tool calls');
160
+
161
+ const answer = String(second.content).toLowerCase();
162
+ assert.equal(
163
+ answer.includes('21') || answer.includes('sunny'),
164
+ true,
165
+ `final answer uses the tool result (got: ${String(second.content).slice(0, 200)})`,
166
+ );
167
+ },
168
+ },
169
+ ],
170
+ };