backend-manager 5.6.3 → 5.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +43 -0
  2. package/CLAUDE.md +4 -3
  3. package/PROGRESS.md +34 -0
  4. package/docs/ai-library.md +62 -11
  5. package/docs/cdp-debugging.md +44 -0
  6. package/docs/cli-output.md +22 -10
  7. package/docs/mcp.md +166 -43
  8. package/docs/test-framework.md +2 -2
  9. package/package.json +1 -1
  10. package/plans/mcp2.md +247 -0
  11. package/src/cli/commands/mcp.js +8 -2
  12. package/src/cli/commands/serve.js +155 -29
  13. package/src/cli/commands/setup-tests/base-test.js +8 -0
  14. package/src/cli/commands/setup-tests/firebase-auth.js +26 -0
  15. package/src/cli/commands/setup-tests/firebase-cli.js +9 -13
  16. package/src/cli/commands/setup-tests/index.js +4 -0
  17. package/src/cli/commands/setup-tests/java-installed.js +26 -0
  18. package/src/cli/commands/setup.js +2 -1
  19. package/src/cli/commands/test.js +13 -0
  20. package/src/cli/index.js +14 -0
  21. package/src/cli/utils/ui.js +27 -5
  22. package/src/manager/index.js +8 -3
  23. package/src/manager/libraries/ai/index.js +45 -1
  24. package/src/manager/libraries/ai/providers/anthropic-format.js +234 -0
  25. package/src/manager/libraries/ai/providers/anthropic.js +28 -49
  26. package/src/manager/libraries/ai/providers/claude-code.js +21 -47
  27. package/src/manager/libraries/ai/providers/openai.js +154 -19
  28. package/src/manager/libraries/ai/providers/test.js +242 -0
  29. package/src/manager/libraries/email/data/disposable-domains.json +465 -0
  30. package/src/mcp/client.js +48 -13
  31. package/src/mcp/handler.js +222 -69
  32. package/src/mcp/index.js +48 -18
  33. package/src/mcp/tools.js +150 -0
  34. package/src/mcp/utils.js +108 -0
  35. package/src/test/fixtures/firebase-project/firebase.json +1 -1
  36. package/src/test/test-accounts.js +31 -0
  37. package/test/ai/tools-live.js +170 -0
  38. package/test/email/marketing-lifecycle.js +10 -5
  39. package/test/helpers/ai-test-provider.js +202 -0
  40. package/test/helpers/ai-tools-format.js +350 -0
  41. package/test/mcp/discovery.js +53 -0
  42. package/test/mcp/oauth.js +161 -0
  43. package/test/mcp/protocol.js +268 -0
  44. package/test/mcp/roles.js +168 -0
  45. package/test/mcp/utils.js +245 -0
  46. package/test/routes/marketing/webhook.js +37 -33
  47. package/.claude/settings.local.json +0 -12
@@ -335,12 +335,12 @@ function OpenAI(assistant, key) {
335
335
  const self = this;
336
336
 
337
337
  self.assistant = assistant;
338
- self.Manager = assistant.Manager;
339
- self.user = assistant.user;
338
+ self.Manager = assistant?.Manager;
339
+ self.user = assistant?.user;
340
340
  self.key = key
341
- || self.Manager.config?.openai?.key
342
- || self.Manager.config?.openai?.global
343
- || self.Manager.config?.openai?.main
341
+ || self.Manager?.config?.openai?.key
342
+ || self.Manager?.config?.openai?.global
343
+ || self.Manager?.config?.openai?.main
344
344
  || process.env.OPENAI_API_KEY
345
345
  || process.env.BACKEND_MANAGER_OPENAI_API_KEY
346
346
 
@@ -451,12 +451,19 @@ OpenAI.prototype.request = function (options) {
451
451
  _log('Starting', options);
452
452
 
453
453
 
454
+ // Direct-messages mode: when a unified messages[] array is passed (incl.
455
+ // assistant toolCalls turns + role:'tool' results), it IS the full
456
+ // conversation — prompt/message/history are ignored and the array maps
457
+ // straight to the Responses API input (see formatMessages). The last user
458
+ // turn's text still feeds moderation.
459
+ const useMessages = Array.isArray(options.messages) && options.messages.length > 0;
460
+
454
461
  // Load prompt segments (one entry per role) and the user message
455
- const promptSegments = options.prompt.map((segment) => ({
462
+ const promptSegments = useMessages ? [] : options.prompt.map((segment) => ({
456
463
  role: segment.role,
457
464
  content: loadContent(segment, _log),
458
465
  }));
459
- const message = loadContent(options.message, _log);
466
+ const message = useMessages ? lastUserText(options.messages) : loadContent(options.message, _log);
460
467
  const user = options.user?.auth?.uid || assistant.request.geolocation.ip || 'unknown';
461
468
 
462
469
  // Log
@@ -477,9 +484,10 @@ OpenAI.prototype.request = function (options) {
477
484
  return reject(assistant.errorify(`Error loading message: ${message}`, {code: 400}));
478
485
  }
479
486
 
480
- // Moderate if needed
487
+ // Moderate if needed (skipped in direct-messages mode when the last turn
488
+ // carries no user text — e.g. a tool-result continuation turn)
481
489
  let moderation = null;
482
- if (options.moderate) {
490
+ if (options.moderate && !(useMessages && !message)) {
483
491
  moderation = await makeRequest('moderations', options, self, promptSegments, message, user, _log)
484
492
  .then(async (r) => {
485
493
  // {
@@ -883,6 +891,112 @@ function formatHistory(options, promptSegments, message, _log) {
883
891
  return formatted;
884
892
  }
885
893
 
894
+ /**
895
+ * Map a unified messages[] array straight to the Responses API input array.
896
+ *
897
+ * Unified turn shapes:
898
+ * - { role: 'system'|'developer'|'user'|'assistant', content: string }
899
+ * - { role: 'assistant', content?, toolCalls: [{ id, name, arguments }] }
900
+ * → message item (if content) + function_call items
901
+ * - { role: 'tool', toolCallId, content } → function_call_output item
902
+ */
903
+ function formatMessages(messages, _log) {
904
+ const input = [];
905
+
906
+ for (const m of messages) {
907
+ // Tool result turn → function_call_output item
908
+ if (m.role === 'tool') {
909
+ input.push({
910
+ type: 'function_call_output',
911
+ call_id: m.toolCallId,
912
+ output: typeof m.content === 'string' ? m.content : JSON.stringify(m.content || ''),
913
+ });
914
+ continue;
915
+ }
916
+
917
+ // Assistant turn with tool calls → message item (if text) + function_call items
918
+ if (m.role === 'assistant' && Array.isArray(m.toolCalls) && m.toolCalls.length) {
919
+ const text = typeof m.content === 'string' ? m.content.trim() : '';
920
+
921
+ if (text) {
922
+ input.push({
923
+ role: 'assistant',
924
+ content: formatMessageContent(text, [], _log, 'responses', 'assistant'),
925
+ });
926
+ }
927
+
928
+ for (const call of m.toolCalls) {
929
+ input.push({
930
+ type: 'function_call',
931
+ call_id: call.id,
932
+ name: call.name,
933
+ arguments: typeof call.arguments === 'string' ? call.arguments : JSON.stringify(call.arguments || {}),
934
+ });
935
+ }
936
+
937
+ continue;
938
+ }
939
+
940
+ // Plain text turn
941
+ const role = m.role || 'user';
942
+ const content = typeof m.content === 'string' ? m.content.trim() : String(m.content || '');
943
+
944
+ input.push({
945
+ role: role,
946
+ content: formatMessageContent(content, m.attachments, _log, 'responses', role),
947
+ });
948
+ }
949
+
950
+ return input;
951
+ }
952
+
953
+ // Last user turn's text — feeds moderation in direct-messages mode
954
+ function lastUserText(messages) {
955
+ const lastUser = [...messages].reverse().find((m) => m.role === 'user' && typeof m.content === 'string');
956
+ return lastUser?.content || '';
957
+ }
958
+
959
+ // Normalized function tools ({ name, description, parameters }) get the
960
+ // Responses API envelope; anything carrying another `type` passes verbatim
961
+ // (hosted tools like { type: 'web_search' })
962
+ function normalizeToolEntry(tool) {
963
+ if (tool && tool.name && (!tool.type || tool.type === 'function')) {
964
+ return {
965
+ type: 'function',
966
+ name: tool.name,
967
+ description: tool.description || '',
968
+ parameters: tool.parameters || { type: 'object', properties: {} },
969
+ };
970
+ }
971
+
972
+ return tool;
973
+ }
974
+
975
+ // 'auto' | 'required' | 'none' pass through; { name } → a specific function tool
976
+ function normalizeToolChoice(choice) {
977
+ if (typeof choice === 'object' && choice?.name) {
978
+ return { type: 'function', name: choice.name };
979
+ }
980
+
981
+ return choice;
982
+ }
983
+
984
+ function parseArguments(args) {
985
+ if (args && typeof args === 'object') {
986
+ return args;
987
+ }
988
+
989
+ if (typeof args === 'string' && args.trim()) {
990
+ try {
991
+ return JSON5.parse(args);
992
+ } catch (e) {
993
+ return {};
994
+ }
995
+ }
996
+
997
+ return {};
998
+ }
999
+
886
1000
  function attemptRequest(options, self, promptSegments, message, user, moderation, attempt, assistant, resolve, reject, _log) {
887
1001
  const retries = options.retries;
888
1002
  const triggers = options.retryTriggers;
@@ -961,6 +1075,14 @@ function attemptRequest(options, self, promptSegments, message, user, moderation
961
1075
  .join('\n')
962
1076
  .trim();
963
1077
 
1078
+ // Normalized tool calls (Responses API function_call items) + stop reason
1079
+ const toolCalls = output
1080
+ .filter((o) => o.type === 'function_call')
1081
+ .map((o) => ({ id: o.call_id, name: o.name, arguments: parseArguments(o.arguments) }));
1082
+ const stopReason = toolCalls.length
1083
+ ? 'tool_use'
1084
+ : (r.status === 'incomplete' && r.incomplete_details?.reason === 'max_output_tokens' ? 'max_tokens' : 'end');
1085
+
964
1086
  // Get model configuration
965
1087
  const modelConfig = getModelConfig(options.model);
966
1088
 
@@ -979,9 +1101,10 @@ function attemptRequest(options, self, promptSegments, message, user, moderation
979
1101
  _log('Response', outputText.length, typeof outputText, outputText);
980
1102
  _log('Tokens', self.tokens);
981
1103
 
982
- // Try to parse JSON response if needed
1104
+ // Try to parse JSON response if needed — never on a tool-call turn, where
1105
+ // empty text is the normal intermediate state (the caller continues the loop)
983
1106
  try {
984
- const parsed = options.response === 'json' ? JSON5.parse(outputText) : outputText;
1107
+ const parsed = options.response === 'json' && !toolCalls.length ? JSON5.parse(outputText) : outputText;
985
1108
 
986
1109
  // Return
987
1110
  return resolve({
@@ -989,6 +1112,9 @@ function attemptRequest(options, self, promptSegments, message, user, moderation
989
1112
  content: parsed,
990
1113
  tokens: self.tokens,
991
1114
  moderation: moderation,
1115
+ raw: r,
1116
+ toolCalls: toolCalls,
1117
+ stopReason: stopReason,
992
1118
  })
993
1119
  } catch (e) {
994
1120
  assistant.error('Error parsing response', r, e);
@@ -1055,8 +1181,12 @@ function makeRequest(mode, options, self, promptSegments, message, user, _log) {
1055
1181
  user: user,
1056
1182
  }
1057
1183
  } else if (mode === 'responses') {
1058
- // Format history for responses API
1059
- const history = formatHistory(options, promptSegments, message, _log);
1184
+ // Format input for the Responses API — direct-messages mode maps the
1185
+ // unified messages[] straight through; legacy mode builds from
1186
+ // prompt segments + history + message
1187
+ const history = Array.isArray(options.messages) && options.messages.length
1188
+ ? formatMessages(options.messages, _log)
1189
+ : formatHistory(options, promptSegments, message, _log);
1060
1190
 
1061
1191
  // Set request
1062
1192
  request.url = 'https://api.openai.com/v1/responses';
@@ -1080,15 +1210,17 @@ function makeRequest(mode, options, self, promptSegments, message, user, _log) {
1080
1210
  request.body.reasoning = reasoning;
1081
1211
  }
1082
1212
 
1083
- // Only include tools if `tools.list` is a non-empty array. When present, the
1084
- // response output may contain tool-call items (e.g. web_search_call)
1085
- // alongside the message the message extractor below already ignores
1086
- // non-message items, so this is purely additive.
1213
+ // Only include tools if `tools.list` is a non-empty array. Normalized
1214
+ // function tools ({ name, description, parameters }) get the Responses
1215
+ // `type: 'function'` envelope; hosted tools (web_search, code_interpreter)
1216
+ // pass verbatim. When present, the response output may contain tool-call
1217
+ // items alongside the message — function_call items are extracted into
1218
+ // the normalized `toolCalls` return field.
1087
1219
  if (Array.isArray(options.tools?.list) && options.tools.list.length) {
1088
- request.body.tools = options.tools.list;
1220
+ request.body.tools = options.tools.list.map(normalizeToolEntry);
1089
1221
 
1090
1222
  if (options.tools.choice) {
1091
- request.body.tool_choice = options.tools.choice;
1223
+ request.body.tool_choice = normalizeToolChoice(options.tools.choice);
1092
1224
  }
1093
1225
  }
1094
1226
  }
@@ -1186,5 +1318,8 @@ module.exports = OpenAI;
1186
1318
  module.exports._internals = {
1187
1319
  normalizePrompt,
1188
1320
  formatHistory,
1321
+ formatMessages,
1322
+ normalizeToolEntry,
1323
+ normalizeToolChoice,
1189
1324
  VALID_PROMPT_ROLES,
1190
1325
  };
@@ -0,0 +1,242 @@
1
+ /**
2
+ * Test provider — a deterministic, first-class AI provider for test suites
3
+ * (the `test` payment-processor precedent: a real implementation of the
4
+ * provider seam, not a mock injected into callers).
5
+ *
6
+ * REFUSES to run in production — only development/testing environments.
7
+ *
8
+ * Behavior is scripted by directives embedded in the LAST user message. The
9
+ * directives form a SEQUENCE consumed across the turns of a tool loop: call N
10
+ * executes directive N-1 (indexed by how many assistant turns follow the last
11
+ * user turn in options.messages). Directive content must not contain `]]`.
12
+ *
13
+ * [[tool:check_order {"orderNumber":"123"}]] — one tool call this step
14
+ * [[tools:[{"name":"a","arguments":{}},{"name":"b","arguments":{}}]]
15
+ * — parallel tool calls this step
16
+ * [[reply:{"message":"done"}]] — final reply (JSON or text)
17
+ * [[delay:500]] — modifier: delay the NEXT step
18
+ * [[error:boom]] — throw at this step
19
+ *
20
+ * No directives (or script exhausted) → echo reply: `Echo: <text>` (wrapped as
21
+ * { message } when options.response === 'json').
22
+ *
23
+ * Returns the same shape as the real providers:
24
+ * { content, output, tokens, raw, toolCalls, stopReason }
25
+ */
26
+ const JSON5 = require('json5');
27
+
28
+ // The closing `]]` must not be followed by another `]` so directive values may
29
+ // END with a JSON `]` (e.g. [[tools:[...]]]); `]]` strictly INSIDE a value is
30
+ // still unsupported
31
+ const DIRECTIVE_REGEX = /\[\[(tool|tools|reply|delay|error)(?::([\s\S]*?))?\]\](?!\])/g;
32
+
33
+ function TestProvider(assistant, key) {
34
+ const self = this;
35
+
36
+ self.assistant = assistant;
37
+ self.Manager = assistant?.Manager;
38
+ self.key = key || 'test';
39
+
40
+ self.tokens = {
41
+ total: { count: 0, price: 0 },
42
+ input: { count: 0, price: 0 },
43
+ output: { count: 0, price: 0 },
44
+ };
45
+
46
+ return self;
47
+ }
48
+
49
+ TestProvider.prototype.request = async function (options) {
50
+ const self = this;
51
+
52
+ assertAllowedEnvironment(self.Manager);
53
+
54
+ options = options || {};
55
+
56
+ const messages = Array.isArray(options.messages) ? options.messages : [];
57
+ const lastUserMessage = [...messages].reverse().find((m) => m.role === 'user' && typeof m.content === 'string');
58
+ const scriptSource = lastUserMessage?.content || stringifyLoose(options.message?.content) || '';
59
+
60
+ const { steps, cleanText } = parseScript(scriptSource);
61
+
62
+ // Which step of the script is this call? One assistant turn is appended per
63
+ // loop iteration, so call N sees N-1 assistant turns after the last user turn.
64
+ const lastUserIdx = messages.lastIndexOf(lastUserMessage);
65
+ const stepIndex = messages.slice(lastUserIdx + 1).filter((m) => m.role === 'assistant').length;
66
+
67
+ const step = steps[stepIndex] || { type: 'echo' };
68
+
69
+ // Simulated token accounting so usage/cost paths execute
70
+ const inputChars = messages.reduce((n, m) => n + stringifyLoose(m.content).length, 0) || scriptSource.length;
71
+ self.tokens.input.count += Math.ceil(inputChars / 4);
72
+
73
+ if (step.delay) {
74
+ await new Promise((resolve) => setTimeout(resolve, Math.min(step.delay, 30000)));
75
+ }
76
+
77
+ if (step.type === 'error') {
78
+ throw new Error(step.message || 'Test provider scripted error');
79
+ }
80
+
81
+ if (step.type === 'tools') {
82
+ const toolCalls = step.calls.map((call, i) => ({
83
+ id: `call_test_${stepIndex}_${i}`,
84
+ name: call.name,
85
+ arguments: call.arguments || {},
86
+ }));
87
+
88
+ self.tokens.output.count += 10 * toolCalls.length;
89
+ finalizeTokens(self.tokens);
90
+
91
+ return {
92
+ content: '',
93
+ output: [],
94
+ tokens: self.tokens,
95
+ raw: { provider: 'test', step: stepIndex, toolCalls },
96
+ toolCalls,
97
+ stopReason: 'tool_use',
98
+ };
99
+ }
100
+
101
+ // Final reply — scripted or echo
102
+ let text;
103
+
104
+ if (step.type === 'reply') {
105
+ text = step.content;
106
+ } else {
107
+ text = `Echo: ${cleanText || '(empty)'}`;
108
+
109
+ if (options.response === 'json') {
110
+ text = JSON.stringify({ message: text });
111
+ }
112
+ }
113
+
114
+ self.tokens.output.count += Math.ceil(text.length / 4);
115
+ finalizeTokens(self.tokens);
116
+
117
+ let parsed = text;
118
+
119
+ if (options.response === 'json') {
120
+ try {
121
+ parsed = JSON5.parse(text);
122
+ } catch (e) {
123
+ // Loose by design — a scripted plain-text reply stays a string
124
+ }
125
+ }
126
+
127
+ return {
128
+ content: parsed,
129
+ output: [{ type: 'text', text }],
130
+ tokens: self.tokens,
131
+ raw: { provider: 'test', step: stepIndex },
132
+ toolCalls: [],
133
+ stopReason: 'end',
134
+ };
135
+ };
136
+
137
+ /**
138
+ * Parse the directive script out of a message. Returns the ordered steps and
139
+ * the message text with directives stripped (the echo source).
140
+ */
141
+ function parseScript(rawSource) {
142
+ // Consumers may markdown-escape user input before it reaches the provider
143
+ // (\[\[tool:...\]\]) — unescape so directives still parse
144
+ const source = String(rawSource || '').replace(/\\([\\`*_{}[\]()#+\-.!~|>])/g, '$1');
145
+
146
+ const steps = [];
147
+ let pendingDelay = 0;
148
+ let match;
149
+
150
+ DIRECTIVE_REGEX.lastIndex = 0;
151
+
152
+ while ((match = DIRECTIVE_REGEX.exec(source)) !== null) {
153
+ const [, type, value] = match;
154
+
155
+ if (type === 'delay') {
156
+ pendingDelay = parseInt(value, 10) || 0;
157
+ continue;
158
+ }
159
+
160
+ const step = buildStep(type, value);
161
+ step.delay = pendingDelay;
162
+ pendingDelay = 0;
163
+
164
+ steps.push(step);
165
+ }
166
+
167
+ // Trailing delay with no following directive → delay the default echo
168
+ if (pendingDelay) {
169
+ steps.push({ type: 'echo', delay: pendingDelay });
170
+ }
171
+
172
+ const cleanText = source.replace(DIRECTIVE_REGEX, '').trim();
173
+
174
+ return { steps, cleanText };
175
+ }
176
+
177
+ function buildStep(type, value) {
178
+ if (type === 'error') {
179
+ return { type: 'error', message: (value || '').trim() };
180
+ }
181
+
182
+ if (type === 'reply') {
183
+ return { type: 'reply', content: (value || '').trim() };
184
+ }
185
+
186
+ if (type === 'tools') {
187
+ return { type: 'tools', calls: JSON5.parse(value) };
188
+ }
189
+
190
+ // tool:name {json}
191
+ const trimmed = (value || '').trim();
192
+ const spaceIdx = trimmed.search(/\s/);
193
+ const name = spaceIdx < 0 ? trimmed : trimmed.slice(0, spaceIdx);
194
+ const argsRaw = spaceIdx < 0 ? '' : trimmed.slice(spaceIdx).trim();
195
+
196
+ return {
197
+ type: 'tools',
198
+ calls: [{ name, arguments: argsRaw ? JSON5.parse(argsRaw) : {} }],
199
+ };
200
+ }
201
+
202
+ function assertAllowedEnvironment(Manager) {
203
+ // The Manager's environment detection is the SSOT when in scope
204
+ if (Manager && typeof Manager.isDevelopment === 'function' && typeof Manager.isTesting === 'function') {
205
+ if (Manager.isDevelopment() || Manager.isTesting()) {
206
+ return;
207
+ }
208
+
209
+ throw new Error('AI test provider is only available in development or testing environments');
210
+ }
211
+
212
+ // No Manager (pure unit tests) — allow only under explicit test/emulator signals
213
+ if (process.env.BEM_TESTING === 'true' || process.env.FUNCTIONS_EMULATOR) {
214
+ return;
215
+ }
216
+
217
+ throw new Error('AI test provider is only available in development or testing environments');
218
+ }
219
+
220
+ function finalizeTokens(tokens) {
221
+ tokens.total.count = tokens.input.count + tokens.output.count;
222
+ }
223
+
224
+ function stringifyLoose(content) {
225
+ if (typeof content === 'string') {
226
+ return content;
227
+ }
228
+
229
+ if (Array.isArray(content)) {
230
+ return content.map((c) => c?.text || '').join('\n');
231
+ }
232
+
233
+ return content ? String(content) : '';
234
+ }
235
+
236
+ module.exports = TestProvider;
237
+
238
+ // Exposed for unit tests. Not part of the public API.
239
+ module.exports._internals = {
240
+ parseScript,
241
+ buildStep,
242
+ };