mojulo 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +53 -4
  2. package/lib/audit-logger-new.js +11 -0
  3. package/lib/auth/gate.js +25 -0
  4. package/lib/auth/service.js +17 -0
  5. package/lib/auth/session.js +63 -0
  6. package/lib/builder/chat-processor.js +607 -0
  7. package/lib/builder/composer-bridge.js +82 -0
  8. package/lib/builder/evaluator.js +159 -0
  9. package/lib/builder/executor.js +252 -0
  10. package/lib/builder/index.js +48 -0
  11. package/lib/builder/session.js +248 -0
  12. package/lib/builder/system-prompt.js +422 -0
  13. package/lib/builder/tone-presets.js +75 -0
  14. package/lib/builder/tool-executors.js +1418 -0
  15. package/lib/builder/tools.js +338 -0
  16. package/lib/builder/validators.js +239 -0
  17. package/lib/composer/composer.js +225 -0
  18. package/lib/composer/index.js +40 -0
  19. package/lib/composer/protocols/00_base.txt +19 -0
  20. package/lib/composer/protocols/01_knowledge.txt +9 -0
  21. package/lib/composer/protocols/02_form-gathering.txt +32 -0
  22. package/lib/composer/protocols/03_appointments.txt +16 -0
  23. package/lib/composer/protocols/04_triage.txt +15 -0
  24. package/lib/composer/protocols/05_optical-read.txt +22 -0
  25. package/lib/composer/response-builder.js +98 -0
  26. package/lib/config-builder.js +650 -0
  27. package/lib/db/ids.js +10 -0
  28. package/lib/db/index.js +179 -0
  29. package/lib/db/repositories/apiKeys.js +72 -0
  30. package/lib/db/repositories/auditLogs.js +12 -0
  31. package/lib/db/repositories/botSpaces.js +12 -0
  32. package/lib/db/repositories/builderSessions.js +312 -0
  33. package/lib/db/repositories/deploymentEvents.js +12 -0
  34. package/lib/db/repositories/deployments.js +385 -0
  35. package/lib/db/repositories/documents.js +68 -0
  36. package/lib/db/repositories/mcpJobs.js +84 -0
  37. package/lib/deployers/bot-fleet.js +110 -0
  38. package/lib/deployers/bot-proxy.js +72 -0
  39. package/lib/deployers/build.js +89 -0
  40. package/lib/deployers/cloud-deploy.js +310 -0
  41. package/lib/deployers/docker.js +439 -0
  42. package/lib/deployers/fly.js +432 -0
  43. package/lib/deployers/index.js +38 -0
  44. package/lib/deployment-auth.js +36 -0
  45. package/lib/document-parser.js +171 -0
  46. package/lib/embedder/chunker.js +93 -0
  47. package/lib/embedder/local.js +101 -0
  48. package/lib/embedder/preview-rag.js +93 -0
  49. package/lib/envelope-schema.js +54 -0
  50. package/lib/fleet/scoped-sql.js +342 -0
  51. package/lib/form-schema-config/base.js +135 -0
  52. package/lib/form-schema-config/index.js +286 -0
  53. package/lib/form-schema-config/locales/af-ZA.js +153 -0
  54. package/lib/form-schema-config/locales/ar-AE.js +142 -0
  55. package/lib/form-schema-config/locales/ar-SA.js +164 -0
  56. package/lib/form-schema-config/locales/de-DE.js +152 -0
  57. package/lib/form-schema-config/locales/en-AU.js +161 -0
  58. package/lib/form-schema-config/locales/en-CA.js +115 -0
  59. package/lib/form-schema-config/locales/en-GB.js +132 -0
  60. package/lib/form-schema-config/locales/en-IN.js +219 -0
  61. package/lib/form-schema-config/locales/en-MY.js +171 -0
  62. package/lib/form-schema-config/locales/en-NG.js +198 -0
  63. package/lib/form-schema-config/locales/en-PH.js +186 -0
  64. package/lib/form-schema-config/locales/en-SG.js +153 -0
  65. package/lib/form-schema-config/locales/en-US.js +138 -0
  66. package/lib/form-schema-config/locales/es-ES.js +171 -0
  67. package/lib/form-schema-config/locales/es-MX.js +193 -0
  68. package/lib/form-schema-config/locales/fr-CA.js +138 -0
  69. package/lib/form-schema-config/locales/fr-FR.js +155 -0
  70. package/lib/form-schema-config/locales/hi-IN.js +219 -0
  71. package/lib/form-schema-config/locales/it-IT.js +157 -0
  72. package/lib/form-schema-config/locales/ja-JP.js +169 -0
  73. package/lib/form-schema-config/locales/ko-KR.js +140 -0
  74. package/lib/form-schema-config/locales/nl-NL.js +149 -0
  75. package/lib/form-schema-config/locales/pt-BR.js +168 -0
  76. package/lib/form-schema-config/locales/zh-CN.js +172 -0
  77. package/lib/form-schema-config/locales/zh-HK.js +142 -0
  78. package/lib/form-structure-schema.js +191 -0
  79. package/lib/llm-providers.js +828 -0
  80. package/lib/markdown.js +197 -0
  81. package/lib/mcp/catalysts/appointment-to-calendar.md +84 -0
  82. package/lib/mcp/catalysts/conversations-to-channel-digest.md +104 -0
  83. package/lib/mcp/catalysts/document-extract-to-store.md +92 -0
  84. package/lib/mcp/catalysts/knowledge-gap-miner.md +96 -0
  85. package/lib/mcp/catalysts/loader.js +144 -0
  86. package/lib/mcp/catalysts/qualify-lead-to-crm.md +83 -0
  87. package/lib/mcp/catalysts/scan-conversations-for-signal.md +92 -0
  88. package/lib/mcp/catalysts/submission-to-ticket.md +83 -0
  89. package/lib/mcp/catalysts/submissions-to-warehouse.md +103 -0
  90. package/lib/mcp/catalysts/weekly-submissions-digest.md +82 -0
  91. package/lib/mcp/jobs.js +64 -0
  92. package/lib/mcp/server.js +184 -0
  93. package/lib/mcp/session-binding.js +130 -0
  94. package/lib/mcp/tools/build.js +123 -0
  95. package/lib/mcp/tools/catalysts.js +477 -0
  96. package/lib/mcp/tools/context.js +325 -0
  97. package/lib/mcp/tools/fleet.js +391 -0
  98. package/lib/mcp/tools/jobs-tools.js +240 -0
  99. package/lib/mcp/tools/operate.js +314 -0
  100. package/lib/preview/build-preview-config.js +136 -0
  101. package/lib/rate-limiter.js +11 -0
  102. package/lib/resolve-api-key.js +142 -0
  103. package/lib/storage/index.js +40 -0
  104. package/messages/de.json +2136 -0
  105. package/messages/en.json +2136 -0
  106. package/messages/es.json +2136 -0
  107. package/messages/fr.json +2136 -0
  108. package/messages/it.json +2136 -0
  109. package/messages/ja.json +2136 -0
  110. package/messages/ko.json +2136 -0
  111. package/messages/nl.json +2136 -0
  112. package/messages/pl.json +2136 -0
  113. package/messages/pt.json +2136 -0
  114. package/messages/ru.json +2136 -0
  115. package/messages/uk.json +2136 -0
  116. package/messages/zh.json +2136 -0
  117. package/package.json +61 -5
  118. package/scripts/mcp-config.mjs +162 -0
  119. package/scripts/mcp-stdio-loader.mjs +42 -0
  120. package/scripts/mcp-stdio.mjs +108 -0
  121. package/scripts/mojulo-paths.mjs +48 -0
@@ -0,0 +1,1418 @@
1
+ /**
2
+ * Builder Tool Executors for Inverted Flow
3
+ *
4
+ * Executes tool calls from Claude during the inverted builder flow.
5
+ * These tools enable Claude to:
6
+ * - Process documents into RAG summaries
7
+ * - Infer user intent and confidence
8
+ * - Recommend protocols based on context
9
+ * - Generate configurations for each protocol
10
+ * - Compose bot identity
11
+ *
12
+ * Philosophy: "Claude proposes, User disposes"
13
+ */
14
+
15
+ import { validateToolInput } from './tools.js';
16
+ import { BuilderSessionRepository, SESSION_STATUS } from '@/lib/db/repositories/builderSessions.js';
17
+ import { DocumentRepository } from '@/lib/db/repositories/documents.js';
18
+ import { ApiKeyRepository } from '@/lib/db/repositories/apiKeys.js';
19
+ import { decryptApiKey } from '@/lib/deployment-auth.js';
20
+ import { getDefaultModelForTask, getAllowedProtocolsForModel } from '@/lib/llm-providers.js';
21
+ import { saveBuilderConfig } from './executor.js';
22
+ import { buildArtifact } from '@/lib/deployers/build.js';
23
+
24
+ /**
25
+ * Get LLM configuration from session's preloaded context
26
+ * Uses builder config settings with fallback to provider auto-selection.
27
+ *
28
+ * The `task` parameter selects the per-provider model tier (reasoning /
29
+ * structured / summary). Callers pass the tier appropriate for their
30
+ * workload so the form generator and summary calls aren't billed at the
31
+ * reasoning-tier rate.
32
+ *
33
+ * @param {Object} session - Builder session with preloadedContext
34
+ * @param {string} userId - User ID for API key lookup
35
+ * @param {string} [task='reasoning'] - Task tier: reasoning | structured | summary
36
+ * @returns {Promise<{ provider: string, apiKey: string, model: string }>}
37
+ */
38
+ async function getLLMConfigFromSession(session, userId, task = 'reasoning') {
39
+ const { defaultProvider, defaultApiKeyId } = session.preloadedContext || {};
40
+
41
+ // Get API keys for user
42
+ const apiKeys = await ApiKeyRepository.findByUserId(userId);
43
+
44
+ let apiKeyRecord;
45
+
46
+ // First try: Use the specific API key ID from builder config
47
+ if (defaultApiKeyId) {
48
+ apiKeyRecord = apiKeys.find((k) => k.id === defaultApiKeyId);
49
+ }
50
+
51
+ // Second try: Find any key for the default provider
52
+ if (!apiKeyRecord && defaultProvider) {
53
+ apiKeyRecord = apiKeys.find((k) => k.provider === defaultProvider);
54
+ }
55
+
56
+ // Final fallback: cloud providers first, then ollama. Local-only inference
57
+ // sits last so a user who has both Anthropic and Ollama keys doesn't get
58
+ // silently routed to the slower lane — they pick Ollama by marking it
59
+ // default, not by accident.
60
+ if (!apiKeyRecord) {
61
+ const fallbackOrder = ['anthropic', 'bedrock', 'openai', 'ollama'];
62
+ for (const provider of fallbackOrder) {
63
+ apiKeyRecord = apiKeys.find((k) => k.provider === provider);
64
+ if (apiKeyRecord) break;
65
+ }
66
+ }
67
+
68
+ if (!apiKeyRecord) {
69
+ throw new Error('No API key available for LLM operations');
70
+ }
71
+
72
+ return {
73
+ provider: apiKeyRecord.provider,
74
+ apiKey: decryptApiKey(apiKeyRecord.encryptedKey),
75
+ model: getDefaultModelForTask(apiKeyRecord.provider, task),
76
+ };
77
+ }
78
+
79
+ /**
80
+ * Static prompt suggestions per intent (fallback when domainDigest not available)
81
+ */
82
+ const STATIC_PROMPT_SUGGESTIONS = {
83
+ support_bot: [
84
+ 'How do I get started?',
85
+ 'What are the pricing options?',
86
+ 'I need help with my account',
87
+ ],
88
+ lead_gen: [
89
+ 'Tell me about your services',
90
+ 'I want to get a quote',
91
+ 'Schedule a demo',
92
+ ],
93
+ appointment_scheduler: [
94
+ 'What times are available?',
95
+ 'Book a consultation',
96
+ 'Reschedule my appointment',
97
+ ],
98
+ knowledge_base: [
99
+ 'How does this work?',
100
+ 'What are the features?',
101
+ 'Show me the documentation',
102
+ ],
103
+ feedback_collector: [
104
+ 'I have a suggestion',
105
+ 'Report an issue',
106
+ 'Share my experience',
107
+ ],
108
+ onboarding_assistant: [
109
+ 'Show me around',
110
+ 'What can I do here?',
111
+ 'Help me set up',
112
+ ],
113
+ triage_router: [
114
+ 'I need to talk to sales',
115
+ 'Technical support please',
116
+ 'Connect me to billing',
117
+ ],
118
+ };
119
+
120
+ /**
121
+ * Get static prompts for an intent type
122
+ * @param {string} intent - Bot intent type
123
+ * @returns {string[]} Array of suggested prompts
124
+ */
125
+ function getStaticPromptsForIntent(intent) {
126
+ return STATIC_PROMPT_SUGGESTIONS[intent] || STATIC_PROMPT_SUGGESTIONS.support_bot;
127
+ }
128
+
129
+ /**
130
+ * Generate contextual firstMessage and objective from a domain digest using LLM
131
+ * @param {string} domainDigest - Per-document LLM-composed digest of the corpus
132
+ * @param {string} userMessage - Original user message describing what they want
133
+ * @param {string} intent - Bot intent type
134
+ * @param {string} organizationName - Organization name if available
135
+ * @param {Object} session - Builder session for LLM config lookup
136
+ * @param {string} userId - User ID for API key lookup
137
+ * @returns {Promise<{ firstMessage: string, objective: string } | null>}
138
+ */
139
+ async function generateContextualIdentity(domainDigest, userMessage, intent, organizationName, session, userId) {
140
+ // Get LLM config from session (supports Anthropic, Bedrock, etc.)
141
+ // Structured tier: response is a JSON object parsed via jsonMatch.
142
+ let llmConfig;
143
+ try {
144
+ llmConfig = await getLLMConfigFromSession(session, userId, 'structured');
145
+ } catch (err) {
146
+ console.log('[Builder] No API key available for identity generation:', err.message);
147
+ return null;
148
+ }
149
+
150
+ const { provider, apiKey, model } = llmConfig;
151
+ const { generateSummary } = await import('@/lib/llm-providers.js');
152
+
153
+ const intentLabel = intent.replace(/_/g, ' ');
154
+
155
+ const identityPrompt = `Generate a contextual bot identity based on the following:
156
+
157
+ USER'S REQUEST:
158
+ ${userMessage.substring(0, 500)}
159
+
160
+ DOCUMENT SUMMARY (knowledge the bot will have):
161
+ ${domainDigest.substring(0, 1500)}
162
+
163
+ BOT TYPE: ${intentLabel}
164
+ ORGANIZATION: ${organizationName || 'Not specified'}
165
+
166
+ Generate:
167
+ 1. **firstMessage**: A warm, specific greeting (1-2 sentences) that:
168
+ - Introduces what the bot can help with based on the actual document content
169
+ - Mentions specific topics/services from the documents (not generic)
170
+ - Feels welcoming and helpful
171
+ - Max 150 characters
172
+
173
+ 2. **objective**: A concise statement (1 sentence) describing the bot's purpose that:
174
+ - Is specific to the document content and user's request
175
+ - Mentions key capabilities based on the documents
176
+ - Max 200 characters
177
+
178
+ Return ONLY a JSON object with "firstMessage" and "objective" keys, no other text.
179
+ Example:
180
+ {"firstMessage": "Hi! I'm the Valley Dental assistant. I can help with appointment booking, insurance questions, or info about our services.", "objective": "Help visitors learn about dental services, pricing, insurance, and book appointments at Valley Dental."}`;
181
+
182
+ try {
183
+ const response = await generateSummary(
184
+ provider,
185
+ identityPrompt,
186
+ apiKey,
187
+ 'Generate contextual bot identity',
188
+ model
189
+ );
190
+
191
+ // Parse JSON object from response (handles markdown code blocks too)
192
+ const jsonMatch = response.match(/\{[\s\S]*?\}/);
193
+ if (!jsonMatch) {
194
+ console.warn('[Builder] No JSON object found in identity generation response');
195
+ return null;
196
+ }
197
+
198
+ const identity = JSON.parse(jsonMatch[0]);
199
+
200
+ // Validate required fields
201
+ if (!identity.firstMessage || !identity.objective) {
202
+ console.warn('[Builder] Missing required fields in identity generation');
203
+ return null;
204
+ }
205
+
206
+ // Clean and truncate
207
+ const result = {
208
+ firstMessage: identity.firstMessage.trim().substring(0, 200),
209
+ objective: identity.objective.trim().substring(0, 250),
210
+ };
211
+
212
+ console.log('[Builder] Generated contextual identity:', result);
213
+ return result;
214
+ } catch (parseError) {
215
+ console.warn('[Builder] Failed to parse identity generation response:', parseError.message);
216
+ return null;
217
+ }
218
+ }
219
+
220
+ /**
221
+ * Execute a modular tool call
222
+ * @param {string} toolName - Name of the tool
223
+ * @param {Object} input - Tool input
224
+ * @param {Object} context - Execution context (session, user, etc.)
225
+ * @returns {Promise<{ success: boolean, result?: any, error?: string }>}
226
+ */
227
+ export async function executeBuilderTool(toolName, input, context) {
228
+ const { session, userId } = context;
229
+
230
+ // Validate input
231
+ const validation = validateToolInput(toolName, input);
232
+ if (!validation.valid) {
233
+ return {
234
+ success: false,
235
+ error: `Invalid input: ${validation.error}`,
236
+ };
237
+ }
238
+
239
+ try {
240
+ const handler = builderToolHandlers[toolName];
241
+ if (!handler) {
242
+ return {
243
+ success: false,
244
+ error: `Unknown modular tool: ${toolName}`,
245
+ };
246
+ }
247
+
248
+ const result = await handler(input, context);
249
+ return { success: true, result };
250
+ } catch (error) {
251
+ console.error(`[Builder] Tool execution error (${toolName}):`, error);
252
+ return {
253
+ success: false,
254
+ error: error.message || 'Tool execution failed',
255
+ };
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Extract prepopulated settings from user message
261
+ * Detects patterns like "called X", "named X", "bot name X", "resource X"
262
+ * @param {string} userMessage - User's message
263
+ * @returns {Object} Extracted settings (botName, resourceName, displayName, etc.)
264
+ */
265
+ function extractPrepopulatedSettings(userMessage) {
266
+ const settings = {};
267
+
268
+ // Patterns for bot name detection
269
+ // Matches: "called X", "named X", "bot name X", "name it X", "call it X"
270
+ const botNamePatterns = [
271
+ /(?:called|named|name it|call it)\s+["']?([a-zA-Z0-9][\w\s-]{0,30}[a-zA-Z0-9])["']?(?:\s|$|,|\.)/i,
272
+ /bot\s+(?:name|called|named)\s+["']?([a-zA-Z0-9][\w\s-]{0,30}[a-zA-Z0-9])["']?(?:\s|$|,|\.)/i,
273
+ /["']([a-zA-Z0-9][\w\s-]{0,30}[a-zA-Z0-9])["']\s+(?:bot|assistant)/i,
274
+ ];
275
+
276
+ // Patterns for resource/company name detection
277
+ // Matches: "for X", "resource X", "company X", "organization X", "business X"
278
+ const resourceNamePatterns = [
279
+ /(?:for|resource|company|organization|business|brand)\s+(?:name\s+)?["']?([a-zA-Z0-9][\w\s&.-]{0,40}[a-zA-Z0-9])["']?(?:\s|$|,|\.)/i,
280
+ /["']([a-zA-Z0-9][\w\s&.-]{0,40}[a-zA-Z0-9])["']\s+(?:company|organization|business|brand)/i,
281
+ ];
282
+
283
+ // Patterns for greeting/first message detection
284
+ const greetingPatterns = [
285
+ /(?:greeting|first message|welcome message|start with)\s*[:\s]+["'](.{5,150})["']/i,
286
+ /greet(?:ing)?\s+(?:should be|as)\s+["'](.{5,150})["']/i,
287
+ ];
288
+
289
+ // Patterns for objective/purpose detection
290
+ const objectivePatterns = [
291
+ /(?:objective|purpose|goal)\s*[:\s]+["'](.{10,200})["']/i,
292
+ /(?:should|will)\s+(?:help|assist)\s+(?:users?\s+)?(?:with\s+)?(.{10,150})/i,
293
+ ];
294
+
295
+ // Try to extract bot name
296
+ for (const pattern of botNamePatterns) {
297
+ const match = userMessage.match(pattern);
298
+ if (match && match[1]) {
299
+ const extracted = match[1].trim();
300
+ // Sanitize for use as bot name (slug format)
301
+ settings.botName = extracted
302
+ .toLowerCase()
303
+ .replace(/[^a-z0-9\s-]/g, '')
304
+ .replace(/\s+/g, '-')
305
+ .replace(/-+/g, '-')
306
+ .substring(0, 30);
307
+ // Also store the display-friendly version
308
+ settings.displayName = extracted;
309
+ break;
310
+ }
311
+ }
312
+
313
+ // Try to extract resource/organization name
314
+ for (const pattern of resourceNamePatterns) {
315
+ const match = userMessage.match(pattern);
316
+ if (match && match[1]) {
317
+ const extracted = match[1].trim();
318
+ // Avoid matching common words that aren't company names
319
+ const skipWords = ['my', 'the', 'a', 'an', 'this', 'that', 'our', 'their', 'your'];
320
+ if (!skipWords.includes(extracted.toLowerCase())) {
321
+ settings.resourceName = extracted;
322
+ break;
323
+ }
324
+ }
325
+ }
326
+
327
+ // Try to extract custom greeting
328
+ for (const pattern of greetingPatterns) {
329
+ const match = userMessage.match(pattern);
330
+ if (match && match[1]) {
331
+ settings.firstMessage = match[1].trim();
332
+ break;
333
+ }
334
+ }
335
+
336
+ // Try to extract objective
337
+ for (const pattern of objectivePatterns) {
338
+ const match = userMessage.match(pattern);
339
+ if (match && match[1]) {
340
+ settings.objective = match[1].trim();
341
+ break;
342
+ }
343
+ }
344
+
345
+ return settings;
346
+ }
347
+
348
+ /**
349
+ * Embed a batch of {text, metadata} chunks locally and persist into the
350
+ * session's embeddings blob. If the blob already exists (e.g. knowledge docs
351
+ * were embedded earlier in the same session), append; otherwise create.
352
+ *
353
+ * The single-blob shape lets one cosine search return the most relevant chunk
354
+ * regardless of whether it came from a document or a triage route — the LLM
355
+ * uses metadata.source at the formatting layer to decide what to do with it.
356
+ */
357
+ async function embedAndPersistChunks(chunks, session) {
358
+ const { downloadToBuffer, uploadFile, deleteFile } = await import('@/lib/storage/index.js');
359
+ const { generateEmbeddings, LOCAL_EMBEDDING_MODEL } = await import('@/lib/embedder/local.js');
360
+
361
+ if (!chunks || chunks.length === 0) {
362
+ throw new Error('embedAndPersistChunks: chunks must be a non-empty array');
363
+ }
364
+
365
+ const storageKey = `embeddings/${session.id}.json`;
366
+
367
+ let existingChunks = [];
368
+ try {
369
+ const existing = await downloadToBuffer(storageKey);
370
+ if (existing) {
371
+ const parsed = JSON.parse(existing.toString('utf8'));
372
+ if (Array.isArray(parsed.chunks)) existingChunks = parsed.chunks;
373
+ }
374
+ } catch {
375
+ // First write or unreadable prior blob — start fresh.
376
+ }
377
+
378
+ let embeddings;
379
+ try {
380
+ embeddings = await generateEmbeddings(
381
+ chunks.map((c) => c.text),
382
+ { inputType: 'search_document' }
383
+ );
384
+ } catch (err) {
385
+ if (existingChunks.length === 0) {
386
+ await deleteFile(storageKey).catch(() => {});
387
+ }
388
+ throw new Error(`Local embedding failed: ${err.message}`);
389
+ }
390
+
391
+ if (embeddings.length !== chunks.length) {
392
+ if (existingChunks.length === 0) {
393
+ await deleteFile(storageKey).catch(() => {});
394
+ }
395
+ throw new Error(
396
+ `Embedder returned ${embeddings.length} vectors for ${chunks.length} chunks`
397
+ );
398
+ }
399
+
400
+ const newChunks = chunks.map((c, i) => ({
401
+ text: c.text,
402
+ embedding: embeddings[i],
403
+ metadata: c.metadata,
404
+ }));
405
+
406
+ const merged = [...existingChunks, ...newChunks];
407
+ const payload = {
408
+ model: LOCAL_EMBEDDING_MODEL,
409
+ chunkCount: merged.length,
410
+ createdAt: new Date().toISOString(),
411
+ chunks: merged,
412
+ };
413
+
414
+ await uploadFile(storageKey, Buffer.from(JSON.stringify(payload), 'utf8'));
415
+
416
+ return { storageKey, chunkCount: merged.length, model: LOCAL_EMBEDDING_MODEL };
417
+ }
418
+
419
+ /**
420
+ * Vector branch of process_documents: parse → chunk → embed locally via
421
+ * @huggingface/transformers (multilingual-e5-small) → persist a single JSON
422
+ * blob to the factory's filesystem storage. The resulting storageKey is
423
+ * stashed on the session so save_modular_bot can copy it onto the
424
+ * deployment row and the build pipeline can stream it into the artifact's
425
+ * config/embeddings.json.
426
+ *
427
+ * Embed failures: wipe the partial blob, surface the error. No silent
428
+ * partial state.
429
+ */
430
+ async function processDocumentsVector(documents, documentIds, session, userId) {
431
+ const { downloadToBuffer } = await import('@/lib/storage/index.js');
432
+ const { parseDocument } = await import('@/lib/document-parser.js');
433
+ const { chunkDocuments } = await import('@/lib/embedder/chunker.js');
434
+ const { LOCAL_EMBEDDING_MODEL } = await import('@/lib/embedder/local.js');
435
+
436
+ // Parse all documents. Prefer the row's cached parsed_text (every upload
437
+ // path — web, chat builder, MCP — populates it at ingest time). Falling
438
+ // back to download+re-parse covers any legacy row whose parsed_text was
439
+ // never set, and is the only path that can recover a doc whose buffer
440
+ // changed out-of-band.
441
+ const parsed = [];
442
+ for (const doc of documents) {
443
+ try {
444
+ let text = doc.parsedText;
445
+ if (!text || text.trim().length === 0) {
446
+ const buffer = await downloadToBuffer(doc.storagePath);
447
+ text = await parseDocument(buffer, doc.originalName);
448
+ }
449
+ if (text && text.trim().length > 0) {
450
+ parsed.push({ id: doc.id, originalName: doc.originalName, text });
451
+ }
452
+ } catch (err) {
453
+ console.error(`[Builder] Failed to parse ${doc.originalName}:`, err.message);
454
+ }
455
+ }
456
+ if (parsed.length === 0) {
457
+ throw new Error('Vector embedding: no documents parseable');
458
+ }
459
+
460
+ // Chunk.
461
+ const chunks = chunkDocuments(parsed);
462
+ if (chunks.length === 0) {
463
+ throw new Error('Vector embedding: no chunks produced from documents');
464
+ }
465
+
466
+ console.log(
467
+ `[Builder] Vector embedding ${chunks.length} chunks across ${parsed.length} docs locally (${LOCAL_EMBEDDING_MODEL})`
468
+ );
469
+
470
+ const { storageKey, chunkCount } = await embedAndPersistChunks(chunks, session);
471
+
472
+ // Compose a domain digest for build-time tools (compose_identity,
473
+ // infer_appointment_types, generate_suggested_prompts). Per-document LLM
474
+ // summary, then concatenate. Not consumed at runtime — the bundled
475
+ // embedding model handles retrieval — only used by the builder pipeline.
476
+ // Falls back to a chunk-slice surrogate if every summary call fails so
477
+ // the build can still progress.
478
+ const { generateSummary } = await import('@/lib/llm-providers.js');
479
+ // Summary tier: free-text per-document summarization for the domain digest.
480
+ const llmConfig = await getLLMConfigFromSession(session, userId, 'summary');
481
+ const { provider, apiKey, model } = llmConfig;
482
+
483
+ const summaryPrompt = `Analyze this document and provide a comprehensive summary that:
484
+
485
+ 1. Identifies key terms, concepts, and topics covered
486
+ 2. Highlights the main themes and subject areas
487
+ 3. Lists important entities, processes, or procedures mentioned
488
+ 4. Notes any technical specifications, data, or metrics
489
+
490
+ IMPORTANT: Generate the summary in the SAME LANGUAGE as the original document.
491
+
492
+ Synthesize the information into max 3 paragraphs, 200 words.
493
+
494
+ Keep the summary high-level, factual, and cohesive.`;
495
+
496
+ const individualSummaries = [];
497
+ for (const doc of parsed) {
498
+ try {
499
+ const docSummary = await generateSummary(provider, doc.text, apiKey, summaryPrompt, model);
500
+ individualSummaries.push({ name: doc.originalName, summary: docSummary });
501
+ } catch (err) {
502
+ console.error(`[Builder] Failed to summarize ${doc.originalName}:`, err.message);
503
+ individualSummaries.push({ name: doc.originalName, summary: `[Error: ${err.message}]` });
504
+ }
505
+ }
506
+
507
+ const combinedSummary = individualSummaries
508
+ .filter((s) => !s.summary.startsWith('[Error'))
509
+ .map((s) => `## ${s.name}\n\n${s.summary}`)
510
+ .join('\n\n---\n\n');
511
+
512
+ const domainDigest =
513
+ combinedSummary ||
514
+ chunks
515
+ .slice(0, 12)
516
+ .map((c) => c.text)
517
+ .join('\n')
518
+ .slice(0, 4000);
519
+
520
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'knowledge', {
521
+ domainDigest,
522
+ documentIds,
523
+ documentsProcessed: parsed.length,
524
+ totalDocuments: documents.length,
525
+ ragMode: 'vector',
526
+ });
527
+
528
+ await BuilderSessionRepository.updateGeneratedConfig(
529
+ session.id,
530
+ userId,
531
+ 'embeddings',
532
+ {
533
+ storageKey,
534
+ model: LOCAL_EMBEDDING_MODEL,
535
+ chunkCount,
536
+ }
537
+ );
538
+
539
+ return {
540
+ ragMode: 'vector',
541
+ documentsProcessed: parsed.length,
542
+ totalDocuments: documents.length,
543
+ chunkCount,
544
+ embeddingModel: LOCAL_EMBEDDING_MODEL,
545
+ storageKey,
546
+ message: `Embedded ${chunks.length} chunks from ${parsed.length} documents using ${LOCAL_EMBEDDING_MODEL} (total ${chunkCount} chunks in store).`,
547
+ };
548
+ }
549
+
550
+ /**
551
+ * Tool handlers for inverted modular flow
552
+ */
553
+ const builderToolHandlers = {
554
+ /**
555
+ * Parse uploaded documents, embed them locally via the bundled
556
+ * multilingual-e5-small ONNX model, and stash the embedding blob on the
557
+ * session so save_modular_bot can copy it onto the deployment row. Also
558
+ * generates a build-time `domainDigest` on the session that's consumed by
559
+ * compose_identity and other downstream tools.
560
+ */
561
+ async process_documents(input, context) {
562
+ const { documentIds } = input;
563
+ const { session, userId } = context;
564
+
565
+ if (!documentIds || documentIds.length === 0) {
566
+ throw new Error('No document IDs provided');
567
+ }
568
+
569
+ const documents = await DocumentRepository.findByIds(documentIds);
570
+ if (documents.length === 0) {
571
+ throw new Error('No documents found with the provided IDs');
572
+ }
573
+
574
+ console.log(`[Builder] Processing ${documents.length} documents (vector mode)`);
575
+ return processDocumentsVector(documents, documentIds, session, userId);
576
+ },
577
+
578
+ /**
579
+ * Infer user intent from message and context
580
+ */
581
+ async infer_intent(input, context) {
582
+ const { userMessage, domainDigest } = input;
583
+ const { session, userId } = context;
584
+
585
+ // Intent classification based on keywords and context
586
+ // Note: 'faq' maps to knowledge_base (Q&A from documents), not support_bot
587
+ const intentPatterns = [
588
+ { intent: 'knowledge_base', keywords: ['faq', 'knowledge', 'documentation', 'docs', 'wiki', 'information', 'answer questions', 'q&a'], confidence: 0.9 },
589
+ { intent: 'support_bot', keywords: ['support', 'help desk', 'customer service', 'ticket', 'assist', 'troubleshoot'], confidence: 0.88 },
590
+ { intent: 'lead_gen', keywords: ['lead', 'capture', 'collect', 'form', 'contact', 'inquiry', 'sales'], confidence: 0.88 },
591
+ { intent: 'appointment_scheduler', keywords: ['appointment', 'booking', 'schedule', 'calendar', 'book', 'meeting'], confidence: 0.92 },
592
+ { intent: 'feedback_collector', keywords: ['feedback', 'survey', 'review', 'rating', 'opinion'], confidence: 0.87 },
593
+ { intent: 'onboarding_assistant', keywords: ['onboard', 'welcome', 'getting started', 'new user', 'tutorial'], confidence: 0.86 },
594
+ { intent: 'triage_router', keywords: ['triage', 'route', 'routing', 'redirect', 'transfer', 'dispatch', 'multi-bot', 'orchestrat'], confidence: 0.91 },
595
+ ];
596
+
597
+ const messageLower = userMessage.toLowerCase();
598
+ const summaryLower = (domainDigest || '').toLowerCase();
599
+ const combined = `${messageLower} ${summaryLower}`;
600
+
601
+ let bestMatch = { intent: 'support_bot', confidence: 0.7, reason: 'Default intent for general assistance' };
602
+
603
+ for (const pattern of intentPatterns) {
604
+ const matchCount = pattern.keywords.filter(kw => combined.includes(kw)).length;
605
+ if (matchCount > 0) {
606
+ const adjustedConfidence = Math.min(pattern.confidence + (matchCount * 0.02), 0.98);
607
+ if (adjustedConfidence > bestMatch.confidence) {
608
+ bestMatch = {
609
+ intent: pattern.intent,
610
+ confidence: adjustedConfidence,
611
+ reason: `Detected keywords: ${pattern.keywords.filter(kw => combined.includes(kw)).join(', ')}`,
612
+ };
613
+ }
614
+ }
615
+ }
616
+
617
+ // Extract prepopulated settings from user message
618
+ const prepopulatedSettings = extractPrepopulatedSettings(userMessage);
619
+
620
+ // Update session with inference and prepopulated settings
621
+ await BuilderSessionRepository.updateInference(session.id, userId, {
622
+ intent: bestMatch.intent,
623
+ confidence: bestMatch.confidence,
624
+ recommendedProtocols: {}, // Will be filled by recommend_protocols
625
+ });
626
+
627
+ // Store prepopulated settings in generatedConfigs for use by compose_identity
628
+ if (Object.keys(prepopulatedSettings).length > 0) {
629
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'prepopulated', prepopulatedSettings);
630
+ }
631
+
632
+ return {
633
+ intent: bestMatch.intent,
634
+ confidence: bestMatch.confidence,
635
+ reason: bestMatch.reason,
636
+ prepopulatedSettings: Object.keys(prepopulatedSettings).length > 0 ? prepopulatedSettings : undefined,
637
+ };
638
+ },
639
+
640
+ /**
641
+ * Recommend protocols based on inferred intent
642
+ */
643
+ async recommend_protocols(input, context) {
644
+ const { intent, domainDigest, userMessage } = input;
645
+ const { session, userId } = context;
646
+
647
+ const recommendations = {
648
+ knowledge: {
649
+ enabled: false,
650
+ reason: 'No document context detected',
651
+ presetConfig: null,
652
+ },
653
+ forms: {
654
+ enabled: false,
655
+ reason: 'No data collection intent detected',
656
+ presetConfig: null,
657
+ },
658
+ appointments: {
659
+ enabled: false,
660
+ reason: 'No booking/scheduling intent detected',
661
+ presetConfig: null,
662
+ },
663
+ triage: {
664
+ enabled: false,
665
+ reason: 'No routing intent detected',
666
+ presetConfig: null,
667
+ },
668
+ };
669
+
670
+ // Knowledge protocol
671
+ if (domainDigest || session.generatedConfigs?.knowledge?.domainDigest) {
672
+ recommendations.knowledge.enabled = true;
673
+ recommendations.knowledge.reason = 'Documents uploaded - knowledge base enabled';
674
+ recommendations.knowledge.presetConfig = {
675
+ domainDigest: domainDigest || session.generatedConfigs?.knowledge?.domainDigest,
676
+ documentIds: session.generatedConfigs?.knowledge?.documentIds || [],
677
+ };
678
+ }
679
+
680
+ // Forms protocol - only enable when user explicitly requests data collection
681
+ // Check for explicit collection keywords in the user's message
682
+ const messageLower = (userMessage || '').toLowerCase();
683
+ const explicitFormKeywords = [
684
+ 'collect', 'capture', 'gather', 'form', 'submit', 'input',
685
+ 'contact info', 'email address', 'phone number', 'sign up',
686
+ 'registration', 'lead', 'inquiry', 'feedback', 'survey'
687
+ ];
688
+ const hasExplicitFormRequest = explicitFormKeywords.some(kw => messageLower.includes(kw));
689
+
690
+ if (hasExplicitFormRequest) {
691
+ recommendations.forms.enabled = true;
692
+ recommendations.forms.reason = 'User requested data collection';
693
+ } else if (intent === 'lead_gen' || intent === 'feedback_collector') {
694
+ // These intents inherently require forms
695
+ recommendations.forms.enabled = true;
696
+ recommendations.forms.reason = `${intent} requires data collection`;
697
+ }
698
+
699
+ // Appointments protocol
700
+ if (intent === 'appointment_scheduler') {
701
+ recommendations.appointments.enabled = true;
702
+ recommendations.appointments.reason = 'Scheduling intent detected';
703
+ // Disable forms for pure appointment bots
704
+ recommendations.forms.enabled = false;
705
+ recommendations.forms.reason = 'Not needed for appointment-only flow';
706
+ }
707
+
708
+ // Triage protocol - detect routing/orchestration intent
709
+ const triageKeywords = [
710
+ 'triage', 'route', 'routing', 'redirect', 'transfer',
711
+ 'multi-bot', 'orchestrat', 'dispatch', 'forward',
712
+ 'different team', 'right department', 'connect to'
713
+ ];
714
+ const hasTriageRequest = triageKeywords.some(kw => messageLower.includes(kw));
715
+
716
+ // Also check if triage routes are already configured in the session
717
+ const hasExistingTriageRoutes = session.generatedConfigs?.triage?.routes?.length > 0;
718
+
719
+ if (hasTriageRequest || hasExistingTriageRoutes) {
720
+ recommendations.triage.enabled = true;
721
+ recommendations.triage.reason = hasExistingTriageRoutes
722
+ ? 'Triage routes configured'
723
+ : 'Routing intent detected';
724
+ if (hasExistingTriageRoutes) {
725
+ recommendations.triage.presetConfig = {
726
+ routes: session.generatedConfigs.triage.routes,
727
+ };
728
+ }
729
+ }
730
+
731
+ // Model-level gate: small Ollama models (qwen3, mistral-nemo) are only
732
+ // reliable at single-turn knowledge Q&A. Force-disable the stateful
733
+ // protocols regardless of what the heuristics above suggested. The
734
+ // wizard's enabledProtocols uses `formGathering`; this tool's shape uses
735
+ // `forms` — map across the boundary. The bot being built inherits the
736
+ // builder's default provider/model, so gating on preloadedContext is
737
+ // correct here.
738
+ const { defaultProvider, defaultModel } = session.preloadedContext || {};
739
+ const allowedForModel = getAllowedProtocolsForModel(defaultProvider, defaultModel);
740
+ if (allowedForModel) {
741
+ const toolKeyToWizardKey = {
742
+ knowledge: 'knowledge',
743
+ forms: 'formGathering',
744
+ appointments: 'appointments',
745
+ triage: 'triage',
746
+ };
747
+ const gateReason = `${defaultModel} only supports the knowledge protocol — switch to llama3.3 for multi-step flows.`;
748
+ for (const [toolKey, wizardKey] of Object.entries(toolKeyToWizardKey)) {
749
+ if (!allowedForModel.has(wizardKey) && recommendations[toolKey]) {
750
+ recommendations[toolKey].enabled = false;
751
+ recommendations[toolKey].reason = gateReason;
752
+ recommendations[toolKey].presetConfig = null;
753
+ }
754
+ }
755
+ }
756
+
757
+ // Update session with recommendations
758
+ await BuilderSessionRepository.updateInference(session.id, userId, {
759
+ intent,
760
+ confidence: session.intentConfidence || 0.85,
761
+ recommendedProtocols: recommendations,
762
+ });
763
+
764
+ return {
765
+ protocols: recommendations,
766
+ summary: Object.entries(recommendations)
767
+ .filter(([_, v]) => v.enabled)
768
+ .map(([k, _]) => k)
769
+ .join(', ') || 'None recommended',
770
+ };
771
+ },
772
+
773
+ /**
774
+ * Generate form schema based on context
775
+ */
776
+ async generate_form_schema(input, context) {
777
+ const { description, formType = 'custom', locale = 'en', afterSubmitChatMessage } = input;
778
+ const { session, userId } = context;
779
+
780
+ // Get LLM config from session (supports Anthropic, Bedrock, etc.)
781
+ // Structured tier: model returns a JSON schema parsed downstream.
782
+ const llmConfig = await getLLMConfigFromSession(session, userId, 'structured');
783
+ const { provider, apiKey, model } = llmConfig;
784
+
785
+ const { generateSummary } = await import('@/lib/llm-providers.js');
786
+ const { buildFormSchemaPrompt, isLocaleSupported, DEFAULT_LOCALE } = await import('@/lib/form-schema-config/index.js');
787
+
788
+ const resolvedLocale = isLocaleSupported(locale) ? locale : DEFAULT_LOCALE;
789
+
790
+ const basePrompt = `You are a form structure generator. Convert the description into a JSON form schema.
791
+
792
+ OUTPUT FORMAT: Return ONLY valid JSON matching this structure:
793
+ {
794
+ "sections": [
795
+ {
796
+ "id": "section-id",
797
+ "label": "Section Label",
798
+ "fields": [
799
+ {
800
+ "id": "fieldId",
801
+ "label": "Field Label",
802
+ "type": "text|email|tel|number|select|date|textarea|checkbox|radio",
803
+ "required": true|false,
804
+ "placeholder": "optional placeholder",
805
+ "options": ["for select/radio types"]
806
+ }
807
+ ]
808
+ }
809
+ ],
810
+ "afterSubmitMessage": "A contextual thank you message shown after form submission"
811
+ }
812
+
813
+ Keep forms concise - 4-8 fields maximum. Group related fields into sections.
814
+ The afterSubmitMessage should be friendly, contextual to the form purpose, and in the appropriate language for the locale.`;
815
+
816
+ const localePrompt = buildFormSchemaPrompt(resolvedLocale);
817
+ const fullPrompt = `${basePrompt}\n\n${localePrompt}`;
818
+
819
+ const response = await generateSummary(provider, description, apiKey, fullPrompt, model);
820
+
821
+ // Parse JSON response
822
+ let formSchema;
823
+ try {
824
+ const jsonMatch = response.match(/```(?:json)?\s*(\{[\s\S]*\})\s*```/);
825
+ const jsonString = jsonMatch ? jsonMatch[1] : response;
826
+ formSchema = JSON.parse(jsonString.trim());
827
+ } catch (parseError) {
828
+ console.error('[Builder] Failed to parse form schema:', parseError);
829
+ throw new Error('Failed to generate form structure');
830
+ }
831
+
832
+ // Validate structure
833
+ if (!Array.isArray(formSchema.sections) || formSchema.sections.length === 0) {
834
+ throw new Error('Generated form structure is invalid');
835
+ }
836
+
837
+ const fieldCount = formSchema.sections.reduce((acc, s) => acc + (s.fields?.length || 0), 0);
838
+
839
+ // Use provided afterSubmitChatMessage, or generated one from schema, or default
840
+ const resolvedAfterSubmitMessage = afterSubmitChatMessage
841
+ || formSchema.afterSubmitMessage
842
+ || 'Thank you for your submission! How can I help you further?';
843
+
844
+ // Remove afterSubmitMessage from schema (it's stored separately)
845
+ delete formSchema.afterSubmitMessage;
846
+
847
+ // Store in session - formSendHome defaults to true (send submissions to control plane)
848
+ // Save the original description as formStructureInput so it persists for edit mode
849
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'forms', {
850
+ formSchema,
851
+ formStructureInput: description,
852
+ fieldCount,
853
+ sectionCount: formSchema.sections.length,
854
+ formSendHome: true,
855
+ afterSubmitChatMessage: resolvedAfterSubmitMessage,
856
+ });
857
+
858
+ return {
859
+ formSchema,
860
+ fieldCount,
861
+ sectionCount: formSchema.sections.length,
862
+ formSendHome: true,
863
+ afterSubmitChatMessage: resolvedAfterSubmitMessage,
864
+ message: `Created form with ${fieldCount} fields`,
865
+ };
866
+ },
867
+
868
+ /**
869
+ * Generate appointment configuration
870
+ */
871
+ async generate_appointment_config(input, context) {
872
+ let { domainDigest, businessType, calendarProviders = [] } = input;
873
+ const { session, userId } = context;
874
+ // Same pattern as compose_identity / recommend_protocols: schema field is
875
+ // documentation, session is the source of truth.
876
+ domainDigest = domainDigest || session.generatedConfigs?.knowledge?.domainDigest;
877
+
878
+ // Generate basic appointment config structure
879
+ const config = {
880
+ destinations: [],
881
+ defaultDuration: 30,
882
+ bufferTime: 15,
883
+ maxAdvanceBooking: 30, // days
884
+ };
885
+
886
+ // If domainDigest contains service info, try to extract appointment types
887
+ if (domainDigest) {
888
+ // Basic extraction - could be enhanced with LLM
889
+ const serviceKeywords = ['consultation', 'meeting', 'session', 'appointment', 'call'];
890
+ for (const keyword of serviceKeywords) {
891
+ if (domainDigest.toLowerCase().includes(keyword)) {
892
+ config.destinations.push({
893
+ id: `${keyword}-${Date.now()}`,
894
+ provider: calendarProviders[0] || 'cal.com',
895
+ description: `${businessType || 'General'} ${keyword}`,
896
+ duration: 30,
897
+ });
898
+ break; // Just add one default for now
899
+ }
900
+ }
901
+ }
902
+
903
+ // Store in session
904
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'appointments', config);
905
+
906
+ return {
907
+ config,
908
+ message: config.destinations.length > 0
909
+ ? `Generated ${config.destinations.length} appointment type(s)`
910
+ : 'Appointment configuration ready - destinations need to be configured',
911
+ };
912
+ },
913
+
914
+ /**
915
+ * Generate triage routing configuration
916
+ */
917
+ async generate_triage_config(input, context) {
918
+ const { routes } = input;
919
+ const { session, userId } = context;
920
+
921
+ if (!routes || routes.length === 0) {
922
+ throw new Error('No triage routes provided');
923
+ }
924
+
925
+ // Slugify helper for generating deployment IDs from names
926
+ const slugify = (name) =>
927
+ name
928
+ .toLowerCase()
929
+ .trim()
930
+ .replace(/[^a-z0-9]+/g, '-')
931
+ .replace(/^-|-$/g, '');
932
+
933
+ // Process and validate routes
934
+ const processedRoutes = routes.map((route) => {
935
+ if (!route.name || !route.description || !route.url) {
936
+ throw new Error(`Invalid route: name, description, and url are required. Got: ${JSON.stringify(route)}`);
937
+ }
938
+
939
+ return {
940
+ deploymentId: route.deploymentId || slugify(route.name),
941
+ name: route.name.trim(),
942
+ description: route.description.trim(),
943
+ url: route.url.trim(),
944
+ };
945
+ });
946
+
947
+ // Check for duplicate deploymentIds
948
+ const deploymentIds = processedRoutes.map((r) => r.deploymentId);
949
+ const duplicates = deploymentIds.filter((id, index) => deploymentIds.indexOf(id) !== index);
950
+ if (duplicates.length > 0) {
951
+ throw new Error(`Duplicate deployment IDs detected: ${[...new Set(duplicates)].join(', ')}`);
952
+ }
953
+
954
+ console.log(`[Builder] Generated triage config with ${processedRoutes.length} routes`);
955
+
956
+ // Store in session's generated configs
957
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'triage', {
958
+ routes: processedRoutes,
959
+ routeCount: processedRoutes.length,
960
+ });
961
+
962
+ // Embed each route's description into the same cosine index that knowledge
963
+ // chunks use. The retrieval signal here is intent-match: when a user
964
+ // describes what they want, the LLM gets the matching route description
965
+ // pulled into context, reinforcing the JSON-list lookup it would do
966
+ // anyway. The deploymentId itself is authoritative on the JSON list — the
967
+ // embedding is contextual reinforcement, not the source of routing IDs.
968
+ const { chunkTriageRoutes } = await import('@/lib/embedder/chunker.js');
969
+ const { LOCAL_EMBEDDING_MODEL } = await import('@/lib/embedder/local.js');
970
+ const routeChunks = chunkTriageRoutes(processedRoutes);
971
+ if (routeChunks.length > 0) {
972
+ const { storageKey, chunkCount } = await embedAndPersistChunks(routeChunks, session);
973
+ await BuilderSessionRepository.updateGeneratedConfig(
974
+ session.id,
975
+ userId,
976
+ 'embeddings',
977
+ {
978
+ storageKey,
979
+ model: LOCAL_EMBEDDING_MODEL,
980
+ chunkCount,
981
+ }
982
+ );
983
+ }
984
+
985
+ return {
986
+ routes: processedRoutes,
987
+ routeCount: processedRoutes.length,
988
+ message: `Configured ${processedRoutes.length} triage route(s): ${processedRoutes.map((r) => r.name).join(', ')}`,
989
+ };
990
+ },
991
+
992
+ /**
993
+ * Generate Optical Read extraction fields
994
+ *
995
+ * Slugifies idName from label when missing, dedupes by idName, and persists
996
+ * the field list onto the session under generatedConfigs.opticalRead.
997
+ * Mirrors generate_triage_config in shape; the protocol's directional
998
+ * principle (templated-artifact prior, hint as load-bearing primitive) is
999
+ * encoded in the cartridge — this executor just normalizes the field list.
1000
+ */
1001
+ async generate_optical_read_config(input, context) {
1002
+ const { fields } = input;
1003
+ const { session, userId } = context;
1004
+
1005
+ if (!fields || fields.length === 0) {
1006
+ throw new Error('No optical read fields provided');
1007
+ }
1008
+
1009
+ const slugify = (label) =>
1010
+ (label || '')
1011
+ .toLowerCase()
1012
+ .trim()
1013
+ .replace(/[^a-z0-9]+/g, '_')
1014
+ .replace(/^_+|_+$/g, '');
1015
+
1016
+ const ID_NAME_PATTERN = /^[a-z][a-z0-9_]*$/;
1017
+
1018
+ const seen = new Set();
1019
+ const processedFields = [];
1020
+ for (const field of fields) {
1021
+ if (!field.label || !field.label.trim()) {
1022
+ throw new Error(`Invalid optical read field: label is required. Got: ${JSON.stringify(field)}`);
1023
+ }
1024
+ const idName = (field.idName && field.idName.trim()) || slugify(field.label);
1025
+ if (!ID_NAME_PATTERN.test(idName)) {
1026
+ throw new Error(`Invalid idName "${idName}": must be snake_case (lowercase letters, digits, underscores)`);
1027
+ }
1028
+ if (seen.has(idName)) {
1029
+ // Dedupe rather than throw — the chat builder may produce overlapping
1030
+ // labels ("Name", "Full Name") that slug to the same key.
1031
+ continue;
1032
+ }
1033
+ seen.add(idName);
1034
+ processedFields.push({
1035
+ label: field.label.trim(),
1036
+ idName,
1037
+ hint: field.hint ? field.hint.trim() : '',
1038
+ });
1039
+ }
1040
+
1041
+ if (processedFields.length === 0) {
1042
+ throw new Error('No valid optical read fields after normalization');
1043
+ }
1044
+
1045
+ console.log(`[Builder] Generated optical read config with ${processedFields.length} fields`);
1046
+
1047
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'opticalRead', {
1048
+ fields: processedFields,
1049
+ });
1050
+
1051
+ return {
1052
+ fields: processedFields,
1053
+ fieldCount: processedFields.length,
1054
+ message: `Configured ${processedFields.length} extraction field(s): ${processedFields.map((f) => f.idName).join(', ')}`,
1055
+ };
1056
+ },
1057
+
1058
+ /**
1059
+ * Compose bot identity from context
1060
+ */
1061
+ async compose_identity(input, context) {
1062
+ const { intent, domainDigest, organizationName, enabledProtocols, userMessage } = input;
1063
+ const { session, userId } = context;
1064
+
1065
+ // Check for prepopulated settings from infer_intent
1066
+ const prepopulated = session.generatedConfigs?.prepopulated || {};
1067
+
1068
+ // Use prepopulated resource name if available, otherwise use organizationName
1069
+ const effectiveOrgName = prepopulated.resourceName || organizationName;
1070
+
1071
+ // Generate bot name - use prepopulated if available
1072
+ let botName;
1073
+ if (prepopulated.botName) {
1074
+ // Use the prepopulated bot name directly (already sanitized)
1075
+ botName = prepopulated.botName;
1076
+ } else {
1077
+ // Generate bot name from organization and intent
1078
+ const sanitizedOrg = (effectiveOrgName || 'my')
1079
+ .toLowerCase()
1080
+ .replace(/[^a-z0-9]/g, '-')
1081
+ .replace(/-+/g, '-')
1082
+ .substring(0, 20);
1083
+
1084
+ const intentSuffix = {
1085
+ support_bot: 'support',
1086
+ lead_gen: 'leads',
1087
+ appointment_scheduler: 'booking',
1088
+ knowledge_base: 'help',
1089
+ feedback_collector: 'feedback',
1090
+ onboarding_assistant: 'onboard',
1091
+ triage_router: 'triage',
1092
+ }[intent] || 'bot';
1093
+
1094
+ botName = `${sanitizedOrg}-${intentSuffix}`;
1095
+ }
1096
+
1097
+ // Generate objective based on intent - use prepopulated if available
1098
+ const objectives = {
1099
+ support_bot: `Help users with questions and support requests${effectiveOrgName ? ` for ${effectiveOrgName}` : ''}`,
1100
+ lead_gen: `Collect contact information and qualify leads${effectiveOrgName ? ` for ${effectiveOrgName}` : ''}`,
1101
+ appointment_scheduler: `Help users book appointments${effectiveOrgName ? ` with ${effectiveOrgName}` : ''}`,
1102
+ knowledge_base: `Answer questions using the knowledge base${effectiveOrgName ? ` for ${effectiveOrgName}` : ''}`,
1103
+ feedback_collector: `Collect user feedback and suggestions${effectiveOrgName ? ` for ${effectiveOrgName}` : ''}`,
1104
+ onboarding_assistant: `Guide new users through getting started${effectiveOrgName ? ` with ${effectiveOrgName}` : ''}`,
1105
+ triage_router: `Route users to the right team or specialist${effectiveOrgName ? ` at ${effectiveOrgName}` : ''}`,
1106
+ };
1107
+
1108
+ // Generate first message - use prepopulated if available
1109
+ const firstMessages = {
1110
+ support_bot: `Hi! I'm here to help with any questions you might have${effectiveOrgName ? ` about ${effectiveOrgName}` : ''}. How can I assist you today?`,
1111
+ lead_gen: `Hello! I'd love to learn more about what you're looking for. How can I help you get started?`,
1112
+ appointment_scheduler: `Hi! I can help you schedule an appointment. What type of appointment are you looking to book?`,
1113
+ knowledge_base: `Hello! I have access to our knowledge base and can answer your questions. What would you like to know?`,
1114
+ feedback_collector: `Hi there! I'd love to hear your thoughts and feedback. What's on your mind?`,
1115
+ onboarding_assistant: `Welcome! I'm here to help you get started. Would you like a quick tour of the features?`,
1116
+ triage_router: `Hi! I can help connect you with the right team. What can I help you with today?`,
1117
+ };
1118
+
1119
+ // Generate contextual identity (firstMessage + objective) if domainDigest available
1120
+ // Note: suggestedPrompts are now set separately via set_suggested_prompts tool
1121
+ // to ensure proper localization in the same language as documents
1122
+ let contextualFirstMessage = null;
1123
+ let contextualObjective = null;
1124
+
1125
+ // For triage routers, build an effective digest from route descriptions (botSummaries).
1126
+ // Fall back to the session-stored digest (written by process_documents) when
1127
+ // the LLM doesn't pass it — the schema field is documentation, the session
1128
+ // is the source of truth.
1129
+ let effectiveDigest = domainDigest || session.generatedConfigs?.knowledge?.domainDigest;
1130
+ if (intent === 'triage_router' && (!domainDigest || domainDigest.trim().length === 0)) {
1131
+ const triageRoutes = session.generatedConfigs?.triage?.routes;
1132
+ if (triageRoutes && triageRoutes.length > 0) {
1133
+ // Build digest from route descriptions (which are target bots' botSummary)
1134
+ effectiveDigest = triageRoutes
1135
+ .map((route) => `${route.name}: ${route.description}`)
1136
+ .join('\n');
1137
+ console.log('[Builder] Using triage routes as effective domain digest for identity');
1138
+ }
1139
+ }
1140
+
1141
+ if (effectiveDigest && effectiveDigest.trim().length > 0) {
1142
+ // Try to generate contextual identity (firstMessage + objective) using LLM
1143
+ const effectiveUserMessage = userMessage || session.userMessage || '';
1144
+ try {
1145
+ const contextualIdentity = await generateContextualIdentity(
1146
+ effectiveDigest,
1147
+ effectiveUserMessage,
1148
+ intent,
1149
+ effectiveOrgName,
1150
+ session,
1151
+ userId
1152
+ );
1153
+ if (contextualIdentity) {
1154
+ contextualFirstMessage = contextualIdentity.firstMessage;
1155
+ contextualObjective = contextualIdentity.objective;
1156
+ console.log('[Builder] Using contextual identity from LLM');
1157
+ }
1158
+ } catch (err) {
1159
+ console.warn('[Builder] Failed to generate contextual identity:', err.message);
1160
+ }
1161
+ }
1162
+
1163
+ // Use static prompts as placeholder - Claude will set localized prompts via set_suggested_prompts
1164
+ const suggestedPrompts = getStaticPromptsForIntent(intent);
1165
+
1166
+ // Build identity with priority: prepopulated > contextual LLM > static templates
1167
+ const identity = {
1168
+ botName,
1169
+ displayName: prepopulated.displayName || (effectiveOrgName ? `${effectiveOrgName} Assistant` : 'Assistant'),
1170
+ objective: prepopulated.objective || contextualObjective || objectives[intent] || objectives.support_bot,
1171
+ firstMessage: prepopulated.firstMessage || contextualFirstMessage || firstMessages[intent] || firstMessages.support_bot,
1172
+ suggestedPrompts,
1173
+ };
1174
+
1175
+ // Store in session
1176
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'identity', identity);
1177
+
1178
+ // Also update core config with defaults from preloaded context
1179
+ const coreConfig = {
1180
+ provider: session.preloadedContext?.defaultProvider || 'anthropic',
1181
+ model: session.preloadedContext?.defaultModel || 'claude-sonnet-4-20250514',
1182
+ apiKeyId: session.preloadedContext?.defaultApiKeyId,
1183
+ botName: identity.botName,
1184
+ };
1185
+
1186
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'core', coreConfig);
1187
+
1188
+ // Build message indicating what was used
1189
+ const usedPrepopulated = [];
1190
+ if (prepopulated.botName) usedPrepopulated.push('bot name');
1191
+ if (prepopulated.displayName) usedPrepopulated.push('display name');
1192
+ if (prepopulated.resourceName) usedPrepopulated.push('organization');
1193
+ if (prepopulated.objective) usedPrepopulated.push('objective');
1194
+ if (prepopulated.firstMessage) usedPrepopulated.push('greeting');
1195
+
1196
+ const prepopulatedNote = usedPrepopulated.length > 0
1197
+ ? ` (using user-specified: ${usedPrepopulated.join(', ')})`
1198
+ : '';
1199
+
1200
+ return {
1201
+ identity,
1202
+ prepopulatedSettings: Object.keys(prepopulated).length > 0 ? prepopulated : undefined,
1203
+ message: `Composed identity: ${identity.botName}${prepopulatedNote}`,
1204
+ };
1205
+ },
1206
+
1207
+ /**
1208
+ * Set suggested prompts for the bot (allows Claude to provide localized prompts)
1209
+ */
1210
+ async set_suggested_prompts(input, context) {
1211
+ const { prompts } = input;
1212
+ const { session, userId } = context;
1213
+
1214
+ if (!prompts || !Array.isArray(prompts) || prompts.length === 0) {
1215
+ throw new Error('At least one prompt is required');
1216
+ }
1217
+
1218
+ // Clean and validate prompts
1219
+ const cleanedPrompts = prompts
1220
+ .filter((p) => typeof p === 'string' && p.trim().length > 0)
1221
+ .map((p) => p.trim())
1222
+ .slice(0, 5);
1223
+
1224
+ if (cleanedPrompts.length === 0) {
1225
+ throw new Error('No valid prompts provided');
1226
+ }
1227
+
1228
+ // Get current identity from session
1229
+ const currentIdentity = session.generatedConfigs?.identity || {};
1230
+
1231
+ // Update identity with new prompts
1232
+ const updatedIdentity = {
1233
+ ...currentIdentity,
1234
+ suggestedPrompts: cleanedPrompts,
1235
+ };
1236
+
1237
+ // Store in session
1238
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'identity', updatedIdentity);
1239
+
1240
+ console.log('[Builder] Set suggested prompts:', cleanedPrompts);
1241
+
1242
+ return {
1243
+ identity: updatedIdentity,
1244
+ promptCount: cleanedPrompts.length,
1245
+ message: `Set ${cleanedPrompts.length} suggested prompts`,
1246
+ };
1247
+ },
1248
+
1249
+ /**
1250
+ * Generate a structured prose summary of the bot for multi-bot orchestration
1251
+ */
1252
+ async generate_bot_summary(input, context) {
1253
+ const { session, userId } = context;
1254
+
1255
+ // Get LLM config from session (supports Anthropic, Bedrock, etc.)
1256
+ // Summary tier: prose generation for multi-bot orchestration metadata.
1257
+ const llmConfig = await getLLMConfigFromSession(session, userId, 'summary');
1258
+ const { provider, apiKey, model } = llmConfig;
1259
+
1260
+ const { generateSummary } = await import('@/lib/llm-providers.js');
1261
+
1262
+ // Gather context from session
1263
+ const { enabledProtocols, identityConfig, protocolData, generatedConfigs } = session;
1264
+ const identity = generatedConfigs?.identity || identityConfig || {};
1265
+
1266
+ // Build compact context for the LLM
1267
+ const contextParts = [];
1268
+
1269
+ // Bot identity
1270
+ contextParts.push(`Bot Name: ${identity.botName || 'Unnamed Bot'}`);
1271
+ contextParts.push(`Purpose: ${identity.objective || 'General assistant'}`);
1272
+
1273
+ // Knowledge context
1274
+ if (enabledProtocols.knowledge) {
1275
+ const domainDigest = protocolData?.knowledge?.domainDigest || generatedConfigs?.knowledge?.domainDigest;
1276
+ if (domainDigest) {
1277
+ // Extract first 500 chars of the digest for context
1278
+ contextParts.push(`Knowledge Base Topics: ${domainDigest.substring(0, 500)}`);
1279
+ }
1280
+ const docCount = protocolData?.knowledge?.documents?.length || generatedConfigs?.knowledge?.documentsProcessed || 0;
1281
+ if (docCount > 0) {
1282
+ contextParts.push(`Documents: ${docCount} document(s) processed`);
1283
+ }
1284
+ }
1285
+
1286
+ // Form collection context
1287
+ if (enabledProtocols.formGathering) {
1288
+ const formSchema = protocolData?.formGathering?.generatedFormJson || generatedConfigs?.forms?.formSchema;
1289
+ if (formSchema?.sections) {
1290
+ const fieldNames = formSchema.sections
1291
+ .flatMap(s => s.fields || [])
1292
+ .map(f => f.label)
1293
+ .slice(0, 8);
1294
+ contextParts.push(`Collects Information: ${fieldNames.join(', ')}`);
1295
+ }
1296
+ }
1297
+
1298
+ // Appointments context
1299
+ if (enabledProtocols.appointments) {
1300
+ const destinations = protocolData?.appointments?.destinations || generatedConfigs?.appointments?.destinations || [];
1301
+ if (destinations.length > 0) {
1302
+ const destNames = destinations.map(d => d.name || d.description).slice(0, 5);
1303
+ contextParts.push(`Appointment Types: ${destNames.join(', ')}`);
1304
+ }
1305
+ }
1306
+
1307
+ // Triage context
1308
+ if (enabledProtocols.triage) {
1309
+ const routes = protocolData?.triage?.routes || [];
1310
+ if (routes.length > 0) {
1311
+ const routeNames = routes.map(r => r.botName || r.name).slice(0, 5);
1312
+ contextParts.push(`Routes To: ${routeNames.join(', ')}`);
1313
+ }
1314
+ }
1315
+
1316
+ const contextString = contextParts.join('\n');
1317
+
1318
+ // System prompt for structured prose generation
1319
+ const systemPrompt = `You are generating a bot summary for a multi-bot orchestration system. Other bots will read this summary to understand what this bot does and when to route conversations to it.
1320
+
1321
+ Write a clear, concise description in 2-3 sentences that covers:
1322
+ 1. What the bot is and its primary purpose
1323
+ 2. What knowledge or information it has access to (if any)
1324
+ 3. What actions it can perform (collect info, book appointments, route to specialists)
1325
+
1326
+ Style guidelines:
1327
+ - Use third person ("This bot..." or "The [Name] assistant...")
1328
+ - Be specific about capabilities, not generic
1329
+ - Keep it under 150 words
1330
+ - No bullet points - flowing prose only
1331
+ - No markdown formatting
1332
+
1333
+ Return ONLY the summary text, nothing else.`;
1334
+
1335
+ const userPrompt = `Generate a bot summary based on this configuration:\n\n${contextString}`;
1336
+
1337
+ try {
1338
+ const botSummary = await generateSummary(
1339
+ provider,
1340
+ userPrompt,
1341
+ apiKey,
1342
+ systemPrompt,
1343
+ model
1344
+ );
1345
+
1346
+ const cleanedSummary = botSummary.trim();
1347
+
1348
+ // Store botSummary at top level of generatedConfigs (beside objective, paradigm)
1349
+ await BuilderSessionRepository.updateGeneratedConfig(session.id, userId, 'botSummary', cleanedSummary);
1350
+
1351
+ console.log('[Builder] Generated bot summary:', cleanedSummary.substring(0, 100) + '...');
1352
+
1353
+ return {
1354
+ botSummary: cleanedSummary,
1355
+ message: 'Bot summary generated successfully',
1356
+ };
1357
+ } catch (error) {
1358
+ console.error('[Builder] Failed to generate bot summary:', error.message);
1359
+ throw new Error(`Failed to generate bot summary: ${error.message}`);
1360
+ }
1361
+ },
1362
+
1363
+ /**
1364
+ * Save the bot's composed configuration to a deployment row, then build
1365
+ * the artifact so the user lands on the dashboard with a ready ZIP.
1366
+ *
1367
+ * Build failures don't fail the tool — the row stays `saved` and the
1368
+ * dashboard's Build button picks up where this left off.
1369
+ */
1370
+ async save_modular_bot(input, context) {
1371
+ const { sessionId, confirmedProtocols } = input;
1372
+ const { session, userId } = context;
1373
+
1374
+ if (session.id !== sessionId) {
1375
+ throw new Error('Session ID mismatch');
1376
+ }
1377
+
1378
+ const editingDeployment = session.generatedConfigs?._editingDeployment;
1379
+ const isUpdate = !!editingDeployment?.id;
1380
+
1381
+ await BuilderSessionRepository.updateStatus(sessionId, userId, SESSION_STATUS.DEPLOYING);
1382
+ await BuilderSessionRepository.confirmProtocols(sessionId, userId, confirmedProtocols);
1383
+ await BuilderSessionRepository.syncGeneratedConfigsToLegacy(sessionId, userId);
1384
+
1385
+ const updatedSession = await BuilderSessionRepository.findById(sessionId);
1386
+
1387
+ const result = await saveBuilderConfig(sessionId, userId, {
1388
+ botSpaceId: updatedSession.botSpaceId,
1389
+ redeploymentId: isUpdate ? editingDeployment.id : null,
1390
+ });
1391
+
1392
+ if (result.success) {
1393
+ await BuilderSessionRepository.updateStatus(sessionId, userId, SESSION_STATUS.DEPLOYED);
1394
+ await BuilderSessionRepository.linkDeployment(sessionId, userId, result.deploymentId);
1395
+
1396
+ let buildStatus = result.status;
1397
+ let buildError = null;
1398
+ try {
1399
+ const { deployment } = await buildArtifact(result.deploymentId);
1400
+ buildStatus = deployment.status;
1401
+ } catch (err) {
1402
+ console.error('[save_modular_bot] build after save failed:', err);
1403
+ buildError = err.message || 'Build failed';
1404
+ }
1405
+
1406
+ return { ...result, isUpdate, status: buildStatus, buildError };
1407
+ }
1408
+
1409
+ await BuilderSessionRepository.updateStatus(sessionId, userId, SESSION_STATUS.AWAITING_CONFIRM);
1410
+ return { ...result, isUpdate };
1411
+ },
1412
+ };
1413
+
1414
+ // Back-compat shim: chat sessions persisted before the rename still reference
1415
+ // the old tool name. Map it to the new handler so replays don't break.
1416
+ builderToolHandlers.deploy_modular_bot = builderToolHandlers.save_modular_bot;
1417
+
1418
+ export { builderToolHandlers };