mojulo 0.0.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +54 -4
  2. package/lib/audit-logger-new.js +11 -0
  3. package/lib/auth/gate.js +25 -0
  4. package/lib/auth/service.js +17 -0
  5. package/lib/auth/session.js +63 -0
  6. package/lib/builder/chat-processor.js +607 -0
  7. package/lib/builder/composer-bridge.js +82 -0
  8. package/lib/builder/evaluator.js +159 -0
  9. package/lib/builder/executor.js +252 -0
  10. package/lib/builder/index.js +48 -0
  11. package/lib/builder/session.js +248 -0
  12. package/lib/builder/system-prompt.js +422 -0
  13. package/lib/builder/tone-presets.js +75 -0
  14. package/lib/builder/tool-executors.js +1527 -0
  15. package/lib/builder/tools.js +338 -0
  16. package/lib/builder/validators.js +239 -0
  17. package/lib/composer/composer.js +225 -0
  18. package/lib/composer/index.js +40 -0
  19. package/lib/composer/protocols/00_base.txt +19 -0
  20. package/lib/composer/protocols/01_knowledge.txt +9 -0
  21. package/lib/composer/protocols/02_form-gathering.txt +32 -0
  22. package/lib/composer/protocols/03_appointments.txt +16 -0
  23. package/lib/composer/protocols/04_triage.txt +15 -0
  24. package/lib/composer/protocols/05_optical-read.txt +22 -0
  25. package/lib/composer/response-builder.js +98 -0
  26. package/lib/config-builder.js +650 -0
  27. package/lib/db/ids.js +10 -0
  28. package/lib/db/index.js +179 -0
  29. package/lib/db/repositories/apiKeys.js +72 -0
  30. package/lib/db/repositories/auditLogs.js +12 -0
  31. package/lib/db/repositories/botSpaces.js +12 -0
  32. package/lib/db/repositories/builderSessions.js +312 -0
  33. package/lib/db/repositories/deploymentEvents.js +12 -0
  34. package/lib/db/repositories/deployments.js +385 -0
  35. package/lib/db/repositories/documents.js +68 -0
  36. package/lib/db/repositories/mcpJobs.js +84 -0
  37. package/lib/deployers/bot-fleet.js +110 -0
  38. package/lib/deployers/bot-proxy.js +72 -0
  39. package/lib/deployers/build.js +89 -0
  40. package/lib/deployers/cloud-deploy.js +310 -0
  41. package/lib/deployers/docker.js +439 -0
  42. package/lib/deployers/fly.js +432 -0
  43. package/lib/deployers/index.js +38 -0
  44. package/lib/deployment-auth.js +36 -0
  45. package/lib/document-parser.js +171 -0
  46. package/lib/embedder/chunker.js +93 -0
  47. package/lib/embedder/local.js +101 -0
  48. package/lib/embedder/preview-rag.js +93 -0
  49. package/lib/envelope-schema.js +54 -0
  50. package/lib/fleet/scoped-sql.js +342 -0
  51. package/lib/form-schema-config/base.js +135 -0
  52. package/lib/form-schema-config/index.js +286 -0
  53. package/lib/form-schema-config/locales/af-ZA.js +153 -0
  54. package/lib/form-schema-config/locales/ar-AE.js +142 -0
  55. package/lib/form-schema-config/locales/ar-SA.js +164 -0
  56. package/lib/form-schema-config/locales/de-DE.js +152 -0
  57. package/lib/form-schema-config/locales/en-AU.js +161 -0
  58. package/lib/form-schema-config/locales/en-CA.js +115 -0
  59. package/lib/form-schema-config/locales/en-GB.js +132 -0
  60. package/lib/form-schema-config/locales/en-IN.js +219 -0
  61. package/lib/form-schema-config/locales/en-MY.js +171 -0
  62. package/lib/form-schema-config/locales/en-NG.js +198 -0
  63. package/lib/form-schema-config/locales/en-PH.js +186 -0
  64. package/lib/form-schema-config/locales/en-SG.js +153 -0
  65. package/lib/form-schema-config/locales/en-US.js +138 -0
  66. package/lib/form-schema-config/locales/es-ES.js +171 -0
  67. package/lib/form-schema-config/locales/es-MX.js +193 -0
  68. package/lib/form-schema-config/locales/fr-CA.js +138 -0
  69. package/lib/form-schema-config/locales/fr-FR.js +155 -0
  70. package/lib/form-schema-config/locales/hi-IN.js +219 -0
  71. package/lib/form-schema-config/locales/it-IT.js +157 -0
  72. package/lib/form-schema-config/locales/ja-JP.js +169 -0
  73. package/lib/form-schema-config/locales/ko-KR.js +140 -0
  74. package/lib/form-schema-config/locales/nl-NL.js +149 -0
  75. package/lib/form-schema-config/locales/pt-BR.js +168 -0
  76. package/lib/form-schema-config/locales/zh-CN.js +172 -0
  77. package/lib/form-schema-config/locales/zh-HK.js +142 -0
  78. package/lib/form-structure-schema.js +191 -0
  79. package/lib/llm-providers.js +828 -0
  80. package/lib/markdown.js +197 -0
  81. package/lib/mcp/catalysts/appointment-to-calendar.md +84 -0
  82. package/lib/mcp/catalysts/conversations-to-channel-digest.md +104 -0
  83. package/lib/mcp/catalysts/document-extract-to-store.md +92 -0
  84. package/lib/mcp/catalysts/knowledge-gap-miner.md +96 -0
  85. package/lib/mcp/catalysts/loader.js +144 -0
  86. package/lib/mcp/catalysts/qualify-lead-to-crm.md +83 -0
  87. package/lib/mcp/catalysts/scan-conversations-for-signal.md +92 -0
  88. package/lib/mcp/catalysts/submission-to-ticket.md +83 -0
  89. package/lib/mcp/catalysts/submissions-to-warehouse.md +103 -0
  90. package/lib/mcp/catalysts/weekly-submissions-digest.md +82 -0
  91. package/lib/mcp/jobs.js +64 -0
  92. package/lib/mcp/server.js +184 -0
  93. package/lib/mcp/session-binding.js +130 -0
  94. package/lib/mcp/tools/build.js +123 -0
  95. package/lib/mcp/tools/catalysts.js +477 -0
  96. package/lib/mcp/tools/context.js +325 -0
  97. package/lib/mcp/tools/fleet.js +391 -0
  98. package/lib/mcp/tools/jobs-tools.js +240 -0
  99. package/lib/mcp/tools/operate.js +314 -0
  100. package/lib/preview/build-preview-config.js +136 -0
  101. package/lib/rate-limiter.js +11 -0
  102. package/lib/resolve-api-key.js +142 -0
  103. package/lib/storage/index.js +40 -0
  104. package/messages/de.json +2136 -0
  105. package/messages/en.json +2136 -0
  106. package/messages/es.json +2136 -0
  107. package/messages/fr.json +2136 -0
  108. package/messages/it.json +2136 -0
  109. package/messages/ja.json +2136 -0
  110. package/messages/ko.json +2136 -0
  111. package/messages/nl.json +2136 -0
  112. package/messages/pl.json +2136 -0
  113. package/messages/pt.json +2136 -0
  114. package/messages/ru.json +2136 -0
  115. package/messages/uk.json +2136 -0
  116. package/messages/zh.json +2136 -0
  117. package/package.json +68 -5
  118. package/scripts/mcp-config.mjs +162 -0
  119. package/scripts/mcp-stdio-loader.mjs +42 -0
  120. package/scripts/mcp-stdio.mjs +108 -0
  121. package/scripts/mojulo-paths.mjs +48 -0
@@ -0,0 +1,391 @@
1
+ /**
2
+ * MCP Ring 2.5 — fleet read tools.
3
+ *
4
+ * Peers of the dashboard's `/data` pane. Same fan-out substrate (bot-fleet.js),
5
+ * same posture (no conversation content crosses to the control plane —
6
+ * aggregates pass through process memory for the duration of one call).
7
+ *
8
+ * Shape rules enforced here (mirroring MCP-side feedback):
9
+ *
10
+ * - Every fleet tool returns `unreachable: [{ botId, botName, reason, status? }]`
11
+ * in the same shape so the agent can detect "all bots responded?" with
12
+ * one check across tools.
13
+ * - Cache-backed tools return `cache: { fromCache, cachedAt, ttlMs }`
14
+ * so the agent can answer "is this current?" honestly.
15
+ * - Descriptions advertise rollup-only nature and point at the per-bot
16
+ * drill-in tool (`get_conversation`) for actual content.
17
+ * - Descriptions name operational realism (~1–3s warm, ~30s cold fan-out).
18
+ * - Tool names are prefixed `fleet_*` so the agent can tell at a glance
19
+ * which surface it's hitting.
20
+ *
21
+ * The fan-out path stays opportunistic: a slow / down bot doesn't block the
22
+ * tool — its row lands in `unreachable[]` instead.
23
+ */
24
+
25
+ import { fanOut, listConnectedDeployments } from '@/lib/deployers/bot-fleet';
26
+ import { registerTool } from '@/lib/mcp/server';
27
+
28
+ const CACHE_TTL_MS = 60_000;
29
+ const summaryCache = new Map(); // key -> { expiresAt, cachedAt, payload }
30
+
31
+ function normalizeUnreachable(unreachable) {
32
+ return (unreachable || []).map((u) => ({
33
+ botId: u.id,
34
+ botName: u.botName,
35
+ reason: u.reason,
36
+ ...(u.status ? { status: u.status } : {}),
37
+ ...(u.message ? { message: u.message } : {}),
38
+ }));
39
+ }
40
+
41
+ function unreachableFromResults(results) {
42
+ return results
43
+ .filter((r) => !r.ok)
44
+ .map((r) => ({
45
+ botId: r.deployment.id,
46
+ botName: r.deployment.botName,
47
+ reason: r.reason,
48
+ ...(r.status ? { status: r.status } : {}),
49
+ ...(r.message ? { message: r.message } : {}),
50
+ }));
51
+ }
52
+
53
+ function filterDeployments(all, ids) {
54
+ if (!Array.isArray(ids) || ids.length === 0) return all;
55
+ const wanted = new Set(ids);
56
+ return all.filter((d) => wanted.has(d.id));
57
+ }
58
+
59
+ function summaryCacheKey(startDate, endDate, deployments) {
60
+ const ids = deployments.map((d) => d.id).sort().join(',');
61
+ return `${startDate || ''}|${endDate || ''}|${ids}`;
62
+ }
63
+
64
+ async function fleetAnalyticsSummaryHandler(input, _ctx) {
65
+ const { startDate, endDate, deploymentIds } = input || {};
66
+ const all = await listConnectedDeployments();
67
+ const deployments = filterDeployments(all, deploymentIds);
68
+
69
+ const cacheKey = summaryCacheKey(startDate, endDate, deployments);
70
+ const cached = summaryCache.get(cacheKey);
71
+ if (cached && cached.expiresAt > Date.now()) {
72
+ return {
73
+ ...cached.payload,
74
+ cache: {
75
+ fromCache: true,
76
+ cachedAt: new Date(cached.cachedAt).toISOString(),
77
+ ttlMs: CACHE_TTL_MS,
78
+ },
79
+ };
80
+ }
81
+
82
+ const qs = new URLSearchParams();
83
+ if (startDate) qs.set('startDate', String(startDate));
84
+ if (endDate) qs.set('endDate', String(endDate));
85
+ const path = `/api/analytics/summary${qs.toString() ? `?${qs.toString()}` : ''}`;
86
+
87
+ const { results, totalCount, reachableCount, unreachableCount } =
88
+ await fanOut(path, { deployments });
89
+
90
+ const totals = { conversations: 0, turns: 0 };
91
+ const dailyMap = new Map();
92
+ const heatMap = new Map();
93
+ const perBot = [];
94
+
95
+ for (const r of results) {
96
+ if (!r.ok) continue;
97
+ const d = r.data || {};
98
+ const t = d.totals || {};
99
+ totals.conversations += t.conversations || 0;
100
+ totals.turns += t.turns || 0;
101
+ perBot.push({
102
+ botId: r.deployment.id,
103
+ botName: r.deployment.botName,
104
+ conversations: t.conversations || 0,
105
+ turns: t.turns || 0,
106
+ avgTurnsPerConversation: t.avgTurnsPerConversation || 0,
107
+ firstAt: t.firstAt || null,
108
+ lastAt: t.lastAt || null,
109
+ });
110
+ for (const row of d.daily || []) {
111
+ const cur = dailyMap.get(row.date) || { conversations: 0, turns: 0 };
112
+ cur.conversations += row.conversations || 0;
113
+ cur.turns += row.turns || 0;
114
+ dailyMap.set(row.date, cur);
115
+ }
116
+ for (const cell of d.heatmap || []) {
117
+ const k = `${cell.dow}-${cell.hour}`;
118
+ heatMap.set(k, (heatMap.get(k) || 0) + (cell.turns || 0));
119
+ }
120
+ }
121
+
122
+ const daily = Array.from(dailyMap.entries())
123
+ .map(([date, v]) => ({ date, ...v }))
124
+ .sort((a, b) => (a.date < b.date ? -1 : 1));
125
+ const heatmap = Array.from(heatMap.entries()).map(([k, turns]) => {
126
+ const [dow, hour] = k.split('-').map(Number);
127
+ return { dow, hour, turns };
128
+ });
129
+ const topBots = [...perBot]
130
+ .sort((a, b) => b.turns - a.turns)
131
+ .slice(0, 10);
132
+
133
+ // Pull a thin protocol mix per-call too — it's cheap and the dashboard
134
+ // surfaces it implicitly via the SQL Explorer's protocol_stats table.
135
+ const protoFan = await fanOut(path.replace('/summary', '/protocol_stats'), {
136
+ deployments,
137
+ });
138
+ const protoMap = new Map();
139
+ for (const r of protoFan.results) {
140
+ if (!r.ok) continue;
141
+ for (const row of (r.data && r.data.rows) || []) {
142
+ const cur = protoMap.get(row.protocol) || { turns: 0, conversationsTouched: 0 };
143
+ cur.turns += row.turns || 0;
144
+ cur.conversationsTouched += row.conversations_touched || 0;
145
+ protoMap.set(row.protocol, cur);
146
+ }
147
+ }
148
+ const protocolMix = Array.from(protoMap.entries())
149
+ .map(([protocol, v]) => ({ protocol, ...v }))
150
+ .sort((a, b) => b.turns - a.turns);
151
+
152
+ const payload = {
153
+ totals: {
154
+ ...totals,
155
+ avgTurnsPerConversation: totals.conversations
156
+ ? Number((totals.turns / totals.conversations).toFixed(2))
157
+ : 0,
158
+ activeBots: reachableCount,
159
+ totalBots: totalCount,
160
+ },
161
+ daily,
162
+ heatmap,
163
+ topBots,
164
+ protocolMix,
165
+ perBot,
166
+ unreachable: unreachableFromResults(results),
167
+ };
168
+
169
+ const now = Date.now();
170
+ summaryCache.set(cacheKey, {
171
+ expiresAt: now + CACHE_TTL_MS,
172
+ cachedAt: now,
173
+ payload,
174
+ });
175
+ if (summaryCache.size > 64) {
176
+ for (const [k, v] of summaryCache.entries()) {
177
+ if (v.expiresAt <= Date.now()) summaryCache.delete(k);
178
+ }
179
+ }
180
+
181
+ return {
182
+ ...payload,
183
+ cache: {
184
+ fromCache: false,
185
+ cachedAt: new Date(now).toISOString(),
186
+ ttlMs: CACHE_TTL_MS,
187
+ },
188
+ };
189
+ }
190
+
191
+ async function fleetQueryConversationsHandler(input, _ctx) {
192
+ const {
193
+ startDate,
194
+ endDate,
195
+ conversationId,
196
+ deploymentIds,
197
+ limit = 50,
198
+ offset = 0,
199
+ } = input || {};
200
+
201
+ if (!startDate && !endDate && !conversationId) {
202
+ throw new Error(
203
+ 'fleet_query_conversations requires at least one of startDate, endDate, or conversationId. The single-bot /api/conversations contract requires a filter — same here.',
204
+ );
205
+ }
206
+
207
+ const all = await listConnectedDeployments();
208
+ const deployments = filterDeployments(all, deploymentIds);
209
+
210
+ const qs = new URLSearchParams();
211
+ if (startDate) qs.set('startDate', String(startDate));
212
+ if (endDate) qs.set('endDate', String(endDate));
213
+ if (conversationId) qs.set('conversationId', String(conversationId));
214
+ qs.set('limit', '50');
215
+ qs.set('offset', '0');
216
+ const path = `/api/conversations?${qs.toString()}`;
217
+
218
+ const { results, totalCount, reachableCount, unreachableCount } =
219
+ await fanOut(path, { deployments });
220
+
221
+ const merged = [];
222
+ for (const r of results) {
223
+ if (!r.ok) continue;
224
+ const convos = (r.data && r.data.conversations) || [];
225
+ for (const c of convos) {
226
+ merged.push({
227
+ botId: r.deployment.id,
228
+ botName: r.deployment.botName,
229
+ conversationId: c.conversation_id,
230
+ startedAt: c.started_at,
231
+ lastActivity: c.last_activity,
232
+ turnCount: c.turn_count,
233
+ });
234
+ }
235
+ }
236
+
237
+ merged.sort((a, b) => (a.lastActivity < b.lastActivity ? 1 : -1));
238
+ const cap = 500;
239
+ const truncated = merged.length > cap;
240
+ const capped = truncated ? merged.slice(0, cap) : merged;
241
+ const page = capped.slice(offset, offset + limit);
242
+
243
+ return {
244
+ conversations: page,
245
+ pagination: {
246
+ limit,
247
+ offset,
248
+ total: capped.length,
249
+ returned: page.length,
250
+ hasMore: offset + page.length < capped.length,
251
+ truncated,
252
+ },
253
+ fleet: {
254
+ totalBots: totalCount,
255
+ reachableBots: reachableCount,
256
+ unreachableBots: unreachableCount,
257
+ },
258
+ unreachable: unreachableFromResults(results),
259
+ };
260
+ }
261
+
262
+ async function verifyFleetChainsHandler(input, _ctx) {
263
+ const { startDate, endDate, deploymentIds } = input || {};
264
+ const all = await listConnectedDeployments();
265
+ const deployments = filterDeployments(all, deploymentIds);
266
+
267
+ const qs = new URLSearchParams();
268
+ if (startDate) qs.set('startDate', String(startDate));
269
+ if (endDate) qs.set('endDate', String(endDate));
270
+ const path = `/api/verify/all${qs.toString() ? `?${qs.toString()}` : ''}`;
271
+
272
+ const { results, totalCount, reachableCount, unreachableCount } =
273
+ await fanOut(path, { deployments, timeoutMs: 30_000 });
274
+
275
+ let totalTurns = 0;
276
+ let invalidTurns = 0;
277
+ let conversationsVerified = 0;
278
+ const failed = [];
279
+ const perBot = [];
280
+
281
+ for (const r of results) {
282
+ if (!r.ok) continue;
283
+ const d = r.data || {};
284
+ totalTurns += d.totalTurns || 0;
285
+ invalidTurns += d.invalidTurns || 0;
286
+ conversationsVerified += d.conversationsVerified || 0;
287
+ perBot.push({
288
+ botId: r.deployment.id,
289
+ botName: r.deployment.botName,
290
+ valid: !!d.valid,
291
+ totalTurns: d.totalTurns || 0,
292
+ invalidTurns: d.invalidTurns || 0,
293
+ conversationsVerified: d.conversationsVerified || 0,
294
+ });
295
+ for (const f of d.failed || []) {
296
+ failed.push({
297
+ botId: r.deployment.id,
298
+ botName: r.deployment.botName,
299
+ conversationId: f.conversationId,
300
+ turn: f.turn,
301
+ timestamp: f.timestamp,
302
+ reason: f.reason,
303
+ });
304
+ }
305
+ }
306
+
307
+ return {
308
+ valid: invalidTurns === 0 && unreachableCount === 0,
309
+ totalTurns,
310
+ invalidTurns,
311
+ conversationsVerified,
312
+ failed,
313
+ perBot,
314
+ fleet: {
315
+ totalBots: totalCount,
316
+ reachableBots: reachableCount,
317
+ unreachableBots: unreachableCount,
318
+ },
319
+ unreachable: unreachableFromResults(results),
320
+ };
321
+ }
322
+
323
+ export function registerFleetTools() {
324
+ registerTool({
325
+ name: 'fleet_analytics_summary',
326
+ description:
327
+ "Fleet-wide activity rollup: totals, daily turn/conversation counts, top bots by activity, protocol usage mix, plus a per-bot breakdown keyed by botId. Aggregates and metadata only — for conversation content, use get_conversation against a specific bot (use fleet_query_conversations first to locate which bot a conversation lives on). Hits a 60s in-process cache shared with the dashboard's Analytics tab; cold (cache miss) takes ~1–3s warm, up to ~30s when every bot has to fan-out fresh. Response includes `cache: { fromCache, cachedAt, ttlMs }` so you can tell whether the data is current and `unreachable: [{ botId, botName, reason }]` so you can detect partial fleets. Optional `startDate`/`endDate` ISO bounds; optional `deploymentIds` to scope to a subset (default: every connected bot).",
328
+ inputSchema: {
329
+ type: 'object',
330
+ properties: {
331
+ startDate: { type: 'string', description: 'ISO timestamp lower bound on turn timestamp.' },
332
+ endDate: { type: 'string', description: 'ISO timestamp upper bound on turn timestamp.' },
333
+ deploymentIds: {
334
+ type: 'array',
335
+ items: { type: 'string' },
336
+ description: 'Optional — restrict the fan-out to these deployment ids. Defaults to every connected bot.',
337
+ },
338
+ },
339
+ },
340
+ handler: fleetAnalyticsSummaryHandler,
341
+ });
342
+
343
+ registerTool({
344
+ name: 'fleet_query_conversations',
345
+ description:
346
+ "Locate conversations across every connected bot. Returns conversation summaries only — `{ botId, botName, conversationId, startedAt, lastActivity, turnCount }` — never turn content. To read a conversation's actual content, call `get_conversation` against the bot named by `botId`; the two-step pattern (fleet-locate, then per-bot-read) preserves the 'conversation data never crosses to the control plane' posture. Requires at least one of `startDate` / `endDate` / `conversationId` — same contract as the per-bot /api/conversations endpoint, which refuses unfiltered scans. Operational realism: typically 1–3s, up to ~30s cold across a large fleet. Response includes `unreachable: [{ botId, botName, reason }]` to flag bots that didn't answer.",
347
+ inputSchema: {
348
+ type: 'object',
349
+ properties: {
350
+ startDate: { type: 'string', description: 'ISO timestamp lower bound on first-turn timestamp.' },
351
+ endDate: { type: 'string', description: 'ISO timestamp upper bound on first-turn timestamp.' },
352
+ conversationId: { type: 'string', description: 'Optional substring match on conversation id.' },
353
+ deploymentIds: {
354
+ type: 'array',
355
+ items: { type: 'string' },
356
+ description: 'Optional — restrict the fan-out to these deployment ids.',
357
+ },
358
+ limit: { type: 'integer', minimum: 1, maximum: 200, default: 50 },
359
+ offset: { type: 'integer', minimum: 0, default: 0 },
360
+ },
361
+ },
362
+ handler: fleetQueryConversationsHandler,
363
+ });
364
+
365
+ registerTool({
366
+ name: 'verify_fleet_chains',
367
+ description:
368
+ "Walk the tamper-evident hash chain across every connected bot in one call. This is the audit story scaled to fleet level — each bot still owns its own chain, but the aggregate roll-up is something only the control plane can produce. Returns `{ valid, totalTurns, invalidTurns, conversationsVerified, failed: [{ botId, botName, conversationId, turn, reason }], perBot: [...], unreachable: [...] }`. `valid: true` requires zero invalid turns AND zero unreachable bots — a fleet with dark bots can't be conclusively audited. Optional `startDate` / `endDate` ISO bounds narrow which conversations are walked. Operational realism: walks every turn on each reachable bot, so heavy fleets at full history take longer than other fleet tools — typically 2–10s, more on bots with very large databases.",
369
+ inputSchema: {
370
+ type: 'object',
371
+ properties: {
372
+ startDate: { type: 'string', description: 'ISO timestamp lower bound on first-turn timestamp.' },
373
+ endDate: { type: 'string', description: 'ISO timestamp upper bound on first-turn timestamp.' },
374
+ deploymentIds: {
375
+ type: 'array',
376
+ items: { type: 'string' },
377
+ description: 'Optional — restrict the audit to these deployment ids.',
378
+ },
379
+ },
380
+ },
381
+ handler: verifyFleetChainsHandler,
382
+ });
383
+ }
384
+
385
+ // Exported for tests.
386
+ export {
387
+ fleetAnalyticsSummaryHandler,
388
+ fleetQueryConversationsHandler,
389
+ verifyFleetChainsHandler,
390
+ normalizeUnreachable,
391
+ };
@@ -0,0 +1,240 @@
1
+ /**
2
+ * MCP Phase 2 — long-running tools wrapped as jobs.
3
+ *
4
+ * `process_documents` and `save_modular_bot` from BUILDER_TOOLS can take 10s+
5
+ * (document parsing + embedding + per-doc LLM summary; or build pipeline +
6
+ * docker package + LLM identity composition). MCP clients don't all surface
7
+ * progress notifications well, so we wrap them: return { jobId } immediately,
8
+ * the model polls via `poll_job`.
9
+ *
10
+ * Also adds an in-protocol document upload path (`upload_document_from_url`)
11
+ * since the web flow uses a separate form-data endpoint that MCP can't reach.
12
+ */
13
+
14
+ import { BUILDER_TOOLS } from '@/lib/builder/tools';
15
+ import { executeBuilderTool } from '@/lib/builder/tool-executors';
16
+ import { BuilderSessionRepository } from '@/lib/db/repositories/builderSessions';
17
+ import { DocumentRepository } from '@/lib/db/repositories/documents';
18
+ import {
19
+ getOrCreateBuilderSession,
20
+ } from '@/lib/mcp/session-binding';
21
+ import { registerTool } from '@/lib/mcp/server';
22
+ import { startJob, getJob } from '@/lib/mcp/jobs';
23
+ import { parseDocument } from '@/lib/document-parser';
24
+ import { uploadFile } from '@/lib/storage';
25
+
26
+ const JOB_TOOL_NAMES = ['process_documents', 'save_modular_bot'];
27
+
28
+ function findBuilderToolSchema(name) {
29
+ const tool = BUILDER_TOOLS.find((t) => t.name === name);
30
+ if (!tool) throw new Error(`BUILDER_TOOLS is missing ${name}`);
31
+ return tool;
32
+ }
33
+
34
+ function makeJobHandler(toolName) {
35
+ return async function handle(input, mcpContext) {
36
+ const session = await getOrCreateBuilderSession(
37
+ mcpContext.mcpSessionId,
38
+ mcpContext.userId
39
+ );
40
+
41
+ return startJob({
42
+ tool: toolName,
43
+ mcpSessionId: mcpContext.mcpSessionId,
44
+ builderSessionId: session.id,
45
+ task: async () => {
46
+ // Re-fetch session on the worker side — there could be writes from
47
+ // other tools between job start and run.
48
+ const fresh = await BuilderSessionRepository.findById(session.id);
49
+ const ctx = { session: fresh, userId: mcpContext.userId };
50
+ const result = await executeBuilderTool(toolName, input, ctx);
51
+ if (!result.success) {
52
+ throw new Error(result.error || `${toolName} failed`);
53
+ }
54
+ return result.result;
55
+ },
56
+ });
57
+ };
58
+ }
59
+
60
+ const MIME_BY_EXT = {
61
+ pdf: 'application/pdf',
62
+ docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
63
+ txt: 'text/plain',
64
+ md: 'text/markdown',
65
+ html: 'text/html',
66
+ htm: 'text/html',
67
+ };
68
+
69
+ const MAX_DOC_BYTES = 25 * 1024 * 1024; // 25 MB — same ballpark as the web upload form.
70
+
71
+ async function uploadDocumentFromUrlHandler(input, _mcpContext) {
72
+ const { url, base64, text, fileName } = input || {};
73
+
74
+ const providedCount = [url, base64, text].filter(Boolean).length;
75
+ if (providedCount === 0) {
76
+ throw new Error('Provide one of `url`, `base64` (with `fileName`), or `text` (with `fileName`).');
77
+ }
78
+ if (providedCount > 1) {
79
+ throw new Error('Provide only one of `url`, `base64`, or `text` — they are mutually exclusive.');
80
+ }
81
+
82
+ let buffer;
83
+ let resolvedName = fileName;
84
+ let resolvedMime;
85
+ // When the caller hands us already-extracted text (e.g. piped from another
86
+ // MCP server like Google Docs), skip the parser — the text IS the parsed
87
+ // form. Set this so we don't burn cycles re-parsing a .txt blob that
88
+ // officeparser doesn't even understand.
89
+ let prextractedText = null;
90
+
91
+ if (url) {
92
+ if (!/^https?:\/\//i.test(url)) {
93
+ throw new Error('url must be http(s)://');
94
+ }
95
+ const resp = await fetch(url);
96
+ if (!resp.ok) {
97
+ throw new Error(`Fetch failed: ${resp.status} ${resp.statusText}`);
98
+ }
99
+ const ab = await resp.arrayBuffer();
100
+ if (ab.byteLength > MAX_DOC_BYTES) {
101
+ throw new Error(`Document too large: ${ab.byteLength} bytes (max ${MAX_DOC_BYTES})`);
102
+ }
103
+ buffer = Buffer.from(ab);
104
+ resolvedMime = resp.headers.get('content-type')?.split(';')[0]?.trim() || undefined;
105
+ if (!resolvedName) {
106
+ const path = new URL(url).pathname;
107
+ resolvedName = path.split('/').pop() || 'document';
108
+ }
109
+ } else if (base64) {
110
+ if (!fileName) {
111
+ throw new Error('fileName is required when uploading via base64.');
112
+ }
113
+ buffer = Buffer.from(base64, 'base64');
114
+ if (buffer.length === 0) {
115
+ throw new Error('base64 decoded to zero bytes.');
116
+ }
117
+ if (buffer.length > MAX_DOC_BYTES) {
118
+ throw new Error(`Document too large: ${buffer.length} bytes (max ${MAX_DOC_BYTES})`);
119
+ }
120
+ } else {
121
+ if (!fileName) {
122
+ throw new Error('fileName is required when uploading via text.');
123
+ }
124
+ if (typeof text !== 'string' || text.trim().length === 0) {
125
+ throw new Error('text must be a non-empty string.');
126
+ }
127
+ buffer = Buffer.from(text, 'utf8');
128
+ if (buffer.length > MAX_DOC_BYTES) {
129
+ throw new Error(`Document too large: ${buffer.length} bytes (max ${MAX_DOC_BYTES})`);
130
+ }
131
+ prextractedText = text;
132
+ resolvedMime = 'text/plain';
133
+ }
134
+
135
+ if (!resolvedMime) {
136
+ const ext = (resolvedName || '').split('.').pop()?.toLowerCase();
137
+ resolvedMime = MIME_BY_EXT[ext] || 'application/octet-stream';
138
+ }
139
+
140
+ let parsedText;
141
+ if (prextractedText !== null) {
142
+ parsedText = prextractedText;
143
+ } else {
144
+ // Parse the document up front; if it's unparseable, surface the failure now
145
+ // rather than have process_documents skip it silently.
146
+ try {
147
+ parsedText = await parseDocument(buffer, resolvedName);
148
+ } catch (err) {
149
+ throw new Error(`Failed to parse document: ${err.message}`);
150
+ }
151
+ }
152
+
153
+ const storagePath = `documents/${Date.now()}-${resolvedName}`;
154
+ await uploadFile(storagePath, buffer, null, { contentType: resolvedMime });
155
+
156
+ const document = await DocumentRepository.create({
157
+ storagePath,
158
+ originalName: resolvedName,
159
+ mimeType: resolvedMime,
160
+ sizeBytes: buffer.length,
161
+ parsedText,
162
+ });
163
+
164
+ return {
165
+ documentId: document.id,
166
+ originalName: document.originalName,
167
+ mimeType: document.mimeType,
168
+ sizeBytes: document.sizeBytes,
169
+ message: `Uploaded ${document.originalName}. Pass its id to process_documents.`,
170
+ };
171
+ }
172
+
173
+ async function pollJobHandler(input, _mcpContext) {
174
+ const { jobId } = input || {};
175
+ if (!jobId) throw new Error('jobId is required');
176
+ const job = await getJob(jobId);
177
+ if (!job) throw new Error(`Job not found: ${jobId}`);
178
+ return {
179
+ jobId: job.id,
180
+ tool: job.tool,
181
+ status: job.status,
182
+ progress: job.progress,
183
+ result: job.result,
184
+ error: job.error,
185
+ };
186
+ }
187
+
188
+ export function registerJobsTools() {
189
+ for (const name of JOB_TOOL_NAMES) {
190
+ const schema = findBuilderToolSchema(name);
191
+ registerTool({
192
+ name: schema.name,
193
+ description: `${schema.description}\n\n(This tool runs as a background job. It returns { jobId } immediately; call \`poll_job\` until status is "done" or "error".)`,
194
+ inputSchema: schema.input_schema,
195
+ handler: makeJobHandler(name),
196
+ });
197
+ }
198
+
199
+ registerTool({
200
+ name: 'upload_document_from_url',
201
+ description:
202
+ 'Upload a document (PDF / DOCX / TXT / MD / HTML) to the control plane for the bot under construction. Provide exactly one of: `url` to fetch from, `base64` binary content with `fileName`, or `text` plain-text content with `fileName` (use this when piping already-extracted text from another MCP server like Google Docs — skips the binary round-trip). Returns a `documentId` you can pass to `process_documents`.',
203
+ inputSchema: {
204
+ type: 'object',
205
+ properties: {
206
+ url: {
207
+ type: 'string',
208
+ description: 'http(s) URL to fetch the document from. Mutually exclusive with `base64` and `text`.',
209
+ },
210
+ base64: {
211
+ type: 'string',
212
+ description: 'Base64-encoded document content. Requires `fileName`. Mutually exclusive with `url` and `text`.',
213
+ },
214
+ text: {
215
+ type: 'string',
216
+ description: 'Already-extracted plain-text content (skips the parser). Requires `fileName`. Mutually exclusive with `url` and `base64`. Use this when the source MCP server already returned text content — avoids needlessly base64-encoding through the model.',
217
+ },
218
+ fileName: {
219
+ type: 'string',
220
+ description: 'Original filename (used for display). Required with `base64` and `text`; optional with `url` (inferred from path).',
221
+ },
222
+ },
223
+ },
224
+ handler: uploadDocumentFromUrlHandler,
225
+ });
226
+
227
+ registerTool({
228
+ name: 'poll_job',
229
+ description:
230
+ 'Check the status of a background job started by `process_documents`, `save_modular_bot`, or other job-based tools. Returns { status: "pending" | "running" | "done" | "error", progress, result, error }.',
231
+ inputSchema: {
232
+ type: 'object',
233
+ properties: {
234
+ jobId: { type: 'string', description: 'The jobId returned from the original tool call.' },
235
+ },
236
+ required: ['jobId'],
237
+ },
238
+ handler: pollJobHandler,
239
+ });
240
+ }