mojulo 0.0.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +54 -4
  2. package/lib/audit-logger-new.js +11 -0
  3. package/lib/auth/gate.js +25 -0
  4. package/lib/auth/service.js +17 -0
  5. package/lib/auth/session.js +63 -0
  6. package/lib/builder/chat-processor.js +607 -0
  7. package/lib/builder/composer-bridge.js +82 -0
  8. package/lib/builder/evaluator.js +159 -0
  9. package/lib/builder/executor.js +252 -0
  10. package/lib/builder/index.js +48 -0
  11. package/lib/builder/session.js +248 -0
  12. package/lib/builder/system-prompt.js +422 -0
  13. package/lib/builder/tone-presets.js +75 -0
  14. package/lib/builder/tool-executors.js +1527 -0
  15. package/lib/builder/tools.js +338 -0
  16. package/lib/builder/validators.js +239 -0
  17. package/lib/composer/composer.js +225 -0
  18. package/lib/composer/index.js +40 -0
  19. package/lib/composer/protocols/00_base.txt +19 -0
  20. package/lib/composer/protocols/01_knowledge.txt +9 -0
  21. package/lib/composer/protocols/02_form-gathering.txt +32 -0
  22. package/lib/composer/protocols/03_appointments.txt +16 -0
  23. package/lib/composer/protocols/04_triage.txt +15 -0
  24. package/lib/composer/protocols/05_optical-read.txt +22 -0
  25. package/lib/composer/response-builder.js +98 -0
  26. package/lib/config-builder.js +650 -0
  27. package/lib/db/ids.js +10 -0
  28. package/lib/db/index.js +179 -0
  29. package/lib/db/repositories/apiKeys.js +72 -0
  30. package/lib/db/repositories/auditLogs.js +12 -0
  31. package/lib/db/repositories/botSpaces.js +12 -0
  32. package/lib/db/repositories/builderSessions.js +312 -0
  33. package/lib/db/repositories/deploymentEvents.js +12 -0
  34. package/lib/db/repositories/deployments.js +385 -0
  35. package/lib/db/repositories/documents.js +68 -0
  36. package/lib/db/repositories/mcpJobs.js +84 -0
  37. package/lib/deployers/bot-fleet.js +110 -0
  38. package/lib/deployers/bot-proxy.js +72 -0
  39. package/lib/deployers/build.js +89 -0
  40. package/lib/deployers/cloud-deploy.js +310 -0
  41. package/lib/deployers/docker.js +439 -0
  42. package/lib/deployers/fly.js +432 -0
  43. package/lib/deployers/index.js +38 -0
  44. package/lib/deployment-auth.js +36 -0
  45. package/lib/document-parser.js +171 -0
  46. package/lib/embedder/chunker.js +93 -0
  47. package/lib/embedder/local.js +101 -0
  48. package/lib/embedder/preview-rag.js +93 -0
  49. package/lib/envelope-schema.js +54 -0
  50. package/lib/fleet/scoped-sql.js +342 -0
  51. package/lib/form-schema-config/base.js +135 -0
  52. package/lib/form-schema-config/index.js +286 -0
  53. package/lib/form-schema-config/locales/af-ZA.js +153 -0
  54. package/lib/form-schema-config/locales/ar-AE.js +142 -0
  55. package/lib/form-schema-config/locales/ar-SA.js +164 -0
  56. package/lib/form-schema-config/locales/de-DE.js +152 -0
  57. package/lib/form-schema-config/locales/en-AU.js +161 -0
  58. package/lib/form-schema-config/locales/en-CA.js +115 -0
  59. package/lib/form-schema-config/locales/en-GB.js +132 -0
  60. package/lib/form-schema-config/locales/en-IN.js +219 -0
  61. package/lib/form-schema-config/locales/en-MY.js +171 -0
  62. package/lib/form-schema-config/locales/en-NG.js +198 -0
  63. package/lib/form-schema-config/locales/en-PH.js +186 -0
  64. package/lib/form-schema-config/locales/en-SG.js +153 -0
  65. package/lib/form-schema-config/locales/en-US.js +138 -0
  66. package/lib/form-schema-config/locales/es-ES.js +171 -0
  67. package/lib/form-schema-config/locales/es-MX.js +193 -0
  68. package/lib/form-schema-config/locales/fr-CA.js +138 -0
  69. package/lib/form-schema-config/locales/fr-FR.js +155 -0
  70. package/lib/form-schema-config/locales/hi-IN.js +219 -0
  71. package/lib/form-schema-config/locales/it-IT.js +157 -0
  72. package/lib/form-schema-config/locales/ja-JP.js +169 -0
  73. package/lib/form-schema-config/locales/ko-KR.js +140 -0
  74. package/lib/form-schema-config/locales/nl-NL.js +149 -0
  75. package/lib/form-schema-config/locales/pt-BR.js +168 -0
  76. package/lib/form-schema-config/locales/zh-CN.js +172 -0
  77. package/lib/form-schema-config/locales/zh-HK.js +142 -0
  78. package/lib/form-structure-schema.js +191 -0
  79. package/lib/llm-providers.js +828 -0
  80. package/lib/markdown.js +197 -0
  81. package/lib/mcp/catalysts/appointment-to-calendar.md +84 -0
  82. package/lib/mcp/catalysts/conversations-to-channel-digest.md +104 -0
  83. package/lib/mcp/catalysts/document-extract-to-store.md +92 -0
  84. package/lib/mcp/catalysts/knowledge-gap-miner.md +96 -0
  85. package/lib/mcp/catalysts/loader.js +144 -0
  86. package/lib/mcp/catalysts/qualify-lead-to-crm.md +83 -0
  87. package/lib/mcp/catalysts/scan-conversations-for-signal.md +92 -0
  88. package/lib/mcp/catalysts/submission-to-ticket.md +83 -0
  89. package/lib/mcp/catalysts/submissions-to-warehouse.md +103 -0
  90. package/lib/mcp/catalysts/weekly-submissions-digest.md +82 -0
  91. package/lib/mcp/jobs.js +64 -0
  92. package/lib/mcp/server.js +184 -0
  93. package/lib/mcp/session-binding.js +130 -0
  94. package/lib/mcp/tools/build.js +123 -0
  95. package/lib/mcp/tools/catalysts.js +477 -0
  96. package/lib/mcp/tools/context.js +325 -0
  97. package/lib/mcp/tools/fleet.js +391 -0
  98. package/lib/mcp/tools/jobs-tools.js +240 -0
  99. package/lib/mcp/tools/operate.js +314 -0
  100. package/lib/preview/build-preview-config.js +136 -0
  101. package/lib/rate-limiter.js +11 -0
  102. package/lib/resolve-api-key.js +142 -0
  103. package/lib/storage/index.js +40 -0
  104. package/messages/de.json +2136 -0
  105. package/messages/en.json +2136 -0
  106. package/messages/es.json +2136 -0
  107. package/messages/fr.json +2136 -0
  108. package/messages/it.json +2136 -0
  109. package/messages/ja.json +2136 -0
  110. package/messages/ko.json +2136 -0
  111. package/messages/nl.json +2136 -0
  112. package/messages/pl.json +2136 -0
  113. package/messages/pt.json +2136 -0
  114. package/messages/ru.json +2136 -0
  115. package/messages/uk.json +2136 -0
  116. package/messages/zh.json +2136 -0
  117. package/package.json +68 -5
  118. package/scripts/mcp-config.mjs +162 -0
  119. package/scripts/mcp-stdio-loader.mjs +42 -0
  120. package/scripts/mcp-stdio.mjs +108 -0
  121. package/scripts/mojulo-paths.mjs +48 -0
@@ -0,0 +1,432 @@
1
+ /**
2
+ * Fly.io Machines API deployer.
3
+ *
4
+ * Deploys the published GHCR bot image (ghcr.io/zombico/mojulo-bot) to a
5
+ * user-owned Fly app using their API token. Per-bot config is injected via
6
+ * the Machines API `files` field — the image stays bot-agnostic.
7
+ *
8
+ * Patterns enforced (see lite-template/integration/CLOUD_DEPLOY_GUIDE.md):
9
+ * - Pattern 1: One image, config injected per machine via base64 files[].
10
+ * - Pattern 2: Volume named "data", find-or-create idempotent, always
11
+ * re-attached on update.
12
+ * - Pattern 3: Deterministic app name = ${md5(userId).slice(0,8)}-${botName}.
13
+ * - Pattern 4: Lifecycle ops are thin platform mappings.
14
+ * - Pattern 5: deploy() accepts an onProgress callback for audit-trail
15
+ * events; the lifecycle wrapper persists them.
16
+ *
17
+ * Credentials are passed by constructor parameter — never read globally —
18
+ * so future per-user token storage drops in without changing this file.
19
+ */
20
+
21
+ import crypto from 'crypto';
22
+
23
+ const FLY_API_BASE = 'https://api.machines.dev/v1';
24
+
25
+ const DEFAULT_GUEST = { cpu_kind: 'shared', cpus: 1, memory_mb: 1024 };
26
+ const DEFAULT_REGION = 'iad';
27
+ const DEFAULT_VOLUME_GB = 1;
28
+
29
+ // Fly's max app-name length; computeAppName truncates to this.
30
+ const FLY_APP_NAME_MAX_LENGTH = 63;
31
+ // Port the bot's Express server listens on inside the container (matches lite-template/server.js).
32
+ const BOT_INTERNAL_PORT = 3000;
33
+ // Healthcheck cadence injected into the Fly machine config.
34
+ const HEALTHCHECK_CONFIG = {
35
+ interval: '15s',
36
+ timeout: '5s',
37
+ grace_period: '20s',
38
+ };
39
+
40
+ export class FlyDeployer {
41
+ constructor({
42
+ apiToken,
43
+ orgSlug = 'personal',
44
+ image,
45
+ defaultRegion = DEFAULT_REGION,
46
+ defaultGuest = DEFAULT_GUEST,
47
+ defaultVolumeGb = DEFAULT_VOLUME_GB,
48
+ } = {}) {
49
+ if (!apiToken) {
50
+ throw new Error('FlyDeployer requires apiToken');
51
+ }
52
+ this.apiToken = apiToken;
53
+ this.orgSlug = orgSlug;
54
+ // Cloud always wants a public registry pin. In the common case BOT_IMAGE
55
+ // already points at GHCR, so fall back to it. Set MOJULO_CLOUD_IMAGE only
56
+ // when the cloud tag needs to diverge from the local docker-compose tag
57
+ // (e.g. BOT_IMAGE is a laptop tag like `mojulo/bot:latest`). The registry
58
+ // check below catches that case regardless of which env var supplied it.
59
+ this.image =
60
+ image ||
61
+ process.env.MOJULO_CLOUD_IMAGE ||
62
+ process.env.BOT_IMAGE;
63
+ if (!this.image || !this.image.includes('/')) {
64
+ throw new Error(
65
+ `Cloud image "${this.image}" has no registry prefix; Fly will route it through Docker Hub. ` +
66
+ `Set MOJULO_CLOUD_IMAGE or BOT_IMAGE to a fully-qualified image like ghcr.io/owner/name:tag.`
67
+ );
68
+ }
69
+ this.defaultRegion = defaultRegion;
70
+ this.defaultGuest = defaultGuest;
71
+ this.defaultVolumeGb = defaultVolumeGb;
72
+ }
73
+
74
+ /**
75
+ * Deterministic app name from user identity + bot name. Same inputs always
76
+ * yield the same name, which makes deploys self-healing: lose the
77
+ * control-plane row, redeploy, and you find the existing app + volume.
78
+ */
79
+ static computeAppName({ userId = 'local', botName }) {
80
+ if (!botName) throw new Error('computeAppName requires botName');
81
+ const userHash = crypto
82
+ .createHash('md5')
83
+ .update(String(userId))
84
+ .digest('hex')
85
+ .substring(0, 8);
86
+ return `${userHash}-${botName}`
87
+ .toLowerCase()
88
+ .replace(/[^a-z0-9-]/g, '-')
89
+ .replace(/-+/g, '-')
90
+ .replace(/(^-|-$)/g, '')
91
+ .slice(0, FLY_APP_NAME_MAX_LENGTH);
92
+ }
93
+
94
+ async _request(pathSuffix, options = {}) {
95
+ const res = await fetch(`${FLY_API_BASE}${pathSuffix}`, {
96
+ ...options,
97
+ headers: {
98
+ Authorization: `Bearer ${this.apiToken}`,
99
+ 'Content-Type': 'application/json',
100
+ ...(options.headers || {}),
101
+ },
102
+ });
103
+ const text = await res.text();
104
+ if (!res.ok) {
105
+ const err = new Error(`Fly API ${res.status} ${pathSuffix}: ${text}`);
106
+ err.status = res.status;
107
+ err.body = text;
108
+ throw err;
109
+ }
110
+ return text ? JSON.parse(text) : null;
111
+ }
112
+
113
+ /**
114
+ * Provision (or update) a bot. Idempotent against an existing app + volume
115
+ * keyed by `appName`.
116
+ *
117
+ * @param {Object} params
118
+ * @param {string} params.appName Deterministic app name (use computeAppName)
119
+ * @param {Array} params.configFiles [{ guestPath, contents }] — written into the container at machine create
120
+ * @param {Object} params.env Env vars for the bot process (LLM key, MOJULO_API_KEY, DOCKER_RUN, etc.)
121
+ * @param {string} [params.region]
122
+ * @param {Object} [params.guest] cpu_kind, cpus, memory_mb
123
+ * @param {number} [params.volumeGb]
124
+ * @param {Function} [params.onProgress] called with { step, message } per lifecycle event
125
+ */
126
+ async deploy(params) {
127
+ const {
128
+ appName,
129
+ configFiles = [],
130
+ env = {},
131
+ region = this.defaultRegion,
132
+ guest = this.defaultGuest,
133
+ volumeGb = this.defaultVolumeGb,
134
+ onProgress = () => {},
135
+ } = params;
136
+
137
+ if (!appName) throw new Error('deploy requires appName');
138
+
139
+ onProgress({ step: 'app', message: `Ensuring Fly app ${appName} exists` });
140
+ await this._ensureApp(appName);
141
+
142
+ onProgress({ step: 'ips', message: `Ensuring public IPs for ${appName}` });
143
+ await this._ensureIps(appName);
144
+
145
+ onProgress({ step: 'volume', message: `Ensuring data volume in ${region}` });
146
+ const volumeId = await this._ensureVolume(appName, region, volumeGb);
147
+
148
+ onProgress({ step: 'machine_config', message: 'Building machine config' });
149
+ const machineConfig = this._buildMachineConfig({
150
+ env,
151
+ configFiles,
152
+ volumeId,
153
+ guest,
154
+ });
155
+
156
+ onProgress({ step: 'machine', message: 'Creating or updating machine' });
157
+ const machine = await this._ensureMachine(appName, region, machineConfig);
158
+
159
+ onProgress({ step: 'wait', message: 'Waiting for machine to start' });
160
+ await this._waitForStart(appName, machine.id).catch((err) => {
161
+ // Healthcheck/start timeout is informational, not fatal — the machine
162
+ // may still come healthy moments later. Surface as a progress event
163
+ // and let the caller poll status.
164
+ onProgress({ step: 'wait_timeout', message: err.message });
165
+ });
166
+
167
+ const url = `https://${appName}.fly.dev`;
168
+ onProgress({ step: 'complete', message: `Deployed at ${url}` });
169
+
170
+ return { appName, url, machineId: machine.id, volumeId };
171
+ }
172
+
173
+ async _ensureApp(appName) {
174
+ try {
175
+ await this._request('/apps', {
176
+ method: 'POST',
177
+ body: JSON.stringify({ app_name: appName, org_slug: this.orgSlug }),
178
+ });
179
+ } catch (err) {
180
+ const body = err.body || err.message || '';
181
+ if (
182
+ err.status === 422 ||
183
+ /already (exists|been taken|taken)/i.test(body) ||
184
+ /name has already/i.test(body)
185
+ ) {
186
+ // App from a prior deploy — fall through and reuse.
187
+ return;
188
+ }
189
+ throw err;
190
+ }
191
+ }
192
+
193
+ /**
194
+ * Allocate shared_v4 + v6 IPs if missing. Without this, the *.fly.dev
195
+ * hostname has no DNS records and external requests can never reach the
196
+ * app — the machine starts fine, health checks pass on Fly's internal
197
+ * network, but autostart-on-request never fires because nothing routes in.
198
+ *
199
+ * The Machines REST API can list IPs, but allocation has historically
200
+ * required GraphQL (see CLOUD_DEPLOY_GUIDE.md "GraphQL IP allocation").
201
+ * Same Bearer token works for both endpoints.
202
+ */
203
+ async _ensureIps(appName) {
204
+ let existing = [];
205
+ try {
206
+ existing = (await this._request(`/apps/${appName}/ips`)) || [];
207
+ } catch (err) {
208
+ if (err.status !== 404) throw err;
209
+ }
210
+ const hasV4 = existing.some((ip) =>
211
+ ['shared_v4', 'v4'].includes(ip.type)
212
+ );
213
+ const hasV6 = existing.some((ip) => ip.type === 'v6');
214
+
215
+ if (!hasV4) await this._allocateIp(appName, 'shared_v4');
216
+ if (!hasV6) await this._allocateIp(appName, 'v6');
217
+ }
218
+
219
+ async _allocateIp(appName, type) {
220
+ const query = `
221
+ mutation($input: AllocateIPAddressInput!) {
222
+ allocateIpAddress(input: $input) {
223
+ ipAddress { address type }
224
+ }
225
+ }
226
+ `;
227
+ const res = await fetch('https://api.fly.io/graphql', {
228
+ method: 'POST',
229
+ headers: {
230
+ Authorization: `Bearer ${this.apiToken}`,
231
+ 'Content-Type': 'application/json',
232
+ },
233
+ body: JSON.stringify({
234
+ query,
235
+ variables: { input: { appId: appName, type } },
236
+ }),
237
+ });
238
+ const text = await res.text();
239
+ if (!res.ok) {
240
+ throw new Error(
241
+ `Fly GraphQL ${res.status} allocateIpAddress(${type}): ${text}`
242
+ );
243
+ }
244
+ const data = JSON.parse(text);
245
+ if (data.errors && data.errors.length) {
246
+ const msg = data.errors[0].message || '';
247
+ // Race: another deploy raced us and already allocated this type.
248
+ if (/already/i.test(msg)) return;
249
+ throw new Error(`Fly GraphQL allocateIpAddress(${type}): ${msg}`);
250
+ }
251
+ }
252
+
253
+ async _ensureVolume(appName, region, sizeGb) {
254
+ // The Fly Machines API does NOT enforce volume-name uniqueness — POSTing
255
+ // a second volume with name "data" silently creates a second volume,
256
+ // which orphans the first. List-first is the only correct approach.
257
+ const volumes = await this._request(`/apps/${appName}/volumes`).catch(
258
+ (err) => {
259
+ if (err.status === 404) return [];
260
+ throw err;
261
+ }
262
+ );
263
+ const existing = (volumes || []).find(
264
+ (v) => v.name === 'data' && v.state !== 'destroyed'
265
+ );
266
+ if (existing) return existing.id;
267
+
268
+ const vol = await this._request(`/apps/${appName}/volumes`, {
269
+ method: 'POST',
270
+ body: JSON.stringify({ name: 'data', size_gb: sizeGb, region }),
271
+ });
272
+ return vol.id;
273
+ }
274
+
275
+ _buildMachineConfig({ env, configFiles, volumeId, guest }) {
276
+ const files = configFiles.map((f) => ({
277
+ guest_path: f.guestPath,
278
+ raw_value: Buffer.from(f.contents).toString('base64'),
279
+ }));
280
+
281
+ const config = {
282
+ image: this.image,
283
+ env: { DOCKER_RUN: 'true', ...env },
284
+ files,
285
+ services: [
286
+ {
287
+ ports: [
288
+ { port: 80, handlers: ['http'], force_https: true },
289
+ { port: 443, handlers: ['tls', 'http'] },
290
+ ],
291
+ protocol: 'tcp',
292
+ internal_port: BOT_INTERNAL_PORT,
293
+ autostart: true,
294
+ autostop: 'stop',
295
+ },
296
+ ],
297
+ checks: {
298
+ httpget: {
299
+ type: 'http',
300
+ port: BOT_INTERNAL_PORT,
301
+ path: '/health',
302
+ ...HEALTHCHECK_CONFIG,
303
+ },
304
+ },
305
+ guest,
306
+ restart: { policy: 'on-failure', max_retries: 3 },
307
+ };
308
+
309
+ // Always include mounts last so that any future merge operations on this
310
+ // object don't clobber the volume attach (failure mode #5 in the
311
+ // cloud-deploy guide).
312
+ if (volumeId) {
313
+ config.mounts = [{ volume: volumeId, path: '/data' }];
314
+ }
315
+
316
+ return config;
317
+ }
318
+
319
+ async _ensureMachine(appName, region, machineConfig) {
320
+ let existing = [];
321
+ try {
322
+ existing = (await this._request(`/apps/${appName}/machines`)) || [];
323
+ } catch (err) {
324
+ if (err.status !== 404) throw err;
325
+ }
326
+
327
+ if (existing.length > 0) {
328
+ const machine = existing[0];
329
+ // Update path: re-send the full config including the mounts array.
330
+ // Fly's machines API does not merge mounts — omitting them detaches
331
+ // the volume.
332
+ await this._request(`/apps/${appName}/machines/${machine.id}`, {
333
+ method: 'POST',
334
+ body: JSON.stringify({ region, config: machineConfig }),
335
+ });
336
+ return machine;
337
+ }
338
+
339
+ return this._request(`/apps/${appName}/machines`, {
340
+ method: 'POST',
341
+ body: JSON.stringify({ region, config: machineConfig }),
342
+ });
343
+ }
344
+
345
+ async _waitForStart(appName, machineId, timeoutSeconds = 60) {
346
+ return this._request(
347
+ `/apps/${appName}/machines/${machineId}/wait?state=started&timeout=${timeoutSeconds}`
348
+ );
349
+ }
350
+
351
+ /**
352
+ * Stop all machines but keep volume + app. Reversible via resume().
353
+ */
354
+ async pause(appName) {
355
+ const machines = await this._listMachines(appName);
356
+ for (const m of machines) {
357
+ if (m.state !== 'stopped') {
358
+ await this._request(`/apps/${appName}/machines/${m.id}/stop`, {
359
+ method: 'POST',
360
+ }).catch(() => {});
361
+ }
362
+ }
363
+ return { ok: true, paused: machines.length };
364
+ }
365
+
366
+ async resume(appName) {
367
+ const machines = await this._listMachines(appName);
368
+ for (const m of machines) {
369
+ if (m.state === 'stopped') {
370
+ await this._request(`/apps/${appName}/machines/${m.id}/start`, {
371
+ method: 'POST',
372
+ }).catch(() => {});
373
+ }
374
+ }
375
+ return { ok: true, resumed: machines.length };
376
+ }
377
+
378
+ /**
379
+ * Stop + delete every machine, then delete the app. App deletion cascades
380
+ * to volume + IPs on Fly. Use `force=true` on machine delete to avoid
381
+ * hangs when a machine is in a transitional state.
382
+ */
383
+ async destroy(appName) {
384
+ const machines = await this._listMachines(appName);
385
+ for (const m of machines) {
386
+ await this._request(`/apps/${appName}/machines/${m.id}/stop`, {
387
+ method: 'POST',
388
+ }).catch(() => {});
389
+ await this._request(`/apps/${appName}/machines/${m.id}?force=true`, {
390
+ method: 'DELETE',
391
+ }).catch(() => {});
392
+ }
393
+ await this._request(`/apps/${appName}`, { method: 'DELETE' }).catch(
394
+ (err) => {
395
+ if (err.status !== 404) throw err;
396
+ }
397
+ );
398
+ return { ok: true };
399
+ }
400
+
401
+ async getStatus(appName) {
402
+ let machines;
403
+ try {
404
+ machines = await this._listMachines(appName);
405
+ } catch (err) {
406
+ if (err.status === 404) return { status: 'not_found' };
407
+ throw err;
408
+ }
409
+ if (!machines.length) return { status: 'not_found' };
410
+
411
+ const states = machines.map((m) => m.state);
412
+ const allStarted = states.every((s) => s === 'started');
413
+ const allStopped = states.every((s) => s === 'stopped');
414
+ const status = allStarted ? 'running' : allStopped ? 'stopped' : 'mixed';
415
+ return {
416
+ status,
417
+ url: `https://${appName}.fly.dev`,
418
+ machines: machines.length,
419
+ states,
420
+ };
421
+ }
422
+
423
+ async _listMachines(appName) {
424
+ try {
425
+ const machines = await this._request(`/apps/${appName}/machines`);
426
+ return machines || [];
427
+ } catch (err) {
428
+ if (err.status === 404) return [];
429
+ throw err;
430
+ }
431
+ }
432
+ }
@@ -0,0 +1,38 @@
1
+ import { DockerDeployer } from './docker.js';
2
+ import { FlyDeployer } from './fly.js';
3
+
4
+ let _deployer = null;
5
+
6
+ export async function getDeploymentProvider() {
7
+ if (!_deployer) _deployer = new DockerDeployer();
8
+ return _deployer;
9
+ }
10
+
11
+ export async function deploy(params) {
12
+ const provider = await getDeploymentProvider();
13
+ return provider.deploy(params);
14
+ }
15
+
16
+ export async function destroy(appId) {
17
+ const provider = await getDeploymentProvider();
18
+ return provider.destroy(appId);
19
+ }
20
+
21
+ /**
22
+ * Construct a cloud deployer instance per-call. Cloud deployers are
23
+ * stateless aside from credentials, so there's no caching layer — the
24
+ * caller passes the user's token in.
25
+ *
26
+ * Currently registered: 'fly'. Add new providers here.
27
+ */
28
+ export function getCloudDeployer(provider, credentials = {}) {
29
+ if (provider === 'fly') {
30
+ return new FlyDeployer({
31
+ apiToken: credentials.flyApiToken || process.env.FLY_API_TOKEN,
32
+ orgSlug: credentials.flyOrgSlug || process.env.FLY_ORG_SLUG || 'personal',
33
+ });
34
+ }
35
+ throw new Error(`Unknown cloud provider: ${provider}`);
36
+ }
37
+
38
+ export const CLOUD_PROVIDERS = ['fly'];
@@ -0,0 +1,36 @@
1
+ import crypto from 'crypto';
2
+
3
+ // Lite encrypts the stored LLM API key with a key derived from the host.
4
+ // In production deployments users set API_KEY_ENCRYPTION_KEY; otherwise a
5
+ // stable machine-derived key keeps round-trips working for local dev.
6
+ function getEncryptionKey() {
7
+ const envKey = process.env.API_KEY_ENCRYPTION_KEY;
8
+ if (envKey) {
9
+ return crypto.createHash('sha256').update(envKey).digest();
10
+ }
11
+ return crypto.createHash('sha256').update('mojulo-lite-local-dev').digest();
12
+ }
13
+
14
+ const ALGO = 'aes-256-gcm';
15
+
16
+ export function encryptApiKey(plaintext) {
17
+ const iv = crypto.randomBytes(12);
18
+ const cipher = crypto.createCipheriv(ALGO, getEncryptionKey(), iv);
19
+ const ciphertext = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
20
+ const tag = cipher.getAuthTag();
21
+ return Buffer.concat([iv, tag, ciphertext]).toString('base64');
22
+ }
23
+
24
+ export function decryptApiKey(encrypted) {
25
+ const data = Buffer.from(encrypted, 'base64');
26
+ const iv = data.subarray(0, 12);
27
+ const tag = data.subarray(12, 28);
28
+ const ciphertext = data.subarray(28);
29
+ const decipher = crypto.createDecipheriv(ALGO, getEncryptionKey(), iv);
30
+ decipher.setAuthTag(tag);
31
+ return Buffer.concat([decipher.update(ciphertext), decipher.final()]).toString('utf8');
32
+ }
33
+
34
+ export function generateApiKey() {
35
+ return `bot_${crypto.randomBytes(24).toString('hex')}`;
36
+ }
@@ -0,0 +1,171 @@
1
+ /**
2
+ * Document Parser Utility
3
+ * Handles parsing of various document types
4
+ * - PDFs: Uses pdf2json (Node.js native, no browser APIs)
5
+ * - DOCX/PPTX/XLSX: Uses officeparser
6
+ */
7
+
8
+ const PDFParser = require('pdf2json');
9
+ const officeParser = require('officeparser');
10
+ const { writeFile, unlink } = require('fs/promises');
11
+ const { join } = require('path');
12
+ const { tmpdir } = require('os');
13
+
14
+ /**
15
+ * Parse PDF using pdf2json (Node.js native parser)
16
+ * @param {Buffer} buffer - PDF file buffer
17
+ * @returns {Promise<string>} Extracted text
18
+ */
19
+ async function parsePDF(buffer) {
20
+ return new Promise((resolve, reject) => {
21
+ const pdfParser = new PDFParser();
22
+
23
+ pdfParser.on('pdfParser_dataError', (errData) => {
24
+ reject(new Error(errData.parserError));
25
+ });
26
+
27
+ pdfParser.on('pdfParser_dataReady', (pdfData) => {
28
+ try {
29
+ // Try getRawTextContent() first
30
+ let text = pdfParser.getRawTextContent();
31
+
32
+ console.log('PDF text length:', text ? text.length : 0);
33
+ console.log('PDF text sample:', text ? text.substring(0, 200) : 'empty');
34
+
35
+ // If getRawTextContent() returns empty, try alternative extraction
36
+ if (!text || text.trim().length === 0) {
37
+ console.log('Trying alternative text extraction from pdfData');
38
+ // Extract text from pages manually
39
+ const pages = pdfData?.Pages || [];
40
+ const textParts = [];
41
+
42
+ for (const page of pages) {
43
+ const texts = page?.Texts || [];
44
+ for (const textItem of texts) {
45
+ try {
46
+ const encodedText = textItem?.R?.[0]?.T || '';
47
+ const decodedText = encodedText ? decodeURIComponent(encodedText) : '';
48
+ if (decodedText) {
49
+ textParts.push(decodedText);
50
+ }
51
+ } catch (decodeError) {
52
+ // If decoding fails, use the raw text
53
+ const rawText = textItem?.R?.[0]?.T || '';
54
+ if (rawText) {
55
+ textParts.push(rawText);
56
+ }
57
+ }
58
+ }
59
+ }
60
+
61
+ text = textParts.join(' ');
62
+ console.log('Alternative extraction length:', text.length);
63
+ }
64
+
65
+ resolve(text);
66
+ } catch (error) {
67
+ reject(error);
68
+ }
69
+ });
70
+
71
+ // Parse the buffer
72
+ pdfParser.parseBuffer(buffer);
73
+ });
74
+ }
75
+
76
+ /**
77
+ * Parse Office documents using officeparser (requires file path)
78
+ * @param {Buffer} buffer - File buffer
79
+ * @param {string} fileName - Original file name
80
+ * @returns {Promise<string>} Extracted text
81
+ */
82
+ async function parseOfficeDocument(buffer, fileName) {
83
+ const tempFilePath = join(tmpdir(), `temp-${Date.now()}-${fileName}`);
84
+
85
+ try {
86
+ await writeFile(tempFilePath, buffer);
87
+ const extractedText = await officeParser.parseOfficeAsync(tempFilePath);
88
+ return extractedText;
89
+ } finally {
90
+ try {
91
+ await unlink(tempFilePath);
92
+ } catch (unlinkError) {
93
+ console.warn(`Failed to delete temp file ${tempFilePath}:`, unlinkError);
94
+ }
95
+ }
96
+ }
97
+
98
+ /**
99
+ * Repair PDFs whose text extraction produced character-positioned output
100
+ * (every character separated by whitespace), turning "P l a n d ' a s s u r"
101
+ * back into "Plan d'assurance". Common with PDFs exported from Google Docs
102
+ * and other tools that embed custom font subsets pdf2json can't decode.
103
+ *
104
+ * Heuristic: if >50% of whitespace-split tokens in a 4KB sample are length
105
+ * 1, the parse is broken. Threshold is conservative — healthy English/French
106
+ * prose runs ~0-30% single-char tokens (only "I", "a", "à", "y") so the
107
+ * normalizer is a no-op on well-parsed docs.
108
+ *
109
+ * Word-boundary rule: 3+ consecutive spaces (or any newline) marks a real
110
+ * word break. 1-2 spaces inside a "word" are PDF-positioning artifacts and
111
+ * get stripped. Edge cases like "A u t o C a r e" → "AutoCare" recover
112
+ * because the intra-word gap stays under 3 spaces.
113
+ *
114
+ * Limitation: tight pseudo-ligatures with extra positioning around capitals
115
+ * (e.g. "L L C" with 3-space gaps inside the acronym) reconstruct as
116
+ * "L L C" — searchable but not pixel-perfect. Acceptable trade-off vs the
117
+ * unsearchable status quo.
118
+ */
119
+ function normalizeCharacterSpacing(text) {
120
+ const sample = text.slice(0, 4000);
121
+ const sampleTokens = sample.split(/\s+/).filter(Boolean);
122
+ if (sampleTokens.length < 20) return text;
123
+
124
+ const singleCharRatio =
125
+ sampleTokens.filter((t) => t.length === 1).length / sampleTokens.length;
126
+ if (singleCharRatio < 0.5) return text;
127
+
128
+ console.log(
129
+ `Detected character-positioned PDF text (${(singleCharRatio * 100).toFixed(0)}% single-char tokens) — normalizing.`
130
+ );
131
+
132
+ return text
133
+ .split(/[ \t]{3,}|\r?\n+/)
134
+ .map((w) => w.replace(/\s+/g, ''))
135
+ .filter(Boolean)
136
+ .join(' ');
137
+ }
138
+
139
+ /**
140
+ * Parse document by writing to temp file first
141
+ * @param {Buffer} buffer - File buffer
142
+ * @param {string} fileName - Original file name
143
+ * @returns {Promise<string>} Extracted text
144
+ */
145
+ async function parseDocument(buffer, fileName) {
146
+ try {
147
+ const fileExtension = fileName.toLowerCase().split('.').pop();
148
+
149
+ let extractedText;
150
+
151
+ // Use pdf-parse for PDFs (no worker issues)
152
+ if (fileExtension === 'pdf') {
153
+ extractedText = await parsePDF(buffer);
154
+ }
155
+ // Use officeparser for other document types
156
+ else {
157
+ extractedText = await parseOfficeDocument(buffer, fileName);
158
+ }
159
+
160
+ if (!extractedText || extractedText.trim().length === 0) {
161
+ throw new Error(`No text extracted from ${fileName}`);
162
+ }
163
+
164
+ return normalizeCharacterSpacing(extractedText);
165
+ } catch (error) {
166
+ console.error(`Error parsing ${fileName}:`, error);
167
+ throw new Error(`Failed to parse ${fileName}: ${error.message}`);
168
+ }
169
+ }
170
+
171
+ module.exports = { parseDocument };