mojulo 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -4
- package/lib/audit-logger-new.js +11 -0
- package/lib/auth/gate.js +25 -0
- package/lib/auth/service.js +17 -0
- package/lib/auth/session.js +63 -0
- package/lib/builder/chat-processor.js +607 -0
- package/lib/builder/composer-bridge.js +82 -0
- package/lib/builder/evaluator.js +159 -0
- package/lib/builder/executor.js +252 -0
- package/lib/builder/index.js +48 -0
- package/lib/builder/session.js +248 -0
- package/lib/builder/system-prompt.js +422 -0
- package/lib/builder/tone-presets.js +75 -0
- package/lib/builder/tool-executors.js +1418 -0
- package/lib/builder/tools.js +338 -0
- package/lib/builder/validators.js +239 -0
- package/lib/composer/composer.js +225 -0
- package/lib/composer/index.js +40 -0
- package/lib/composer/protocols/00_base.txt +19 -0
- package/lib/composer/protocols/01_knowledge.txt +9 -0
- package/lib/composer/protocols/02_form-gathering.txt +32 -0
- package/lib/composer/protocols/03_appointments.txt +16 -0
- package/lib/composer/protocols/04_triage.txt +15 -0
- package/lib/composer/protocols/05_optical-read.txt +22 -0
- package/lib/composer/response-builder.js +98 -0
- package/lib/config-builder.js +650 -0
- package/lib/db/ids.js +10 -0
- package/lib/db/index.js +179 -0
- package/lib/db/repositories/apiKeys.js +72 -0
- package/lib/db/repositories/auditLogs.js +12 -0
- package/lib/db/repositories/botSpaces.js +12 -0
- package/lib/db/repositories/builderSessions.js +312 -0
- package/lib/db/repositories/deploymentEvents.js +12 -0
- package/lib/db/repositories/deployments.js +385 -0
- package/lib/db/repositories/documents.js +68 -0
- package/lib/db/repositories/mcpJobs.js +84 -0
- package/lib/deployers/bot-fleet.js +110 -0
- package/lib/deployers/bot-proxy.js +72 -0
- package/lib/deployers/build.js +89 -0
- package/lib/deployers/cloud-deploy.js +310 -0
- package/lib/deployers/docker.js +439 -0
- package/lib/deployers/fly.js +432 -0
- package/lib/deployers/index.js +38 -0
- package/lib/deployment-auth.js +36 -0
- package/lib/document-parser.js +171 -0
- package/lib/embedder/chunker.js +93 -0
- package/lib/embedder/local.js +101 -0
- package/lib/embedder/preview-rag.js +93 -0
- package/lib/envelope-schema.js +54 -0
- package/lib/fleet/scoped-sql.js +342 -0
- package/lib/form-schema-config/base.js +135 -0
- package/lib/form-schema-config/index.js +286 -0
- package/lib/form-schema-config/locales/af-ZA.js +153 -0
- package/lib/form-schema-config/locales/ar-AE.js +142 -0
- package/lib/form-schema-config/locales/ar-SA.js +164 -0
- package/lib/form-schema-config/locales/de-DE.js +152 -0
- package/lib/form-schema-config/locales/en-AU.js +161 -0
- package/lib/form-schema-config/locales/en-CA.js +115 -0
- package/lib/form-schema-config/locales/en-GB.js +132 -0
- package/lib/form-schema-config/locales/en-IN.js +219 -0
- package/lib/form-schema-config/locales/en-MY.js +171 -0
- package/lib/form-schema-config/locales/en-NG.js +198 -0
- package/lib/form-schema-config/locales/en-PH.js +186 -0
- package/lib/form-schema-config/locales/en-SG.js +153 -0
- package/lib/form-schema-config/locales/en-US.js +138 -0
- package/lib/form-schema-config/locales/es-ES.js +171 -0
- package/lib/form-schema-config/locales/es-MX.js +193 -0
- package/lib/form-schema-config/locales/fr-CA.js +138 -0
- package/lib/form-schema-config/locales/fr-FR.js +155 -0
- package/lib/form-schema-config/locales/hi-IN.js +219 -0
- package/lib/form-schema-config/locales/it-IT.js +157 -0
- package/lib/form-schema-config/locales/ja-JP.js +169 -0
- package/lib/form-schema-config/locales/ko-KR.js +140 -0
- package/lib/form-schema-config/locales/nl-NL.js +149 -0
- package/lib/form-schema-config/locales/pt-BR.js +168 -0
- package/lib/form-schema-config/locales/zh-CN.js +172 -0
- package/lib/form-schema-config/locales/zh-HK.js +142 -0
- package/lib/form-structure-schema.js +191 -0
- package/lib/llm-providers.js +828 -0
- package/lib/markdown.js +197 -0
- package/lib/mcp/catalysts/appointment-to-calendar.md +84 -0
- package/lib/mcp/catalysts/conversations-to-channel-digest.md +104 -0
- package/lib/mcp/catalysts/document-extract-to-store.md +92 -0
- package/lib/mcp/catalysts/knowledge-gap-miner.md +96 -0
- package/lib/mcp/catalysts/loader.js +144 -0
- package/lib/mcp/catalysts/qualify-lead-to-crm.md +83 -0
- package/lib/mcp/catalysts/scan-conversations-for-signal.md +92 -0
- package/lib/mcp/catalysts/submission-to-ticket.md +83 -0
- package/lib/mcp/catalysts/submissions-to-warehouse.md +103 -0
- package/lib/mcp/catalysts/weekly-submissions-digest.md +82 -0
- package/lib/mcp/jobs.js +64 -0
- package/lib/mcp/server.js +184 -0
- package/lib/mcp/session-binding.js +130 -0
- package/lib/mcp/tools/build.js +123 -0
- package/lib/mcp/tools/catalysts.js +477 -0
- package/lib/mcp/tools/context.js +325 -0
- package/lib/mcp/tools/fleet.js +391 -0
- package/lib/mcp/tools/jobs-tools.js +240 -0
- package/lib/mcp/tools/operate.js +314 -0
- package/lib/preview/build-preview-config.js +136 -0
- package/lib/rate-limiter.js +11 -0
- package/lib/resolve-api-key.js +142 -0
- package/lib/storage/index.js +40 -0
- package/messages/de.json +2136 -0
- package/messages/en.json +2136 -0
- package/messages/es.json +2136 -0
- package/messages/fr.json +2136 -0
- package/messages/it.json +2136 -0
- package/messages/ja.json +2136 -0
- package/messages/ko.json +2136 -0
- package/messages/nl.json +2136 -0
- package/messages/pl.json +2136 -0
- package/messages/pt.json +2136 -0
- package/messages/ru.json +2136 -0
- package/messages/uk.json +2136 -0
- package/messages/zh.json +2136 -0
- package/package.json +61 -5
- package/scripts/mcp-config.mjs +162 -0
- package/scripts/mcp-stdio-loader.mjs +42 -0
- package/scripts/mcp-stdio.mjs +108 -0
- package/scripts/mojulo-paths.mjs +48 -0
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fly.io Machines API deployer.
|
|
3
|
+
*
|
|
4
|
+
* Deploys the published GHCR bot image (ghcr.io/zombico/mojulo-bot) to a
|
|
5
|
+
* user-owned Fly app using their API token. Per-bot config is injected via
|
|
6
|
+
* the Machines API `files` field — the image stays bot-agnostic.
|
|
7
|
+
*
|
|
8
|
+
* Patterns enforced (see lite-template/integration/CLOUD_DEPLOY_GUIDE.md):
|
|
9
|
+
* - Pattern 1: One image, config injected per machine via base64 files[].
|
|
10
|
+
* - Pattern 2: Volume named "data", find-or-create idempotent, always
|
|
11
|
+
* re-attached on update.
|
|
12
|
+
* - Pattern 3: Deterministic app name = ${md5(userId).slice(0,8)}-${botName}.
|
|
13
|
+
* - Pattern 4: Lifecycle ops are thin platform mappings.
|
|
14
|
+
* - Pattern 5: deploy() accepts an onProgress callback for audit-trail
|
|
15
|
+
* events; the lifecycle wrapper persists them.
|
|
16
|
+
*
|
|
17
|
+
* Credentials are passed by constructor parameter — never read globally —
|
|
18
|
+
* so future per-user token storage drops in without changing this file.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import crypto from 'crypto';
|
|
22
|
+
|
|
23
|
+
const FLY_API_BASE = 'https://api.machines.dev/v1';
|
|
24
|
+
|
|
25
|
+
const DEFAULT_GUEST = { cpu_kind: 'shared', cpus: 1, memory_mb: 1024 };
|
|
26
|
+
const DEFAULT_REGION = 'iad';
|
|
27
|
+
const DEFAULT_VOLUME_GB = 1;
|
|
28
|
+
|
|
29
|
+
// Fly's max app-name length; computeAppName truncates to this.
|
|
30
|
+
const FLY_APP_NAME_MAX_LENGTH = 63;
|
|
31
|
+
// Port the bot's Express server listens on inside the container (matches lite-template/server.js).
|
|
32
|
+
const BOT_INTERNAL_PORT = 3000;
|
|
33
|
+
// Healthcheck cadence injected into the Fly machine config.
|
|
34
|
+
const HEALTHCHECK_CONFIG = {
|
|
35
|
+
interval: '15s',
|
|
36
|
+
timeout: '5s',
|
|
37
|
+
grace_period: '20s',
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export class FlyDeployer {
|
|
41
|
+
constructor({
|
|
42
|
+
apiToken,
|
|
43
|
+
orgSlug = 'personal',
|
|
44
|
+
image,
|
|
45
|
+
defaultRegion = DEFAULT_REGION,
|
|
46
|
+
defaultGuest = DEFAULT_GUEST,
|
|
47
|
+
defaultVolumeGb = DEFAULT_VOLUME_GB,
|
|
48
|
+
} = {}) {
|
|
49
|
+
if (!apiToken) {
|
|
50
|
+
throw new Error('FlyDeployer requires apiToken');
|
|
51
|
+
}
|
|
52
|
+
this.apiToken = apiToken;
|
|
53
|
+
this.orgSlug = orgSlug;
|
|
54
|
+
// Cloud always wants a public registry pin. In the common case BOT_IMAGE
|
|
55
|
+
// already points at GHCR, so fall back to it. Set MOJULO_CLOUD_IMAGE only
|
|
56
|
+
// when the cloud tag needs to diverge from the local docker-compose tag
|
|
57
|
+
// (e.g. BOT_IMAGE is a laptop tag like `mojulo/bot:latest`). The registry
|
|
58
|
+
// check below catches that case regardless of which env var supplied it.
|
|
59
|
+
this.image =
|
|
60
|
+
image ||
|
|
61
|
+
process.env.MOJULO_CLOUD_IMAGE ||
|
|
62
|
+
process.env.BOT_IMAGE;
|
|
63
|
+
if (!this.image || !this.image.includes('/')) {
|
|
64
|
+
throw new Error(
|
|
65
|
+
`Cloud image "${this.image}" has no registry prefix; Fly will route it through Docker Hub. ` +
|
|
66
|
+
`Set MOJULO_CLOUD_IMAGE or BOT_IMAGE to a fully-qualified image like ghcr.io/owner/name:tag.`
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
this.defaultRegion = defaultRegion;
|
|
70
|
+
this.defaultGuest = defaultGuest;
|
|
71
|
+
this.defaultVolumeGb = defaultVolumeGb;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Deterministic app name from user identity + bot name. Same inputs always
|
|
76
|
+
* yield the same name, which makes deploys self-healing: lose the
|
|
77
|
+
* control-plane row, redeploy, and you find the existing app + volume.
|
|
78
|
+
*/
|
|
79
|
+
static computeAppName({ userId = 'local', botName }) {
|
|
80
|
+
if (!botName) throw new Error('computeAppName requires botName');
|
|
81
|
+
const userHash = crypto
|
|
82
|
+
.createHash('md5')
|
|
83
|
+
.update(String(userId))
|
|
84
|
+
.digest('hex')
|
|
85
|
+
.substring(0, 8);
|
|
86
|
+
return `${userHash}-${botName}`
|
|
87
|
+
.toLowerCase()
|
|
88
|
+
.replace(/[^a-z0-9-]/g, '-')
|
|
89
|
+
.replace(/-+/g, '-')
|
|
90
|
+
.replace(/(^-|-$)/g, '')
|
|
91
|
+
.slice(0, FLY_APP_NAME_MAX_LENGTH);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async _request(pathSuffix, options = {}) {
|
|
95
|
+
const res = await fetch(`${FLY_API_BASE}${pathSuffix}`, {
|
|
96
|
+
...options,
|
|
97
|
+
headers: {
|
|
98
|
+
Authorization: `Bearer ${this.apiToken}`,
|
|
99
|
+
'Content-Type': 'application/json',
|
|
100
|
+
...(options.headers || {}),
|
|
101
|
+
},
|
|
102
|
+
});
|
|
103
|
+
const text = await res.text();
|
|
104
|
+
if (!res.ok) {
|
|
105
|
+
const err = new Error(`Fly API ${res.status} ${pathSuffix}: ${text}`);
|
|
106
|
+
err.status = res.status;
|
|
107
|
+
err.body = text;
|
|
108
|
+
throw err;
|
|
109
|
+
}
|
|
110
|
+
return text ? JSON.parse(text) : null;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Provision (or update) a bot. Idempotent against an existing app + volume
|
|
115
|
+
* keyed by `appName`.
|
|
116
|
+
*
|
|
117
|
+
* @param {Object} params
|
|
118
|
+
* @param {string} params.appName Deterministic app name (use computeAppName)
|
|
119
|
+
* @param {Array} params.configFiles [{ guestPath, contents }] — written into the container at machine create
|
|
120
|
+
* @param {Object} params.env Env vars for the bot process (LLM key, MOJULO_API_KEY, DOCKER_RUN, etc.)
|
|
121
|
+
* @param {string} [params.region]
|
|
122
|
+
* @param {Object} [params.guest] cpu_kind, cpus, memory_mb
|
|
123
|
+
* @param {number} [params.volumeGb]
|
|
124
|
+
* @param {Function} [params.onProgress] called with { step, message } per lifecycle event
|
|
125
|
+
*/
|
|
126
|
+
async deploy(params) {
|
|
127
|
+
const {
|
|
128
|
+
appName,
|
|
129
|
+
configFiles = [],
|
|
130
|
+
env = {},
|
|
131
|
+
region = this.defaultRegion,
|
|
132
|
+
guest = this.defaultGuest,
|
|
133
|
+
volumeGb = this.defaultVolumeGb,
|
|
134
|
+
onProgress = () => {},
|
|
135
|
+
} = params;
|
|
136
|
+
|
|
137
|
+
if (!appName) throw new Error('deploy requires appName');
|
|
138
|
+
|
|
139
|
+
onProgress({ step: 'app', message: `Ensuring Fly app ${appName} exists` });
|
|
140
|
+
await this._ensureApp(appName);
|
|
141
|
+
|
|
142
|
+
onProgress({ step: 'ips', message: `Ensuring public IPs for ${appName}` });
|
|
143
|
+
await this._ensureIps(appName);
|
|
144
|
+
|
|
145
|
+
onProgress({ step: 'volume', message: `Ensuring data volume in ${region}` });
|
|
146
|
+
const volumeId = await this._ensureVolume(appName, region, volumeGb);
|
|
147
|
+
|
|
148
|
+
onProgress({ step: 'machine_config', message: 'Building machine config' });
|
|
149
|
+
const machineConfig = this._buildMachineConfig({
|
|
150
|
+
env,
|
|
151
|
+
configFiles,
|
|
152
|
+
volumeId,
|
|
153
|
+
guest,
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
onProgress({ step: 'machine', message: 'Creating or updating machine' });
|
|
157
|
+
const machine = await this._ensureMachine(appName, region, machineConfig);
|
|
158
|
+
|
|
159
|
+
onProgress({ step: 'wait', message: 'Waiting for machine to start' });
|
|
160
|
+
await this._waitForStart(appName, machine.id).catch((err) => {
|
|
161
|
+
// Healthcheck/start timeout is informational, not fatal — the machine
|
|
162
|
+
// may still come healthy moments later. Surface as a progress event
|
|
163
|
+
// and let the caller poll status.
|
|
164
|
+
onProgress({ step: 'wait_timeout', message: err.message });
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
const url = `https://${appName}.fly.dev`;
|
|
168
|
+
onProgress({ step: 'complete', message: `Deployed at ${url}` });
|
|
169
|
+
|
|
170
|
+
return { appName, url, machineId: machine.id, volumeId };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
async _ensureApp(appName) {
|
|
174
|
+
try {
|
|
175
|
+
await this._request('/apps', {
|
|
176
|
+
method: 'POST',
|
|
177
|
+
body: JSON.stringify({ app_name: appName, org_slug: this.orgSlug }),
|
|
178
|
+
});
|
|
179
|
+
} catch (err) {
|
|
180
|
+
const body = err.body || err.message || '';
|
|
181
|
+
if (
|
|
182
|
+
err.status === 422 ||
|
|
183
|
+
/already (exists|been taken|taken)/i.test(body) ||
|
|
184
|
+
/name has already/i.test(body)
|
|
185
|
+
) {
|
|
186
|
+
// App from a prior deploy — fall through and reuse.
|
|
187
|
+
return;
|
|
188
|
+
}
|
|
189
|
+
throw err;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Allocate shared_v4 + v6 IPs if missing. Without this, the *.fly.dev
|
|
195
|
+
* hostname has no DNS records and external requests can never reach the
|
|
196
|
+
* app — the machine starts fine, health checks pass on Fly's internal
|
|
197
|
+
* network, but autostart-on-request never fires because nothing routes in.
|
|
198
|
+
*
|
|
199
|
+
* The Machines REST API can list IPs, but allocation has historically
|
|
200
|
+
* required GraphQL (see CLOUD_DEPLOY_GUIDE.md "GraphQL IP allocation").
|
|
201
|
+
* Same Bearer token works for both endpoints.
|
|
202
|
+
*/
|
|
203
|
+
async _ensureIps(appName) {
|
|
204
|
+
let existing = [];
|
|
205
|
+
try {
|
|
206
|
+
existing = (await this._request(`/apps/${appName}/ips`)) || [];
|
|
207
|
+
} catch (err) {
|
|
208
|
+
if (err.status !== 404) throw err;
|
|
209
|
+
}
|
|
210
|
+
const hasV4 = existing.some((ip) =>
|
|
211
|
+
['shared_v4', 'v4'].includes(ip.type)
|
|
212
|
+
);
|
|
213
|
+
const hasV6 = existing.some((ip) => ip.type === 'v6');
|
|
214
|
+
|
|
215
|
+
if (!hasV4) await this._allocateIp(appName, 'shared_v4');
|
|
216
|
+
if (!hasV6) await this._allocateIp(appName, 'v6');
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
async _allocateIp(appName, type) {
|
|
220
|
+
const query = `
|
|
221
|
+
mutation($input: AllocateIPAddressInput!) {
|
|
222
|
+
allocateIpAddress(input: $input) {
|
|
223
|
+
ipAddress { address type }
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
`;
|
|
227
|
+
const res = await fetch('https://api.fly.io/graphql', {
|
|
228
|
+
method: 'POST',
|
|
229
|
+
headers: {
|
|
230
|
+
Authorization: `Bearer ${this.apiToken}`,
|
|
231
|
+
'Content-Type': 'application/json',
|
|
232
|
+
},
|
|
233
|
+
body: JSON.stringify({
|
|
234
|
+
query,
|
|
235
|
+
variables: { input: { appId: appName, type } },
|
|
236
|
+
}),
|
|
237
|
+
});
|
|
238
|
+
const text = await res.text();
|
|
239
|
+
if (!res.ok) {
|
|
240
|
+
throw new Error(
|
|
241
|
+
`Fly GraphQL ${res.status} allocateIpAddress(${type}): ${text}`
|
|
242
|
+
);
|
|
243
|
+
}
|
|
244
|
+
const data = JSON.parse(text);
|
|
245
|
+
if (data.errors && data.errors.length) {
|
|
246
|
+
const msg = data.errors[0].message || '';
|
|
247
|
+
// Race: another deploy raced us and already allocated this type.
|
|
248
|
+
if (/already/i.test(msg)) return;
|
|
249
|
+
throw new Error(`Fly GraphQL allocateIpAddress(${type}): ${msg}`);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
async _ensureVolume(appName, region, sizeGb) {
|
|
254
|
+
// The Fly Machines API does NOT enforce volume-name uniqueness — POSTing
|
|
255
|
+
// a second volume with name "data" silently creates a second volume,
|
|
256
|
+
// which orphans the first. List-first is the only correct approach.
|
|
257
|
+
const volumes = await this._request(`/apps/${appName}/volumes`).catch(
|
|
258
|
+
(err) => {
|
|
259
|
+
if (err.status === 404) return [];
|
|
260
|
+
throw err;
|
|
261
|
+
}
|
|
262
|
+
);
|
|
263
|
+
const existing = (volumes || []).find(
|
|
264
|
+
(v) => v.name === 'data' && v.state !== 'destroyed'
|
|
265
|
+
);
|
|
266
|
+
if (existing) return existing.id;
|
|
267
|
+
|
|
268
|
+
const vol = await this._request(`/apps/${appName}/volumes`, {
|
|
269
|
+
method: 'POST',
|
|
270
|
+
body: JSON.stringify({ name: 'data', size_gb: sizeGb, region }),
|
|
271
|
+
});
|
|
272
|
+
return vol.id;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
_buildMachineConfig({ env, configFiles, volumeId, guest }) {
|
|
276
|
+
const files = configFiles.map((f) => ({
|
|
277
|
+
guest_path: f.guestPath,
|
|
278
|
+
raw_value: Buffer.from(f.contents).toString('base64'),
|
|
279
|
+
}));
|
|
280
|
+
|
|
281
|
+
const config = {
|
|
282
|
+
image: this.image,
|
|
283
|
+
env: { DOCKER_RUN: 'true', ...env },
|
|
284
|
+
files,
|
|
285
|
+
services: [
|
|
286
|
+
{
|
|
287
|
+
ports: [
|
|
288
|
+
{ port: 80, handlers: ['http'], force_https: true },
|
|
289
|
+
{ port: 443, handlers: ['tls', 'http'] },
|
|
290
|
+
],
|
|
291
|
+
protocol: 'tcp',
|
|
292
|
+
internal_port: BOT_INTERNAL_PORT,
|
|
293
|
+
autostart: true,
|
|
294
|
+
autostop: 'stop',
|
|
295
|
+
},
|
|
296
|
+
],
|
|
297
|
+
checks: {
|
|
298
|
+
httpget: {
|
|
299
|
+
type: 'http',
|
|
300
|
+
port: BOT_INTERNAL_PORT,
|
|
301
|
+
path: '/health',
|
|
302
|
+
...HEALTHCHECK_CONFIG,
|
|
303
|
+
},
|
|
304
|
+
},
|
|
305
|
+
guest,
|
|
306
|
+
restart: { policy: 'on-failure', max_retries: 3 },
|
|
307
|
+
};
|
|
308
|
+
|
|
309
|
+
// Always include mounts last so that any future merge operations on this
|
|
310
|
+
// object don't clobber the volume attach (failure mode #5 in the
|
|
311
|
+
// cloud-deploy guide).
|
|
312
|
+
if (volumeId) {
|
|
313
|
+
config.mounts = [{ volume: volumeId, path: '/data' }];
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return config;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
async _ensureMachine(appName, region, machineConfig) {
|
|
320
|
+
let existing = [];
|
|
321
|
+
try {
|
|
322
|
+
existing = (await this._request(`/apps/${appName}/machines`)) || [];
|
|
323
|
+
} catch (err) {
|
|
324
|
+
if (err.status !== 404) throw err;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
if (existing.length > 0) {
|
|
328
|
+
const machine = existing[0];
|
|
329
|
+
// Update path: re-send the full config including the mounts array.
|
|
330
|
+
// Fly's machines API does not merge mounts — omitting them detaches
|
|
331
|
+
// the volume.
|
|
332
|
+
await this._request(`/apps/${appName}/machines/${machine.id}`, {
|
|
333
|
+
method: 'POST',
|
|
334
|
+
body: JSON.stringify({ region, config: machineConfig }),
|
|
335
|
+
});
|
|
336
|
+
return machine;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return this._request(`/apps/${appName}/machines`, {
|
|
340
|
+
method: 'POST',
|
|
341
|
+
body: JSON.stringify({ region, config: machineConfig }),
|
|
342
|
+
});
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
async _waitForStart(appName, machineId, timeoutSeconds = 60) {
|
|
346
|
+
return this._request(
|
|
347
|
+
`/apps/${appName}/machines/${machineId}/wait?state=started&timeout=${timeoutSeconds}`
|
|
348
|
+
);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Stop all machines but keep volume + app. Reversible via resume().
|
|
353
|
+
*/
|
|
354
|
+
async pause(appName) {
|
|
355
|
+
const machines = await this._listMachines(appName);
|
|
356
|
+
for (const m of machines) {
|
|
357
|
+
if (m.state !== 'stopped') {
|
|
358
|
+
await this._request(`/apps/${appName}/machines/${m.id}/stop`, {
|
|
359
|
+
method: 'POST',
|
|
360
|
+
}).catch(() => {});
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
return { ok: true, paused: machines.length };
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
async resume(appName) {
|
|
367
|
+
const machines = await this._listMachines(appName);
|
|
368
|
+
for (const m of machines) {
|
|
369
|
+
if (m.state === 'stopped') {
|
|
370
|
+
await this._request(`/apps/${appName}/machines/${m.id}/start`, {
|
|
371
|
+
method: 'POST',
|
|
372
|
+
}).catch(() => {});
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return { ok: true, resumed: machines.length };
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Stop + delete every machine, then delete the app. App deletion cascades
|
|
380
|
+
* to volume + IPs on Fly. Use `force=true` on machine delete to avoid
|
|
381
|
+
* hangs when a machine is in a transitional state.
|
|
382
|
+
*/
|
|
383
|
+
async destroy(appName) {
|
|
384
|
+
const machines = await this._listMachines(appName);
|
|
385
|
+
for (const m of machines) {
|
|
386
|
+
await this._request(`/apps/${appName}/machines/${m.id}/stop`, {
|
|
387
|
+
method: 'POST',
|
|
388
|
+
}).catch(() => {});
|
|
389
|
+
await this._request(`/apps/${appName}/machines/${m.id}?force=true`, {
|
|
390
|
+
method: 'DELETE',
|
|
391
|
+
}).catch(() => {});
|
|
392
|
+
}
|
|
393
|
+
await this._request(`/apps/${appName}`, { method: 'DELETE' }).catch(
|
|
394
|
+
(err) => {
|
|
395
|
+
if (err.status !== 404) throw err;
|
|
396
|
+
}
|
|
397
|
+
);
|
|
398
|
+
return { ok: true };
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
async getStatus(appName) {
|
|
402
|
+
let machines;
|
|
403
|
+
try {
|
|
404
|
+
machines = await this._listMachines(appName);
|
|
405
|
+
} catch (err) {
|
|
406
|
+
if (err.status === 404) return { status: 'not_found' };
|
|
407
|
+
throw err;
|
|
408
|
+
}
|
|
409
|
+
if (!machines.length) return { status: 'not_found' };
|
|
410
|
+
|
|
411
|
+
const states = machines.map((m) => m.state);
|
|
412
|
+
const allStarted = states.every((s) => s === 'started');
|
|
413
|
+
const allStopped = states.every((s) => s === 'stopped');
|
|
414
|
+
const status = allStarted ? 'running' : allStopped ? 'stopped' : 'mixed';
|
|
415
|
+
return {
|
|
416
|
+
status,
|
|
417
|
+
url: `https://${appName}.fly.dev`,
|
|
418
|
+
machines: machines.length,
|
|
419
|
+
states,
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
async _listMachines(appName) {
|
|
424
|
+
try {
|
|
425
|
+
const machines = await this._request(`/apps/${appName}/machines`);
|
|
426
|
+
return machines || [];
|
|
427
|
+
} catch (err) {
|
|
428
|
+
if (err.status === 404) return [];
|
|
429
|
+
throw err;
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { DockerDeployer } from './docker.js';
|
|
2
|
+
import { FlyDeployer } from './fly.js';
|
|
3
|
+
|
|
4
|
+
let _deployer = null;
|
|
5
|
+
|
|
6
|
+
export async function getDeploymentProvider() {
|
|
7
|
+
if (!_deployer) _deployer = new DockerDeployer();
|
|
8
|
+
return _deployer;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export async function deploy(params) {
|
|
12
|
+
const provider = await getDeploymentProvider();
|
|
13
|
+
return provider.deploy(params);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export async function destroy(appId) {
|
|
17
|
+
const provider = await getDeploymentProvider();
|
|
18
|
+
return provider.destroy(appId);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Construct a cloud deployer instance per-call. Cloud deployers are
|
|
23
|
+
* stateless aside from credentials, so there's no caching layer — the
|
|
24
|
+
* caller passes the user's token in.
|
|
25
|
+
*
|
|
26
|
+
* Currently registered: 'fly'. Add new providers here.
|
|
27
|
+
*/
|
|
28
|
+
export function getCloudDeployer(provider, credentials = {}) {
|
|
29
|
+
if (provider === 'fly') {
|
|
30
|
+
return new FlyDeployer({
|
|
31
|
+
apiToken: credentials.flyApiToken || process.env.FLY_API_TOKEN,
|
|
32
|
+
orgSlug: credentials.flyOrgSlug || process.env.FLY_ORG_SLUG || 'personal',
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
throw new Error(`Unknown cloud provider: ${provider}`);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export const CLOUD_PROVIDERS = ['fly'];
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import crypto from 'crypto';
|
|
2
|
+
|
|
3
|
+
// Lite encrypts the stored LLM API key with a key derived from the host.
|
|
4
|
+
// In production deployments users set API_KEY_ENCRYPTION_KEY; otherwise a
|
|
5
|
+
// stable machine-derived key keeps round-trips working for local dev.
|
|
6
|
+
function getEncryptionKey() {
|
|
7
|
+
const envKey = process.env.API_KEY_ENCRYPTION_KEY;
|
|
8
|
+
if (envKey) {
|
|
9
|
+
return crypto.createHash('sha256').update(envKey).digest();
|
|
10
|
+
}
|
|
11
|
+
return crypto.createHash('sha256').update('mojulo-lite-local-dev').digest();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const ALGO = 'aes-256-gcm';
|
|
15
|
+
|
|
16
|
+
export function encryptApiKey(plaintext) {
|
|
17
|
+
const iv = crypto.randomBytes(12);
|
|
18
|
+
const cipher = crypto.createCipheriv(ALGO, getEncryptionKey(), iv);
|
|
19
|
+
const ciphertext = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
|
|
20
|
+
const tag = cipher.getAuthTag();
|
|
21
|
+
return Buffer.concat([iv, tag, ciphertext]).toString('base64');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function decryptApiKey(encrypted) {
|
|
25
|
+
const data = Buffer.from(encrypted, 'base64');
|
|
26
|
+
const iv = data.subarray(0, 12);
|
|
27
|
+
const tag = data.subarray(12, 28);
|
|
28
|
+
const ciphertext = data.subarray(28);
|
|
29
|
+
const decipher = crypto.createDecipheriv(ALGO, getEncryptionKey(), iv);
|
|
30
|
+
decipher.setAuthTag(tag);
|
|
31
|
+
return Buffer.concat([decipher.update(ciphertext), decipher.final()]).toString('utf8');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function generateApiKey() {
|
|
35
|
+
return `bot_${crypto.randomBytes(24).toString('hex')}`;
|
|
36
|
+
}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Parser Utility
|
|
3
|
+
* Handles parsing of various document types
|
|
4
|
+
* - PDFs: Uses pdf2json (Node.js native, no browser APIs)
|
|
5
|
+
* - DOCX/PPTX/XLSX: Uses officeparser
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const PDFParser = require('pdf2json');
|
|
9
|
+
const officeParser = require('officeparser');
|
|
10
|
+
const { writeFile, unlink } = require('fs/promises');
|
|
11
|
+
const { join } = require('path');
|
|
12
|
+
const { tmpdir } = require('os');
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Parse PDF using pdf2json (Node.js native parser)
|
|
16
|
+
* @param {Buffer} buffer - PDF file buffer
|
|
17
|
+
* @returns {Promise<string>} Extracted text
|
|
18
|
+
*/
|
|
19
|
+
async function parsePDF(buffer) {
|
|
20
|
+
return new Promise((resolve, reject) => {
|
|
21
|
+
const pdfParser = new PDFParser();
|
|
22
|
+
|
|
23
|
+
pdfParser.on('pdfParser_dataError', (errData) => {
|
|
24
|
+
reject(new Error(errData.parserError));
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
pdfParser.on('pdfParser_dataReady', (pdfData) => {
|
|
28
|
+
try {
|
|
29
|
+
// Try getRawTextContent() first
|
|
30
|
+
let text = pdfParser.getRawTextContent();
|
|
31
|
+
|
|
32
|
+
console.log('PDF text length:', text ? text.length : 0);
|
|
33
|
+
console.log('PDF text sample:', text ? text.substring(0, 200) : 'empty');
|
|
34
|
+
|
|
35
|
+
// If getRawTextContent() returns empty, try alternative extraction
|
|
36
|
+
if (!text || text.trim().length === 0) {
|
|
37
|
+
console.log('Trying alternative text extraction from pdfData');
|
|
38
|
+
// Extract text from pages manually
|
|
39
|
+
const pages = pdfData?.Pages || [];
|
|
40
|
+
const textParts = [];
|
|
41
|
+
|
|
42
|
+
for (const page of pages) {
|
|
43
|
+
const texts = page?.Texts || [];
|
|
44
|
+
for (const textItem of texts) {
|
|
45
|
+
try {
|
|
46
|
+
const encodedText = textItem?.R?.[0]?.T || '';
|
|
47
|
+
const decodedText = encodedText ? decodeURIComponent(encodedText) : '';
|
|
48
|
+
if (decodedText) {
|
|
49
|
+
textParts.push(decodedText);
|
|
50
|
+
}
|
|
51
|
+
} catch (decodeError) {
|
|
52
|
+
// If decoding fails, use the raw text
|
|
53
|
+
const rawText = textItem?.R?.[0]?.T || '';
|
|
54
|
+
if (rawText) {
|
|
55
|
+
textParts.push(rawText);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
text = textParts.join(' ');
|
|
62
|
+
console.log('Alternative extraction length:', text.length);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
resolve(text);
|
|
66
|
+
} catch (error) {
|
|
67
|
+
reject(error);
|
|
68
|
+
}
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// Parse the buffer
|
|
72
|
+
pdfParser.parseBuffer(buffer);
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Parse Office documents using officeparser (requires file path)
|
|
78
|
+
* @param {Buffer} buffer - File buffer
|
|
79
|
+
* @param {string} fileName - Original file name
|
|
80
|
+
* @returns {Promise<string>} Extracted text
|
|
81
|
+
*/
|
|
82
|
+
async function parseOfficeDocument(buffer, fileName) {
|
|
83
|
+
const tempFilePath = join(tmpdir(), `temp-${Date.now()}-${fileName}`);
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
await writeFile(tempFilePath, buffer);
|
|
87
|
+
const extractedText = await officeParser.parseOfficeAsync(tempFilePath);
|
|
88
|
+
return extractedText;
|
|
89
|
+
} finally {
|
|
90
|
+
try {
|
|
91
|
+
await unlink(tempFilePath);
|
|
92
|
+
} catch (unlinkError) {
|
|
93
|
+
console.warn(`Failed to delete temp file ${tempFilePath}:`, unlinkError);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Repair PDFs whose text extraction produced character-positioned output
|
|
100
|
+
* (every character separated by whitespace), turning "P l a n d ' a s s u r"
|
|
101
|
+
* back into "Plan d'assurance". Common with PDFs exported from Google Docs
|
|
102
|
+
* and other tools that embed custom font subsets pdf2json can't decode.
|
|
103
|
+
*
|
|
104
|
+
* Heuristic: if >50% of whitespace-split tokens in a 4KB sample are length
|
|
105
|
+
* 1, the parse is broken. Threshold is conservative — healthy English/French
|
|
106
|
+
* prose runs ~0-30% single-char tokens (only "I", "a", "à", "y") so the
|
|
107
|
+
* normalizer is a no-op on well-parsed docs.
|
|
108
|
+
*
|
|
109
|
+
* Word-boundary rule: 3+ consecutive spaces (or any newline) marks a real
|
|
110
|
+
* word break. 1-2 spaces inside a "word" are PDF-positioning artifacts and
|
|
111
|
+
* get stripped. Edge cases like "A u t o C a r e" → "AutoCare" recover
|
|
112
|
+
* because the intra-word gap stays under 3 spaces.
|
|
113
|
+
*
|
|
114
|
+
* Limitation: tight pseudo-ligatures with extra positioning around capitals
|
|
115
|
+
* (e.g. "L L C" with 3-space gaps inside the acronym) reconstruct as
|
|
116
|
+
* "L L C" — searchable but not pixel-perfect. Acceptable trade-off vs the
|
|
117
|
+
* unsearchable status quo.
|
|
118
|
+
*/
|
|
119
|
+
function normalizeCharacterSpacing(text) {
|
|
120
|
+
const sample = text.slice(0, 4000);
|
|
121
|
+
const sampleTokens = sample.split(/\s+/).filter(Boolean);
|
|
122
|
+
if (sampleTokens.length < 20) return text;
|
|
123
|
+
|
|
124
|
+
const singleCharRatio =
|
|
125
|
+
sampleTokens.filter((t) => t.length === 1).length / sampleTokens.length;
|
|
126
|
+
if (singleCharRatio < 0.5) return text;
|
|
127
|
+
|
|
128
|
+
console.log(
|
|
129
|
+
`Detected character-positioned PDF text (${(singleCharRatio * 100).toFixed(0)}% single-char tokens) — normalizing.`
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
return text
|
|
133
|
+
.split(/[ \t]{3,}|\r?\n+/)
|
|
134
|
+
.map((w) => w.replace(/\s+/g, ''))
|
|
135
|
+
.filter(Boolean)
|
|
136
|
+
.join(' ');
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Parse document by writing to temp file first
|
|
141
|
+
* @param {Buffer} buffer - File buffer
|
|
142
|
+
* @param {string} fileName - Original file name
|
|
143
|
+
* @returns {Promise<string>} Extracted text
|
|
144
|
+
*/
|
|
145
|
+
async function parseDocument(buffer, fileName) {
|
|
146
|
+
try {
|
|
147
|
+
const fileExtension = fileName.toLowerCase().split('.').pop();
|
|
148
|
+
|
|
149
|
+
let extractedText;
|
|
150
|
+
|
|
151
|
+
// Use pdf-parse for PDFs (no worker issues)
|
|
152
|
+
if (fileExtension === 'pdf') {
|
|
153
|
+
extractedText = await parsePDF(buffer);
|
|
154
|
+
}
|
|
155
|
+
// Use officeparser for other document types
|
|
156
|
+
else {
|
|
157
|
+
extractedText = await parseOfficeDocument(buffer, fileName);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (!extractedText || extractedText.trim().length === 0) {
|
|
161
|
+
throw new Error(`No text extracted from ${fileName}`);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return normalizeCharacterSpacing(extractedText);
|
|
165
|
+
} catch (error) {
|
|
166
|
+
console.error(`Error parsing ${fileName}:`, error);
|
|
167
|
+
throw new Error(`Failed to parse ${fileName}: ${error.message}`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
module.exports = { parseDocument };
|