mojulo 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,13 +9,14 @@
9
9
  claude mcp add mojulo --command "npx -y mojulo"
10
10
 
11
11
  # 2. Configure at least one LLM provider key
12
- npx -y mojulo-config set anthropic sk-ant-...
12
+ # (mojulo-config ships inside the mojulo package, so -p mojulo is required)
13
+ npx -y -p mojulo mojulo-config set anthropic sk-ant-...
13
14
 
14
15
  # 3. In a Claude session, ask:
15
16
  # "build me a triage bot for my dental practice"
16
17
  ```
17
18
 
18
- Compiled bots land in `~/.mojulo/data/artifacts/`. Run them with `docker compose up`, or set a Fly token (`mojulo-config set fly fo1_...`) and ask Claude to deploy to the cloud.
19
+ Compiled bots land in `~/.mojulo/data/artifacts/`. Run them with `docker compose up`, or set a Fly token (`npx -y -p mojulo mojulo-config set fly fo1_...`) and ask Claude to deploy to the cloud.
19
20
 
20
21
  On first connect, Claude calls `forward_context` to read mojulo's glossary, lifecycle, and tool index — so the session orients itself before doing anything.
21
22
 
@@ -217,6 +217,105 @@ Example:
217
217
  }
218
218
  }
219
219
 
220
+ /**
221
+ * Extract user-stated bot settings from the prompt via LLM.
222
+ *
223
+ * Replaces the English-locked regex in extractPrepopulatedSettings — the LLM
224
+ * handles "llámalo Maverick" / "把它叫做小助手" / possessives / multi-turn
225
+ * mentions that the regex misses. Returns the same shape so compose_identity
226
+ * consumes it unchanged. The botName is slug-normalized here (same rules as
227
+ * the regex path) so downstream code doesn't have to know which extractor ran.
228
+ *
229
+ * Returns null when no API key is available (signal to caller to fall back to
230
+ * regex). Returns {} on parse/validation failure.
231
+ */
232
+ async function extractPrepopulatedSettingsLLM(userMessage, session, userId) {
233
+ let llmConfig;
234
+ try {
235
+ llmConfig = await getLLMConfigFromSession(session, userId, 'summary');
236
+ } catch (err) {
237
+ console.log('[Builder] No API key for prepopulated extraction:', err.message);
238
+ return null;
239
+ }
240
+
241
+ const { provider, apiKey, model } = llmConfig;
242
+ const { generateSummary } = await import('@/lib/llm-providers.js');
243
+
244
+ const prompt = `Extract user-specified settings from this bot-building request. The user may write in any language (English, Spanish, Chinese, Polish, Arabic, etc.).
245
+
246
+ USER REQUEST:
247
+ ${userMessage.substring(0, 1000)}
248
+
249
+ Extract these fields IF — and only if — the user explicitly states them. Do not invent or infer.
250
+
251
+ - displayName: The proper-noun name the user gave the bot (e.g. "Maverick", "小助手", "Pelusa"). Preserve original script and capitalization. Omit if not stated.
252
+ - resourceName: The organization/company/brand the bot is for (e.g. "Acme Dental", "Valley Coffee"). Omit if not stated.
253
+ - firstMessage: An exact greeting/welcome message the user dictated in quotes. Omit if not stated.
254
+ - objective: An exact purpose/goal the user dictated in quotes. Omit if not stated.
255
+
256
+ Return ONLY a JSON object with whichever fields are present, no other text. Empty object {} if nothing was stated.
257
+
258
+ Examples:
259
+ - "build me a bot called Maverick for Acme Dental" → {"displayName": "Maverick", "resourceName": "Acme Dental"}
260
+ - "llámalo Sparky" → {"displayName": "Sparky"}
261
+ - "把这个机器人叫做小助手,给阳光咖啡馆用的" → {"displayName": "小助手", "resourceName": "阳光咖啡馆"}
262
+ - "I need a support bot" → {}`;
263
+
264
+ try {
265
+ const response = await generateSummary(
266
+ provider,
267
+ prompt,
268
+ apiKey,
269
+ 'Extract bot settings from user request',
270
+ model
271
+ );
272
+
273
+ const jsonMatch = response.match(/\{[\s\S]*\}/);
274
+ if (!jsonMatch) return {};
275
+
276
+ const extracted = JSON.parse(jsonMatch[0]);
277
+ const settings = {};
278
+
279
+ if (typeof extracted.displayName === 'string' && extracted.displayName.trim()) {
280
+ const displayName = extracted.displayName.trim().substring(0, 40);
281
+ settings.displayName = displayName;
282
+ // Slug for botName: collapse to a-z0-9-, max 30. For non-ASCII names
283
+ // the slug ends up empty — fall back to a transliteration-free hash so
284
+ // the bot still has a stable id while displayName carries the original.
285
+ const slug = displayName
286
+ .toLowerCase()
287
+ .replace(/[^a-z0-9\s-]/g, '')
288
+ .replace(/\s+/g, '-')
289
+ .replace(/-+/g, '-')
290
+ .replace(/^-|-$/g, '')
291
+ .substring(0, 30);
292
+ if (slug) {
293
+ settings.botName = slug;
294
+ }
295
+ // No slug fallback: leaving botName unset lets compose_identity generate
296
+ // an org-derived slug, which is more useful than a hash to the operator.
297
+ }
298
+
299
+ if (typeof extracted.resourceName === 'string' && extracted.resourceName.trim()) {
300
+ settings.resourceName = extracted.resourceName.trim().substring(0, 60);
301
+ }
302
+
303
+ if (typeof extracted.firstMessage === 'string' && extracted.firstMessage.trim()) {
304
+ settings.firstMessage = extracted.firstMessage.trim().substring(0, 200);
305
+ }
306
+
307
+ if (typeof extracted.objective === 'string' && extracted.objective.trim()) {
308
+ settings.objective = extracted.objective.trim().substring(0, 250);
309
+ }
310
+
311
+ console.log('[Builder] LLM-extracted prepopulated settings:', settings);
312
+ return settings;
313
+ } catch (err) {
314
+ console.warn('[Builder] Failed to LLM-extract prepopulated settings:', err.message);
315
+ return {};
316
+ }
317
+ }
318
+
220
319
  /**
221
320
  * Execute a modular tool call
222
321
  * @param {string} toolName - Name of the tool
@@ -614,8 +713,13 @@ const builderToolHandlers = {
614
713
  }
615
714
  }
616
715
 
617
- // Extract prepopulated settings from user message
618
- const prepopulatedSettings = extractPrepopulatedSettings(userMessage);
716
+ // Extract prepopulated settings from user message.
717
+ // LLM extractor handles any language; regex is the fallback when no API
718
+ // key is configured (returns null) or when the LLM yields nothing useful.
719
+ const llmSettings = await extractPrepopulatedSettingsLLM(userMessage, session, userId);
720
+ const prepopulatedSettings = (llmSettings && Object.keys(llmSettings).length > 0)
721
+ ? llmSettings
722
+ : extractPrepopulatedSettings(userMessage);
619
723
 
620
724
  // Update session with inference and prepopulated settings
621
725
  await BuilderSessionRepository.updateInference(session.id, userId, {
@@ -1395,15 +1499,20 @@ Return ONLY the summary text, nothing else.`;
1395
1499
 
1396
1500
  let buildStatus = result.status;
1397
1501
  let buildError = null;
1502
+ // artifactPath is the absolute on-disk zip — surfaced so MCP/stdio
1503
+ // callers (which have no HTTP server to hit downloadUrl against) can
1504
+ // hand the user a real path. Web flow keeps using downloadUrl.
1505
+ let artifactPath = null;
1398
1506
  try {
1399
- const { deployment } = await buildArtifact(result.deploymentId);
1400
- buildStatus = deployment.status;
1507
+ const built = await buildArtifact(result.deploymentId);
1508
+ buildStatus = built.deployment.status;
1509
+ artifactPath = built.artifactPath;
1401
1510
  } catch (err) {
1402
1511
  console.error('[save_modular_bot] build after save failed:', err);
1403
1512
  buildError = err.message || 'Build failed';
1404
1513
  }
1405
1514
 
1406
- return { ...result, isUpdate, status: buildStatus, buildError };
1515
+ return { ...result, isUpdate, status: buildStatus, buildError, artifactPath };
1407
1516
  }
1408
1517
 
1409
1518
  await BuilderSessionRepository.updateStatus(sessionId, userId, SESSION_STATUS.AWAITING_CONFIRM);
@@ -75,7 +75,7 @@ That density runs through the whole body — numbered synthesis steps, mapping r
75
75
  1. **Build.** Pick which protocols (capabilities) the bot needs, generate their configs, upload any documents the bot should know, compose the bot's identity. Either drive this step-by-step through the build tools, or just describe the user's goal and let the build tools sequence themselves starting from \`infer_intent\`.
76
76
 
77
77
  *Builder-session scope.* Build tools share state via a **builder session** keyed on the \`mcp-session-id\` header your client sends. The session row persists in the control plane's SQLite, but the header→session binding is held in process memory. So: the same client reconnecting during a single control-plane process lifetime resumes its in-progress config, while a **control-plane restart drops the binding** and the user's next build tool call starts a fresh bot (the orphaned row stays in SQLite). Inside the same connection, \`start_new_bot\` deliberately discards in-progress config and starts over.
78
- 2. **Deploy.** \`save_modular_bot\` compiles the configured bot into a downloadable zip artifact. The user runs it locally (Docker) or in the cloud (Fly.io). The container image is bot-agnostic — per-bot config is injected at start time, so the same image runs every bot the user has.
78
+ 2. **Deploy.** \`save_modular_bot\` compiles the configured bot into a zip artifact on disk and returns its absolute path in \`artifactPath\`. The user runs it locally (\`unzip\` + \`docker compose up\`) or in the cloud (Fly.io). Over stdio MCP the zip lives under \`$MOJULO_HOME/data/artifacts/\` (default \`~/.mojulo/data/artifacts/\`) — hand the user the \`artifactPath\` value verbatim. The legacy \`downloadUrl\` field in the response is a Next.js-route path; ignore it over stdio. The container image is bot-agnostic — per-bot config is injected at start time, so the same image runs every bot the user has.
79
79
  3. **Connect.** Once the bot starts, it phones home to the control plane with its URL. From then on the control plane can reach it through a bearer-authenticated proxy. **Conversation data stays in the bot's SQLite forever** — the control plane only stores \`url\` and \`last_seen_at\`. Any tool that needs transcript data proxies through to the bot in real time.
80
80
  4. **Operate.** Use the operate tools to read what bots have captured. Use the catalyst tools to turn that captured signal into action via the user's other installed MCPs.
81
81
  5. **Operate the fleet.** Once multiple bots are connected, fleet-level questions ("how is the whole fleet doing?", "which bots saw the most activity?", "find any conversation across every bot that mentioned X") have their own surface — the \`fleet_*\` tools. They fan out across every connected bot and aggregate in process memory; conversation content still stays on each bot. The natural two-step pattern is **fleet-locate** with \`fleet_query_conversations\` → **per-bot-read** with \`get_conversation\`. Same posture as single-bot operate, just batched. Cross-bot catalysts (the new category fleet aggregation enables) come from \`recommend_catalysts\` with \`scope: 'fleet'\`.
@@ -103,7 +103,7 @@ That density runs through the whole body — numbered synthesis steps, mapping r
103
103
  ### Build, documents and artifact compilation
104
104
  - \`upload_document_from_url\` — **sync**, ~1–5s. Upload a PDF / DOCX / TXT / MD / HTML the bot should learn from. Accepts a URL, base64, or pre-extracted text. → returns \`{ documentId, originalName, mimeType, sizeBytes, message }\`. Pass \`documentId\` into \`process_documents\`.
105
105
  - \`process_documents\` — **async**, returns \`{ jobId }\`. ~10–30s **per document** (parse + chunk + embed + per-doc LLM summary). Many or large docs can run minutes. Makes documents available to the \`knowledge\` protocol.
106
- - \`save_modular_bot\` — **async**, returns \`{ jobId }\`. ~10–60s in prebuilt-image mode (compose cartridges + write config + zip); longer when the control plane is in offline-build mode (\`MOJULO_OFFLINE_BUILD=1\` bundles full bot source). Compiles the configured bot into the downloadable artifact.
106
+ - \`save_modular_bot\` — **async**, returns \`{ jobId }\`. ~10–60s in prebuilt-image mode (compose cartridges + write config + zip); longer when the control plane is in offline-build mode (\`MOJULO_OFFLINE_BUILD=1\` bundles full bot source). Compiles the configured bot into a zip on disk. Polled result: \`{ deploymentId, status, botName, artifactPath, buildError, ... }\`. \`artifactPath\` is the absolute path to the zip — that's the value to surface to the user.
107
107
  - \`poll_job\` — **sync**. Check the status of any async job. → returns \`{ jobId, tool, status: "pending" | "running" | "done" | "error", progress, result, error }\`. Reasonable polling cadence is every 2–5s.
108
108
 
109
109
  ### Operate (fleet)
package/package.json CHANGED
@@ -1,12 +1,17 @@
1
1
  {
2
2
  "name": "mojulo",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "license": "Apache-2.0",
5
5
  "description": "Mojulo — MCP server for building self-hosted chatbots from inside Claude.",
6
6
  "author": "Franz Ombico",
7
7
  "homepage": "https://github.com/zombico/mojulo",
8
- "repository": { "type": "git", "url": "git+https://github.com/zombico/mojulo.git" },
9
- "bugs": { "url": "https://github.com/zombico/mojulo/issues" },
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "git+https://github.com/zombico/mojulo.git"
11
+ },
12
+ "bugs": {
13
+ "url": "https://github.com/zombico/mojulo/issues"
14
+ },
10
15
  "keywords": [
11
16
  "mcp",
12
17
  "claude",
@@ -18,7 +23,9 @@
18
23
  "self-hosted",
19
24
  "mojulo"
20
25
  ],
21
- "engines": { "node": ">=20" },
26
+ "engines": {
27
+ "node": ">=20"
28
+ },
22
29
  "bin": {
23
30
  "mojulo": "./scripts/mcp-stdio.mjs",
24
31
  "mojulo-config": "./scripts/mcp-config.mjs"