mojulo 0.0.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -4
- package/lib/audit-logger-new.js +11 -0
- package/lib/auth/gate.js +25 -0
- package/lib/auth/service.js +17 -0
- package/lib/auth/session.js +63 -0
- package/lib/builder/chat-processor.js +607 -0
- package/lib/builder/composer-bridge.js +82 -0
- package/lib/builder/evaluator.js +159 -0
- package/lib/builder/executor.js +252 -0
- package/lib/builder/index.js +48 -0
- package/lib/builder/session.js +248 -0
- package/lib/builder/system-prompt.js +422 -0
- package/lib/builder/tone-presets.js +75 -0
- package/lib/builder/tool-executors.js +1527 -0
- package/lib/builder/tools.js +338 -0
- package/lib/builder/validators.js +239 -0
- package/lib/composer/composer.js +225 -0
- package/lib/composer/index.js +40 -0
- package/lib/composer/protocols/00_base.txt +19 -0
- package/lib/composer/protocols/01_knowledge.txt +9 -0
- package/lib/composer/protocols/02_form-gathering.txt +32 -0
- package/lib/composer/protocols/03_appointments.txt +16 -0
- package/lib/composer/protocols/04_triage.txt +15 -0
- package/lib/composer/protocols/05_optical-read.txt +22 -0
- package/lib/composer/response-builder.js +98 -0
- package/lib/config-builder.js +650 -0
- package/lib/db/ids.js +10 -0
- package/lib/db/index.js +179 -0
- package/lib/db/repositories/apiKeys.js +72 -0
- package/lib/db/repositories/auditLogs.js +12 -0
- package/lib/db/repositories/botSpaces.js +12 -0
- package/lib/db/repositories/builderSessions.js +312 -0
- package/lib/db/repositories/deploymentEvents.js +12 -0
- package/lib/db/repositories/deployments.js +385 -0
- package/lib/db/repositories/documents.js +68 -0
- package/lib/db/repositories/mcpJobs.js +84 -0
- package/lib/deployers/bot-fleet.js +110 -0
- package/lib/deployers/bot-proxy.js +72 -0
- package/lib/deployers/build.js +89 -0
- package/lib/deployers/cloud-deploy.js +310 -0
- package/lib/deployers/docker.js +439 -0
- package/lib/deployers/fly.js +432 -0
- package/lib/deployers/index.js +38 -0
- package/lib/deployment-auth.js +36 -0
- package/lib/document-parser.js +171 -0
- package/lib/embedder/chunker.js +93 -0
- package/lib/embedder/local.js +101 -0
- package/lib/embedder/preview-rag.js +93 -0
- package/lib/envelope-schema.js +54 -0
- package/lib/fleet/scoped-sql.js +342 -0
- package/lib/form-schema-config/base.js +135 -0
- package/lib/form-schema-config/index.js +286 -0
- package/lib/form-schema-config/locales/af-ZA.js +153 -0
- package/lib/form-schema-config/locales/ar-AE.js +142 -0
- package/lib/form-schema-config/locales/ar-SA.js +164 -0
- package/lib/form-schema-config/locales/de-DE.js +152 -0
- package/lib/form-schema-config/locales/en-AU.js +161 -0
- package/lib/form-schema-config/locales/en-CA.js +115 -0
- package/lib/form-schema-config/locales/en-GB.js +132 -0
- package/lib/form-schema-config/locales/en-IN.js +219 -0
- package/lib/form-schema-config/locales/en-MY.js +171 -0
- package/lib/form-schema-config/locales/en-NG.js +198 -0
- package/lib/form-schema-config/locales/en-PH.js +186 -0
- package/lib/form-schema-config/locales/en-SG.js +153 -0
- package/lib/form-schema-config/locales/en-US.js +138 -0
- package/lib/form-schema-config/locales/es-ES.js +171 -0
- package/lib/form-schema-config/locales/es-MX.js +193 -0
- package/lib/form-schema-config/locales/fr-CA.js +138 -0
- package/lib/form-schema-config/locales/fr-FR.js +155 -0
- package/lib/form-schema-config/locales/hi-IN.js +219 -0
- package/lib/form-schema-config/locales/it-IT.js +157 -0
- package/lib/form-schema-config/locales/ja-JP.js +169 -0
- package/lib/form-schema-config/locales/ko-KR.js +140 -0
- package/lib/form-schema-config/locales/nl-NL.js +149 -0
- package/lib/form-schema-config/locales/pt-BR.js +168 -0
- package/lib/form-schema-config/locales/zh-CN.js +172 -0
- package/lib/form-schema-config/locales/zh-HK.js +142 -0
- package/lib/form-structure-schema.js +191 -0
- package/lib/llm-providers.js +828 -0
- package/lib/markdown.js +197 -0
- package/lib/mcp/catalysts/appointment-to-calendar.md +84 -0
- package/lib/mcp/catalysts/conversations-to-channel-digest.md +104 -0
- package/lib/mcp/catalysts/document-extract-to-store.md +92 -0
- package/lib/mcp/catalysts/knowledge-gap-miner.md +96 -0
- package/lib/mcp/catalysts/loader.js +144 -0
- package/lib/mcp/catalysts/qualify-lead-to-crm.md +83 -0
- package/lib/mcp/catalysts/scan-conversations-for-signal.md +92 -0
- package/lib/mcp/catalysts/submission-to-ticket.md +83 -0
- package/lib/mcp/catalysts/submissions-to-warehouse.md +103 -0
- package/lib/mcp/catalysts/weekly-submissions-digest.md +82 -0
- package/lib/mcp/jobs.js +64 -0
- package/lib/mcp/server.js +184 -0
- package/lib/mcp/session-binding.js +130 -0
- package/lib/mcp/tools/build.js +123 -0
- package/lib/mcp/tools/catalysts.js +477 -0
- package/lib/mcp/tools/context.js +325 -0
- package/lib/mcp/tools/fleet.js +391 -0
- package/lib/mcp/tools/jobs-tools.js +240 -0
- package/lib/mcp/tools/operate.js +314 -0
- package/lib/preview/build-preview-config.js +136 -0
- package/lib/rate-limiter.js +11 -0
- package/lib/resolve-api-key.js +142 -0
- package/lib/storage/index.js +40 -0
- package/messages/de.json +2136 -0
- package/messages/en.json +2136 -0
- package/messages/es.json +2136 -0
- package/messages/fr.json +2136 -0
- package/messages/it.json +2136 -0
- package/messages/ja.json +2136 -0
- package/messages/ko.json +2136 -0
- package/messages/nl.json +2136 -0
- package/messages/pl.json +2136 -0
- package/messages/pt.json +2136 -0
- package/messages/ru.json +2136 -0
- package/messages/uk.json +2136 -0
- package/messages/zh.json +2136 -0
- package/package.json +68 -5
- package/scripts/mcp-config.mjs +162 -0
- package/scripts/mcp-stdio-loader.mjs +42 -0
- package/scripts/mcp-stdio.mjs +108 -0
- package/scripts/mojulo-paths.mjs +48 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure text chunker for vector embeddings.
|
|
3
|
+
*
|
|
4
|
+
* 512-character target with 50-char overlap. Locale-agnostic by design —
|
|
5
|
+
* the embedding model handles tokenization on its end, so we just need
|
|
6
|
+
* stable, overlapping windows. No sentence-aware splitting; the embedding
|
|
7
|
+
* model recovers semantic boundaries fine from raw substrings.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
const DEFAULT_CHUNK_SIZE = 512;
|
|
11
|
+
const DEFAULT_OVERLAP = 50;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Split a single document into chunks suitable for embedding.
|
|
15
|
+
*
|
|
16
|
+
* @param {string} text
|
|
17
|
+
* @param {Object} [opts]
|
|
18
|
+
* @param {number} [opts.chunkSize=512]
|
|
19
|
+
* @param {number} [opts.overlap=50]
|
|
20
|
+
* @returns {Array<{ text: string, chunkIndex: number }>}
|
|
21
|
+
*/
|
|
22
|
+
export function chunkText(text, opts = {}) {
|
|
23
|
+
const { chunkSize = DEFAULT_CHUNK_SIZE, overlap = DEFAULT_OVERLAP } = opts;
|
|
24
|
+
if (!text || typeof text !== 'string') return [];
|
|
25
|
+
|
|
26
|
+
const normalized = text.replace(/\s+/g, ' ').trim();
|
|
27
|
+
if (normalized.length === 0) return [];
|
|
28
|
+
|
|
29
|
+
if (normalized.length <= chunkSize) {
|
|
30
|
+
return [{ text: normalized, chunkIndex: 0 }];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const stride = Math.max(1, chunkSize - overlap);
|
|
34
|
+
const chunks = [];
|
|
35
|
+
let chunkIndex = 0;
|
|
36
|
+
for (let start = 0; start < normalized.length; start += stride) {
|
|
37
|
+
const slice = normalized.slice(start, start + chunkSize).trim();
|
|
38
|
+
if (slice.length === 0) continue;
|
|
39
|
+
chunks.push({ text: slice, chunkIndex });
|
|
40
|
+
chunkIndex++;
|
|
41
|
+
if (start + chunkSize >= normalized.length) break;
|
|
42
|
+
}
|
|
43
|
+
return chunks;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Chunk a batch of documents, attaching metadata so we can trace each chunk
|
|
48
|
+
* back to its source.
|
|
49
|
+
*
|
|
50
|
+
* @param {Array<{ id: string, originalName: string, text: string }>} docs
|
|
51
|
+
* @param {Object} [opts]
|
|
52
|
+
* @returns {Array<{ text: string, metadata: { documentId: string, originalName: string, chunkIndex: number } }>}
|
|
53
|
+
*/
|
|
54
|
+
export function chunkDocuments(docs, opts = {}) {
|
|
55
|
+
const out = [];
|
|
56
|
+
for (const doc of docs || []) {
|
|
57
|
+
const chunks = chunkText(doc.text, opts);
|
|
58
|
+
for (const chunk of chunks) {
|
|
59
|
+
out.push({
|
|
60
|
+
text: chunk.text,
|
|
61
|
+
metadata: {
|
|
62
|
+
documentId: doc.id,
|
|
63
|
+
originalName: doc.originalName,
|
|
64
|
+
chunkIndex: chunk.chunkIndex,
|
|
65
|
+
},
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return out;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Triage route descriptions go into the same cosine index as document chunks.
|
|
73
|
+
// At retrieval time, vector-rag.js reads metadata.source to format the snippet
|
|
74
|
+
// so the LLM sees the deploymentId inline alongside the description text.
|
|
75
|
+
// Route descriptions are already concise — we don't sub-chunk; one chunk per
|
|
76
|
+
// route preserves the description as a single retrieval unit.
|
|
77
|
+
export function chunkTriageRoutes(routes) {
|
|
78
|
+
const out = [];
|
|
79
|
+
for (const route of routes || []) {
|
|
80
|
+
const text = (route.description || '').replace(/\s+/g, ' ').trim();
|
|
81
|
+
if (!text) continue;
|
|
82
|
+
out.push({
|
|
83
|
+
text,
|
|
84
|
+
metadata: {
|
|
85
|
+
source: 'triage-route',
|
|
86
|
+
deploymentId: route.deploymentId,
|
|
87
|
+
originalName: route.name,
|
|
88
|
+
chunkIndex: 0,
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
return out;
|
|
93
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding client for the control plane.
|
|
3
|
+
*
|
|
4
|
+
* Loads multilingual-e5-small from the
|
|
5
|
+
* pre-fetched ONNX cache at lib/embedder/models/ via @huggingface/transformers
|
|
6
|
+
* and runs inference in-process. No network calls.
|
|
7
|
+
*
|
|
8
|
+
* The same model + dtype combo runs in the lite-template artifact at
|
|
9
|
+
* runtime, so corpus and query vectors live in the same geometric space.
|
|
10
|
+
*
|
|
11
|
+
* e5 models expect prefixed inputs:
|
|
12
|
+
* - 'passage: <text>' for corpus chunks
|
|
13
|
+
* - 'query: <text>' for retrieval queries
|
|
14
|
+
* This module owns that convention so callers stay model-agnostic.
|
|
15
|
+
*
|
|
16
|
+
* The cache dir resolves to MOJULO_MODELS_DIR if set (npx flow lands this at
|
|
17
|
+
* ~/.mojulo/models), else the bundled lib/embedder/models/ next to this file
|
|
18
|
+
* (clone-and-run flow).
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import path from 'node:path';
|
|
22
|
+
import { fileURLToPath } from 'node:url';
|
|
23
|
+
import { pipeline, env } from '@huggingface/transformers';
|
|
24
|
+
|
|
25
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
26
|
+
|
|
27
|
+
// Two modes:
|
|
28
|
+
// - MOJULO_MODELS_DIR set (npx flow): cache lives under the user's home,
|
|
29
|
+
// gets populated lazily on first call. Remote downloads enabled so the
|
|
30
|
+
// fresh-dir case self-heals without a separate postinstall step.
|
|
31
|
+
// - MOJULO_MODELS_DIR unset (clone-and-run flow): cache lives next to this
|
|
32
|
+
// file, postinstall fills it. Remote downloads stay off — preserves the
|
|
33
|
+
// current offline-after-install posture.
|
|
34
|
+
const USER_CACHE = !!process.env.MOJULO_MODELS_DIR;
|
|
35
|
+
env.cacheDir = process.env.MOJULO_MODELS_DIR || path.resolve(__dirname, 'models');
|
|
36
|
+
env.allowRemoteModels = USER_CACHE;
|
|
37
|
+
env.allowLocalModels = true;
|
|
38
|
+
|
|
39
|
+
const MODEL_ID = 'Xenova/multilingual-e5-small';
|
|
40
|
+
const DTYPE = 'q8';
|
|
41
|
+
|
|
42
|
+
export const LOCAL_EMBEDDING_MODEL = 'multilingual-e5-small';
|
|
43
|
+
export const LOCAL_EMBEDDING_DIM = 384;
|
|
44
|
+
|
|
45
|
+
let extractorPromise = null;
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Begin loading the embedding model in the background. Idempotent — shares
|
|
49
|
+
* promise state with the lazy path in generateEmbeddings(), so calling this
|
|
50
|
+
* at server cold-start lets the 113MB fetch overlap with the user's first
|
|
51
|
+
* exchanges instead of blocking their first RAG action. Callers that don't
|
|
52
|
+
* await should still .catch() to avoid an unhandled rejection.
|
|
53
|
+
*/
|
|
54
|
+
export async function preloadModel() {
|
|
55
|
+
await getExtractor();
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function getExtractor() {
|
|
59
|
+
if (!extractorPromise) {
|
|
60
|
+
extractorPromise = pipeline('feature-extraction', MODEL_ID, { dtype: DTYPE }).catch(
|
|
61
|
+
(err) => {
|
|
62
|
+
extractorPromise = null;
|
|
63
|
+
const hint = USER_CACHE
|
|
64
|
+
? `Lazy download from ${env.cacheDir} failed — check network / disk and retry.`
|
|
65
|
+
: `Run "node scripts/fetch-embed-model.js" first.`;
|
|
66
|
+
throw new Error(
|
|
67
|
+
`Failed to load embedding model from ${env.cacheDir}. ${hint} Cause: ${err.message}`
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
return extractorPromise;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Generate embeddings for a list of texts. Returns L2-normalized
|
|
77
|
+
* 384-dim float arrays parallel to the input.
|
|
78
|
+
*
|
|
79
|
+
* @param {string[]} texts
|
|
80
|
+
* @param {Object} options
|
|
81
|
+
* @param {'search_document' | 'search_query'} options.inputType
|
|
82
|
+
* @returns {Promise<number[][]>}
|
|
83
|
+
*/
|
|
84
|
+
export async function generateEmbeddings(texts, { inputType } = {}) {
|
|
85
|
+
if (!Array.isArray(texts) || texts.length === 0) {
|
|
86
|
+
throw new Error('generateEmbeddings: texts must be a non-empty array');
|
|
87
|
+
}
|
|
88
|
+
if (inputType !== 'search_document' && inputType !== 'search_query') {
|
|
89
|
+
throw new Error(
|
|
90
|
+
"generateEmbeddings: inputType is required ('search_document' | 'search_query')"
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const prefix = inputType === 'search_query' ? 'query: ' : 'passage: ';
|
|
95
|
+
const extractor = await getExtractor();
|
|
96
|
+
const out = await extractor(
|
|
97
|
+
texts.map((t) => prefix + t),
|
|
98
|
+
{ pooling: 'mean', normalize: true }
|
|
99
|
+
);
|
|
100
|
+
return out.tolist();
|
|
101
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VectorRAGPreview — in-memory mirror of lite-template/helper/vector-rag.js
|
|
3
|
+
* for the wizard's /api/preview/chat path.
|
|
4
|
+
*
|
|
5
|
+
* The deployed bot loads embeddings.json from disk; the preview hydrates
|
|
6
|
+
* the same payload from a downloadToBuffer() call. Same retrieval semantics,
|
|
7
|
+
* same query-side prefix, same cosine math.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { generateEmbeddings } from './local.js';
|
|
11
|
+
|
|
12
|
+
function cosineSimilarity(a, b) {
|
|
13
|
+
if (a.length !== b.length) throw new Error('Vectors must have same length');
|
|
14
|
+
let dot = 0;
|
|
15
|
+
let na = 0;
|
|
16
|
+
let nb = 0;
|
|
17
|
+
for (let i = 0; i < a.length; i++) {
|
|
18
|
+
dot += a[i] * b[i];
|
|
19
|
+
na += a[i] * a[i];
|
|
20
|
+
nb += b[i] * b[i];
|
|
21
|
+
}
|
|
22
|
+
const denom = Math.sqrt(na) * Math.sqrt(nb);
|
|
23
|
+
return denom === 0 ? 0 : dot / denom;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function findSimilar(queryEmbedding, chunks, k = 3) {
|
|
27
|
+
const scored = chunks.map((chunk) => ({
|
|
28
|
+
text: chunk.text,
|
|
29
|
+
score: cosineSimilarity(queryEmbedding, chunk.embedding),
|
|
30
|
+
metadata: chunk.metadata,
|
|
31
|
+
}));
|
|
32
|
+
scored.sort((a, b) => b.score - a.score);
|
|
33
|
+
return scored.slice(0, k);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export default class VectorRAGPreview {
|
|
37
|
+
/**
|
|
38
|
+
* @param {{ chunks: Array<{ text: string, embedding: number[], metadata?: Object }>, model?: string }} payload
|
|
39
|
+
*/
|
|
40
|
+
constructor(payload) {
|
|
41
|
+
this.chunks = Array.isArray(payload?.chunks) ? payload.chunks : [];
|
|
42
|
+
this.model = payload?.model || null;
|
|
43
|
+
this.isLoaded = this.chunks.length > 0;
|
|
44
|
+
this.lastSearchResults = null;
|
|
45
|
+
this.mode = 'vector';
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// No-op for parity with VectorRAG.initialize() (lite-template runtime).
|
|
49
|
+
async initialize() {
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async search(query, maxResults = 3) {
|
|
54
|
+
if (!this.isLoaded) return '';
|
|
55
|
+
const cleanQuery = (query || '').trim();
|
|
56
|
+
if (cleanQuery.length < 3) return '';
|
|
57
|
+
|
|
58
|
+
const [queryVec] = await generateEmbeddings([cleanQuery], { inputType: 'search_query' });
|
|
59
|
+
const top = findSimilar(queryVec, this.chunks, maxResults);
|
|
60
|
+
|
|
61
|
+
if (top.length === 0) {
|
|
62
|
+
this.lastSearchResults = null;
|
|
63
|
+
return '';
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
this.lastSearchResults = top.map((hit) => ({
|
|
67
|
+
filename: hit.metadata?.originalName || 'unknown',
|
|
68
|
+
content: hit.text,
|
|
69
|
+
score: hit.score,
|
|
70
|
+
chunkIndex: hit.metadata?.chunkIndex ?? null,
|
|
71
|
+
source: hit.metadata?.source || 'document',
|
|
72
|
+
deploymentId: hit.metadata?.deploymentId || null,
|
|
73
|
+
}));
|
|
74
|
+
|
|
75
|
+
return top
|
|
76
|
+
.map((hit, i) => {
|
|
77
|
+
const prefix = top.length > 1 ? `[${i + 1}] ` : '';
|
|
78
|
+
const isRoute = hit.metadata?.source === 'triage-route';
|
|
79
|
+
if (isRoute) {
|
|
80
|
+
const id = hit.metadata?.deploymentId || 'unknown';
|
|
81
|
+
const name = hit.metadata?.originalName || id;
|
|
82
|
+
return `${prefix}[Triage route — deploymentId: ${id} | name: ${name}]:\n${hit.text}`;
|
|
83
|
+
}
|
|
84
|
+
const filename = hit.metadata?.originalName || 'document';
|
|
85
|
+
return `${prefix}[From ${filename}]:\n${hit.text}`;
|
|
86
|
+
})
|
|
87
|
+
.join('\n\n---\n\n');
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
getLastSearchResults() {
|
|
91
|
+
return this.lastSearchResults;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
// Canonical envelope shape — mirror of lite-template/helper/envelope-schema.js.
|
|
2
|
+
// Two npm packages, no shared layer; keep in sync.
|
|
3
|
+
//
|
|
4
|
+
// canonical source: lite-template/helper/envelope-schema.js
|
|
5
|
+
// mirror: control/lib/envelope-schema.js
|
|
6
|
+
|
|
7
|
+
export const ENVELOPE_SCHEMA = {
|
|
8
|
+
type: 'object',
|
|
9
|
+
required: ['answer'],
|
|
10
|
+
additionalProperties: false,
|
|
11
|
+
properties: {
|
|
12
|
+
answer: { type: 'string' },
|
|
13
|
+
suggestions: { type: 'array', items: { type: 'string' } },
|
|
14
|
+
|
|
15
|
+
form: {
|
|
16
|
+
type: 'object',
|
|
17
|
+
additionalProperties: false,
|
|
18
|
+
properties: {
|
|
19
|
+
fields: { type: 'object', additionalProperties: true },
|
|
20
|
+
remaining: { type: 'integer', minimum: 0 },
|
|
21
|
+
complete: { type: 'boolean' },
|
|
22
|
+
},
|
|
23
|
+
},
|
|
24
|
+
|
|
25
|
+
triage: {
|
|
26
|
+
type: 'object',
|
|
27
|
+
additionalProperties: false,
|
|
28
|
+
properties: {
|
|
29
|
+
deploymentId: { type: 'string' },
|
|
30
|
+
starterPrompt: { type: 'string' },
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
|
|
34
|
+
appointment: {
|
|
35
|
+
type: 'object',
|
|
36
|
+
additionalProperties: false,
|
|
37
|
+
properties: {
|
|
38
|
+
showLaunchButton: { type: 'boolean' },
|
|
39
|
+
calendarId: { type: 'string' },
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
|
|
43
|
+
extraction: {
|
|
44
|
+
type: 'object',
|
|
45
|
+
additionalProperties: false,
|
|
46
|
+
properties: {
|
|
47
|
+
fields: { type: 'object', additionalProperties: true },
|
|
48
|
+
confidence: { type: 'string', enum: ['high', 'medium', 'low'] },
|
|
49
|
+
notes: { type: 'string' },
|
|
50
|
+
showUploadButton: { type: 'boolean' },
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
};
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-memory SQLite assembly + SELECT-only validator for the SQL Explorer.
|
|
3
|
+
*
|
|
4
|
+
* On every query, we:
|
|
5
|
+
* 1. Spin up a fresh `:memory:` better-sqlite3 DB.
|
|
6
|
+
* 2. Fan out to every connected bot for its rollup endpoints.
|
|
7
|
+
* 3. Load returned rows into named tables that mirror Prime's schema
|
|
8
|
+
* surface (bots, daily_bot_stats, bot_health, protocol_stats).
|
|
9
|
+
* 4. Validate the user query (SELECT/WITH only, single statement, no
|
|
10
|
+
* ATTACH/PRAGMA/destructive verbs).
|
|
11
|
+
* 5. Run it with row + duration caps.
|
|
12
|
+
* 6. Discard the DB.
|
|
13
|
+
*
|
|
14
|
+
* Nothing crosses to the control-plane SQLite. The aggregates live only in
|
|
15
|
+
* process memory for the duration of one request.
|
|
16
|
+
*
|
|
17
|
+
* Scoping note: in single-user mode the in-memory DB only contains the
|
|
18
|
+
* user's own deployments — there's nothing to scope away, so the CTE
|
|
19
|
+
* rewrite from Prime is intentionally omitted here. The seam is the same
|
|
20
|
+
* table set, so a future multi-tenant variant can wrap the validated
|
|
21
|
+
* query without changing the surface.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import Database from 'better-sqlite3';
|
|
25
|
+
import { fanOut, listConnectedDeployments } from '@/lib/deployers/bot-fleet';
|
|
26
|
+
|
|
27
|
+
const ROW_CAP = 10_000;
|
|
28
|
+
const QUERY_TIMEOUT_MS = 30_000;
|
|
29
|
+
|
|
30
|
+
const FORBIDDEN_KEYWORDS = [
|
|
31
|
+
'INSERT', 'UPDATE', 'DELETE', 'REPLACE',
|
|
32
|
+
'DROP', 'ALTER', 'CREATE', 'TRUNCATE',
|
|
33
|
+
'ATTACH', 'DETACH', 'PRAGMA', 'VACUUM',
|
|
34
|
+
'REINDEX', 'ANALYZE',
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
function stripCommentsAndStrings(sql) {
|
|
38
|
+
// Replace single-quote string literals and SQL comments with spaces so the
|
|
39
|
+
// forbidden-keyword scan can't be fooled by `SELECT 'DELETE'`.
|
|
40
|
+
let out = '';
|
|
41
|
+
let i = 0;
|
|
42
|
+
while (i < sql.length) {
|
|
43
|
+
const c = sql[i];
|
|
44
|
+
const next = sql[i + 1];
|
|
45
|
+
if (c === '-' && next === '-') {
|
|
46
|
+
while (i < sql.length && sql[i] !== '\n') i++;
|
|
47
|
+
out += ' ';
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
if (c === '/' && next === '*') {
|
|
51
|
+
i += 2;
|
|
52
|
+
while (i < sql.length && !(sql[i] === '*' && sql[i + 1] === '/')) i++;
|
|
53
|
+
i += 2;
|
|
54
|
+
out += ' ';
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (c === "'") {
|
|
58
|
+
out += ' ';
|
|
59
|
+
i++;
|
|
60
|
+
while (i < sql.length) {
|
|
61
|
+
if (sql[i] === "'" && sql[i + 1] === "'") { i += 2; continue; }
|
|
62
|
+
if (sql[i] === "'") { i++; break; }
|
|
63
|
+
i++;
|
|
64
|
+
}
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
if (c === '"') {
|
|
68
|
+
// Double-quoted identifiers — keep contents (they're not keywords).
|
|
69
|
+
out += c;
|
|
70
|
+
i++;
|
|
71
|
+
while (i < sql.length && sql[i] !== '"') {
|
|
72
|
+
out += sql[i];
|
|
73
|
+
i++;
|
|
74
|
+
}
|
|
75
|
+
if (i < sql.length) { out += sql[i]; i++; }
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
out += c;
|
|
79
|
+
i++;
|
|
80
|
+
}
|
|
81
|
+
return out;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function validateUserSql(sql) {
|
|
85
|
+
if (typeof sql !== 'string' || !sql.trim()) {
|
|
86
|
+
return { ok: false, error: 'Empty query' };
|
|
87
|
+
}
|
|
88
|
+
const stripped = stripCommentsAndStrings(sql).trim();
|
|
89
|
+
if (!stripped) return { ok: false, error: 'Empty query' };
|
|
90
|
+
|
|
91
|
+
// Single statement only — reject anything past a non-trailing semicolon.
|
|
92
|
+
const noTrailing = stripped.replace(/;+\s*$/, '');
|
|
93
|
+
if (noTrailing.includes(';')) {
|
|
94
|
+
return { ok: false, error: 'Multiple statements are not allowed' };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Must start with SELECT or WITH (case-insensitive).
|
|
98
|
+
const first = noTrailing.toUpperCase().match(/^\s*(SELECT|WITH)\b/);
|
|
99
|
+
if (!first) {
|
|
100
|
+
return { ok: false, error: 'Only SELECT queries are allowed' };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Reject forbidden keywords anywhere.
|
|
104
|
+
const upper = noTrailing.toUpperCase();
|
|
105
|
+
for (const kw of FORBIDDEN_KEYWORDS) {
|
|
106
|
+
const re = new RegExp(`\\b${kw}\\b`);
|
|
107
|
+
if (re.test(upper)) {
|
|
108
|
+
return { ok: false, error: `Disallowed keyword: ${kw}` };
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return { ok: true };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Build an in-memory SQLite populated with fleet rollups and run `sql` against
|
|
116
|
+
* it. Returns { rows, columns, rowCount, truncated, fleet }.
|
|
117
|
+
*/
|
|
118
|
+
export async function runScopedSql(sql) {
|
|
119
|
+
const v = validateUserSql(sql);
|
|
120
|
+
if (!v.ok) return { error: v.error };
|
|
121
|
+
|
|
122
|
+
const deployments = await listConnectedDeployments();
|
|
123
|
+
const fleetMeta = {
|
|
124
|
+
totalCount: deployments.length,
|
|
125
|
+
reachableCount: 0,
|
|
126
|
+
unreachableCount: 0,
|
|
127
|
+
unreachable: [],
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
const db = new Database(':memory:');
|
|
131
|
+
try {
|
|
132
|
+
db.exec(`
|
|
133
|
+
CREATE TABLE bots (
|
|
134
|
+
id TEXT PRIMARY KEY,
|
|
135
|
+
bot_name TEXT NOT NULL,
|
|
136
|
+
url TEXT,
|
|
137
|
+
last_seen_at TEXT,
|
|
138
|
+
created_at TEXT,
|
|
139
|
+
cloud_provider TEXT,
|
|
140
|
+
cloud_status TEXT,
|
|
141
|
+
flow_type TEXT
|
|
142
|
+
);
|
|
143
|
+
CREATE TABLE daily_bot_stats (
|
|
144
|
+
bot_id TEXT NOT NULL,
|
|
145
|
+
date TEXT NOT NULL,
|
|
146
|
+
conversations INTEGER NOT NULL,
|
|
147
|
+
turns INTEGER NOT NULL,
|
|
148
|
+
avg_turns REAL NOT NULL
|
|
149
|
+
);
|
|
150
|
+
CREATE TABLE bot_health (
|
|
151
|
+
bot_id TEXT PRIMARY KEY,
|
|
152
|
+
bot_name TEXT NOT NULL,
|
|
153
|
+
conversations_7d INTEGER NOT NULL,
|
|
154
|
+
turns_7d INTEGER NOT NULL,
|
|
155
|
+
avg_turns_7d REAL NOT NULL,
|
|
156
|
+
conversations_total INTEGER NOT NULL,
|
|
157
|
+
turns_total INTEGER NOT NULL,
|
|
158
|
+
last_activity_at TEXT
|
|
159
|
+
);
|
|
160
|
+
CREATE TABLE protocol_stats (
|
|
161
|
+
bot_id TEXT NOT NULL,
|
|
162
|
+
protocol TEXT NOT NULL,
|
|
163
|
+
turns INTEGER NOT NULL,
|
|
164
|
+
conversations_touched INTEGER NOT NULL
|
|
165
|
+
);
|
|
166
|
+
`);
|
|
167
|
+
|
|
168
|
+
// Populate the control-plane-known `bots` table without any fan-out.
|
|
169
|
+
const insertBot = db.prepare(`
|
|
170
|
+
INSERT INTO bots (id, bot_name, url, last_seen_at, created_at, cloud_provider, cloud_status, flow_type)
|
|
171
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
172
|
+
`);
|
|
173
|
+
for (const d of deployments) {
|
|
174
|
+
insertBot.run(
|
|
175
|
+
d.id,
|
|
176
|
+
d.botName,
|
|
177
|
+
d.url,
|
|
178
|
+
d.lastSeenAt ? d.lastSeenAt.toISOString() : null,
|
|
179
|
+
d.createdAt ? d.createdAt.toISOString() : null,
|
|
180
|
+
d.cloudProvider,
|
|
181
|
+
d.cloudStatus,
|
|
182
|
+
d.flowType,
|
|
183
|
+
);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Fan out for the three rollup tables in parallel.
|
|
187
|
+
const [daily, health, proto] = await Promise.all([
|
|
188
|
+
fanOut('/api/analytics/daily_stats', { deployments }),
|
|
189
|
+
fanOut('/api/analytics/bot_health', { deployments }),
|
|
190
|
+
fanOut('/api/analytics/protocol_stats', { deployments }),
|
|
191
|
+
]);
|
|
192
|
+
|
|
193
|
+
// Use bot_health's reachability as the canonical fleet status, since
|
|
194
|
+
// it's the cheapest call.
|
|
195
|
+
fleetMeta.reachableCount = health.reachableCount;
|
|
196
|
+
fleetMeta.unreachableCount = health.unreachableCount;
|
|
197
|
+
fleetMeta.unreachable = health.results
|
|
198
|
+
.filter((r) => !r.ok)
|
|
199
|
+
.map((r) => ({
|
|
200
|
+
id: r.deployment.id,
|
|
201
|
+
botName: r.deployment.botName,
|
|
202
|
+
reason: r.reason,
|
|
203
|
+
status: r.status,
|
|
204
|
+
}));
|
|
205
|
+
|
|
206
|
+
const insertDaily = db.prepare(`
|
|
207
|
+
INSERT INTO daily_bot_stats (bot_id, date, conversations, turns, avg_turns)
|
|
208
|
+
VALUES (?, ?, ?, ?, ?)
|
|
209
|
+
`);
|
|
210
|
+
for (const r of daily.results) {
|
|
211
|
+
if (!r.ok) continue;
|
|
212
|
+
for (const row of (r.data && r.data.rows) || []) {
|
|
213
|
+
insertDaily.run(
|
|
214
|
+
r.deployment.id,
|
|
215
|
+
row.date,
|
|
216
|
+
row.conversations || 0,
|
|
217
|
+
row.turns || 0,
|
|
218
|
+
row.avg_turns || 0,
|
|
219
|
+
);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const insertHealth = db.prepare(`
|
|
224
|
+
INSERT INTO bot_health (bot_id, bot_name, conversations_7d, turns_7d, avg_turns_7d, conversations_total, turns_total, last_activity_at)
|
|
225
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
226
|
+
`);
|
|
227
|
+
for (const r of health.results) {
|
|
228
|
+
if (!r.ok) continue;
|
|
229
|
+
const d = r.data || {};
|
|
230
|
+
insertHealth.run(
|
|
231
|
+
r.deployment.id,
|
|
232
|
+
r.deployment.botName,
|
|
233
|
+
d.conversations7d || 0,
|
|
234
|
+
d.turns7d || 0,
|
|
235
|
+
d.avgTurns7d || 0,
|
|
236
|
+
d.conversationsTotal || 0,
|
|
237
|
+
d.turnsTotal || 0,
|
|
238
|
+
d.lastActivityAt || null,
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
const insertProto = db.prepare(`
|
|
243
|
+
INSERT INTO protocol_stats (bot_id, protocol, turns, conversations_touched)
|
|
244
|
+
VALUES (?, ?, ?, ?)
|
|
245
|
+
`);
|
|
246
|
+
for (const r of proto.results) {
|
|
247
|
+
if (!r.ok) continue;
|
|
248
|
+
for (const row of (r.data && r.data.rows) || []) {
|
|
249
|
+
insertProto.run(
|
|
250
|
+
r.deployment.id,
|
|
251
|
+
row.protocol,
|
|
252
|
+
row.turns || 0,
|
|
253
|
+
row.conversations_touched || 0,
|
|
254
|
+
);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Run the user query under a timeout. better-sqlite3 is synchronous, but
|
|
259
|
+
// we wrap with a wall-clock guard so a runaway recursive CTE can't pin
|
|
260
|
+
// the event loop forever.
|
|
261
|
+
const start = Date.now();
|
|
262
|
+
const stmt = db.prepare(sql);
|
|
263
|
+
let rawRows;
|
|
264
|
+
try {
|
|
265
|
+
rawRows = stmt.all();
|
|
266
|
+
} catch (err) {
|
|
267
|
+
return { error: err.message };
|
|
268
|
+
}
|
|
269
|
+
if (Date.now() - start > QUERY_TIMEOUT_MS) {
|
|
270
|
+
return { error: `Query exceeded ${QUERY_TIMEOUT_MS}ms` };
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const truncated = rawRows.length > ROW_CAP;
|
|
274
|
+
const rows = truncated ? rawRows.slice(0, ROW_CAP) : rawRows;
|
|
275
|
+
const columns = stmt.columns().map((c) => c.name);
|
|
276
|
+
return {
|
|
277
|
+
rows,
|
|
278
|
+
columns,
|
|
279
|
+
rowCount: rows.length,
|
|
280
|
+
truncated,
|
|
281
|
+
fleet: fleetMeta,
|
|
282
|
+
};
|
|
283
|
+
} finally {
|
|
284
|
+
db.close();
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Lightweight schema descriptor — used to render the schema reference
|
|
290
|
+
* panel without needing a round-trip.
|
|
291
|
+
*/
|
|
292
|
+
export const FLEET_SCHEMA = [
|
|
293
|
+
{
|
|
294
|
+
name: 'bots',
|
|
295
|
+
description: 'One row per registered deployment (control-plane state)',
|
|
296
|
+
columns: [
|
|
297
|
+
{ name: 'id', description: 'Deployment id' },
|
|
298
|
+
{ name: 'bot_name', description: 'Display name' },
|
|
299
|
+
{ name: 'url', description: 'Bot URL if connected' },
|
|
300
|
+
{ name: 'last_seen_at', description: 'Last successful proxy call' },
|
|
301
|
+
{ name: 'created_at', description: 'Deployment created at' },
|
|
302
|
+
{ name: 'cloud_provider', description: 'fly | null' },
|
|
303
|
+
{ name: 'cloud_status', description: 'running | paused | failed | null' },
|
|
304
|
+
{ name: 'flow_type', description: 'Configured flow type' },
|
|
305
|
+
],
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
name: 'daily_bot_stats',
|
|
309
|
+
description: 'Per-bot per-day conversation and turn counts',
|
|
310
|
+
columns: [
|
|
311
|
+
{ name: 'bot_id', description: 'FK → bots.id' },
|
|
312
|
+
{ name: 'date', description: 'YYYY-MM-DD' },
|
|
313
|
+
{ name: 'conversations', description: 'Distinct conversation count' },
|
|
314
|
+
{ name: 'turns', description: 'Turn count' },
|
|
315
|
+
{ name: 'avg_turns', description: 'turns / conversations' },
|
|
316
|
+
],
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
name: 'bot_health',
|
|
320
|
+
description: 'One row per bot — last 7 days plus all-time totals',
|
|
321
|
+
columns: [
|
|
322
|
+
{ name: 'bot_id', description: 'FK → bots.id' },
|
|
323
|
+
{ name: 'bot_name', description: 'Display name' },
|
|
324
|
+
{ name: 'conversations_7d', description: 'Last 7 days' },
|
|
325
|
+
{ name: 'turns_7d', description: 'Last 7 days' },
|
|
326
|
+
{ name: 'avg_turns_7d', description: 'Last 7 days' },
|
|
327
|
+
{ name: 'conversations_total', description: 'All time' },
|
|
328
|
+
{ name: 'turns_total', description: 'All time' },
|
|
329
|
+
{ name: 'last_activity_at', description: 'Most recent turn timestamp' },
|
|
330
|
+
],
|
|
331
|
+
},
|
|
332
|
+
{
|
|
333
|
+
name: 'protocol_stats',
|
|
334
|
+
description: 'Per-bot per-protocol turn and conversation counts',
|
|
335
|
+
columns: [
|
|
336
|
+
{ name: 'bot_id', description: 'FK → bots.id' },
|
|
337
|
+
{ name: 'protocol', description: 'form | triage | appointment | extraction' },
|
|
338
|
+
{ name: 'turns', description: 'Turns whose machine_state touched this protocol' },
|
|
339
|
+
{ name: 'conversations_touched', description: 'Distinct conversations' },
|
|
340
|
+
],
|
|
341
|
+
},
|
|
342
|
+
];
|