0nmcp 2.6.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,875 @@
1
+ // ============================================================
2
+ // 0nMCP — AI Training Engine (0nAI Training Center)
3
+ // ============================================================
4
+ // Tools for building, curating, scoring, and exporting
5
+ // training datasets for the 0nAI model.
6
+ //
7
+ // 8 MCP Tools:
8
+ // training_ingest — Ingest raw material from files/memory/code
9
+ // training_generate — Generate training pairs from sources
10
+ // training_score — Score pairs against rubrics
11
+ // training_dataset — Create/manage named datasets
12
+ // training_export — Export dataset as fine-tuning JSONL
13
+ // training_stats — Training center analytics
14
+ // training_search — Search sources and pairs
15
+ // training_review — Review and approve/reject pairs
16
+ //
17
+ // Database: Supabase (pwujhhmlrtxjmjzyttwn)
18
+ // Tables: training_sources, training_pairs, training_datasets,
19
+ // training_evaluations, training_exports, training_rubrics
20
+ // ============================================================
21
+
22
+ import { readFileSync, readdirSync, existsSync } from "fs";
23
+ import { writeFileSync, mkdirSync } from "fs";
24
+ import { join, basename, extname } from "path";
25
+ import { homedir } from "os";
26
+ import { createHash } from "crypto";
27
+
28
+ const EXPORTS_DIR = join(homedir(), ".0n", "training");
29
+ const MEMORY_DIR = join(homedir(), ".claude", "projects", "-Users-rocketopp", "memory");
30
+
31
+ // ── System prompt for 0nAI ──────────────────────────────────
32
+
33
+ const ONAI_SYSTEM_PROMPT = `You are Jaxx, the AI engine powering the 0nORK ecosystem. You are an expert in:
34
+ - 0nMCP: Universal AI API Orchestrator with 1,171+ tools across 54 services
35
+ - The .0n Standard: Universal config format, field resolution, workflow runtime
36
+ - CRM integration (NEVER say "GHL" or "GoHighLevel" — always "CRM" or "ROCKET")
37
+ - 0nVault: AES-256-GCM encrypted credential storage
38
+ - The 0n Network: 0nMCP, 0n-spec, 0nork, 0n Marketplace, 0nCore, 0nmcp.com
39
+
40
+ Rules:
41
+ - Push to main, deploy immediately — no feature branches
42
+ - Dark theme UI with brand green #7ed957
43
+ - ESM modules, TypeScript strict, Tailwind v4
44
+ - Data-driven patterns over code repetition
45
+ - Never over-engineer. Simplest solution first.
46
+ - Client emails ALWAYS go through CRM built-in email, never SendGrid/Resend`;
47
+
48
+ /**
49
+ * Register training engine tools on an MCP server instance.
50
+ *
51
+ * @param {import("@modelcontextprotocol/sdk/server/mcp.js").McpServer} server
52
+ * @param {import("zod").ZodType} z
53
+ * @param {object} [supabase] — Supabase client (optional, creates one if missing)
54
+ */
55
+ export function registerTrainingTools(server, z, supabase) {
56
+
57
+ // Lazy Supabase init
58
+ async function getSupabase() {
59
+ if (supabase) return supabase;
60
+ try {
61
+ const { createClient } = await import("@supabase/supabase-js");
62
+ const url = process.env.SUPABASE_URL || "https://pwujhhmlrtxjmjzyttwn.supabase.co";
63
+ const key = process.env.SUPABASE_SERVICE_KEY || process.env.SUPABASE_SERVICE_ROLE_KEY;
64
+ if (!key) throw new Error("No Supabase service key");
65
+ supabase = createClient(url, key);
66
+ return supabase;
67
+ } catch (err) {
68
+ throw new Error(`Supabase not available: ${err.message}`);
69
+ }
70
+ }
71
+
72
+ // ─── training_ingest ──────────────────────────────────────
73
+ server.tool(
74
+ "training_ingest",
75
+ `Ingest raw training material from files, memory, code, or text.
76
+ Stores in training_sources table for later pair generation.
77
+
78
+ Sources: memory files, .js/.ts code, .md docs, raw text, API patterns.
79
+
80
+ Example: training_ingest({ source_type: "memory", path: "~/.claude/projects/-Users-rocketopp/memory/" })
81
+ Example: training_ingest({ source_type: "code", path: "~/Github/0nMCP/catalog.js" })
82
+ Example: training_ingest({ source_type: "text", title: "CRM Email Rule", content: "Client emails always go through CRM..." })`,
83
+ {
84
+ source_type: z.enum(["memory", "code", "documentation", "api_pattern", "decision", "brand", "text", "conversation"]).describe("Type of source material"),
85
+ path: z.string().optional().describe("File or directory path to ingest"),
86
+ title: z.string().optional().describe("Title (required for text type)"),
87
+ content: z.string().optional().describe("Raw content (for text type, or override file content)"),
88
+ tags: z.array(z.string()).optional().describe("Tags for categorization"),
89
+ },
90
+ async ({ source_type, path, title, content, tags }) => {
91
+ try {
92
+ const sb = await getSupabase();
93
+ const sources = [];
94
+
95
+ if (path) {
96
+ const resolvedPath = path.replace("~", homedir());
97
+
98
+ if (existsSync(resolvedPath)) {
99
+ const stat = await import("fs").then(fs => fs.statSync(resolvedPath));
100
+
101
+ if (stat.isDirectory()) {
102
+ // Ingest all files in directory
103
+ const files = readdirSync(resolvedPath).filter(f =>
104
+ [".md", ".js", ".ts", ".json", ".txt", ".0n"].some(ext => f.endsWith(ext))
105
+ );
106
+
107
+ for (const file of files) {
108
+ const filePath = join(resolvedPath, file);
109
+ const fileContent = readFileSync(filePath, "utf-8");
110
+ const tokens = Math.ceil(fileContent.length / 4); // rough estimate
111
+
112
+ sources.push({
113
+ source_type,
114
+ source_path: filePath,
115
+ title: basename(file, extname(file)),
116
+ content: fileContent,
117
+ token_count: tokens,
118
+ tags: tags || [],
119
+ status: "raw",
120
+ metadata: { file_size: fileContent.length, extension: extname(file) },
121
+ });
122
+ }
123
+ } else {
124
+ // Single file
125
+ const fileContent = content || readFileSync(resolvedPath, "utf-8");
126
+ const tokens = Math.ceil(fileContent.length / 4);
127
+
128
+ sources.push({
129
+ source_type,
130
+ source_path: resolvedPath,
131
+ title: title || basename(resolvedPath, extname(resolvedPath)),
132
+ content: fileContent,
133
+ token_count: tokens,
134
+ tags: tags || [],
135
+ status: "raw",
136
+ metadata: { file_size: fileContent.length, extension: extname(resolvedPath) },
137
+ });
138
+ }
139
+ } else {
140
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: `Path not found: ${resolvedPath}` }) }] };
141
+ }
142
+ } else if (content) {
143
+ // Raw text input
144
+ sources.push({
145
+ source_type,
146
+ source_path: null,
147
+ title: title || `${source_type} source`,
148
+ content,
149
+ token_count: Math.ceil(content.length / 4),
150
+ tags: tags || [],
151
+ status: "raw",
152
+ metadata: {},
153
+ });
154
+ } else {
155
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: "Provide path or content" }) }] };
156
+ }
157
+
158
+ // Insert all sources
159
+ const { data, error } = await sb.from("training_sources").insert(sources).select("id, title, token_count");
160
+ if (error) throw error;
161
+
162
+ return {
163
+ content: [{
164
+ type: "text",
165
+ text: JSON.stringify({
166
+ status: "ingested",
167
+ count: data.length,
168
+ total_tokens: data.reduce((s, d) => s + (d.token_count || 0), 0),
169
+ sources: data.map(d => ({ id: d.id, title: d.title, tokens: d.token_count })),
170
+ message: `Ingested ${data.length} source(s). Use training_generate to create pairs.`,
171
+ }, null, 2),
172
+ }],
173
+ };
174
+ } catch (err) {
175
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
176
+ }
177
+ }
178
+ );
179
+
180
+ // ─── training_generate ────────────────────────────────────
181
+ server.tool(
182
+ "training_generate",
183
+ `Generate training pairs (system/user/assistant) from ingested sources.
184
+ Creates high-quality Q&A pairs formatted for fine-tuning.
185
+
186
+ Example: training_generate({ source_id: "uuid", count: 5 })
187
+ Example: training_generate({ domain: "crm", count: 10 })
188
+ Example: training_generate({ pairs: [{ user_input: "How do I...", assistant_output: "You can..." }] })`,
189
+ {
190
+ source_id: z.string().optional().describe("Generate pairs from a specific source"),
191
+ domain: z.string().optional().describe("Domain filter: architecture, crm, workflow, brand, code, support"),
192
+ count: z.number().optional().describe("Number of pairs to generate (default: 5)"),
193
+ dataset_id: z.string().optional().describe("Add pairs to this dataset"),
194
+ difficulty: z.enum(["easy", "medium", "hard", "expert"]).optional().describe("Target difficulty level"),
195
+ pairs: z.array(z.object({
196
+ user_input: z.string(),
197
+ assistant_output: z.string(),
198
+ system_prompt: z.string().optional(),
199
+ domain: z.string().optional(),
200
+ difficulty: z.string().optional(),
201
+ tags: z.array(z.string()).optional(),
202
+ })).optional().describe("Manually provide pairs to insert"),
203
+ },
204
+ async ({ source_id, domain, count, dataset_id, difficulty, pairs }) => {
205
+ try {
206
+ const sb = await getSupabase();
207
+
208
+ if (pairs && pairs.length > 0) {
209
+ // Direct insert of manually provided pairs
210
+ const rows = pairs.map(p => ({
211
+ dataset_id: dataset_id || null,
212
+ system_prompt: p.system_prompt || ONAI_SYSTEM_PROMPT,
213
+ user_input: p.user_input,
214
+ assistant_output: p.assistant_output,
215
+ domain: p.domain || domain || "general",
216
+ difficulty: p.difficulty || difficulty || "medium",
217
+ tags: p.tags || [],
218
+ quality_score: null,
219
+ human_reviewed: false,
220
+ approved: false,
221
+ metadata: { manually_created: true },
222
+ }));
223
+
224
+ const { data, error } = await sb.from("training_pairs").insert(rows).select("id");
225
+ if (error) throw error;
226
+
227
+ // Update dataset pair count
228
+ if (dataset_id) {
229
+ await sb.rpc("update_dataset_count", { ds_id: dataset_id }).catch(() => {
230
+ // RPC may not exist yet, update manually
231
+ sb.from("training_datasets").update({
232
+ pair_count: data.length,
233
+ updated_at: new Date().toISOString(),
234
+ }).eq("id", dataset_id);
235
+ });
236
+ }
237
+
238
+ return {
239
+ content: [{
240
+ type: "text",
241
+ text: JSON.stringify({
242
+ status: "generated",
243
+ count: data.length,
244
+ ids: data.map(d => d.id),
245
+ message: `Created ${data.length} training pair(s). Use training_score to evaluate quality.`,
246
+ }, null, 2),
247
+ }],
248
+ };
249
+ }
250
+
251
+ // Generate from source content
252
+ let sourceContent = "";
253
+ let sourceTitle = "";
254
+
255
+ if (source_id) {
256
+ const { data: source, error } = await sb.from("training_sources").select("*").eq("id", source_id).single();
257
+ if (error || !source) throw new Error("Source not found");
258
+ sourceContent = source.content;
259
+ sourceTitle = source.title;
260
+ } else {
261
+ // Pull recent unprocessed sources
262
+ let query = sb.from("training_sources").select("*").eq("status", "raw").order("created_at", { ascending: false }).limit(3);
263
+ if (domain) {
264
+ query = query.contains("tags", [domain]);
265
+ }
266
+ const { data: sources } = await query;
267
+ if (!sources || sources.length === 0) {
268
+ return { content: [{ type: "text", text: JSON.stringify({ status: "no_sources", message: "No raw sources found. Use training_ingest first." }) }] };
269
+ }
270
+ sourceContent = sources.map(s => `## ${s.title}\n${s.content}`).join("\n\n---\n\n");
271
+ sourceTitle = sources.map(s => s.title).join(", ");
272
+ }
273
+
274
+ // For now, return instructions for manual pair creation
275
+ // (AI generation would require Anthropic API calls which cost money)
276
+ const suggestedPairs = generatePairSuggestions(sourceContent, sourceTitle, domain, count || 5);
277
+
278
+ return {
279
+ content: [{
280
+ type: "text",
281
+ text: JSON.stringify({
282
+ status: "suggestions_ready",
283
+ source: sourceTitle,
284
+ suggested_count: suggestedPairs.length,
285
+ pairs: suggestedPairs,
286
+ message: `Generated ${suggestedPairs.length} pair suggestions from "${sourceTitle}". Review and submit with training_generate({ pairs: [...] }) to save.`,
287
+ }, null, 2),
288
+ }],
289
+ };
290
+ } catch (err) {
291
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
292
+ }
293
+ }
294
+ );
295
+
296
+ // ─── training_score ───────────────────────────────────────
297
+ server.tool(
298
+ "training_score",
299
+ `Score training pairs against rubrics for quality assessment.
300
+
301
+ Example: training_score({ pair_id: "uuid" })
302
+ Example: training_score({ dataset_id: "uuid" }) — score all unscored pairs in dataset
303
+ Example: training_score({ pair_id: "uuid", scores: { accuracy: 5, brand_voice: 4, helpfulness: 5 } })`,
304
+ {
305
+ pair_id: z.string().optional().describe("Score a specific pair"),
306
+ dataset_id: z.string().optional().describe("Score all unscored pairs in dataset"),
307
+ rubric_id: z.string().optional().describe("Use specific rubric (default: domain-matched or general)"),
308
+ scores: z.record(z.number()).optional().describe("Manual scores: { criterion_name: 1-5 }"),
309
+ auto: z.boolean().optional().describe("Auto-score based on heuristics (no API cost)"),
310
+ },
311
+ async ({ pair_id, dataset_id, rubric_id, scores, auto }) => {
312
+ try {
313
+ const sb = await getSupabase();
314
+
315
+ // Get rubric
316
+ let rubric;
317
+ if (rubric_id) {
318
+ const { data } = await sb.from("training_rubrics").select("*").eq("id", rubric_id).single();
319
+ rubric = data;
320
+ }
321
+
322
+ // Get pairs to score
323
+ let pairs = [];
324
+ if (pair_id) {
325
+ const { data } = await sb.from("training_pairs").select("*").eq("id", pair_id);
326
+ pairs = data || [];
327
+ } else if (dataset_id) {
328
+ const { data } = await sb.from("training_pairs").select("*").eq("dataset_id", dataset_id).is("quality_score", null).limit(50);
329
+ pairs = data || [];
330
+ }
331
+
332
+ if (pairs.length === 0) {
333
+ return { content: [{ type: "text", text: JSON.stringify({ status: "no_pairs", message: "No pairs to score" }) }] };
334
+ }
335
+
336
+ const results = [];
337
+
338
+ for (const pair of pairs) {
339
+ let finalScore;
340
+
341
+ if (scores && pair_id) {
342
+ // Manual scoring
343
+ if (!rubric) {
344
+ const { data: r } = await sb.from("training_rubrics")
345
+ .select("*")
346
+ .or(`domain.is.null,domain.eq.${pair.domain || "general"}`)
347
+ .eq("is_active", true)
348
+ .limit(1)
349
+ .single();
350
+ rubric = r;
351
+ }
352
+
353
+ if (rubric) {
354
+ const criteria = rubric.criteria;
355
+ let weightedSum = 0;
356
+ let totalWeight = 0;
357
+ for (const c of criteria) {
358
+ const score = scores[c.name];
359
+ if (score !== undefined) {
360
+ weightedSum += (score / 5) * c.weight;
361
+ totalWeight += c.weight;
362
+ }
363
+ }
364
+ finalScore = totalWeight > 0 ? Math.round((weightedSum / totalWeight) * 100) / 100 : null;
365
+ }
366
+ } else if (auto) {
367
+ // Heuristic auto-scoring (free, no API)
368
+ finalScore = autoScore(pair);
369
+ }
370
+
371
+ if (finalScore !== null && finalScore !== undefined) {
372
+ await sb.from("training_pairs").update({
373
+ quality_score: finalScore,
374
+ human_reviewed: !!scores,
375
+ updated_at: new Date().toISOString(),
376
+ metadata: { ...pair.metadata, scored_at: new Date().toISOString(), rubric_used: rubric?.name },
377
+ }).eq("id", pair.id);
378
+
379
+ results.push({ id: pair.id, score: finalScore, domain: pair.domain });
380
+ }
381
+ }
382
+
383
+ return {
384
+ content: [{
385
+ type: "text",
386
+ text: JSON.stringify({
387
+ status: "scored",
388
+ count: results.length,
389
+ avg_score: results.length > 0 ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length * 100) / 100 : 0,
390
+ results,
391
+ }, null, 2),
392
+ }],
393
+ };
394
+ } catch (err) {
395
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
396
+ }
397
+ }
398
+ );
399
+
400
+ // ─── training_dataset ─────────────────────────────────────
401
+ server.tool(
402
+ "training_dataset",
403
+ `Create or manage training datasets — named collections of pairs.
404
+
405
+ Example: training_dataset({ action: "create", name: "0nAI-CRM-v1", description: "CRM expertise pairs" })
406
+ Example: training_dataset({ action: "list" })
407
+ Example: training_dataset({ action: "add_pairs", dataset_id: "uuid", pair_ids: ["uuid1", "uuid2"] })
408
+ Example: training_dataset({ action: "auto_fill", dataset_id: "uuid", min_quality: 0.7 })`,
409
+ {
410
+ action: z.enum(["create", "list", "get", "add_pairs", "remove_pairs", "auto_fill", "delete"]).describe("Action to perform"),
411
+ name: z.string().optional().describe("Dataset name (for create)"),
412
+ description: z.string().optional().describe("Dataset description (for create)"),
413
+ target_model: z.string().optional().describe("Target model: claude, openai, gemini, llama"),
414
+ dataset_id: z.string().optional().describe("Dataset ID (for get/add/remove/auto_fill)"),
415
+ pair_ids: z.array(z.string()).optional().describe("Pair IDs to add/remove"),
416
+ min_quality: z.number().optional().describe("Minimum quality score for auto_fill (default: 0.7)"),
417
+ domain: z.string().optional().describe("Domain filter for auto_fill"),
418
+ },
419
+ async ({ action, name, description, target_model, dataset_id, pair_ids, min_quality, domain }) => {
420
+ try {
421
+ const sb = await getSupabase();
422
+
423
+ switch (action) {
424
+ case "create": {
425
+ const { data, error } = await sb.from("training_datasets").insert({
426
+ name: name || "Untitled Dataset",
427
+ description: description || "",
428
+ target_model: target_model || "claude",
429
+ status: "building",
430
+ }).select().single();
431
+ if (error) throw error;
432
+ return { content: [{ type: "text", text: JSON.stringify({ status: "created", dataset: data }) }] };
433
+ }
434
+
435
+ case "list": {
436
+ const { data, error } = await sb.from("training_datasets").select("*").order("created_at", { ascending: false });
437
+ if (error) throw error;
438
+ return { content: [{ type: "text", text: JSON.stringify({ status: "ok", count: data.length, datasets: data }, null, 2) }] };
439
+ }
440
+
441
+ case "get": {
442
+ if (!dataset_id) throw new Error("dataset_id required");
443
+ const { data: ds } = await sb.from("training_datasets").select("*").eq("id", dataset_id).single();
444
+ const { count } = await sb.from("training_pairs").select("id", { count: "exact", head: true }).eq("dataset_id", dataset_id);
445
+ const { data: sample } = await sb.from("training_pairs").select("id, user_input, quality_score, domain, approved").eq("dataset_id", dataset_id).order("quality_score", { ascending: false, nullsFirst: false }).limit(5);
446
+ return { content: [{ type: "text", text: JSON.stringify({ status: "ok", dataset: ds, pair_count: count, sample_pairs: sample }, null, 2) }] };
447
+ }
448
+
449
+ case "add_pairs": {
450
+ if (!dataset_id || !pair_ids?.length) throw new Error("dataset_id and pair_ids required");
451
+ const { error } = await sb.from("training_pairs").update({ dataset_id }).in("id", pair_ids);
452
+ if (error) throw error;
453
+ const { count } = await sb.from("training_pairs").select("id", { count: "exact", head: true }).eq("dataset_id", dataset_id);
454
+ await sb.from("training_datasets").update({ pair_count: count, updated_at: new Date().toISOString() }).eq("id", dataset_id);
455
+ return { content: [{ type: "text", text: JSON.stringify({ status: "added", pairs_added: pair_ids.length, total_pairs: count }) }] };
456
+ }
457
+
458
+ case "auto_fill": {
459
+ if (!dataset_id) throw new Error("dataset_id required");
460
+ const minQ = min_quality || 0.7;
461
+ let query = sb.from("training_pairs").select("id").gte("quality_score", minQ).is("dataset_id", null);
462
+ if (domain) query = query.eq("domain", domain);
463
+ const { data: eligible } = await query;
464
+ if (!eligible?.length) return { content: [{ type: "text", text: JSON.stringify({ status: "no_pairs", message: `No unassigned pairs with quality >= ${minQ}` }) }] };
465
+ const ids = eligible.map(p => p.id);
466
+ await sb.from("training_pairs").update({ dataset_id }).in("id", ids);
467
+ await sb.from("training_datasets").update({ pair_count: ids.length, updated_at: new Date().toISOString() }).eq("id", dataset_id);
468
+ return { content: [{ type: "text", text: JSON.stringify({ status: "filled", pairs_added: ids.length }) }] };
469
+ }
470
+
471
+ case "delete": {
472
+ if (!dataset_id) throw new Error("dataset_id required");
473
+ await sb.from("training_pairs").update({ dataset_id: null }).eq("dataset_id", dataset_id);
474
+ await sb.from("training_datasets").delete().eq("id", dataset_id);
475
+ return { content: [{ type: "text", text: JSON.stringify({ status: "deleted" }) }] };
476
+ }
477
+
478
+ default:
479
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: `Unknown action: ${action}` }) }] };
480
+ }
481
+ } catch (err) {
482
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
483
+ }
484
+ }
485
+ );
486
+
487
+ // ─── training_export ──────────────────────────────────────
488
+ server.tool(
489
+ "training_export",
490
+ `Export a training dataset as a fine-tuning JSONL file.
491
+ Supports Anthropic, OpenAI, Alpaca, and ShareGPT formats.
492
+
493
+ Example: training_export({ dataset_id: "uuid", format: "anthropic_jsonl" })
494
+ Example: training_export({ dataset_id: "uuid", format: "openai_jsonl", min_quality: 0.8 })`,
495
+ {
496
+ dataset_id: z.string().describe("Dataset to export"),
497
+ format: z.enum(["anthropic_jsonl", "openai_jsonl", "alpaca", "sharegpt"]).describe("Export format"),
498
+ min_quality: z.number().optional().describe("Minimum quality score to include (default: 0)"),
499
+ approved_only: z.boolean().optional().describe("Only export approved pairs (default: false)"),
500
+ output: z.string().optional().describe("Output file path (default: ~/.0n/training/)"),
501
+ },
502
+ async ({ dataset_id, format, min_quality, approved_only, output }) => {
503
+ try {
504
+ const sb = await getSupabase();
505
+
506
+ // Fetch pairs
507
+ let query = sb.from("training_pairs").select("*").eq("dataset_id", dataset_id).order("quality_score", { ascending: false, nullsFirst: false });
508
+ if (min_quality) query = query.gte("quality_score", min_quality);
509
+ if (approved_only) query = query.eq("approved", true);
510
+
511
+ const { data: pairs, error } = await query;
512
+ if (error) throw error;
513
+ if (!pairs?.length) return { content: [{ type: "text", text: JSON.stringify({ status: "no_pairs", message: "No pairs match criteria" }) }] };
514
+
515
+ // Format pairs
516
+ let lines;
517
+ switch (format) {
518
+ case "anthropic_jsonl":
519
+ lines = pairs.map(p => JSON.stringify({
520
+ messages: [
521
+ { role: "system", content: p.system_prompt || ONAI_SYSTEM_PROMPT },
522
+ { role: "user", content: p.user_input },
523
+ { role: "assistant", content: p.assistant_output },
524
+ ],
525
+ }));
526
+ break;
527
+
528
+ case "openai_jsonl":
529
+ lines = pairs.map(p => JSON.stringify({
530
+ messages: [
531
+ { role: "system", content: p.system_prompt || ONAI_SYSTEM_PROMPT },
532
+ { role: "user", content: p.user_input },
533
+ { role: "assistant", content: p.assistant_output },
534
+ ],
535
+ }));
536
+ break;
537
+
538
+ case "alpaca":
539
+ lines = pairs.map(p => JSON.stringify({
540
+ instruction: p.user_input,
541
+ input: "",
542
+ output: p.assistant_output,
543
+ system: p.system_prompt || ONAI_SYSTEM_PROMPT,
544
+ }));
545
+ break;
546
+
547
+ case "sharegpt":
548
+ lines = pairs.map(p => JSON.stringify({
549
+ conversations: [
550
+ { from: "system", value: p.system_prompt || ONAI_SYSTEM_PROMPT },
551
+ { from: "human", value: p.user_input },
552
+ { from: "gpt", value: p.assistant_output },
553
+ ],
554
+ }));
555
+ break;
556
+ }
557
+
558
+ const content = lines.join("\n") + "\n";
559
+ const hash = createHash("sha256").update(content).digest("hex");
560
+
561
+ // Save file
562
+ if (!existsSync(EXPORTS_DIR)) mkdirSync(EXPORTS_DIR, { recursive: true });
563
+ const filename = `0nai-${format}-${Date.now()}.jsonl`;
564
+ const filePath = output || join(EXPORTS_DIR, filename);
565
+ writeFileSync(filePath, content);
566
+
567
+ // Record export
568
+ await sb.from("training_exports").insert({
569
+ dataset_id,
570
+ format,
571
+ pair_count: pairs.length,
572
+ file_size_bytes: Buffer.byteLength(content),
573
+ file_hash: hash,
574
+ export_path: filePath,
575
+ config: { min_quality, approved_only },
576
+ });
577
+
578
+ // Update dataset status
579
+ await sb.from("training_datasets").update({ status: "exported", updated_at: new Date().toISOString() }).eq("id", dataset_id);
580
+
581
+ return {
582
+ content: [{
583
+ type: "text",
584
+ text: JSON.stringify({
585
+ status: "exported",
586
+ format,
587
+ pairs: pairs.length,
588
+ file: filePath,
589
+ size_bytes: Buffer.byteLength(content),
590
+ hash,
591
+ message: `Exported ${pairs.length} pairs to ${filePath}`,
592
+ }, null, 2),
593
+ }],
594
+ };
595
+ } catch (err) {
596
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
597
+ }
598
+ }
599
+ );
600
+
601
+ // ─── training_stats ───────────────────────────────────────
602
+ server.tool(
603
+ "training_stats",
604
+ `Get analytics for the 0nAI Training Center.
605
+
606
+ Example: training_stats({})`,
607
+ {},
608
+ async () => {
609
+ try {
610
+ const sb = await getSupabase();
611
+
612
+ const [sources, pairs, datasets, exports, runs, knowledge] = await Promise.all([
613
+ sb.from("training_sources").select("id, source_type, status, token_count", { count: "exact" }),
614
+ sb.from("training_pairs").select("id, domain, quality_score, approved, dataset_id", { count: "exact" }),
615
+ sb.from("training_datasets").select("*"),
616
+ sb.from("training_exports").select("id, format, pair_count, created_at", { count: "exact" }),
617
+ sb.from("training_runs").select("id, entries_added, avg_composite_score", { count: "exact" }),
618
+ sb.from("council_knowledge").select("id, domain, composite_score", { count: "exact" }),
619
+ ]);
620
+
621
+ const sourcesByType = {};
622
+ for (const s of sources.data || []) {
623
+ sourcesByType[s.source_type] = (sourcesByType[s.source_type] || 0) + 1;
624
+ }
625
+
626
+ const pairsByDomain = {};
627
+ let approvedCount = 0;
628
+ let scoredCount = 0;
629
+ let totalScore = 0;
630
+ for (const p of pairs.data || []) {
631
+ pairsByDomain[p.domain || "unset"] = (pairsByDomain[p.domain || "unset"] || 0) + 1;
632
+ if (p.approved) approvedCount++;
633
+ if (p.quality_score) { scoredCount++; totalScore += Number(p.quality_score); }
634
+ }
635
+
636
+ const totalTokens = (sources.data || []).reduce((s, d) => s + (d.token_count || 0), 0);
637
+
638
+ return {
639
+ content: [{
640
+ type: "text",
641
+ text: JSON.stringify({
642
+ status: "ok",
643
+ training_center: {
644
+ sources: { total: sources.count || 0, by_type: sourcesByType, total_tokens: totalTokens },
645
+ pairs: {
646
+ total: pairs.count || 0,
647
+ by_domain: pairsByDomain,
648
+ approved: approvedCount,
649
+ scored: scoredCount,
650
+ avg_quality: scoredCount > 0 ? Math.round(totalScore / scoredCount * 100) / 100 : 0,
651
+ },
652
+ datasets: {
653
+ total: (datasets.data || []).length,
654
+ list: (datasets.data || []).map(d => ({ id: d.id, name: d.name, pairs: d.pair_count, status: d.status })),
655
+ },
656
+ exports: { total: exports.count || 0 },
657
+ council: {
658
+ training_runs: runs.count || 0,
659
+ knowledge_entries: knowledge.count || 0,
660
+ avg_score: (knowledge.data || []).length > 0
661
+ ? Math.round((knowledge.data || []).reduce((s, k) => s + Number(k.composite_score || 0), 0) / knowledge.data.length * 100) / 100
662
+ : 0,
663
+ },
664
+ },
665
+ }, null, 2),
666
+ }],
667
+ };
668
+ } catch (err) {
669
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
670
+ }
671
+ }
672
+ );
673
+
674
+ // ─── training_search ──────────────────────────────────────
675
+ server.tool(
676
+ "training_search",
677
+ `Search training sources and pairs by keyword or domain.
678
+
679
+ Example: training_search({ query: "CRM contacts", table: "pairs" })
680
+ Example: training_search({ domain: "crm", min_quality: 0.8 })`,
681
+ {
682
+ query: z.string().optional().describe("Search keyword"),
683
+ table: z.enum(["sources", "pairs", "both"]).optional().describe("Which table to search (default: both)"),
684
+ domain: z.string().optional().describe("Filter by domain"),
685
+ min_quality: z.number().optional().describe("Minimum quality score"),
686
+ limit: z.number().optional().describe("Max results (default: 20)"),
687
+ },
688
+ async ({ query, table, domain, min_quality, limit: maxResults }) => {
689
+ try {
690
+ const sb = await getSupabase();
691
+ const lim = maxResults || 20;
692
+ const results = { sources: [], pairs: [] };
693
+
694
+ if (table !== "pairs") {
695
+ let q = sb.from("training_sources").select("id, title, source_type, status, token_count, tags, created_at").order("created_at", { ascending: false }).limit(lim);
696
+ if (query) q = q.textSearch("fts", query.split(" ").join(" & "));
697
+ const { data } = await q;
698
+ results.sources = data || [];
699
+ }
700
+
701
+ if (table !== "sources") {
702
+ let q = sb.from("training_pairs").select("id, user_input, assistant_output, domain, quality_score, approved, dataset_id, created_at").order("created_at", { ascending: false }).limit(lim);
703
+ if (query) q = q.textSearch("fts", query.split(" ").join(" & "));
704
+ if (domain) q = q.eq("domain", domain);
705
+ if (min_quality) q = q.gte("quality_score", min_quality);
706
+ const { data } = await q;
707
+ results.pairs = data || [];
708
+ }
709
+
710
+ return {
711
+ content: [{
712
+ type: "text",
713
+ text: JSON.stringify({
714
+ status: "ok",
715
+ sources: results.sources.length,
716
+ pairs: results.pairs.length,
717
+ results,
718
+ }, null, 2),
719
+ }],
720
+ };
721
+ } catch (err) {
722
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
723
+ }
724
+ }
725
+ );
726
+
727
+ // ─── training_review ──────────────────────────────────────
728
+ server.tool(
729
+ "training_review",
730
+ `Review and approve/reject training pairs.
731
+
732
+ Example: training_review({ pair_id: "uuid", action: "approve" })
733
+ Example: training_review({ pair_id: "uuid", action: "reject", reason: "Inaccurate API path" })
734
+ Example: training_review({ action: "pending", limit: 10 }) — get unreviewed pairs`,
735
+ {
736
+ action: z.enum(["approve", "reject", "pending", "edit"]).describe("Review action"),
737
+ pair_id: z.string().optional().describe("Pair to review"),
738
+ reason: z.string().optional().describe("Rejection reason"),
739
+ user_input: z.string().optional().describe("Updated user input (for edit)"),
740
+ assistant_output: z.string().optional().describe("Updated assistant output (for edit)"),
741
+ limit: z.number().optional().describe("Number of pending pairs to show (default: 10)"),
742
+ },
743
+ async ({ action, pair_id, reason, user_input, assistant_output, limit: maxResults }) => {
744
+ try {
745
+ const sb = await getSupabase();
746
+
747
+ switch (action) {
748
+ case "approve": {
749
+ if (!pair_id) throw new Error("pair_id required");
750
+ await sb.from("training_pairs").update({ approved: true, human_reviewed: true, updated_at: new Date().toISOString() }).eq("id", pair_id);
751
+ return { content: [{ type: "text", text: JSON.stringify({ status: "approved", pair_id }) }] };
752
+ }
753
+
754
+ case "reject": {
755
+ if (!pair_id) throw new Error("pair_id required");
756
+ await sb.from("training_pairs").update({ approved: false, human_reviewed: true, rejection_reason: reason || null, updated_at: new Date().toISOString() }).eq("id", pair_id);
757
+ return { content: [{ type: "text", text: JSON.stringify({ status: "rejected", pair_id, reason }) }] };
758
+ }
759
+
760
+ case "pending": {
761
+ const { data } = await sb.from("training_pairs")
762
+ .select("id, user_input, assistant_output, domain, quality_score, tags")
763
+ .eq("human_reviewed", false)
764
+ .order("created_at", { ascending: true })
765
+ .limit(maxResults || 10);
766
+ return { content: [{ type: "text", text: JSON.stringify({ status: "ok", count: data?.length || 0, pairs: data }, null, 2) }] };
767
+ }
768
+
769
+ case "edit": {
770
+ if (!pair_id) throw new Error("pair_id required");
771
+ const updates = { updated_at: new Date().toISOString() };
772
+ if (user_input) updates.user_input = user_input;
773
+ if (assistant_output) updates.assistant_output = assistant_output;
774
+ await sb.from("training_pairs").update(updates).eq("id", pair_id);
775
+ return { content: [{ type: "text", text: JSON.stringify({ status: "edited", pair_id }) }] };
776
+ }
777
+ }
778
+ } catch (err) {
779
+ return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
780
+ }
781
+ }
782
+ );
783
+ }
784
+
785
+ // ── Helpers ──────────────────────────────────────────────────
786
+
787
+ /**
788
+ * Auto-score a training pair using heuristics (no API cost).
789
+ */
790
+ function autoScore(pair) {
791
+ let score = 0.5; // baseline
792
+
793
+ const output = pair.assistant_output || "";
794
+ const input = pair.user_input || "";
795
+
796
+ // Length checks
797
+ if (output.length > 100) score += 0.05;
798
+ if (output.length > 300) score += 0.05;
799
+ if (output.length > 50 && output.length < 2000) score += 0.05;
800
+
801
+ // Has code blocks
802
+ if (output.includes("```")) score += 0.05;
803
+
804
+ // Doesn't say GHL
805
+ if (!output.toLowerCase().includes("ghl") && !output.toLowerCase().includes("go high level") && !output.toLowerCase().includes("highlevel")) {
806
+ score += 0.1;
807
+ } else {
808
+ score -= 0.3; // severe penalty
809
+ }
810
+
811
+ // References 0n ecosystem
812
+ if (output.includes("0nMCP") || output.includes("0nmcp") || output.includes(".0n")) score += 0.05;
813
+
814
+ // Has structure (bullet points, headers)
815
+ if (output.includes("- ") || output.includes("* ") || output.includes("1.")) score += 0.05;
816
+
817
+ // Input/output relevance (shared words)
818
+ const inputWords = new Set(input.toLowerCase().split(/\s+/).filter(w => w.length > 3));
819
+ const outputWords = new Set(output.toLowerCase().split(/\s+/).filter(w => w.length > 3));
820
+ let overlap = 0;
821
+ for (const w of inputWords) if (outputWords.has(w)) overlap++;
822
+ if (inputWords.size > 0) score += Math.min(0.1, (overlap / inputWords.size) * 0.1);
823
+
824
+ return Math.min(1.0, Math.max(0.0, Math.round(score * 100) / 100));
825
+ }
826
+
827
+ /**
828
+ * Generate pair suggestions from source content (no API cost).
829
+ * Returns structured suggestions the user can review and submit.
830
+ */
831
+ function generatePairSuggestions(content, title, domain, count) {
832
+ const suggestions = [];
833
+ const lines = content.split("\n").filter(l => l.trim().length > 20);
834
+
835
+ // Extract potential Q&A from headers and content
836
+ for (let i = 0; i < lines.length && suggestions.length < count; i++) {
837
+ const line = lines[i].trim();
838
+
839
+ // Headers become questions
840
+ if (line.startsWith("#") || line.startsWith("##")) {
841
+ const topic = line.replace(/^#+\s*/, "");
842
+ if (topic.length > 10) {
843
+ const context = lines.slice(i + 1, i + 5).join(" ").slice(0, 500);
844
+ suggestions.push({
845
+ user_input: `What is ${topic} in the 0n ecosystem?`,
846
+ assistant_output: context || `${topic} is a component of the 0nORK platform.`,
847
+ domain: domain || "general",
848
+ source: title,
849
+ });
850
+ }
851
+ }
852
+
853
+ // Code patterns
854
+ if (line.includes("function ") || line.includes("export ") || line.includes("class ")) {
855
+ suggestions.push({
856
+ user_input: `How does the ${line.slice(0, 60).replace(/[{(]/g, "")} work?`,
857
+ assistant_output: `[Review and write explanation based on: ${line.slice(0, 200)}]`,
858
+ domain: domain || "code",
859
+ source: title,
860
+ });
861
+ }
862
+
863
+ // Config/endpoint patterns
864
+ if (line.includes("baseUrl") || line.includes("endpoint") || line.includes("path:")) {
865
+ suggestions.push({
866
+ user_input: `What API endpoint does this use?`,
867
+ assistant_output: `[Review and explain the endpoint from: ${line.slice(0, 200)}]`,
868
+ domain: domain || "api_pattern",
869
+ source: title,
870
+ });
871
+ }
872
+ }
873
+
874
+ return suggestions.slice(0, count);
875
+ }