0nmcp 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +233 -695
- package/cli.js +9 -1
- package/crm/objects.js +5 -69
- package/crm/users.js +5 -80
- package/engine/index.js +338 -2
- package/engine/multi-ai.js +525 -0
- package/engine/plugin-builder.js +578 -0
- package/engine/plugin-registry.js +419 -0
- package/engine/plugin.js +448 -0
- package/engine/training-feed.js +520 -0
- package/engine/training.js +875 -0
- package/index.js +9 -1
- package/lib/stats.json +1 -1
- package/package.json +12 -2
|
@@ -0,0 +1,875 @@
|
|
|
1
|
+
// ============================================================
|
|
2
|
+
// 0nMCP — AI Training Engine (0nAI Training Center)
|
|
3
|
+
// ============================================================
|
|
4
|
+
// Tools for building, curating, scoring, and exporting
|
|
5
|
+
// training datasets for the 0nAI model.
|
|
6
|
+
//
|
|
7
|
+
// 8 MCP Tools:
|
|
8
|
+
// training_ingest — Ingest raw material from files/memory/code
|
|
9
|
+
// training_generate — Generate training pairs from sources
|
|
10
|
+
// training_score — Score pairs against rubrics
|
|
11
|
+
// training_dataset — Create/manage named datasets
|
|
12
|
+
// training_export — Export dataset as fine-tuning JSONL
|
|
13
|
+
// training_stats — Training center analytics
|
|
14
|
+
// training_search — Search sources and pairs
|
|
15
|
+
// training_review — Review and approve/reject pairs
|
|
16
|
+
//
|
|
17
|
+
// Database: Supabase (pwujhhmlrtxjmjzyttwn)
|
|
18
|
+
// Tables: training_sources, training_pairs, training_datasets,
|
|
19
|
+
// training_evaluations, training_exports, training_rubrics
|
|
20
|
+
// ============================================================
|
|
21
|
+
|
|
22
|
+
import { readFileSync, readdirSync, existsSync } from "fs";
|
|
23
|
+
import { writeFileSync, mkdirSync } from "fs";
|
|
24
|
+
import { join, basename, extname } from "path";
|
|
25
|
+
import { homedir } from "os";
|
|
26
|
+
import { createHash } from "crypto";
|
|
27
|
+
|
|
28
|
+
const EXPORTS_DIR = join(homedir(), ".0n", "training");
|
|
29
|
+
const MEMORY_DIR = join(homedir(), ".claude", "projects", "-Users-rocketopp", "memory");
|
|
30
|
+
|
|
31
|
+
// ── System prompt for 0nAI ──────────────────────────────────
|
|
32
|
+
|
|
33
|
+
const ONAI_SYSTEM_PROMPT = `You are Jaxx, the AI engine powering the 0nORK ecosystem. You are an expert in:
|
|
34
|
+
- 0nMCP: Universal AI API Orchestrator with 1,171+ tools across 54 services
|
|
35
|
+
- The .0n Standard: Universal config format, field resolution, workflow runtime
|
|
36
|
+
- CRM integration (NEVER say "GHL" or "GoHighLevel" — always "CRM" or "ROCKET")
|
|
37
|
+
- 0nVault: AES-256-GCM encrypted credential storage
|
|
38
|
+
- The 0n Network: 0nMCP, 0n-spec, 0nork, 0n Marketplace, 0nCore, 0nmcp.com
|
|
39
|
+
|
|
40
|
+
Rules:
|
|
41
|
+
- Push to main, deploy immediately — no feature branches
|
|
42
|
+
- Dark theme UI with brand green #7ed957
|
|
43
|
+
- ESM modules, TypeScript strict, Tailwind v4
|
|
44
|
+
- Data-driven patterns over code repetition
|
|
45
|
+
- Never over-engineer. Simplest solution first.
|
|
46
|
+
- Client emails ALWAYS go through CRM built-in email, never SendGrid/Resend`;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Register training engine tools on an MCP server instance.
|
|
50
|
+
*
|
|
51
|
+
* @param {import("@modelcontextprotocol/sdk/server/mcp.js").McpServer} server
|
|
52
|
+
* @param {import("zod").ZodType} z
|
|
53
|
+
* @param {object} [supabase] — Supabase client (optional, creates one if missing)
|
|
54
|
+
*/
|
|
55
|
+
export function registerTrainingTools(server, z, supabase) {
|
|
56
|
+
|
|
57
|
+
// Lazy Supabase init
|
|
58
|
+
async function getSupabase() {
|
|
59
|
+
if (supabase) return supabase;
|
|
60
|
+
try {
|
|
61
|
+
const { createClient } = await import("@supabase/supabase-js");
|
|
62
|
+
const url = process.env.SUPABASE_URL || "https://pwujhhmlrtxjmjzyttwn.supabase.co";
|
|
63
|
+
const key = process.env.SUPABASE_SERVICE_KEY || process.env.SUPABASE_SERVICE_ROLE_KEY;
|
|
64
|
+
if (!key) throw new Error("No Supabase service key");
|
|
65
|
+
supabase = createClient(url, key);
|
|
66
|
+
return supabase;
|
|
67
|
+
} catch (err) {
|
|
68
|
+
throw new Error(`Supabase not available: ${err.message}`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ─── training_ingest ──────────────────────────────────────
|
|
73
|
+
server.tool(
|
|
74
|
+
"training_ingest",
|
|
75
|
+
`Ingest raw training material from files, memory, code, or text.
|
|
76
|
+
Stores in training_sources table for later pair generation.
|
|
77
|
+
|
|
78
|
+
Sources: memory files, .js/.ts code, .md docs, raw text, API patterns.
|
|
79
|
+
|
|
80
|
+
Example: training_ingest({ source_type: "memory", path: "~/.claude/projects/-Users-rocketopp/memory/" })
|
|
81
|
+
Example: training_ingest({ source_type: "code", path: "~/Github/0nMCP/catalog.js" })
|
|
82
|
+
Example: training_ingest({ source_type: "text", title: "CRM Email Rule", content: "Client emails always go through CRM..." })`,
|
|
83
|
+
{
|
|
84
|
+
source_type: z.enum(["memory", "code", "documentation", "api_pattern", "decision", "brand", "text", "conversation"]).describe("Type of source material"),
|
|
85
|
+
path: z.string().optional().describe("File or directory path to ingest"),
|
|
86
|
+
title: z.string().optional().describe("Title (required for text type)"),
|
|
87
|
+
content: z.string().optional().describe("Raw content (for text type, or override file content)"),
|
|
88
|
+
tags: z.array(z.string()).optional().describe("Tags for categorization"),
|
|
89
|
+
},
|
|
90
|
+
async ({ source_type, path, title, content, tags }) => {
|
|
91
|
+
try {
|
|
92
|
+
const sb = await getSupabase();
|
|
93
|
+
const sources = [];
|
|
94
|
+
|
|
95
|
+
if (path) {
|
|
96
|
+
const resolvedPath = path.replace("~", homedir());
|
|
97
|
+
|
|
98
|
+
if (existsSync(resolvedPath)) {
|
|
99
|
+
const stat = await import("fs").then(fs => fs.statSync(resolvedPath));
|
|
100
|
+
|
|
101
|
+
if (stat.isDirectory()) {
|
|
102
|
+
// Ingest all files in directory
|
|
103
|
+
const files = readdirSync(resolvedPath).filter(f =>
|
|
104
|
+
[".md", ".js", ".ts", ".json", ".txt", ".0n"].some(ext => f.endsWith(ext))
|
|
105
|
+
);
|
|
106
|
+
|
|
107
|
+
for (const file of files) {
|
|
108
|
+
const filePath = join(resolvedPath, file);
|
|
109
|
+
const fileContent = readFileSync(filePath, "utf-8");
|
|
110
|
+
const tokens = Math.ceil(fileContent.length / 4); // rough estimate
|
|
111
|
+
|
|
112
|
+
sources.push({
|
|
113
|
+
source_type,
|
|
114
|
+
source_path: filePath,
|
|
115
|
+
title: basename(file, extname(file)),
|
|
116
|
+
content: fileContent,
|
|
117
|
+
token_count: tokens,
|
|
118
|
+
tags: tags || [],
|
|
119
|
+
status: "raw",
|
|
120
|
+
metadata: { file_size: fileContent.length, extension: extname(file) },
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
} else {
|
|
124
|
+
// Single file
|
|
125
|
+
const fileContent = content || readFileSync(resolvedPath, "utf-8");
|
|
126
|
+
const tokens = Math.ceil(fileContent.length / 4);
|
|
127
|
+
|
|
128
|
+
sources.push({
|
|
129
|
+
source_type,
|
|
130
|
+
source_path: resolvedPath,
|
|
131
|
+
title: title || basename(resolvedPath, extname(resolvedPath)),
|
|
132
|
+
content: fileContent,
|
|
133
|
+
token_count: tokens,
|
|
134
|
+
tags: tags || [],
|
|
135
|
+
status: "raw",
|
|
136
|
+
metadata: { file_size: fileContent.length, extension: extname(resolvedPath) },
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
} else {
|
|
140
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: `Path not found: ${resolvedPath}` }) }] };
|
|
141
|
+
}
|
|
142
|
+
} else if (content) {
|
|
143
|
+
// Raw text input
|
|
144
|
+
sources.push({
|
|
145
|
+
source_type,
|
|
146
|
+
source_path: null,
|
|
147
|
+
title: title || `${source_type} source`,
|
|
148
|
+
content,
|
|
149
|
+
token_count: Math.ceil(content.length / 4),
|
|
150
|
+
tags: tags || [],
|
|
151
|
+
status: "raw",
|
|
152
|
+
metadata: {},
|
|
153
|
+
});
|
|
154
|
+
} else {
|
|
155
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: "Provide path or content" }) }] };
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Insert all sources
|
|
159
|
+
const { data, error } = await sb.from("training_sources").insert(sources).select("id, title, token_count");
|
|
160
|
+
if (error) throw error;
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
content: [{
|
|
164
|
+
type: "text",
|
|
165
|
+
text: JSON.stringify({
|
|
166
|
+
status: "ingested",
|
|
167
|
+
count: data.length,
|
|
168
|
+
total_tokens: data.reduce((s, d) => s + (d.token_count || 0), 0),
|
|
169
|
+
sources: data.map(d => ({ id: d.id, title: d.title, tokens: d.token_count })),
|
|
170
|
+
message: `Ingested ${data.length} source(s). Use training_generate to create pairs.`,
|
|
171
|
+
}, null, 2),
|
|
172
|
+
}],
|
|
173
|
+
};
|
|
174
|
+
} catch (err) {
|
|
175
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
);
|
|
179
|
+
|
|
180
|
+
// ─── training_generate ────────────────────────────────────
|
|
181
|
+
server.tool(
|
|
182
|
+
"training_generate",
|
|
183
|
+
`Generate training pairs (system/user/assistant) from ingested sources.
|
|
184
|
+
Creates high-quality Q&A pairs formatted for fine-tuning.
|
|
185
|
+
|
|
186
|
+
Example: training_generate({ source_id: "uuid", count: 5 })
|
|
187
|
+
Example: training_generate({ domain: "crm", count: 10 })
|
|
188
|
+
Example: training_generate({ pairs: [{ user_input: "How do I...", assistant_output: "You can..." }] })`,
|
|
189
|
+
{
|
|
190
|
+
source_id: z.string().optional().describe("Generate pairs from a specific source"),
|
|
191
|
+
domain: z.string().optional().describe("Domain filter: architecture, crm, workflow, brand, code, support"),
|
|
192
|
+
count: z.number().optional().describe("Number of pairs to generate (default: 5)"),
|
|
193
|
+
dataset_id: z.string().optional().describe("Add pairs to this dataset"),
|
|
194
|
+
difficulty: z.enum(["easy", "medium", "hard", "expert"]).optional().describe("Target difficulty level"),
|
|
195
|
+
pairs: z.array(z.object({
|
|
196
|
+
user_input: z.string(),
|
|
197
|
+
assistant_output: z.string(),
|
|
198
|
+
system_prompt: z.string().optional(),
|
|
199
|
+
domain: z.string().optional(),
|
|
200
|
+
difficulty: z.string().optional(),
|
|
201
|
+
tags: z.array(z.string()).optional(),
|
|
202
|
+
})).optional().describe("Manually provide pairs to insert"),
|
|
203
|
+
},
|
|
204
|
+
async ({ source_id, domain, count, dataset_id, difficulty, pairs }) => {
|
|
205
|
+
try {
|
|
206
|
+
const sb = await getSupabase();
|
|
207
|
+
|
|
208
|
+
if (pairs && pairs.length > 0) {
|
|
209
|
+
// Direct insert of manually provided pairs
|
|
210
|
+
const rows = pairs.map(p => ({
|
|
211
|
+
dataset_id: dataset_id || null,
|
|
212
|
+
system_prompt: p.system_prompt || ONAI_SYSTEM_PROMPT,
|
|
213
|
+
user_input: p.user_input,
|
|
214
|
+
assistant_output: p.assistant_output,
|
|
215
|
+
domain: p.domain || domain || "general",
|
|
216
|
+
difficulty: p.difficulty || difficulty || "medium",
|
|
217
|
+
tags: p.tags || [],
|
|
218
|
+
quality_score: null,
|
|
219
|
+
human_reviewed: false,
|
|
220
|
+
approved: false,
|
|
221
|
+
metadata: { manually_created: true },
|
|
222
|
+
}));
|
|
223
|
+
|
|
224
|
+
const { data, error } = await sb.from("training_pairs").insert(rows).select("id");
|
|
225
|
+
if (error) throw error;
|
|
226
|
+
|
|
227
|
+
// Update dataset pair count
|
|
228
|
+
if (dataset_id) {
|
|
229
|
+
await sb.rpc("update_dataset_count", { ds_id: dataset_id }).catch(() => {
|
|
230
|
+
// RPC may not exist yet, update manually
|
|
231
|
+
sb.from("training_datasets").update({
|
|
232
|
+
pair_count: data.length,
|
|
233
|
+
updated_at: new Date().toISOString(),
|
|
234
|
+
}).eq("id", dataset_id);
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return {
|
|
239
|
+
content: [{
|
|
240
|
+
type: "text",
|
|
241
|
+
text: JSON.stringify({
|
|
242
|
+
status: "generated",
|
|
243
|
+
count: data.length,
|
|
244
|
+
ids: data.map(d => d.id),
|
|
245
|
+
message: `Created ${data.length} training pair(s). Use training_score to evaluate quality.`,
|
|
246
|
+
}, null, 2),
|
|
247
|
+
}],
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Generate from source content
|
|
252
|
+
let sourceContent = "";
|
|
253
|
+
let sourceTitle = "";
|
|
254
|
+
|
|
255
|
+
if (source_id) {
|
|
256
|
+
const { data: source, error } = await sb.from("training_sources").select("*").eq("id", source_id).single();
|
|
257
|
+
if (error || !source) throw new Error("Source not found");
|
|
258
|
+
sourceContent = source.content;
|
|
259
|
+
sourceTitle = source.title;
|
|
260
|
+
} else {
|
|
261
|
+
// Pull recent unprocessed sources
|
|
262
|
+
let query = sb.from("training_sources").select("*").eq("status", "raw").order("created_at", { ascending: false }).limit(3);
|
|
263
|
+
if (domain) {
|
|
264
|
+
query = query.contains("tags", [domain]);
|
|
265
|
+
}
|
|
266
|
+
const { data: sources } = await query;
|
|
267
|
+
if (!sources || sources.length === 0) {
|
|
268
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "no_sources", message: "No raw sources found. Use training_ingest first." }) }] };
|
|
269
|
+
}
|
|
270
|
+
sourceContent = sources.map(s => `## ${s.title}\n${s.content}`).join("\n\n---\n\n");
|
|
271
|
+
sourceTitle = sources.map(s => s.title).join(", ");
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// For now, return instructions for manual pair creation
|
|
275
|
+
// (AI generation would require Anthropic API calls which cost money)
|
|
276
|
+
const suggestedPairs = generatePairSuggestions(sourceContent, sourceTitle, domain, count || 5);
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
content: [{
|
|
280
|
+
type: "text",
|
|
281
|
+
text: JSON.stringify({
|
|
282
|
+
status: "suggestions_ready",
|
|
283
|
+
source: sourceTitle,
|
|
284
|
+
suggested_count: suggestedPairs.length,
|
|
285
|
+
pairs: suggestedPairs,
|
|
286
|
+
message: `Generated ${suggestedPairs.length} pair suggestions from "${sourceTitle}". Review and submit with training_generate({ pairs: [...] }) to save.`,
|
|
287
|
+
}, null, 2),
|
|
288
|
+
}],
|
|
289
|
+
};
|
|
290
|
+
} catch (err) {
|
|
291
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
);
|
|
295
|
+
|
|
296
|
+
// ─── training_score ───────────────────────────────────────
|
|
297
|
+
server.tool(
|
|
298
|
+
"training_score",
|
|
299
|
+
`Score training pairs against rubrics for quality assessment.
|
|
300
|
+
|
|
301
|
+
Example: training_score({ pair_id: "uuid" })
|
|
302
|
+
Example: training_score({ dataset_id: "uuid" }) — score all unscored pairs in dataset
|
|
303
|
+
Example: training_score({ pair_id: "uuid", scores: { accuracy: 5, brand_voice: 4, helpfulness: 5 } })`,
|
|
304
|
+
{
|
|
305
|
+
pair_id: z.string().optional().describe("Score a specific pair"),
|
|
306
|
+
dataset_id: z.string().optional().describe("Score all unscored pairs in dataset"),
|
|
307
|
+
rubric_id: z.string().optional().describe("Use specific rubric (default: domain-matched or general)"),
|
|
308
|
+
scores: z.record(z.number()).optional().describe("Manual scores: { criterion_name: 1-5 }"),
|
|
309
|
+
auto: z.boolean().optional().describe("Auto-score based on heuristics (no API cost)"),
|
|
310
|
+
},
|
|
311
|
+
async ({ pair_id, dataset_id, rubric_id, scores, auto }) => {
|
|
312
|
+
try {
|
|
313
|
+
const sb = await getSupabase();
|
|
314
|
+
|
|
315
|
+
// Get rubric
|
|
316
|
+
let rubric;
|
|
317
|
+
if (rubric_id) {
|
|
318
|
+
const { data } = await sb.from("training_rubrics").select("*").eq("id", rubric_id).single();
|
|
319
|
+
rubric = data;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Get pairs to score
|
|
323
|
+
let pairs = [];
|
|
324
|
+
if (pair_id) {
|
|
325
|
+
const { data } = await sb.from("training_pairs").select("*").eq("id", pair_id);
|
|
326
|
+
pairs = data || [];
|
|
327
|
+
} else if (dataset_id) {
|
|
328
|
+
const { data } = await sb.from("training_pairs").select("*").eq("dataset_id", dataset_id).is("quality_score", null).limit(50);
|
|
329
|
+
pairs = data || [];
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if (pairs.length === 0) {
|
|
333
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "no_pairs", message: "No pairs to score" }) }] };
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
const results = [];
|
|
337
|
+
|
|
338
|
+
for (const pair of pairs) {
|
|
339
|
+
let finalScore;
|
|
340
|
+
|
|
341
|
+
if (scores && pair_id) {
|
|
342
|
+
// Manual scoring
|
|
343
|
+
if (!rubric) {
|
|
344
|
+
const { data: r } = await sb.from("training_rubrics")
|
|
345
|
+
.select("*")
|
|
346
|
+
.or(`domain.is.null,domain.eq.${pair.domain || "general"}`)
|
|
347
|
+
.eq("is_active", true)
|
|
348
|
+
.limit(1)
|
|
349
|
+
.single();
|
|
350
|
+
rubric = r;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (rubric) {
|
|
354
|
+
const criteria = rubric.criteria;
|
|
355
|
+
let weightedSum = 0;
|
|
356
|
+
let totalWeight = 0;
|
|
357
|
+
for (const c of criteria) {
|
|
358
|
+
const score = scores[c.name];
|
|
359
|
+
if (score !== undefined) {
|
|
360
|
+
weightedSum += (score / 5) * c.weight;
|
|
361
|
+
totalWeight += c.weight;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
finalScore = totalWeight > 0 ? Math.round((weightedSum / totalWeight) * 100) / 100 : null;
|
|
365
|
+
}
|
|
366
|
+
} else if (auto) {
|
|
367
|
+
// Heuristic auto-scoring (free, no API)
|
|
368
|
+
finalScore = autoScore(pair);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
if (finalScore !== null && finalScore !== undefined) {
|
|
372
|
+
await sb.from("training_pairs").update({
|
|
373
|
+
quality_score: finalScore,
|
|
374
|
+
human_reviewed: !!scores,
|
|
375
|
+
updated_at: new Date().toISOString(),
|
|
376
|
+
metadata: { ...pair.metadata, scored_at: new Date().toISOString(), rubric_used: rubric?.name },
|
|
377
|
+
}).eq("id", pair.id);
|
|
378
|
+
|
|
379
|
+
results.push({ id: pair.id, score: finalScore, domain: pair.domain });
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
return {
|
|
384
|
+
content: [{
|
|
385
|
+
type: "text",
|
|
386
|
+
text: JSON.stringify({
|
|
387
|
+
status: "scored",
|
|
388
|
+
count: results.length,
|
|
389
|
+
avg_score: results.length > 0 ? Math.round(results.reduce((s, r) => s + r.score, 0) / results.length * 100) / 100 : 0,
|
|
390
|
+
results,
|
|
391
|
+
}, null, 2),
|
|
392
|
+
}],
|
|
393
|
+
};
|
|
394
|
+
} catch (err) {
|
|
395
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
);
|
|
399
|
+
|
|
400
|
+
// ─── training_dataset ─────────────────────────────────────
|
|
401
|
+
server.tool(
|
|
402
|
+
"training_dataset",
|
|
403
|
+
`Create or manage training datasets — named collections of pairs.
|
|
404
|
+
|
|
405
|
+
Example: training_dataset({ action: "create", name: "0nAI-CRM-v1", description: "CRM expertise pairs" })
|
|
406
|
+
Example: training_dataset({ action: "list" })
|
|
407
|
+
Example: training_dataset({ action: "add_pairs", dataset_id: "uuid", pair_ids: ["uuid1", "uuid2"] })
|
|
408
|
+
Example: training_dataset({ action: "auto_fill", dataset_id: "uuid", min_quality: 0.7 })`,
|
|
409
|
+
{
|
|
410
|
+
action: z.enum(["create", "list", "get", "add_pairs", "remove_pairs", "auto_fill", "delete"]).describe("Action to perform"),
|
|
411
|
+
name: z.string().optional().describe("Dataset name (for create)"),
|
|
412
|
+
description: z.string().optional().describe("Dataset description (for create)"),
|
|
413
|
+
target_model: z.string().optional().describe("Target model: claude, openai, gemini, llama"),
|
|
414
|
+
dataset_id: z.string().optional().describe("Dataset ID (for get/add/remove/auto_fill)"),
|
|
415
|
+
pair_ids: z.array(z.string()).optional().describe("Pair IDs to add/remove"),
|
|
416
|
+
min_quality: z.number().optional().describe("Minimum quality score for auto_fill (default: 0.7)"),
|
|
417
|
+
domain: z.string().optional().describe("Domain filter for auto_fill"),
|
|
418
|
+
},
|
|
419
|
+
async ({ action, name, description, target_model, dataset_id, pair_ids, min_quality, domain }) => {
|
|
420
|
+
try {
|
|
421
|
+
const sb = await getSupabase();
|
|
422
|
+
|
|
423
|
+
switch (action) {
|
|
424
|
+
case "create": {
|
|
425
|
+
const { data, error } = await sb.from("training_datasets").insert({
|
|
426
|
+
name: name || "Untitled Dataset",
|
|
427
|
+
description: description || "",
|
|
428
|
+
target_model: target_model || "claude",
|
|
429
|
+
status: "building",
|
|
430
|
+
}).select().single();
|
|
431
|
+
if (error) throw error;
|
|
432
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "created", dataset: data }) }] };
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
case "list": {
|
|
436
|
+
const { data, error } = await sb.from("training_datasets").select("*").order("created_at", { ascending: false });
|
|
437
|
+
if (error) throw error;
|
|
438
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "ok", count: data.length, datasets: data }, null, 2) }] };
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
case "get": {
|
|
442
|
+
if (!dataset_id) throw new Error("dataset_id required");
|
|
443
|
+
const { data: ds } = await sb.from("training_datasets").select("*").eq("id", dataset_id).single();
|
|
444
|
+
const { count } = await sb.from("training_pairs").select("id", { count: "exact", head: true }).eq("dataset_id", dataset_id);
|
|
445
|
+
const { data: sample } = await sb.from("training_pairs").select("id, user_input, quality_score, domain, approved").eq("dataset_id", dataset_id).order("quality_score", { ascending: false, nullsFirst: false }).limit(5);
|
|
446
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "ok", dataset: ds, pair_count: count, sample_pairs: sample }, null, 2) }] };
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
case "add_pairs": {
|
|
450
|
+
if (!dataset_id || !pair_ids?.length) throw new Error("dataset_id and pair_ids required");
|
|
451
|
+
const { error } = await sb.from("training_pairs").update({ dataset_id }).in("id", pair_ids);
|
|
452
|
+
if (error) throw error;
|
|
453
|
+
const { count } = await sb.from("training_pairs").select("id", { count: "exact", head: true }).eq("dataset_id", dataset_id);
|
|
454
|
+
await sb.from("training_datasets").update({ pair_count: count, updated_at: new Date().toISOString() }).eq("id", dataset_id);
|
|
455
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "added", pairs_added: pair_ids.length, total_pairs: count }) }] };
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
case "auto_fill": {
|
|
459
|
+
if (!dataset_id) throw new Error("dataset_id required");
|
|
460
|
+
const minQ = min_quality || 0.7;
|
|
461
|
+
let query = sb.from("training_pairs").select("id").gte("quality_score", minQ).is("dataset_id", null);
|
|
462
|
+
if (domain) query = query.eq("domain", domain);
|
|
463
|
+
const { data: eligible } = await query;
|
|
464
|
+
if (!eligible?.length) return { content: [{ type: "text", text: JSON.stringify({ status: "no_pairs", message: `No unassigned pairs with quality >= ${minQ}` }) }] };
|
|
465
|
+
const ids = eligible.map(p => p.id);
|
|
466
|
+
await sb.from("training_pairs").update({ dataset_id }).in("id", ids);
|
|
467
|
+
await sb.from("training_datasets").update({ pair_count: ids.length, updated_at: new Date().toISOString() }).eq("id", dataset_id);
|
|
468
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "filled", pairs_added: ids.length }) }] };
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
case "delete": {
|
|
472
|
+
if (!dataset_id) throw new Error("dataset_id required");
|
|
473
|
+
await sb.from("training_pairs").update({ dataset_id: null }).eq("dataset_id", dataset_id);
|
|
474
|
+
await sb.from("training_datasets").delete().eq("id", dataset_id);
|
|
475
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "deleted" }) }] };
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
default:
|
|
479
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: `Unknown action: ${action}` }) }] };
|
|
480
|
+
}
|
|
481
|
+
} catch (err) {
|
|
482
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
);
|
|
486
|
+
|
|
487
|
+
// ─── training_export ──────────────────────────────────────
|
|
488
|
+
server.tool(
|
|
489
|
+
"training_export",
|
|
490
|
+
`Export a training dataset as a fine-tuning JSONL file.
|
|
491
|
+
Supports Anthropic, OpenAI, Alpaca, and ShareGPT formats.
|
|
492
|
+
|
|
493
|
+
Example: training_export({ dataset_id: "uuid", format: "anthropic_jsonl" })
|
|
494
|
+
Example: training_export({ dataset_id: "uuid", format: "openai_jsonl", min_quality: 0.8 })`,
|
|
495
|
+
{
|
|
496
|
+
dataset_id: z.string().describe("Dataset to export"),
|
|
497
|
+
format: z.enum(["anthropic_jsonl", "openai_jsonl", "alpaca", "sharegpt"]).describe("Export format"),
|
|
498
|
+
min_quality: z.number().optional().describe("Minimum quality score to include (default: 0)"),
|
|
499
|
+
approved_only: z.boolean().optional().describe("Only export approved pairs (default: false)"),
|
|
500
|
+
output: z.string().optional().describe("Output file path (default: ~/.0n/training/)"),
|
|
501
|
+
},
|
|
502
|
+
async ({ dataset_id, format, min_quality, approved_only, output }) => {
|
|
503
|
+
try {
|
|
504
|
+
const sb = await getSupabase();
|
|
505
|
+
|
|
506
|
+
// Fetch pairs
|
|
507
|
+
let query = sb.from("training_pairs").select("*").eq("dataset_id", dataset_id).order("quality_score", { ascending: false, nullsFirst: false });
|
|
508
|
+
if (min_quality) query = query.gte("quality_score", min_quality);
|
|
509
|
+
if (approved_only) query = query.eq("approved", true);
|
|
510
|
+
|
|
511
|
+
const { data: pairs, error } = await query;
|
|
512
|
+
if (error) throw error;
|
|
513
|
+
if (!pairs?.length) return { content: [{ type: "text", text: JSON.stringify({ status: "no_pairs", message: "No pairs match criteria" }) }] };
|
|
514
|
+
|
|
515
|
+
// Format pairs
|
|
516
|
+
let lines;
|
|
517
|
+
switch (format) {
|
|
518
|
+
case "anthropic_jsonl":
|
|
519
|
+
lines = pairs.map(p => JSON.stringify({
|
|
520
|
+
messages: [
|
|
521
|
+
{ role: "system", content: p.system_prompt || ONAI_SYSTEM_PROMPT },
|
|
522
|
+
{ role: "user", content: p.user_input },
|
|
523
|
+
{ role: "assistant", content: p.assistant_output },
|
|
524
|
+
],
|
|
525
|
+
}));
|
|
526
|
+
break;
|
|
527
|
+
|
|
528
|
+
case "openai_jsonl":
|
|
529
|
+
lines = pairs.map(p => JSON.stringify({
|
|
530
|
+
messages: [
|
|
531
|
+
{ role: "system", content: p.system_prompt || ONAI_SYSTEM_PROMPT },
|
|
532
|
+
{ role: "user", content: p.user_input },
|
|
533
|
+
{ role: "assistant", content: p.assistant_output },
|
|
534
|
+
],
|
|
535
|
+
}));
|
|
536
|
+
break;
|
|
537
|
+
|
|
538
|
+
case "alpaca":
|
|
539
|
+
lines = pairs.map(p => JSON.stringify({
|
|
540
|
+
instruction: p.user_input,
|
|
541
|
+
input: "",
|
|
542
|
+
output: p.assistant_output,
|
|
543
|
+
system: p.system_prompt || ONAI_SYSTEM_PROMPT,
|
|
544
|
+
}));
|
|
545
|
+
break;
|
|
546
|
+
|
|
547
|
+
case "sharegpt":
|
|
548
|
+
lines = pairs.map(p => JSON.stringify({
|
|
549
|
+
conversations: [
|
|
550
|
+
{ from: "system", value: p.system_prompt || ONAI_SYSTEM_PROMPT },
|
|
551
|
+
{ from: "human", value: p.user_input },
|
|
552
|
+
{ from: "gpt", value: p.assistant_output },
|
|
553
|
+
],
|
|
554
|
+
}));
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
const content = lines.join("\n") + "\n";
|
|
559
|
+
const hash = createHash("sha256").update(content).digest("hex");
|
|
560
|
+
|
|
561
|
+
// Save file
|
|
562
|
+
if (!existsSync(EXPORTS_DIR)) mkdirSync(EXPORTS_DIR, { recursive: true });
|
|
563
|
+
const filename = `0nai-${format}-${Date.now()}.jsonl`;
|
|
564
|
+
const filePath = output || join(EXPORTS_DIR, filename);
|
|
565
|
+
writeFileSync(filePath, content);
|
|
566
|
+
|
|
567
|
+
// Record export
|
|
568
|
+
await sb.from("training_exports").insert({
|
|
569
|
+
dataset_id,
|
|
570
|
+
format,
|
|
571
|
+
pair_count: pairs.length,
|
|
572
|
+
file_size_bytes: Buffer.byteLength(content),
|
|
573
|
+
file_hash: hash,
|
|
574
|
+
export_path: filePath,
|
|
575
|
+
config: { min_quality, approved_only },
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
// Update dataset status
|
|
579
|
+
await sb.from("training_datasets").update({ status: "exported", updated_at: new Date().toISOString() }).eq("id", dataset_id);
|
|
580
|
+
|
|
581
|
+
return {
|
|
582
|
+
content: [{
|
|
583
|
+
type: "text",
|
|
584
|
+
text: JSON.stringify({
|
|
585
|
+
status: "exported",
|
|
586
|
+
format,
|
|
587
|
+
pairs: pairs.length,
|
|
588
|
+
file: filePath,
|
|
589
|
+
size_bytes: Buffer.byteLength(content),
|
|
590
|
+
hash,
|
|
591
|
+
message: `Exported ${pairs.length} pairs to ${filePath}`,
|
|
592
|
+
}, null, 2),
|
|
593
|
+
}],
|
|
594
|
+
};
|
|
595
|
+
} catch (err) {
|
|
596
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
);
|
|
600
|
+
|
|
601
|
+
// ─── training_stats ───────────────────────────────────────
|
|
602
|
+
server.tool(
|
|
603
|
+
"training_stats",
|
|
604
|
+
`Get analytics for the 0nAI Training Center.
|
|
605
|
+
|
|
606
|
+
Example: training_stats({})`,
|
|
607
|
+
{},
|
|
608
|
+
async () => {
|
|
609
|
+
try {
|
|
610
|
+
const sb = await getSupabase();
|
|
611
|
+
|
|
612
|
+
const [sources, pairs, datasets, exports, runs, knowledge] = await Promise.all([
|
|
613
|
+
sb.from("training_sources").select("id, source_type, status, token_count", { count: "exact" }),
|
|
614
|
+
sb.from("training_pairs").select("id, domain, quality_score, approved, dataset_id", { count: "exact" }),
|
|
615
|
+
sb.from("training_datasets").select("*"),
|
|
616
|
+
sb.from("training_exports").select("id, format, pair_count, created_at", { count: "exact" }),
|
|
617
|
+
sb.from("training_runs").select("id, entries_added, avg_composite_score", { count: "exact" }),
|
|
618
|
+
sb.from("council_knowledge").select("id, domain, composite_score", { count: "exact" }),
|
|
619
|
+
]);
|
|
620
|
+
|
|
621
|
+
const sourcesByType = {};
|
|
622
|
+
for (const s of sources.data || []) {
|
|
623
|
+
sourcesByType[s.source_type] = (sourcesByType[s.source_type] || 0) + 1;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
const pairsByDomain = {};
|
|
627
|
+
let approvedCount = 0;
|
|
628
|
+
let scoredCount = 0;
|
|
629
|
+
let totalScore = 0;
|
|
630
|
+
for (const p of pairs.data || []) {
|
|
631
|
+
pairsByDomain[p.domain || "unset"] = (pairsByDomain[p.domain || "unset"] || 0) + 1;
|
|
632
|
+
if (p.approved) approvedCount++;
|
|
633
|
+
if (p.quality_score) { scoredCount++; totalScore += Number(p.quality_score); }
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
const totalTokens = (sources.data || []).reduce((s, d) => s + (d.token_count || 0), 0);
|
|
637
|
+
|
|
638
|
+
return {
|
|
639
|
+
content: [{
|
|
640
|
+
type: "text",
|
|
641
|
+
text: JSON.stringify({
|
|
642
|
+
status: "ok",
|
|
643
|
+
training_center: {
|
|
644
|
+
sources: { total: sources.count || 0, by_type: sourcesByType, total_tokens: totalTokens },
|
|
645
|
+
pairs: {
|
|
646
|
+
total: pairs.count || 0,
|
|
647
|
+
by_domain: pairsByDomain,
|
|
648
|
+
approved: approvedCount,
|
|
649
|
+
scored: scoredCount,
|
|
650
|
+
avg_quality: scoredCount > 0 ? Math.round(totalScore / scoredCount * 100) / 100 : 0,
|
|
651
|
+
},
|
|
652
|
+
datasets: {
|
|
653
|
+
total: (datasets.data || []).length,
|
|
654
|
+
list: (datasets.data || []).map(d => ({ id: d.id, name: d.name, pairs: d.pair_count, status: d.status })),
|
|
655
|
+
},
|
|
656
|
+
exports: { total: exports.count || 0 },
|
|
657
|
+
council: {
|
|
658
|
+
training_runs: runs.count || 0,
|
|
659
|
+
knowledge_entries: knowledge.count || 0,
|
|
660
|
+
avg_score: (knowledge.data || []).length > 0
|
|
661
|
+
? Math.round((knowledge.data || []).reduce((s, k) => s + Number(k.composite_score || 0), 0) / knowledge.data.length * 100) / 100
|
|
662
|
+
: 0,
|
|
663
|
+
},
|
|
664
|
+
},
|
|
665
|
+
}, null, 2),
|
|
666
|
+
}],
|
|
667
|
+
};
|
|
668
|
+
} catch (err) {
|
|
669
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
);
|
|
673
|
+
|
|
674
|
+
// ─── training_search ──────────────────────────────────────
|
|
675
|
+
server.tool(
|
|
676
|
+
"training_search",
|
|
677
|
+
`Search training sources and pairs by keyword or domain.
|
|
678
|
+
|
|
679
|
+
Example: training_search({ query: "CRM contacts", table: "pairs" })
|
|
680
|
+
Example: training_search({ domain: "crm", min_quality: 0.8 })`,
|
|
681
|
+
{
|
|
682
|
+
query: z.string().optional().describe("Search keyword"),
|
|
683
|
+
table: z.enum(["sources", "pairs", "both"]).optional().describe("Which table to search (default: both)"),
|
|
684
|
+
domain: z.string().optional().describe("Filter by domain"),
|
|
685
|
+
min_quality: z.number().optional().describe("Minimum quality score"),
|
|
686
|
+
limit: z.number().optional().describe("Max results (default: 20)"),
|
|
687
|
+
},
|
|
688
|
+
async ({ query, table, domain, min_quality, limit: maxResults }) => {
|
|
689
|
+
try {
|
|
690
|
+
const sb = await getSupabase();
|
|
691
|
+
const lim = maxResults || 20;
|
|
692
|
+
const results = { sources: [], pairs: [] };
|
|
693
|
+
|
|
694
|
+
if (table !== "pairs") {
|
|
695
|
+
let q = sb.from("training_sources").select("id, title, source_type, status, token_count, tags, created_at").order("created_at", { ascending: false }).limit(lim);
|
|
696
|
+
if (query) q = q.textSearch("fts", query.split(" ").join(" & "));
|
|
697
|
+
const { data } = await q;
|
|
698
|
+
results.sources = data || [];
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
if (table !== "sources") {
|
|
702
|
+
let q = sb.from("training_pairs").select("id, user_input, assistant_output, domain, quality_score, approved, dataset_id, created_at").order("created_at", { ascending: false }).limit(lim);
|
|
703
|
+
if (query) q = q.textSearch("fts", query.split(" ").join(" & "));
|
|
704
|
+
if (domain) q = q.eq("domain", domain);
|
|
705
|
+
if (min_quality) q = q.gte("quality_score", min_quality);
|
|
706
|
+
const { data } = await q;
|
|
707
|
+
results.pairs = data || [];
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
return {
|
|
711
|
+
content: [{
|
|
712
|
+
type: "text",
|
|
713
|
+
text: JSON.stringify({
|
|
714
|
+
status: "ok",
|
|
715
|
+
sources: results.sources.length,
|
|
716
|
+
pairs: results.pairs.length,
|
|
717
|
+
results,
|
|
718
|
+
}, null, 2),
|
|
719
|
+
}],
|
|
720
|
+
};
|
|
721
|
+
} catch (err) {
|
|
722
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
);
|
|
726
|
+
|
|
727
|
+
// ─── training_review ──────────────────────────────────────
|
|
728
|
+
server.tool(
|
|
729
|
+
"training_review",
|
|
730
|
+
`Review and approve/reject training pairs.
|
|
731
|
+
|
|
732
|
+
Example: training_review({ pair_id: "uuid", action: "approve" })
|
|
733
|
+
Example: training_review({ pair_id: "uuid", action: "reject", reason: "Inaccurate API path" })
|
|
734
|
+
Example: training_review({ action: "pending", limit: 10 }) — get unreviewed pairs`,
|
|
735
|
+
{
|
|
736
|
+
action: z.enum(["approve", "reject", "pending", "edit"]).describe("Review action"),
|
|
737
|
+
pair_id: z.string().optional().describe("Pair to review"),
|
|
738
|
+
reason: z.string().optional().describe("Rejection reason"),
|
|
739
|
+
user_input: z.string().optional().describe("Updated user input (for edit)"),
|
|
740
|
+
assistant_output: z.string().optional().describe("Updated assistant output (for edit)"),
|
|
741
|
+
limit: z.number().optional().describe("Number of pending pairs to show (default: 10)"),
|
|
742
|
+
},
|
|
743
|
+
async ({ action, pair_id, reason, user_input, assistant_output, limit: maxResults }) => {
|
|
744
|
+
try {
|
|
745
|
+
const sb = await getSupabase();
|
|
746
|
+
|
|
747
|
+
switch (action) {
|
|
748
|
+
case "approve": {
|
|
749
|
+
if (!pair_id) throw new Error("pair_id required");
|
|
750
|
+
await sb.from("training_pairs").update({ approved: true, human_reviewed: true, updated_at: new Date().toISOString() }).eq("id", pair_id);
|
|
751
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "approved", pair_id }) }] };
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
case "reject": {
|
|
755
|
+
if (!pair_id) throw new Error("pair_id required");
|
|
756
|
+
await sb.from("training_pairs").update({ approved: false, human_reviewed: true, rejection_reason: reason || null, updated_at: new Date().toISOString() }).eq("id", pair_id);
|
|
757
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "rejected", pair_id, reason }) }] };
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
case "pending": {
|
|
761
|
+
const { data } = await sb.from("training_pairs")
|
|
762
|
+
.select("id, user_input, assistant_output, domain, quality_score, tags")
|
|
763
|
+
.eq("human_reviewed", false)
|
|
764
|
+
.order("created_at", { ascending: true })
|
|
765
|
+
.limit(maxResults || 10);
|
|
766
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "ok", count: data?.length || 0, pairs: data }, null, 2) }] };
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
case "edit": {
|
|
770
|
+
if (!pair_id) throw new Error("pair_id required");
|
|
771
|
+
const updates = { updated_at: new Date().toISOString() };
|
|
772
|
+
if (user_input) updates.user_input = user_input;
|
|
773
|
+
if (assistant_output) updates.assistant_output = assistant_output;
|
|
774
|
+
await sb.from("training_pairs").update(updates).eq("id", pair_id);
|
|
775
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "edited", pair_id }) }] };
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
} catch (err) {
|
|
779
|
+
return { content: [{ type: "text", text: JSON.stringify({ status: "failed", error: err.message }) }] };
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
);
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
// ── Helpers ──────────────────────────────────────────────────
|
|
786
|
+
|
|
787
|
+
/**
|
|
788
|
+
* Auto-score a training pair using heuristics (no API cost).
|
|
789
|
+
*/
|
|
790
|
+
function autoScore(pair) {
|
|
791
|
+
let score = 0.5; // baseline
|
|
792
|
+
|
|
793
|
+
const output = pair.assistant_output || "";
|
|
794
|
+
const input = pair.user_input || "";
|
|
795
|
+
|
|
796
|
+
// Length checks
|
|
797
|
+
if (output.length > 100) score += 0.05;
|
|
798
|
+
if (output.length > 300) score += 0.05;
|
|
799
|
+
if (output.length > 50 && output.length < 2000) score += 0.05;
|
|
800
|
+
|
|
801
|
+
// Has code blocks
|
|
802
|
+
if (output.includes("```")) score += 0.05;
|
|
803
|
+
|
|
804
|
+
// Doesn't say GHL
|
|
805
|
+
if (!output.toLowerCase().includes("ghl") && !output.toLowerCase().includes("go high level") && !output.toLowerCase().includes("highlevel")) {
|
|
806
|
+
score += 0.1;
|
|
807
|
+
} else {
|
|
808
|
+
score -= 0.3; // severe penalty
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
// References 0n ecosystem
|
|
812
|
+
if (output.includes("0nMCP") || output.includes("0nmcp") || output.includes(".0n")) score += 0.05;
|
|
813
|
+
|
|
814
|
+
// Has structure (bullet points, headers)
|
|
815
|
+
if (output.includes("- ") || output.includes("* ") || output.includes("1.")) score += 0.05;
|
|
816
|
+
|
|
817
|
+
// Input/output relevance (shared words)
|
|
818
|
+
const inputWords = new Set(input.toLowerCase().split(/\s+/).filter(w => w.length > 3));
|
|
819
|
+
const outputWords = new Set(output.toLowerCase().split(/\s+/).filter(w => w.length > 3));
|
|
820
|
+
let overlap = 0;
|
|
821
|
+
for (const w of inputWords) if (outputWords.has(w)) overlap++;
|
|
822
|
+
if (inputWords.size > 0) score += Math.min(0.1, (overlap / inputWords.size) * 0.1);
|
|
823
|
+
|
|
824
|
+
return Math.min(1.0, Math.max(0.0, Math.round(score * 100) / 100));
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
/**
|
|
828
|
+
* Generate pair suggestions from source content (no API cost).
|
|
829
|
+
* Returns structured suggestions the user can review and submit.
|
|
830
|
+
*/
|
|
831
|
+
function generatePairSuggestions(content, title, domain, count) {
|
|
832
|
+
const suggestions = [];
|
|
833
|
+
const lines = content.split("\n").filter(l => l.trim().length > 20);
|
|
834
|
+
|
|
835
|
+
// Extract potential Q&A from headers and content
|
|
836
|
+
for (let i = 0; i < lines.length && suggestions.length < count; i++) {
|
|
837
|
+
const line = lines[i].trim();
|
|
838
|
+
|
|
839
|
+
// Headers become questions
|
|
840
|
+
if (line.startsWith("#") || line.startsWith("##")) {
|
|
841
|
+
const topic = line.replace(/^#+\s*/, "");
|
|
842
|
+
if (topic.length > 10) {
|
|
843
|
+
const context = lines.slice(i + 1, i + 5).join(" ").slice(0, 500);
|
|
844
|
+
suggestions.push({
|
|
845
|
+
user_input: `What is ${topic} in the 0n ecosystem?`,
|
|
846
|
+
assistant_output: context || `${topic} is a component of the 0nORK platform.`,
|
|
847
|
+
domain: domain || "general",
|
|
848
|
+
source: title,
|
|
849
|
+
});
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// Code patterns
|
|
854
|
+
if (line.includes("function ") || line.includes("export ") || line.includes("class ")) {
|
|
855
|
+
suggestions.push({
|
|
856
|
+
user_input: `How does the ${line.slice(0, 60).replace(/[{(]/g, "")} work?`,
|
|
857
|
+
assistant_output: `[Review and write explanation based on: ${line.slice(0, 200)}]`,
|
|
858
|
+
domain: domain || "code",
|
|
859
|
+
source: title,
|
|
860
|
+
});
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
// Config/endpoint patterns
|
|
864
|
+
if (line.includes("baseUrl") || line.includes("endpoint") || line.includes("path:")) {
|
|
865
|
+
suggestions.push({
|
|
866
|
+
user_input: `What API endpoint does this use?`,
|
|
867
|
+
assistant_output: `[Review and explain the endpoint from: ${line.slice(0, 200)}]`,
|
|
868
|
+
domain: domain || "api_pattern",
|
|
869
|
+
source: title,
|
|
870
|
+
});
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
return suggestions.slice(0, count);
|
|
875
|
+
}
|