research-copilot 0.2.20 → 0.2.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/app/out/main/index.mjs +2585 -48
- package/app/out/renderer/assets/{MilkdownMarkdownEditor-CCdZ2mtg.css → MilkdownMarkdownEditor-BW0Pt28W.css} +16 -1
- package/app/out/renderer/assets/{MilkdownMarkdownEditor-Bj7JSjF5.js → MilkdownMarkdownEditor-OhCrq3X0.js} +56 -51
- package/app/out/renderer/assets/{arc-CPL9nDFE.js → arc-DLr0RP8F.js} +1 -1
- package/app/out/renderer/assets/{blockDiagram-c4efeb88-BFOajDNs.js → blockDiagram-c4efeb88-XhKChw2n.js} +8 -8
- package/app/out/renderer/assets/{c4Diagram-c83219d4-LeqnQ2-5.js → c4Diagram-c83219d4-DDoJmoIQ.js} +3 -3
- package/app/out/renderer/assets/{channel-jk5Np8ud.js → channel-CJCgJSqV.js} +1 -1
- package/app/out/renderer/assets/{classDiagram-beda092f-CxOqB6OU.js → classDiagram-beda092f-CAmimZpz.js} +6 -6
- package/app/out/renderer/assets/{classDiagram-v2-2358418a-CyP_5qLa.js → classDiagram-v2-2358418a-Bma4E_Eg.js} +10 -10
- package/app/out/renderer/assets/{clone-PHFwh58n.js → clone-C338dmoI.js} +1 -1
- package/app/out/renderer/assets/{createText-1719965b-CE_0jsfj.js → createText-1719965b-_up4NJqB.js} +2 -2
- package/app/out/renderer/assets/{edges-96097737-DBk1JhZS.js → edges-96097737-Bpp6hVLn.js} +3 -3
- package/app/out/renderer/assets/{erDiagram-0228fc6a-DnR_LkSB.js → erDiagram-0228fc6a-bjTh_7ap.js} +5 -5
- package/app/out/renderer/assets/{flowDb-c6c81e3f-CJrZUKlS.js → flowDb-c6c81e3f-BjVV4DVk.js} +1 -1
- package/app/out/renderer/assets/{flowDiagram-50d868cf-CfNfrt17.js → flowDiagram-50d868cf-gmeaaZ6z.js} +12 -12
- package/app/out/renderer/assets/{flowDiagram-v2-4f6560a1-BGQtiK3j.js → flowDiagram-v2-4f6560a1-nem5zs2M.js} +12 -12
- package/app/out/renderer/assets/{flowchart-elk-definition-6af322e1-BXLraghz.js → flowchart-elk-definition-6af322e1-DPaGAYRw.js} +6 -6
- package/app/out/renderer/assets/{ganttDiagram-a2739b55-CAwaEMMm.js → ganttDiagram-a2739b55-CnAti19E.js} +3 -3
- package/app/out/renderer/assets/{gitGraphDiagram-82fe8481-vuSEC6ny.js → gitGraphDiagram-82fe8481-DQWHD3SJ.js} +2 -2
- package/app/out/renderer/assets/{graph-CZfltE7S.js → graph-DKiKgH8m.js} +1 -1
- package/app/out/renderer/assets/{index-DIZJXKQ6.js → index-4s-c5d65.js} +3 -3
- package/app/out/renderer/assets/{index-5325376f-DWTrHDEo.js → index-5325376f-G-0aO-2i.js} +6 -6
- package/app/out/renderer/assets/{index-CwPfquqm.js → index-9q_P5ULR.js} +4 -4
- package/app/out/renderer/assets/{index-EaGZvaBp.js → index-B1A3JxQj.js} +3 -3
- package/app/out/renderer/assets/{index-C2tqvXjC.js → index-BBUrmGmY.js} +6 -6
- package/app/out/renderer/assets/{index-D_7yOLk3.js → index-BQho5LH-.js} +6 -6
- package/app/out/renderer/assets/{index-B6f2bVW_.js → index-BUVlmsgO.js} +3 -3
- package/app/out/renderer/assets/{index-DpXI4mHb.js → index-BzEthrJ4.js} +3 -3
- package/app/out/renderer/assets/{index-CUsEKU8Q.js → index-C1YzkB4z.js} +93 -36
- package/app/out/renderer/assets/{index-CMfKxpBP.js → index-CGo665vD.js} +3 -3
- package/app/out/renderer/assets/{index-B5Mkpo9f.js → index-CPZaxR35.js} +3 -3
- package/app/out/renderer/assets/{index-BpdWQuss.js → index-CSyD1mbL.js} +3 -3
- package/app/out/renderer/assets/{index-DB8ImtMy.js → index-Cf7vlFSn.js} +3 -3
- package/app/out/renderer/assets/{index-CyDfvefg.js → index-CluH1o2q.js} +6 -6
- package/app/out/renderer/assets/{index-7dcVwInU.js → index-Cw1n3klA.js} +5 -5
- package/app/out/renderer/assets/{index-Ul-Kq9b2.js → index-DFzvntIw.js} +3 -3
- package/app/out/renderer/assets/{index-t0-md-MG.js → index-DHzyAhWM.js} +4 -4
- package/app/out/renderer/assets/{index-Cc9coKGN.js → index-DhliHfCM.js} +6 -6
- package/app/out/renderer/assets/{index-K0o5fHYG.js → index-DkVFbCxC.js} +3 -3
- package/app/out/renderer/assets/{index-DiCqe1UR.js → index-DpZJP5MT.js} +6 -6
- package/app/out/renderer/assets/{index-CaYWMBXT.js → index-Gfd_DiMG.js} +3 -3
- package/app/out/renderer/assets/{index-Di3HmXc-.js → index-jOvNAYyP.js} +3 -3
- package/app/out/renderer/assets/{index-B4V7cFWJ.js → index-rrJkk8KV.js} +6 -6
- package/app/out/renderer/assets/{index-BgAs-p8D.js → index-vfSerSmF.js} +1 -1
- package/app/out/renderer/assets/{infoDiagram-8eee0895-BmPESCfj.js → infoDiagram-8eee0895-BCnBkXXS.js} +2 -2
- package/app/out/renderer/assets/{journeyDiagram-c64418c1-BGsCbfr_.js → journeyDiagram-c64418c1-Bq2wSX3k.js} +4 -4
- package/app/out/renderer/assets/{layout-5MwFTPs7.js → layout-BvkumzoT.js} +2 -2
- package/app/out/renderer/assets/{line-D0U74KO0.js → line-eU4el-G4.js} +1 -1
- package/app/out/renderer/assets/{linear-BclyBoiT.js → linear-DlBjMBEa.js} +1 -1
- package/app/out/renderer/assets/{mindmap-definition-8da855dc-un1bPKBj.js → mindmap-definition-8da855dc-CzLBu7ao.js} +3 -3
- package/app/out/renderer/assets/{pieDiagram-a8764435-B7KM3duv.js → pieDiagram-a8764435--olrXFr_.js} +3 -3
- package/app/out/renderer/assets/{quadrantDiagram-1e28029f-C8i5m3Os.js → quadrantDiagram-1e28029f-BnpnBBgc.js} +3 -3
- package/app/out/renderer/assets/{requirementDiagram-08caed73-FjqENNN5.js → requirementDiagram-08caed73-6O9WS7hn.js} +5 -5
- package/app/out/renderer/assets/{sankeyDiagram-a04cb91d-BKV22yuJ.js → sankeyDiagram-a04cb91d-D-iJnK91.js} +2 -2
- package/app/out/renderer/assets/{sequenceDiagram-c5b8d532-DWO-Z2i3.js → sequenceDiagram-c5b8d532-DBlK15cV.js} +3 -3
- package/app/out/renderer/assets/{stateDiagram-1ecb1508-BqohgALA.js → stateDiagram-1ecb1508-DKXKPYuk.js} +6 -6
- package/app/out/renderer/assets/{stateDiagram-v2-c2b004d7-B3sEkrB8.js → stateDiagram-v2-c2b004d7-DY288Eo5.js} +10 -10
- package/app/out/renderer/assets/{styles-b4e223ce-BGytHk8n.js → styles-b4e223ce-CRJ_xgJ-.js} +1 -1
- package/app/out/renderer/assets/{styles-ca3715f6-B0PvBknL.js → styles-ca3715f6-Bp_k5KLD.js} +1 -1
- package/app/out/renderer/assets/{styles-d45a18b0-C6F384ai.js → styles-d45a18b0-DLA8Gg6D.js} +4 -4
- package/app/out/renderer/assets/{svgDrawCommon-b86b1483-BXgThwM_.js → svgDrawCommon-b86b1483-Dm5CK2gQ.js} +1 -1
- package/app/out/renderer/assets/{timeline-definition-faaaa080-iNn5igPR.js → timeline-definition-faaaa080-D-m9BHUg.js} +3 -3
- package/app/out/renderer/assets/{xychartDiagram-f5964ef8-oF_gxlk1.js → xychartDiagram-f5964ef8-Drn4Rqev.js} +5 -5
- package/app/out/renderer/index.html +1 -1
- package/lib/skills/builtin/academic-marp-slides/SKILL.md +933 -0
- package/lib/skills/builtin/research-grants/SKILL.md +15 -11
- package/lib/skills/builtin/scholar-evaluation/SKILL.md +12 -11
- package/lib/skills/builtin/scientific-schematics/SKILL.md +463 -560
- package/lib/skills/builtin/teaching-marp-slides/SKILL.md +1218 -0
- package/package.json +1 -1
- package/scripts/audit-diagram-prompts.mjs +67 -0
- package/scripts/test-skill-routing.mjs +238 -0
- package/lib/skills/builtin/marp-slides/SKILL.md +0 -642
- package/lib/skills/builtin/scientific-schematics/references/QUICK_REFERENCE.md +0 -182
- package/lib/skills/builtin/scientific-schematics/references/README.md +0 -292
- package/lib/skills/builtin/scientific-schematics/scripts/__pycache__/generate_schematic.cpython-312.pyc +0 -0
- package/lib/skills/builtin/scientific-schematics/scripts/__pycache__/generate_schematic_ai.cpython-312.pyc +0 -0
- package/lib/skills/builtin/scientific-schematics/scripts/example_usage.sh +0 -85
- package/lib/skills/builtin/scientific-schematics/scripts/generate_schematic.py +0 -141
- package/lib/skills/builtin/scientific-schematics/scripts/generate_schematic_ai.py +0 -910
package/package.json
CHANGED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { readdirSync, readFileSync, statSync } from 'node:fs'
|
|
3
|
+
import { join, resolve } from 'node:path'
|
|
4
|
+
|
|
5
|
+
const SUSPECT_TERMS = [
|
|
6
|
+
'dataset', 'datasets', 'training data', 'test set', 'validation set',
|
|
7
|
+
'experiment', 'experiments', 'experimental setup', 'hyperparameter', 'hyperparameters',
|
|
8
|
+
'accuracy', 'precision', 'recall', 'F1', 'AUC', 'PSNR', 'BLEU', 'ROUGE',
|
|
9
|
+
'ablation', 'ablations',
|
|
10
|
+
'baseline', 'baselines', 'SOTA', 'state-of-the-art', 'benchmark',
|
|
11
|
+
'GPU', 'A100', 'V100', 'epochs', 'batch size', 'learning rate',
|
|
12
|
+
'Table 1', 'Table 2', 'Figure 1', 'Figure 2',
|
|
13
|
+
'outperforms', 'achieves', '%',
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
function findLogs(root) {
|
|
17
|
+
const out = []
|
|
18
|
+
const walk = (dir) => {
|
|
19
|
+
let entries
|
|
20
|
+
try { entries = readdirSync(dir, { withFileTypes: true }) } catch { return }
|
|
21
|
+
for (const e of entries) {
|
|
22
|
+
const p = join(dir, e.name)
|
|
23
|
+
if (e.isDirectory()) {
|
|
24
|
+
if (e.name === 'node_modules' || e.name.startsWith('.')) continue
|
|
25
|
+
walk(p)
|
|
26
|
+
} else if (e.isFile() && e.name.endsWith('_review_log.json')) {
|
|
27
|
+
out.push(p)
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
walk(root)
|
|
32
|
+
return out
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const root = resolve(process.argv[2] || process.cwd())
|
|
36
|
+
const logs = findLogs(root).sort((a, b) => statSync(b).mtimeMs - statSync(a).mtimeMs)
|
|
37
|
+
|
|
38
|
+
if (logs.length === 0) {
|
|
39
|
+
console.error(`No *_review_log.json under ${root}`)
|
|
40
|
+
process.exit(1)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
console.log(`# Prompt audit — ${logs.length} log file(s) under ${root}\n`)
|
|
44
|
+
|
|
45
|
+
for (const file of logs) {
|
|
46
|
+
let data
|
|
47
|
+
try { data = JSON.parse(readFileSync(file, 'utf-8')) } catch (e) {
|
|
48
|
+
console.log(`## ${file}\nPARSE ERROR: ${e.message}\n`); continue
|
|
49
|
+
}
|
|
50
|
+
const prompt = String(data.prompt ?? '')
|
|
51
|
+
const len = prompt.length
|
|
52
|
+
const wordCount = prompt.split(/\s+/).filter(Boolean).length
|
|
53
|
+
const lower = prompt.toLowerCase()
|
|
54
|
+
const hits = SUSPECT_TERMS.filter(t => {
|
|
55
|
+
const re = new RegExp(`\\b${t.toLowerCase().replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&')}\\b`)
|
|
56
|
+
return re.test(lower)
|
|
57
|
+
})
|
|
58
|
+
const finalScore = data.iterations?.at(-1)?.review?.score ?? '—'
|
|
59
|
+
const verdict = data.iterations?.at(-1)?.review?.verdict ?? '—'
|
|
60
|
+
|
|
61
|
+
console.log(`## ${file.replace(root + '/', '')}`)
|
|
62
|
+
console.log(`- length: ${len} chars / ${wordCount} words`)
|
|
63
|
+
console.log(`- docType: ${data.docType} | diagramType: ${data.diagramType} | aspect: ${data.aspect}`)
|
|
64
|
+
console.log(`- final: score=${finalScore} verdict=${verdict} stoppedReason=${data.stoppedReason ?? '—'}`)
|
|
65
|
+
console.log(`- suspect terms (${hits.length}): ${hits.length ? hits.join(', ') : '—'}`)
|
|
66
|
+
console.log(`\n### prompt verbatim\n\n\`\`\`\n${prompt}\n\`\`\`\n`)
|
|
67
|
+
}
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Smoke test for the coordinator's LLM-based skill router.
|
|
3
|
+
//
|
|
4
|
+
// Reproduces `matchSkillsWithLLM` in lib/agents/coordinator.ts verbatim:
|
|
5
|
+
// - Same system prompt
|
|
6
|
+
// - Same `- <name>: <description>` skill list
|
|
7
|
+
// - Same router-model class (Haiku / GPT-5-nano / Gemini-2.0-flash-lite)
|
|
8
|
+
// - Same JSON-array parse
|
|
9
|
+
//
|
|
10
|
+
// Usage:
|
|
11
|
+
// node scripts/test-skill-routing.mjs # anthropic (default), needs ANTHROPIC_API_KEY
|
|
12
|
+
// node scripts/test-skill-routing.mjs --provider=openai # needs OPENAI_API_KEY
|
|
13
|
+
// node scripts/test-skill-routing.mjs --provider=google # needs GEMINI_API_KEY
|
|
14
|
+
// node scripts/test-skill-routing.mjs --cases=path/to/cases.json
|
|
15
|
+
//
|
|
16
|
+
// Exits non-zero if any case fails (expected skill missing, or forbidden skill present).
|
|
17
|
+
|
|
18
|
+
import { readdirSync, readFileSync, existsSync } from 'fs'
|
|
19
|
+
import { join, dirname } from 'path'
|
|
20
|
+
import { fileURLToPath } from 'url'
|
|
21
|
+
import { getModel, getEnvApiKey, completeSimple } from '@mariozechner/pi-ai'
|
|
22
|
+
|
|
23
|
+
const __dirname = dirname(fileURLToPath(import.meta.url))
|
|
24
|
+
const REPO_ROOT = join(__dirname, '..')
|
|
25
|
+
const BUILTIN_SKILLS_DIR = join(REPO_ROOT, 'lib', 'skills', 'builtin')
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// CLI args
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
const args = Object.fromEntries(
|
|
32
|
+
process.argv.slice(2).map(a => {
|
|
33
|
+
const [k, v = 'true'] = a.replace(/^--/, '').split('=')
|
|
34
|
+
return [k, v]
|
|
35
|
+
})
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
const PROVIDER = args.provider ?? 'anthropic'
|
|
39
|
+
const CASES_PATH = args.cases
|
|
40
|
+
|
|
41
|
+
const ROUTER_BY_PROVIDER = {
|
|
42
|
+
anthropic: 'claude-haiku-4-5-20251001',
|
|
43
|
+
openai: 'gpt-5.4-nano',
|
|
44
|
+
google: 'gemini-2.0-flash-lite'
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Skill discovery — mirrors lib/skills/loader.ts frontmatter parse
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
function parseFrontmatter(md) {
|
|
52
|
+
if (!md.startsWith('---')) return null
|
|
53
|
+
const end = md.indexOf('\n---', 3)
|
|
54
|
+
if (end < 0) return null
|
|
55
|
+
const fm = md.slice(3, end).trim()
|
|
56
|
+
const out = {}
|
|
57
|
+
for (const line of fm.split('\n')) {
|
|
58
|
+
const m = line.match(/^(\w[\w-]*):\s*(.*)$/)
|
|
59
|
+
if (!m) continue
|
|
60
|
+
let v = m[2].trim()
|
|
61
|
+
if (v.startsWith('"') && v.endsWith('"')) v = v.slice(1, -1)
|
|
62
|
+
out[m[1]] = v
|
|
63
|
+
}
|
|
64
|
+
return out
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function loadBuiltinSkills() {
|
|
68
|
+
const skills = []
|
|
69
|
+
for (const name of readdirSync(BUILTIN_SKILLS_DIR)) {
|
|
70
|
+
const skillFile = join(BUILTIN_SKILLS_DIR, name, 'SKILL.md')
|
|
71
|
+
if (!existsSync(skillFile)) continue
|
|
72
|
+
const md = readFileSync(skillFile, 'utf-8')
|
|
73
|
+
const fm = parseFrontmatter(md)
|
|
74
|
+
if (!fm?.name || !fm?.description) continue
|
|
75
|
+
skills.push({ name: fm.name, description: fm.description })
|
|
76
|
+
}
|
|
77
|
+
return skills.sort((a, b) => a.name.localeCompare(b.name))
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
// Exact copy of matchSkillsWithLLM prompt (coordinator.ts:99-148)
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
const MAX_SKILL_PRELOAD = 5
|
|
85
|
+
|
|
86
|
+
function buildSystemPrompt(skills) {
|
|
87
|
+
const skillList = skills.map(s => `- ${s.name}: ${s.description}`).join('\n')
|
|
88
|
+
return [
|
|
89
|
+
'You are a skill router for a research assistant. Given a user message, select which skills should be activated.',
|
|
90
|
+
'Return ONLY a JSON array of skill names. Return [] if none are relevant.',
|
|
91
|
+
'',
|
|
92
|
+
'Rules:',
|
|
93
|
+
'- Only select skills directly relevant to the user\'s request',
|
|
94
|
+
'- Do not select skills speculatively',
|
|
95
|
+
`- Maximum ${MAX_SKILL_PRELOAD} skills`,
|
|
96
|
+
'- Consider both English and Chinese messages',
|
|
97
|
+
'',
|
|
98
|
+
'Available skills:',
|
|
99
|
+
skillList
|
|
100
|
+
].join('\n')
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async function routeOnce(model, apiKey, systemPrompt, message) {
|
|
104
|
+
const result = await completeSimple(model, {
|
|
105
|
+
systemPrompt,
|
|
106
|
+
messages: [{ role: 'user', content: message, timestamp: Date.now() }]
|
|
107
|
+
}, { maxTokens: 100, apiKey })
|
|
108
|
+
|
|
109
|
+
const textContent = result.content.find(c => c.type === 'text')
|
|
110
|
+
const text = textContent?.text?.trim() ?? ''
|
|
111
|
+
if (!text) return { raw: '', picked: [] }
|
|
112
|
+
|
|
113
|
+
const jsonMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/) ?? text.match(/(\[[\s\S]*?\])/)
|
|
114
|
+
const jsonStr = jsonMatch?.[1]?.trim() ?? text
|
|
115
|
+
try {
|
|
116
|
+
const parsed = JSON.parse(jsonStr)
|
|
117
|
+
if (!Array.isArray(parsed)) return { raw: text, picked: [] }
|
|
118
|
+
return {
|
|
119
|
+
raw: text,
|
|
120
|
+
picked: parsed.filter(n => typeof n === 'string').slice(0, MAX_SKILL_PRELOAD)
|
|
121
|
+
}
|
|
122
|
+
} catch {
|
|
123
|
+
return { raw: text, picked: [] }
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// ---------------------------------------------------------------------------
|
|
128
|
+
// Test cases — focused on the two new marp skills
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
const DEFAULT_CASES = [
|
|
132
|
+
{
|
|
133
|
+
msg: "let's work in week10_dist folder, create a new week10B_storage_design_slides_v2.md for teaching next week. You can check other docs in the week10_dist folder as your references and write high quality teaching slides.",
|
|
134
|
+
expect: ['teaching-marp-slides'],
|
|
135
|
+
forbid: ['academic-marp-slides']
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
msg: 'Make lecture slides for CS101 on backpropagation',
|
|
139
|
+
expect: ['teaching-marp-slides'],
|
|
140
|
+
forbid: ['academic-marp-slides']
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
msg: '我要做个论文答辩 PPT',
|
|
144
|
+
expect: ['academic-marp-slides'],
|
|
145
|
+
forbid: ['teaching-marp-slides']
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
msg: '下周课件,讲 LR 和 logistic regression',
|
|
149
|
+
expect: ['teaching-marp-slides'],
|
|
150
|
+
forbid: ['academic-marp-slides']
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
msg: 'slides for my group meeting',
|
|
154
|
+
expect: ['academic-marp-slides'],
|
|
155
|
+
forbid: ['teaching-marp-slides']
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
msg: 'revise slide 7 of my conference talk',
|
|
159
|
+
expect: ['academic-marp-slides'],
|
|
160
|
+
forbid: ['teaching-marp-slides']
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
msg: 'add a worked example to my lecture slides',
|
|
164
|
+
expect: ['teaching-marp-slides'],
|
|
165
|
+
forbid: ['academic-marp-slides']
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
msg: 'help me draft a NeurIPS paper intro',
|
|
169
|
+
expect: ['paper-writing'],
|
|
170
|
+
forbid: ['academic-marp-slides', 'teaching-marp-slides']
|
|
171
|
+
}
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
// Main
|
|
176
|
+
// ---------------------------------------------------------------------------
|
|
177
|
+
|
|
178
|
+
async function main() {
|
|
179
|
+
const skills = loadBuiltinSkills()
|
|
180
|
+
console.log(`Loaded ${skills.length} builtin skills from ${BUILTIN_SKILLS_DIR}`)
|
|
181
|
+
const marpOnes = skills.filter(s => s.name.includes('marp'))
|
|
182
|
+
for (const s of marpOnes) console.log(` • ${s.name}`)
|
|
183
|
+
console.log()
|
|
184
|
+
|
|
185
|
+
const routerModelId = ROUTER_BY_PROVIDER[PROVIDER]
|
|
186
|
+
if (!routerModelId) {
|
|
187
|
+
console.error(`Unknown provider "${PROVIDER}". Pick one of: ${Object.keys(ROUTER_BY_PROVIDER).join(', ')}`)
|
|
188
|
+
process.exit(2)
|
|
189
|
+
}
|
|
190
|
+
const apiKey = getEnvApiKey(PROVIDER)
|
|
191
|
+
if (!apiKey) {
|
|
192
|
+
console.error(`No API key for provider "${PROVIDER}" in env. Set the provider's standard env var (e.g., ANTHROPIC_API_KEY / OPENAI_API_KEY / GEMINI_API_KEY).`)
|
|
193
|
+
process.exit(2)
|
|
194
|
+
}
|
|
195
|
+
const model = getModel(PROVIDER, routerModelId)
|
|
196
|
+
console.log(`Using router: ${PROVIDER}/${routerModelId}\n`)
|
|
197
|
+
|
|
198
|
+
const cases = CASES_PATH
|
|
199
|
+
? JSON.parse(readFileSync(CASES_PATH, 'utf-8'))
|
|
200
|
+
: DEFAULT_CASES
|
|
201
|
+
|
|
202
|
+
const systemPrompt = buildSystemPrompt(skills)
|
|
203
|
+
|
|
204
|
+
let failures = 0
|
|
205
|
+
for (const [i, c] of cases.entries()) {
|
|
206
|
+
process.stdout.write(`[${i + 1}/${cases.length}] "${c.msg}" ... `)
|
|
207
|
+
let result
|
|
208
|
+
try {
|
|
209
|
+
result = await routeOnce(model, apiKey, systemPrompt, c.msg)
|
|
210
|
+
} catch (err) {
|
|
211
|
+
console.log('ERROR')
|
|
212
|
+
console.log(` ${err?.message ?? err}`)
|
|
213
|
+
failures++
|
|
214
|
+
continue
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const picked = new Set(result.picked)
|
|
218
|
+
const missing = (c.expect ?? []).filter(name => !picked.has(name))
|
|
219
|
+
const forbidHits = (c.forbid ?? []).filter(name => picked.has(name))
|
|
220
|
+
const ok = missing.length === 0 && forbidHits.length === 0
|
|
221
|
+
|
|
222
|
+
console.log(ok ? 'PASS' : 'FAIL')
|
|
223
|
+
console.log(` picked: [${result.picked.join(', ')}]`)
|
|
224
|
+
if (c.expect?.length) console.log(` expect: [${c.expect.join(', ')}]`)
|
|
225
|
+
if (c.forbid?.length) console.log(` forbid: [${c.forbid.join(', ')}]`)
|
|
226
|
+
if (missing.length) console.log(` MISSING: [${missing.join(', ')}]`)
|
|
227
|
+
if (forbidHits.length) console.log(` UNWANTED: [${forbidHits.join(', ')}]`)
|
|
228
|
+
if (!ok) failures++
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
console.log(`\n${cases.length - failures}/${cases.length} passed`)
|
|
232
|
+
process.exit(failures === 0 ? 0 : 1)
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
main().catch(err => {
|
|
236
|
+
console.error(err)
|
|
237
|
+
process.exit(1)
|
|
238
|
+
})
|