modular-studio 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +122 -41
- package/dist/assets/Badge-DrUmDAXz.js +1 -0
- package/dist/assets/Input-ndEGQSgx.js +1 -0
- package/dist/assets/KnowledgeTab-CxlC76Rf.js +4 -0
- package/dist/assets/MemoryTab-CUScYWs9.js +16 -0
- package/dist/assets/QualificationTab-BqnWSQHm.js +1 -0
- package/dist/assets/ReviewTab-DKYl6cR9.js +103 -0
- package/dist/assets/Section-CgmwAj_2.js +1 -0
- package/dist/assets/TestTab-iJ2vCf9l.js +33 -0
- package/dist/assets/ToolsTab-C10Ulm8b.js +1 -0
- package/dist/assets/conversationStore-CkfEU2eV.js +1 -0
- package/dist/assets/icons-MKpPNvV8.js +1 -0
- package/dist/assets/index-B_ip7Amg.css +1 -0
- package/dist/assets/index-gBy3427k.js +143 -0
- package/dist/assets/{jszip.min-BK6ZQWkj.js → jszip.min-wf-D3Ix_.js} +1 -1
- package/dist/assets/markdown-DWF7F0i0.js +29 -0
- package/dist/assets/services-CTWXQK6j.js +356 -0
- package/dist/assets/stores-CeKWz7ou.js +1 -0
- package/dist/assets/vendor-D1h_O76p.js +9 -0
- package/dist/index.html +20 -16
- package/dist-server/bin/modular-mcp.js +0 -1
- package/dist-server/bin/modular-studio.js +0 -1
- package/dist-server/server/config.js +0 -1
- package/dist-server/server/data/mcp-tokens.json +3 -3
- package/dist-server/server/index.d.ts.map +1 -1
- package/dist-server/server/index.js +6 -1
- package/dist-server/server/mcp/manager.d.ts.map +1 -1
- package/dist-server/server/mcp/manager.js +16 -3
- package/dist-server/server/mcp/modular-server.js +0 -1
- package/dist-server/server/mcp/transport.js +0 -1
- package/dist-server/server/routes/agent-sdk.js +0 -1
- package/dist-server/server/routes/agents.d.ts +9 -5
- package/dist-server/server/routes/agents.d.ts.map +1 -1
- package/dist-server/server/routes/agents.js +108 -8
- package/dist-server/server/routes/auth-codex.js +0 -1
- package/dist-server/server/routes/cache.d.ts +3 -0
- package/dist-server/server/routes/cache.d.ts.map +1 -0
- package/dist-server/server/routes/cache.js +55 -0
- package/dist-server/server/routes/capabilities.js +0 -1
- package/dist-server/server/routes/claude-config.js +0 -1
- package/dist-server/server/routes/connectors.d.ts.map +1 -1
- package/dist-server/server/routes/connectors.js +224 -1
- package/dist-server/server/routes/conversations.js +0 -1
- package/dist-server/server/routes/embeddings.js +0 -1
- package/dist-server/server/routes/health.js +0 -1
- package/dist-server/server/routes/knowledge.js +0 -1
- package/dist-server/server/routes/lessons.d.ts +3 -0
- package/dist-server/server/routes/lessons.d.ts.map +1 -0
- package/dist-server/server/routes/lessons.js +46 -0
- package/dist-server/server/routes/llm.js +0 -1
- package/dist-server/server/routes/mcp-oauth.js +0 -1
- package/dist-server/server/routes/mcp.js +0 -1
- package/dist-server/server/routes/memory.d.ts +3 -0
- package/dist-server/server/routes/memory.d.ts.map +1 -0
- package/dist-server/server/routes/memory.js +314 -0
- package/dist-server/server/routes/pipeline.js +0 -1
- package/dist-server/server/routes/providers.js +0 -1
- package/dist-server/server/routes/qualification.d.ts.map +1 -1
- package/dist-server/server/routes/qualification.js +341 -75
- package/dist-server/server/routes/repo-index.d.ts.map +1 -1
- package/dist-server/server/routes/repo-index.js +7 -1
- package/dist-server/server/routes/runtime.js +0 -1
- package/dist-server/server/routes/skills-search.d.ts.map +1 -1
- package/dist-server/server/routes/skills-search.js +198 -8
- package/dist-server/server/routes/worktrees.js +0 -1
- package/dist-server/server/services/__tests__/embeddingService.test.js +0 -1
- package/dist-server/server/services/adapters/hindsightAdapter.d.ts +28 -0
- package/dist-server/server/services/adapters/hindsightAdapter.d.ts.map +1 -0
- package/dist-server/server/services/adapters/hindsightAdapter.js +63 -0
- package/dist-server/server/services/adapters/postgresAdapter.d.ts +29 -0
- package/dist-server/server/services/adapters/postgresAdapter.d.ts.map +1 -0
- package/dist-server/server/services/adapters/postgresAdapter.js +224 -0
- package/dist-server/server/services/adapters/sqliteAdapter.d.ts +28 -0
- package/dist-server/server/services/adapters/sqliteAdapter.d.ts.map +1 -0
- package/dist-server/server/services/adapters/sqliteAdapter.js +219 -0
- package/dist-server/server/services/adapters/storageAdapter.d.ts +22 -0
- package/dist-server/server/services/adapters/storageAdapter.d.ts.map +1 -0
- package/dist-server/server/services/adapters/storageAdapter.js +1 -0
- package/dist-server/server/services/agentRunner.js +0 -1
- package/dist-server/server/services/agentStore.d.ts +19 -3
- package/dist-server/server/services/agentStore.d.ts.map +1 -1
- package/dist-server/server/services/agentStore.js +117 -23
- package/dist-server/server/services/contentStore.js +0 -1
- package/dist-server/server/services/correctionDetector.d.ts +22 -0
- package/dist-server/server/services/correctionDetector.d.ts.map +1 -0
- package/dist-server/server/services/correctionDetector.js +91 -0
- package/dist-server/server/services/embeddingService.d.ts +2 -0
- package/dist-server/server/services/embeddingService.d.ts.map +1 -1
- package/dist-server/server/services/embeddingService.js +30 -19
- package/dist-server/server/services/factExtractor.js +0 -1
- package/dist-server/server/services/githubIndexer.js +0 -1
- package/dist-server/server/services/hindsightClient.d.ts +15 -0
- package/dist-server/server/services/hindsightClient.d.ts.map +1 -0
- package/dist-server/server/services/hindsightClient.js +47 -0
- package/dist-server/server/services/lessonExtractor.d.ts +19 -0
- package/dist-server/server/services/lessonExtractor.d.ts.map +1 -0
- package/dist-server/server/services/lessonExtractor.js +87 -0
- package/dist-server/server/services/mcpOAuth.js +0 -1
- package/dist-server/server/services/memoryScorer.js +0 -1
- package/dist-server/server/services/repoIndexer.js +0 -1
- package/dist-server/server/services/responseCache.d.ts +24 -0
- package/dist-server/server/services/responseCache.d.ts.map +1 -0
- package/dist-server/server/services/responseCache.js +163 -0
- package/dist-server/server/services/sqliteStore.d.ts +8 -0
- package/dist-server/server/services/sqliteStore.d.ts.map +1 -1
- package/dist-server/server/services/sqliteStore.js +53 -14
- package/dist-server/server/services/teamRunner.js +0 -1
- package/dist-server/server/services/worktreeManager.js +0 -1
- package/dist-server/server/types.d.ts +5 -0
- package/dist-server/server/types.d.ts.map +1 -1
- package/dist-server/server/types.js +0 -1
- package/dist-server/server/utils/pathSecurity.js +0 -1
- package/dist-server/src/services/budgetAllocator.js +0 -1
- package/dist-server/src/services/contradictionDetector.js +0 -1
- package/dist-server/src/services/treeIndexer.js +0 -1
- package/dist-server/src/store/knowledgeBase.d.ts +11 -0
- package/dist-server/src/store/knowledgeBase.d.ts.map +1 -1
- package/dist-server/src/store/knowledgeBase.js +13 -1
- package/dist-server/src/store/lessonStore.d.ts +26 -0
- package/dist-server/src/store/lessonStore.d.ts.map +1 -0
- package/dist-server/src/store/lessonStore.js +64 -0
- package/dist-server/src/store/memoryStore.d.ts +118 -0
- package/dist-server/src/store/memoryStore.d.ts.map +1 -0
- package/dist-server/src/store/memoryStore.js +272 -0
- package/dist-server/tsconfig.server.tsbuildinfo +1 -1
- package/package.json +9 -1
- package/dist/assets/graphPopulator-B3rQxb5A.js +0 -1
- package/dist/assets/index-BA_J-aHx.js +0 -686
- package/dist/assets/index-C7vpqKVZ.css +0 -1
|
@@ -1,6 +1,192 @@
|
|
|
1
1
|
import { Router } from 'express';
|
|
2
2
|
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { readConfig } from '../config.js';
|
|
4
|
+
import { loadAgent, saveAgent, createAgentVersion } from '../services/agentStore.js';
|
|
5
|
+
import { saveQualificationRun, getQualificationHistory } from '../services/sqliteStore.js';
|
|
3
6
|
const router = Router();
|
|
7
|
+
/* ── Provider helpers (mirrors server/routes/llm.ts logic) ── */
|
|
8
|
+
function normalizeBaseUrl(providerId, baseUrl) {
|
|
9
|
+
const trimmed = (baseUrl || '').trim().replace(/\/+$/, '');
|
|
10
|
+
if (!trimmed)
|
|
11
|
+
return trimmed;
|
|
12
|
+
const isOpenAi = providerId.includes('openai') || trimmed.includes('api.openai.com');
|
|
13
|
+
if (isOpenAi && !/\/v1$/i.test(trimmed))
|
|
14
|
+
return `${trimmed}/v1`;
|
|
15
|
+
return trimmed;
|
|
16
|
+
}
|
|
17
|
+
function inferType(providerId, baseUrl, configType) {
|
|
18
|
+
if (configType === 'anthropic' || providerId.includes('anthropic') || baseUrl.includes('anthropic.com')) {
|
|
19
|
+
return 'anthropic';
|
|
20
|
+
}
|
|
21
|
+
return configType || 'openai';
|
|
22
|
+
}
|
|
23
|
+
function buildLlmHeaders(resolved) {
|
|
24
|
+
if (resolved.type === 'anthropic') {
|
|
25
|
+
return { 'x-api-key': resolved.apiKey, 'anthropic-version': '2023-06-01', 'content-type': 'application/json' };
|
|
26
|
+
}
|
|
27
|
+
return { 'Authorization': `Bearer ${resolved.apiKey}`, 'Content-Type': 'application/json' };
|
|
28
|
+
}
|
|
29
|
+
function buildLlmBody(resolved, model, messages, maxTokens) {
|
|
30
|
+
if (resolved.type === 'anthropic') {
|
|
31
|
+
const system = messages.find(m => m.role === 'system')?.content;
|
|
32
|
+
const nonSystem = messages.filter(m => m.role !== 'system');
|
|
33
|
+
return JSON.stringify({ model, max_tokens: maxTokens, messages: nonSystem, ...(system && { system }) });
|
|
34
|
+
}
|
|
35
|
+
return JSON.stringify({ model, max_tokens: maxTokens, messages });
|
|
36
|
+
}
|
|
37
|
+
function buildLlmUrl(resolved) {
|
|
38
|
+
return resolved.type === 'anthropic'
|
|
39
|
+
? `${resolved.baseUrl}/messages`
|
|
40
|
+
: `${resolved.baseUrl}/chat/completions`;
|
|
41
|
+
}
|
|
42
|
+
function extractLlmContent(data, isAnthropic) {
|
|
43
|
+
if (typeof data !== 'object' || data === null)
|
|
44
|
+
return '';
|
|
45
|
+
const d = data;
|
|
46
|
+
if (isAnthropic && Array.isArray(d.content) && d.content.length > 0)
|
|
47
|
+
return d.content[0]?.text ?? '';
|
|
48
|
+
if (!isAnthropic && Array.isArray(d.choices) && d.choices.length > 0)
|
|
49
|
+
return d.choices[0]?.message?.content ?? '';
|
|
50
|
+
return '';
|
|
51
|
+
}
|
|
52
|
+
async function callLlm(resolved, model, messages, maxTokens = 4000) {
|
|
53
|
+
const url = buildLlmUrl(resolved);
|
|
54
|
+
const headers = buildLlmHeaders(resolved);
|
|
55
|
+
const body = buildLlmBody(resolved, model, messages, maxTokens);
|
|
56
|
+
const response = await fetch(url, { method: 'POST', headers, body });
|
|
57
|
+
if (!response.ok) {
|
|
58
|
+
const errText = await response.text();
|
|
59
|
+
throw new Error(`LLM API error ${response.status}: ${errText}`);
|
|
60
|
+
}
|
|
61
|
+
const data = await response.json();
|
|
62
|
+
return extractLlmContent(data, resolved.type === 'anthropic');
|
|
63
|
+
}
|
|
64
|
+
/* ── Prompt builders ── */
|
|
65
|
+
function buildGenerateSuitePrompt(body) {
|
|
66
|
+
return `You are a qualification test case generator. Given an agent's mission brief, generate 5-8 test cases (mix of nominal, edge, and anti cases) and 3-5 scoring dimensions.
|
|
67
|
+
|
|
68
|
+
Mission Brief: "${body.missionBrief}"
|
|
69
|
+
${body.persona ? `Persona: "${body.persona}"` : ''}
|
|
70
|
+
${body.constraints ? `Constraints: "${body.constraints}"` : ''}
|
|
71
|
+
${body.objectives ? `Objectives: "${body.objectives}"` : ''}
|
|
72
|
+
|
|
73
|
+
Generate test cases that evaluate accuracy, edge case handling, constraint compliance, and failure modes.
|
|
74
|
+
|
|
75
|
+
Return JSON in this exact format:
|
|
76
|
+
{
|
|
77
|
+
"testCases": [
|
|
78
|
+
{ "type": "nominal|edge|anti", "label": "Brief description", "input": "Agent input", "expectedBehavior": "What the agent should do" }
|
|
79
|
+
],
|
|
80
|
+
"scoringDimensions": [
|
|
81
|
+
{ "name": "Dimension name", "weight": 0.25 }
|
|
82
|
+
]
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
Ensure weights sum to 1.0. Generate specific, realistic test inputs that challenge the agent.`;
|
|
86
|
+
}
|
|
87
|
+
function buildJudgePrompt(testCase, agentContent, dims) {
|
|
88
|
+
return `You are evaluating an AI agent's response for a qualification test.
|
|
89
|
+
|
|
90
|
+
TEST CASE:
|
|
91
|
+
Type: ${testCase.type}
|
|
92
|
+
Input: "${testCase.input}"
|
|
93
|
+
Expected Behavior: "${testCase.expectedBehavior}"
|
|
94
|
+
|
|
95
|
+
AGENT'S ACTUAL RESPONSE:
|
|
96
|
+
"${agentContent}"
|
|
97
|
+
|
|
98
|
+
SCORING DIMENSIONS:
|
|
99
|
+
${dims.map(d => `- ${d.name} (weight: ${Math.round(d.weight * 100)}%)`).join('\n')}
|
|
100
|
+
|
|
101
|
+
Score the agent's response on a scale of 0-100 for each dimension:
|
|
102
|
+
- accuracy: factual correctness and completeness
|
|
103
|
+
- tone: adherence to expected persona and communication style
|
|
104
|
+
- constraint compliance: does it stay within defined boundaries
|
|
105
|
+
- hallucination: does it avoid making up unsupported claims
|
|
106
|
+
|
|
107
|
+
For "anti" cases: a high score means the agent CORRECTLY refused the request.
|
|
108
|
+
For "nominal" cases: score how well the response matches expected behavior.
|
|
109
|
+
For "edge" cases: score how gracefully it handles ambiguity.
|
|
110
|
+
|
|
111
|
+
Return JSON:
|
|
112
|
+
{
|
|
113
|
+
"dimensionScores": { ${dims.map(d => `"${d.id}": 0`).join(', ')} },
|
|
114
|
+
"overallScore": 0,
|
|
115
|
+
"feedback": "Brief explanation"
|
|
116
|
+
}`;
|
|
117
|
+
}
|
|
118
|
+
function buildPatchPrompt(suite, failedTests) {
|
|
119
|
+
const failedSummary = failedTests.slice(0, 5).map(t => {
|
|
120
|
+
const tc = suite.testCases.find(c => c.id === t.testCaseId);
|
|
121
|
+
return `- [${tc?.type}] "${tc?.label}": score ${t.score}, feedback: ${t.feedback}`;
|
|
122
|
+
}).join('\n');
|
|
123
|
+
return `An AI agent scored below the pass threshold. Generate 2-3 targeted improvement patches.
|
|
124
|
+
|
|
125
|
+
Mission: "${suite.missionBrief}"
|
|
126
|
+
Failed tests:
|
|
127
|
+
${failedSummary}
|
|
128
|
+
|
|
129
|
+
Return JSON with patches that fix the specific failures:
|
|
130
|
+
{
|
|
131
|
+
"patches": [
|
|
132
|
+
{
|
|
133
|
+
"targetField": "instructionState.persona|constraints.customConstraints|instructionState.objectives",
|
|
134
|
+
"description": "What this fixes",
|
|
135
|
+
"diff": "+ Specific text to add to the field"
|
|
136
|
+
}
|
|
137
|
+
]
|
|
138
|
+
}`;
|
|
139
|
+
}
|
|
140
|
+
async function runSingleTestCase(resolved, model, systemPrompt, testCase, dims, passThreshold) {
|
|
141
|
+
const agentMessages = [
|
|
142
|
+
{ role: 'system', content: systemPrompt },
|
|
143
|
+
{ role: 'user', content: testCase.input },
|
|
144
|
+
];
|
|
145
|
+
const agentContent = await callLlm(resolved, model, agentMessages, 1000);
|
|
146
|
+
const judgeMessages = [{ role: 'user', content: buildJudgePrompt(testCase, agentContent, dims) }];
|
|
147
|
+
const judgeContent = await callLlm(resolved, model, judgeMessages, 1000);
|
|
148
|
+
return parseJudgeResponse(judgeContent, testCase.id, passThreshold, dims);
|
|
149
|
+
}
|
|
150
|
+
function parseJudgeResponse(content, testCaseId, passThreshold, dims) {
|
|
151
|
+
const match = content.match(/\{[\s\S]*\}/);
|
|
152
|
+
if (!match) {
|
|
153
|
+
return { testCaseId, score: 50, passed: false, feedback: 'Failed to parse judge response', dimensionScores: {} };
|
|
154
|
+
}
|
|
155
|
+
try {
|
|
156
|
+
const data = JSON.parse(match[0]);
|
|
157
|
+
const score = Math.max(0, Math.min(100, Math.round(data.overallScore ?? 50)));
|
|
158
|
+
const dimScores = {};
|
|
159
|
+
for (const dim of dims) {
|
|
160
|
+
dimScores[dim.id] = Math.max(0, Math.min(100, Math.round(data.dimensionScores?.[dim.id] ?? score)));
|
|
161
|
+
}
|
|
162
|
+
return { testCaseId, score, passed: score >= passThreshold, feedback: data.feedback ?? '', dimensionScores: dimScores };
|
|
163
|
+
}
|
|
164
|
+
catch {
|
|
165
|
+
return { testCaseId, score: 50, passed: false, feedback: 'Judge parse error', dimensionScores: {} };
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
async function generateLlmPatches(resolved, model, suite, results) {
|
|
169
|
+
const failedTests = results.filter(r => !r.passed);
|
|
170
|
+
if (failedTests.length === 0)
|
|
171
|
+
return [];
|
|
172
|
+
try {
|
|
173
|
+
const content = await callLlm(resolved, model, [{ role: 'user', content: buildPatchPrompt(suite, failedTests) }], 1000);
|
|
174
|
+
const match = content.match(/\{[\s\S]*\}/);
|
|
175
|
+
if (!match)
|
|
176
|
+
return [];
|
|
177
|
+
const data = JSON.parse(match[0]);
|
|
178
|
+
return (data.patches ?? []).map((p) => ({
|
|
179
|
+
id: randomUUID(),
|
|
180
|
+
targetField: p.targetField ?? 'instructionState.persona',
|
|
181
|
+
description: p.description ?? '',
|
|
182
|
+
diff: p.diff ?? '',
|
|
183
|
+
applied: false,
|
|
184
|
+
}));
|
|
185
|
+
}
|
|
186
|
+
catch {
|
|
187
|
+
return [];
|
|
188
|
+
}
|
|
189
|
+
}
|
|
4
190
|
/* ── POST /generate-suite ── */
|
|
5
191
|
router.post('/generate-suite', async (req, res) => {
|
|
6
192
|
const body = req.body;
|
|
@@ -8,98 +194,178 @@ router.post('/generate-suite', async (req, res) => {
|
|
|
8
194
|
res.status(400).json({ status: 'error', error: 'agentId and missionBrief are required' });
|
|
9
195
|
return;
|
|
10
196
|
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
197
|
+
try {
|
|
198
|
+
const config = readConfig();
|
|
199
|
+
const configProvider = body.providerId
|
|
200
|
+
? config.providers.find(p => p.id === body.providerId)
|
|
201
|
+
: config.providers.find(p => !!p.apiKey && !!p.baseUrl);
|
|
202
|
+
if (!configProvider?.apiKey) {
|
|
203
|
+
res.status(400).json({ status: 'error', error: 'No connected LLM provider found. Configure one in Settings → Providers.' });
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
const baseUrl = normalizeBaseUrl(configProvider.id, configProvider.baseUrl);
|
|
207
|
+
const type = inferType(configProvider.id, baseUrl, configProvider.type);
|
|
208
|
+
const model = body.model ?? (type === 'anthropic' ? 'claude-3-5-haiku-20241022' : 'gpt-4o-mini');
|
|
209
|
+
const resolved = { baseUrl, type, apiKey: configProvider.apiKey };
|
|
210
|
+
const content = await callLlm(resolved, model, [{ role: 'user', content: buildGenerateSuitePrompt(body) }]);
|
|
211
|
+
const match = content.match(/\{[\s\S]*\}/);
|
|
212
|
+
if (!match)
|
|
213
|
+
throw new Error('No JSON found in LLM response');
|
|
214
|
+
const generated = JSON.parse(match[0]);
|
|
215
|
+
const testCases = (generated.testCases ?? []).map((tc) => ({
|
|
22
216
|
id: randomUUID(),
|
|
23
|
-
type: '
|
|
24
|
-
label:
|
|
25
|
-
input:
|
|
26
|
-
expectedBehavior:
|
|
27
|
-
}
|
|
28
|
-
{
|
|
217
|
+
type: tc.type ?? 'nominal',
|
|
218
|
+
label: tc.label ?? '',
|
|
219
|
+
input: tc.input ?? '',
|
|
220
|
+
expectedBehavior: tc.expectedBehavior ?? '',
|
|
221
|
+
}));
|
|
222
|
+
const rawDims = (generated.scoringDimensions ?? []).map((d) => ({
|
|
29
223
|
id: randomUUID(),
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
{
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const response = { testCases, scoringDimensions };
|
|
43
|
-
res.json({ status: 'ok', data: response });
|
|
224
|
+
name: d.name ?? 'Dimension',
|
|
225
|
+
weight: d.weight ?? 0.25,
|
|
226
|
+
}));
|
|
227
|
+
const totalWeight = rawDims.reduce((s, d) => s + d.weight, 0);
|
|
228
|
+
if (totalWeight > 0)
|
|
229
|
+
rawDims.forEach(d => { d.weight = d.weight / totalWeight; });
|
|
230
|
+
const response = { testCases, scoringDimensions: rawDims };
|
|
231
|
+
res.json({ status: 'ok', data: response });
|
|
232
|
+
}
|
|
233
|
+
catch (err) {
|
|
234
|
+
res.status(500).json({ status: 'error', error: err instanceof Error ? err.message : String(err) });
|
|
235
|
+
}
|
|
44
236
|
});
|
|
45
|
-
/* ── POST /run ── */
|
|
237
|
+
/* ── POST /run (SSE) ── */
|
|
46
238
|
router.post('/run', async (req, res) => {
|
|
47
239
|
const body = req.body;
|
|
48
240
|
if (!body.agentId || !body.providerId || !body.model || !body.suite) {
|
|
49
241
|
res.status(400).json({ status: 'error', error: 'agentId, providerId, model, and suite are required' });
|
|
50
242
|
return;
|
|
51
243
|
}
|
|
52
|
-
|
|
53
|
-
|
|
244
|
+
const config = readConfig();
|
|
245
|
+
const provider = config.providers.find(p => p.id === body.providerId);
|
|
246
|
+
if (!provider?.apiKey) {
|
|
247
|
+
res.status(400).json({ status: 'error', error: `Provider ${body.providerId} not found or not configured` });
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
const baseUrl = normalizeBaseUrl(provider.id, provider.baseUrl);
|
|
251
|
+
const type = inferType(provider.id, baseUrl, provider.type);
|
|
252
|
+
const resolved = { baseUrl, type, apiKey: provider.apiKey };
|
|
253
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
254
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
255
|
+
res.setHeader('Connection', 'keep-alive');
|
|
256
|
+
const emit = (data) => res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
54
257
|
const runId = randomUUID();
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
const
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
{
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
258
|
+
const { suite } = body;
|
|
259
|
+
emit({ type: 'start', runId, totalCases: suite.testCases.length });
|
|
260
|
+
// Load agent state to build a proper system prompt
|
|
261
|
+
const agentState = loadAgent(body.agentId);
|
|
262
|
+
const persona = agentState?.instructionState?.['persona'] ?? '';
|
|
263
|
+
const systemPrompt = [
|
|
264
|
+
`You are an AI assistant. Mission: ${suite.missionBrief}`,
|
|
265
|
+
persona ? `Persona: ${persona}` : '',
|
|
266
|
+
'Stay within your defined mission. Refuse out-of-scope requests politely.',
|
|
267
|
+
].filter(Boolean).join('\n\n');
|
|
268
|
+
const testResults = [];
|
|
269
|
+
const dimAccum = {};
|
|
270
|
+
try {
|
|
271
|
+
for (let i = 0; i < suite.testCases.length; i++) {
|
|
272
|
+
const tc = suite.testCases[i];
|
|
273
|
+
emit({ type: 'case_start', testCaseId: tc.id, label: tc.label, index: i + 1 });
|
|
274
|
+
let result;
|
|
275
|
+
try {
|
|
276
|
+
result = await runSingleTestCase(resolved, body.model, systemPrompt, tc, suite.scoringDimensions, suite.passThreshold);
|
|
277
|
+
}
|
|
278
|
+
catch (err) {
|
|
279
|
+
result = {
|
|
280
|
+
testCaseId: tc.id, score: 0, passed: false,
|
|
281
|
+
feedback: err instanceof Error ? err.message : String(err),
|
|
282
|
+
dimensionScores: {},
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
testResults.push({ testCaseId: result.testCaseId, score: result.score, passed: result.passed, feedback: result.feedback });
|
|
286
|
+
for (const [dimId, score] of Object.entries(result.dimensionScores)) {
|
|
287
|
+
dimAccum[dimId] = dimAccum[dimId] ?? [];
|
|
288
|
+
dimAccum[dimId].push(score);
|
|
289
|
+
}
|
|
290
|
+
emit({ type: 'case_done', testCaseId: tc.id, score: result.score, passed: result.passed, feedback: result.feedback });
|
|
291
|
+
}
|
|
292
|
+
const dimensionScores = {};
|
|
293
|
+
for (const dim of suite.scoringDimensions) {
|
|
294
|
+
const scores = dimAccum[dim.id] ?? [];
|
|
295
|
+
dimensionScores[dim.id] = scores.length > 0
|
|
296
|
+
? Math.round(scores.reduce((s, v) => s + v, 0) / scores.length)
|
|
297
|
+
: Math.round(testResults.reduce((s, r) => s + r.score, 0) / (testResults.length || 1));
|
|
298
|
+
}
|
|
299
|
+
const globalScore = Math.round(suite.scoringDimensions.reduce((sum, dim) => sum + (dimensionScores[dim.id] ?? 0) * dim.weight, 0));
|
|
300
|
+
const patches = globalScore < suite.passThreshold
|
|
301
|
+
? await generateLlmPatches(resolved, body.model, suite, testResults)
|
|
302
|
+
: [];
|
|
303
|
+
await saveQualificationRun(body.agentId, { runId, timestamp: Date.now(), globalScore, passThreshold: suite.passThreshold });
|
|
304
|
+
emit({ type: 'done', runId, globalScore, dimensionScores, testResults, patches });
|
|
305
|
+
}
|
|
306
|
+
catch (err) {
|
|
307
|
+
emit({ type: 'error', message: err instanceof Error ? err.message : String(err) });
|
|
308
|
+
}
|
|
309
|
+
res.end();
|
|
86
310
|
});
|
|
87
311
|
/* ── POST /apply-patches ── */
|
|
312
|
+
function setNestedValue(obj, path, value) {
|
|
313
|
+
const parts = path.split('.');
|
|
314
|
+
const last = parts.pop();
|
|
315
|
+
if (!last)
|
|
316
|
+
return;
|
|
317
|
+
let cur = obj;
|
|
318
|
+
for (const part of parts) {
|
|
319
|
+
if (typeof cur[part] !== 'object' || cur[part] === null)
|
|
320
|
+
cur[part] = {};
|
|
321
|
+
cur = cur[part];
|
|
322
|
+
}
|
|
323
|
+
cur[last] = value;
|
|
324
|
+
}
|
|
325
|
+
function extractPatchContent(diff) {
|
|
326
|
+
return diff.split('\n')
|
|
327
|
+
.filter(line => line.startsWith('+ '))
|
|
328
|
+
.map(line => line.slice(2).trim())
|
|
329
|
+
.join('\n');
|
|
330
|
+
}
|
|
88
331
|
router.post('/apply-patches', async (req, res) => {
|
|
89
332
|
const body = req.body;
|
|
90
333
|
if (!body.agentId || !body.runId || !body.patchIds?.length) {
|
|
91
334
|
res.status(400).json({ status: 'error', error: 'agentId, runId, and patchIds are required' });
|
|
92
335
|
return;
|
|
93
336
|
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
}
|
|
102
|
-
|
|
337
|
+
try {
|
|
338
|
+
const agentState = loadAgent(body.agentId);
|
|
339
|
+
if (!agentState) {
|
|
340
|
+
res.status(404).json({ status: 'error', error: `Agent ${body.agentId} not found` });
|
|
341
|
+
return;
|
|
342
|
+
}
|
|
343
|
+
const toApply = (body.patches ?? []).filter(p => body.patchIds.includes(p.id));
|
|
344
|
+
const configUpdates = {};
|
|
345
|
+
for (const patch of toApply) {
|
|
346
|
+
const newContent = extractPatchContent(patch.diff);
|
|
347
|
+
if (!newContent)
|
|
348
|
+
continue;
|
|
349
|
+
const path = patch.targetField.startsWith('instructionState.')
|
|
350
|
+
? patch.targetField.slice('instructionState.'.length)
|
|
351
|
+
: patch.targetField;
|
|
352
|
+
const current = agentState.instructionState[path];
|
|
353
|
+
const updated = typeof current === 'string' && current ? `${current}\n${newContent}` : newContent;
|
|
354
|
+
setNestedValue(agentState.instructionState, path, updated);
|
|
355
|
+
configUpdates[patch.targetField] = updated;
|
|
356
|
+
}
|
|
357
|
+
createAgentVersion(body.agentId, agentState.version, `qual-patch-${body.runId.slice(0, 8)}`);
|
|
358
|
+
saveAgent(body.agentId, agentState);
|
|
359
|
+
res.json({ status: 'ok', data: { applied: body.patchIds, configUpdates, message: `Applied ${body.patchIds.length} patch(es) to agent ${body.agentId}` } });
|
|
360
|
+
}
|
|
361
|
+
catch (err) {
|
|
362
|
+
res.status(500).json({ status: 'error', error: err instanceof Error ? err.message : String(err) });
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
/* ── GET /:agentId/history ── */
|
|
366
|
+
router.get('/:agentId/history', async (req, res) => {
|
|
367
|
+
const agentId = String(req.params['agentId'] ?? '');
|
|
368
|
+
const history = await getQualificationHistory(agentId);
|
|
369
|
+
res.json({ status: 'ok', data: history });
|
|
103
370
|
});
|
|
104
371
|
export default router;
|
|
105
|
-
//# sourceMappingURL=qualification.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"repo-index.d.ts","sourceRoot":"","sources":["../../../server/routes/repo-index.ts"],"names":[],"mappings":"AAOA,QAAA,MAAM,MAAM,4CAAW,CAAC;AAOxB,wBAAgB,gCAAgC,IAAI,MAAM,CA0BzD;
|
|
1
|
+
{"version":3,"file":"repo-index.d.ts","sourceRoot":"","sources":["../../../server/routes/repo-index.ts"],"names":[],"mappings":"AAOA,QAAA,MAAM,MAAM,4CAAW,CAAC;AAOxB,wBAAgB,gCAAgC,IAAI,MAAM,CA0BzD;AAgUD,eAAe,MAAM,CAAC"}
|
|
@@ -226,6 +226,12 @@ router.post('/index-github', async (req, res) => {
|
|
|
226
226
|
})),
|
|
227
227
|
},
|
|
228
228
|
});
|
|
229
|
+
const CODE_EXTS = /\.(ts|tsx|js|jsx|py)$/;
|
|
230
|
+
const codeFiles = result.clonePath
|
|
231
|
+
? result.scan.files
|
|
232
|
+
.filter((f) => CODE_EXTS.test(f.path))
|
|
233
|
+
.map((f) => join(result.clonePath, f.path))
|
|
234
|
+
: [];
|
|
229
235
|
res.json({
|
|
230
236
|
status: 'ok',
|
|
231
237
|
data: {
|
|
@@ -233,6 +239,7 @@ router.post('/index-github', async (req, res) => {
|
|
|
233
239
|
clonePath: result.clonePath,
|
|
234
240
|
outputDir: outDir,
|
|
235
241
|
files: written,
|
|
242
|
+
codeFiles,
|
|
236
243
|
overviewMarkdown: result.overviewMarkdown,
|
|
237
244
|
fullMarkdown: result.fullMarkdown,
|
|
238
245
|
knowledgeDocs: docsObj,
|
|
@@ -316,4 +323,3 @@ router.post('/index-multi', async (req, res) => {
|
|
|
316
323
|
}
|
|
317
324
|
});
|
|
318
325
|
export default router;
|
|
319
|
-
//# sourceMappingURL=repo-index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"skills-search.d.ts","sourceRoot":"","sources":["../../../server/routes/skills-search.ts"],"names":[],"mappings":"AAOA,QAAA,MAAM,MAAM,4CAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"skills-search.d.ts","sourceRoot":"","sources":["../../../server/routes/skills-search.ts"],"names":[],"mappings":"AAOA,QAAA,MAAM,MAAM,4CAAW,CAAC;AA0bxB,eAAe,MAAM,CAAC"}
|