modular-studio 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -20
- package/dist/assets/Badge-22Ai0eyi.js +1 -0
- package/dist/assets/Input-Bgp734xs.js +1 -0
- package/dist/assets/KnowledgeTab-DABxirZh.js +4 -0
- package/dist/assets/MemoryTab-DZeYElIT.js +16 -0
- package/dist/assets/QualificationTab-Dfpy3J30.js +1 -0
- package/dist/assets/ReviewTab-SD8lQuCc.js +103 -0
- package/dist/assets/Section-DoJrmytO.js +1 -0
- package/dist/assets/TestTab-PDyMF8Fw.js +33 -0
- package/dist/assets/ToolsTab-B83qGCmG.js +1 -0
- package/dist/assets/conversationStore-CkfEU2eV.js +1 -0
- package/dist/assets/icons-C2EV-le6.js +1 -0
- package/dist/assets/index-DkpMAxX7.css +1 -0
- package/dist/assets/index-q24ug5Qs.js +143 -0
- package/dist/assets/{jszip.min-BK6ZQWkj.js → jszip.min-wf-D3Ix_.js} +1 -1
- package/dist/assets/markdown-DWF7F0i0.js +29 -0
- package/dist/assets/services-BaKotDf0.js +343 -0
- package/dist/assets/stores-CeKWz7ou.js +1 -0
- package/dist/assets/vendor-D1h_O76p.js +9 -0
- package/dist/index.html +8 -4
- package/dist-server/bin/modular-mcp.js +0 -1
- package/dist-server/bin/modular-studio.js +0 -1
- package/dist-server/server/config.js +0 -1
- package/dist-server/server/data/mcp-tokens.json +3 -3
- package/dist-server/server/index.d.ts.map +1 -1
- package/dist-server/server/index.js +2 -1
- package/dist-server/server/mcp/manager.js +0 -1
- package/dist-server/server/mcp/modular-server.js +0 -1
- package/dist-server/server/mcp/transport.js +0 -1
- package/dist-server/server/routes/agent-sdk.js +0 -1
- package/dist-server/server/routes/agents.d.ts +9 -5
- package/dist-server/server/routes/agents.d.ts.map +1 -1
- package/dist-server/server/routes/agents.js +81 -8
- package/dist-server/server/routes/auth-codex.js +0 -1
- package/dist-server/server/routes/capabilities.js +0 -1
- package/dist-server/server/routes/claude-config.js +0 -1
- package/dist-server/server/routes/connectors.d.ts.map +1 -1
- package/dist-server/server/routes/connectors.js +194 -1
- package/dist-server/server/routes/conversations.js +0 -1
- package/dist-server/server/routes/embeddings.js +0 -1
- package/dist-server/server/routes/health.js +0 -1
- package/dist-server/server/routes/knowledge.js +0 -1
- package/dist-server/server/routes/llm.js +0 -1
- package/dist-server/server/routes/mcp-oauth.js +0 -1
- package/dist-server/server/routes/mcp.js +0 -1
- package/dist-server/server/routes/memory.d.ts +3 -0
- package/dist-server/server/routes/memory.d.ts.map +1 -0
- package/dist-server/server/routes/memory.js +283 -0
- package/dist-server/server/routes/pipeline.js +0 -1
- package/dist-server/server/routes/providers.js +0 -1
- package/dist-server/server/routes/qualification.d.ts.map +1 -1
- package/dist-server/server/routes/qualification.js +382 -74
- package/dist-server/server/routes/repo-index.js +0 -1
- package/dist-server/server/routes/runtime.js +0 -1
- package/dist-server/server/routes/skills-search.d.ts.map +1 -1
- package/dist-server/server/routes/skills-search.js +39 -5
- package/dist-server/server/routes/worktrees.js +0 -1
- package/dist-server/server/services/__tests__/embeddingService.test.js +0 -1
- package/dist-server/server/services/adapters/postgresAdapter.d.ts +29 -0
- package/dist-server/server/services/adapters/postgresAdapter.d.ts.map +1 -0
- package/dist-server/server/services/adapters/postgresAdapter.js +224 -0
- package/dist-server/server/services/adapters/sqliteAdapter.d.ts +28 -0
- package/dist-server/server/services/adapters/sqliteAdapter.d.ts.map +1 -0
- package/dist-server/server/services/adapters/sqliteAdapter.js +219 -0
- package/dist-server/server/services/adapters/storageAdapter.d.ts +22 -0
- package/dist-server/server/services/adapters/storageAdapter.d.ts.map +1 -0
- package/dist-server/server/services/adapters/storageAdapter.js +1 -0
- package/dist-server/server/services/agentRunner.js +0 -1
- package/dist-server/server/services/agentStore.d.ts +18 -3
- package/dist-server/server/services/agentStore.d.ts.map +1 -1
- package/dist-server/server/services/agentStore.js +116 -23
- package/dist-server/server/services/contentStore.js +0 -1
- package/dist-server/server/services/embeddingService.d.ts +2 -0
- package/dist-server/server/services/embeddingService.d.ts.map +1 -1
- package/dist-server/server/services/embeddingService.js +30 -19
- package/dist-server/server/services/factExtractor.js +0 -1
- package/dist-server/server/services/githubIndexer.js +0 -1
- package/dist-server/server/services/mcpOAuth.js +0 -1
- package/dist-server/server/services/memoryScorer.js +0 -1
- package/dist-server/server/services/repoIndexer.js +0 -1
- package/dist-server/server/services/sqliteStore.js +0 -1
- package/dist-server/server/services/teamRunner.js +0 -1
- package/dist-server/server/services/worktreeManager.js +0 -1
- package/dist-server/server/types.d.ts +5 -0
- package/dist-server/server/types.d.ts.map +1 -1
- package/dist-server/server/types.js +0 -1
- package/dist-server/server/utils/pathSecurity.js +0 -1
- package/dist-server/src/services/budgetAllocator.js +0 -1
- package/dist-server/src/services/contradictionDetector.js +0 -1
- package/dist-server/src/services/treeIndexer.js +0 -1
- package/dist-server/src/store/knowledgeBase.d.ts +10 -0
- package/dist-server/src/store/knowledgeBase.d.ts.map +1 -1
- package/dist-server/src/store/knowledgeBase.js +13 -1
- package/dist-server/src/store/memoryStore.d.ts +107 -0
- package/dist-server/src/store/memoryStore.d.ts.map +1 -0
- package/dist-server/src/store/memoryStore.js +263 -0
- package/dist-server/tsconfig.server.tsbuildinfo +1 -1
- package/package.json +104 -97
- package/dist/assets/graphPopulator-B3rQxb5A.js +0 -1
- package/dist/assets/index-BA_J-aHx.js +0 -686
- package/dist/assets/index-C7vpqKVZ.css +0 -1
|
@@ -1,6 +1,25 @@
|
|
|
1
1
|
import { Router } from 'express';
|
|
2
2
|
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { readConfig } from '../config.js';
|
|
3
4
|
const router = Router();
|
|
5
|
+
const runHistory = new Map();
|
|
6
|
+
function pushHistory(agentId, entry) {
|
|
7
|
+
const list = runHistory.get(agentId) ?? [];
|
|
8
|
+
list.push(entry);
|
|
9
|
+
runHistory.set(agentId, list);
|
|
10
|
+
}
|
|
11
|
+
function extractLlmContent(data, isAnthropic) {
|
|
12
|
+
if (typeof data !== 'object' || data === null)
|
|
13
|
+
return '';
|
|
14
|
+
const d = data;
|
|
15
|
+
if (isAnthropic && Array.isArray(d.content) && d.content.length > 0) {
|
|
16
|
+
return d.content[0]?.text ?? '';
|
|
17
|
+
}
|
|
18
|
+
if (!isAnthropic && Array.isArray(d.choices) && d.choices.length > 0) {
|
|
19
|
+
return d.choices[0]?.message?.content ?? '';
|
|
20
|
+
}
|
|
21
|
+
return '';
|
|
22
|
+
}
|
|
4
23
|
/* ── POST /generate-suite ── */
|
|
5
24
|
router.post('/generate-suite', async (req, res) => {
|
|
6
25
|
const body = req.body;
|
|
@@ -8,39 +27,126 @@ router.post('/generate-suite', async (req, res) => {
|
|
|
8
27
|
res.status(400).json({ status: 'error', error: 'agentId and missionBrief are required' });
|
|
9
28
|
return;
|
|
10
29
|
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
30
|
+
try {
|
|
31
|
+
const config = readConfig();
|
|
32
|
+
// Find a provider with an API key configured
|
|
33
|
+
const connectedProvider = config.providers.find(p => !!p.apiKey && !!p.baseUrl);
|
|
34
|
+
if (!connectedProvider) {
|
|
35
|
+
res.status(400).json({
|
|
36
|
+
status: 'error',
|
|
37
|
+
error: 'No connected LLM provider found. Please configure a provider first.'
|
|
38
|
+
});
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
// Build LLM prompt for test case generation
|
|
42
|
+
const prompt = `You are a qualification test case generator. Given an agent's mission brief, generate 5-10 test cases (mix of nominal, edge, and anti cases) and 3-5 scoring dimensions.
|
|
43
|
+
|
|
44
|
+
Mission Brief: "${body.missionBrief}"
|
|
45
|
+
${body.persona ? `Persona: "${body.persona}"` : ''}
|
|
46
|
+
${body.constraints ? `Constraints: "${body.constraints}"` : ''}
|
|
47
|
+
${body.objectives ? `Objectives: "${body.objectives}"` : ''}
|
|
48
|
+
|
|
49
|
+
Generate test cases that thoroughly evaluate this agent's capabilities, edge cases, and failure modes.
|
|
50
|
+
|
|
51
|
+
Return JSON in this exact format:
|
|
52
|
+
{
|
|
53
|
+
"testCases": [
|
|
54
|
+
{
|
|
55
|
+
"type": "nominal|edge|anti",
|
|
56
|
+
"label": "Brief description of test",
|
|
57
|
+
"input": "Input to send to the agent",
|
|
58
|
+
"expectedBehavior": "What the agent should do"
|
|
59
|
+
}
|
|
60
|
+
],
|
|
61
|
+
"scoringDimensions": [
|
|
62
|
+
{
|
|
63
|
+
"name": "Dimension name",
|
|
64
|
+
"weight": 0.25
|
|
65
|
+
}
|
|
66
|
+
]
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
Ensure weights sum to 1.0. Generate realistic, specific test inputs that would actually challenge the agent.`;
|
|
70
|
+
// Call LLM
|
|
71
|
+
const baseUrl = connectedProvider.baseUrl.replace(/\/+$/, '');
|
|
72
|
+
const isAnthropic = connectedProvider.id.includes('anthropic') || baseUrl.includes('anthropic.com');
|
|
73
|
+
const messages = [
|
|
74
|
+
{ role: 'user', content: prompt }
|
|
75
|
+
];
|
|
76
|
+
const requestBody = isAnthropic ? {
|
|
77
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
78
|
+
max_tokens: 4000,
|
|
79
|
+
messages
|
|
80
|
+
} : {
|
|
81
|
+
model: 'gpt-4o',
|
|
82
|
+
max_tokens: 4000,
|
|
83
|
+
messages
|
|
84
|
+
};
|
|
85
|
+
const headers = {
|
|
86
|
+
'Content-Type': 'application/json'
|
|
87
|
+
};
|
|
88
|
+
if (isAnthropic) {
|
|
89
|
+
headers['x-api-key'] = connectedProvider.apiKey || '';
|
|
90
|
+
headers['anthropic-version'] = '2023-06-01';
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
headers['Authorization'] = `Bearer ${connectedProvider.apiKey || ''}`;
|
|
94
|
+
}
|
|
95
|
+
const llmResponse = await fetch(`${baseUrl}/messages`, {
|
|
96
|
+
method: 'POST',
|
|
97
|
+
headers,
|
|
98
|
+
body: JSON.stringify(requestBody)
|
|
99
|
+
});
|
|
100
|
+
if (!llmResponse.ok) {
|
|
101
|
+
const errorText = await llmResponse.text();
|
|
102
|
+
res.status(502).json({
|
|
103
|
+
status: 'error',
|
|
104
|
+
error: `LLM API error: ${llmResponse.status} ${errorText}`
|
|
105
|
+
});
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
const llmData = await llmResponse.json();
|
|
109
|
+
// Extract content from response
|
|
110
|
+
const content = extractLlmContent(llmData, isAnthropic);
|
|
111
|
+
if (!content) {
|
|
112
|
+
throw new Error('Could not extract content from LLM response');
|
|
113
|
+
}
|
|
114
|
+
// Parse JSON from LLM response
|
|
115
|
+
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
116
|
+
if (!jsonMatch) {
|
|
117
|
+
throw new Error('No JSON found in LLM response');
|
|
118
|
+
}
|
|
119
|
+
const generatedData = JSON.parse(jsonMatch[0]);
|
|
120
|
+
// Transform and validate the generated data
|
|
121
|
+
const testCases = (generatedData.testCases || []).map((tc) => ({
|
|
22
122
|
id: randomUUID(),
|
|
23
|
-
type: '
|
|
24
|
-
label: '
|
|
25
|
-
input:
|
|
26
|
-
expectedBehavior:
|
|
27
|
-
}
|
|
28
|
-
{
|
|
123
|
+
type: tc.type || 'nominal',
|
|
124
|
+
label: tc.label || 'Generated test case',
|
|
125
|
+
input: tc.input || '',
|
|
126
|
+
expectedBehavior: tc.expectedBehavior || '',
|
|
127
|
+
}));
|
|
128
|
+
const scoringDimensions = (generatedData.scoringDimensions || []).map((dim) => ({
|
|
29
129
|
id: randomUUID(),
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
130
|
+
name: dim.name || 'Dimension',
|
|
131
|
+
weight: dim.weight || 0.25,
|
|
132
|
+
}));
|
|
133
|
+
// Normalize weights to sum to 1.0
|
|
134
|
+
const totalWeight = scoringDimensions.reduce((sum, dim) => sum + dim.weight, 0);
|
|
135
|
+
if (totalWeight > 0) {
|
|
136
|
+
scoringDimensions.forEach(dim => {
|
|
137
|
+
dim.weight = dim.weight / totalWeight;
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
const response = { testCases, scoringDimensions };
|
|
141
|
+
res.json({ status: 'ok', data: response });
|
|
142
|
+
}
|
|
143
|
+
catch (err) {
|
|
144
|
+
console.error('Error generating test suite:', err);
|
|
145
|
+
res.status(500).json({
|
|
146
|
+
status: 'error',
|
|
147
|
+
error: err instanceof Error ? err.message : String(err)
|
|
148
|
+
});
|
|
149
|
+
}
|
|
44
150
|
});
|
|
45
151
|
/* ── POST /run ── */
|
|
46
152
|
router.post('/run', async (req, res) => {
|
|
@@ -49,40 +155,207 @@ router.post('/run', async (req, res) => {
|
|
|
49
155
|
res.status(400).json({ status: 'error', error: 'agentId, providerId, model, and suite are required' });
|
|
50
156
|
return;
|
|
51
157
|
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
158
|
+
try {
|
|
159
|
+
const config = readConfig();
|
|
160
|
+
// Find the provider
|
|
161
|
+
const provider = config.providers.find(p => p.id === body.providerId);
|
|
162
|
+
if (!provider || !provider.apiKey) {
|
|
163
|
+
res.status(400).json({
|
|
164
|
+
status: 'error',
|
|
165
|
+
error: `Provider ${body.providerId} not found or not configured`
|
|
166
|
+
});
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
const runId = randomUUID();
|
|
170
|
+
// Build the agent's system prompt from mission brief
|
|
171
|
+
const systemPrompt = `You are an AI assistant. Your mission: ${body.suite.missionBrief}
|
|
172
|
+
|
|
173
|
+
You must stay within the scope of this mission and follow these guidelines:
|
|
174
|
+
- Be helpful and accurate
|
|
175
|
+
- Stay within your defined role
|
|
176
|
+
- If asked to do something outside your mission, politely decline
|
|
177
|
+
- Be consistent with your persona and constraints`;
|
|
178
|
+
const baseUrl = provider.baseUrl.replace(/\/+$/, '');
|
|
179
|
+
const isAnthropic = provider.id.includes('anthropic') || baseUrl.includes('anthropic.com');
|
|
180
|
+
// Process each test case
|
|
181
|
+
const testResults = [];
|
|
182
|
+
for (const testCase of body.suite.testCases) {
|
|
183
|
+
try {
|
|
184
|
+
// 1. Run the test case input against the agent
|
|
185
|
+
const agentMessages = [
|
|
186
|
+
{ role: 'system', content: systemPrompt },
|
|
187
|
+
{ role: 'user', content: testCase.input }
|
|
188
|
+
];
|
|
189
|
+
const agentRequestBody = isAnthropic ? {
|
|
190
|
+
model: body.model,
|
|
191
|
+
max_tokens: 1000,
|
|
192
|
+
messages: agentMessages.filter(m => m.role !== 'system'),
|
|
193
|
+
system: systemPrompt
|
|
194
|
+
} : {
|
|
195
|
+
model: body.model,
|
|
196
|
+
max_tokens: 1000,
|
|
197
|
+
messages: agentMessages
|
|
198
|
+
};
|
|
199
|
+
const headers = {
|
|
200
|
+
'Content-Type': 'application/json'
|
|
201
|
+
};
|
|
202
|
+
if (isAnthropic) {
|
|
203
|
+
headers['x-api-key'] = provider.apiKey;
|
|
204
|
+
headers['anthropic-version'] = '2023-06-01';
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
headers['Authorization'] = `Bearer ${provider.apiKey}`;
|
|
208
|
+
}
|
|
209
|
+
const agentResponse = await fetch(`${baseUrl}/messages`, {
|
|
210
|
+
method: 'POST',
|
|
211
|
+
headers,
|
|
212
|
+
body: JSON.stringify(agentRequestBody)
|
|
213
|
+
});
|
|
214
|
+
if (!agentResponse.ok) {
|
|
215
|
+
testResults.push({
|
|
216
|
+
testCaseId: testCase.id,
|
|
217
|
+
score: 0,
|
|
218
|
+
passed: false,
|
|
219
|
+
feedback: `Failed to get agent response: ${agentResponse.status}`
|
|
220
|
+
});
|
|
221
|
+
continue;
|
|
222
|
+
}
|
|
223
|
+
const agentData = await agentResponse.json();
|
|
224
|
+
// Extract agent's response content
|
|
225
|
+
const agentContent = extractLlmContent(agentData, isAnthropic);
|
|
226
|
+
// 2. Use LLM as judge to score the response
|
|
227
|
+
const judgePrompt = `You are evaluating an AI agent's response for a qualification test.
|
|
228
|
+
|
|
229
|
+
TEST CASE:
|
|
230
|
+
Type: ${testCase.type}
|
|
231
|
+
Input: "${testCase.input}"
|
|
232
|
+
Expected Behavior: "${testCase.expectedBehavior}"
|
|
233
|
+
|
|
234
|
+
AGENT'S ACTUAL RESPONSE:
|
|
235
|
+
"${agentContent}"
|
|
236
|
+
|
|
237
|
+
SCORING DIMENSIONS:
|
|
238
|
+
${body.suite.scoringDimensions.map(d => `- ${d.name} (${Math.round(d.weight * 100)}%)`).join('\n')}
|
|
239
|
+
|
|
240
|
+
Rate the agent's response on a scale of 0-100 for each dimension. Consider:
|
|
241
|
+
- How well did it match the expected behavior?
|
|
242
|
+
- For "nominal" cases: Is it accurate and helpful?
|
|
243
|
+
- For "edge" cases: Does it handle ambiguity appropriately?
|
|
244
|
+
- For "anti" cases: Does it refuse inappropriate requests and stay in scope?
|
|
245
|
+
|
|
246
|
+
Return JSON in this exact format:
|
|
247
|
+
{
|
|
248
|
+
"dimensionScores": {
|
|
249
|
+
${body.suite.scoringDimensions.map(d => `"${d.id}": <score 0-100>`).join(',\n ')}
|
|
250
|
+
},
|
|
251
|
+
"overallScore": <weighted average 0-100>,
|
|
252
|
+
"feedback": "<brief explanation of the score>"
|
|
253
|
+
}`;
|
|
254
|
+
const judgeMessages = [
|
|
255
|
+
{ role: 'user', content: judgePrompt }
|
|
256
|
+
];
|
|
257
|
+
const judgeRequestBody = isAnthropic ? {
|
|
258
|
+
model: body.model,
|
|
259
|
+
max_tokens: 1000,
|
|
260
|
+
messages: judgeMessages
|
|
261
|
+
} : {
|
|
262
|
+
model: body.model,
|
|
263
|
+
max_tokens: 1000,
|
|
264
|
+
messages: judgeMessages
|
|
265
|
+
};
|
|
266
|
+
const judgeResponse = await fetch(`${baseUrl}/messages`, {
|
|
267
|
+
method: 'POST',
|
|
268
|
+
headers,
|
|
269
|
+
body: JSON.stringify(judgeRequestBody)
|
|
270
|
+
});
|
|
271
|
+
if (!judgeResponse.ok) {
|
|
272
|
+
testResults.push({
|
|
273
|
+
testCaseId: testCase.id,
|
|
274
|
+
score: 50,
|
|
275
|
+
passed: false,
|
|
276
|
+
feedback: `Failed to score response: ${judgeResponse.status}`
|
|
277
|
+
});
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
const judgeData = await judgeResponse.json();
|
|
281
|
+
// Extract judge's scoring
|
|
282
|
+
const judgeContent = extractLlmContent(judgeData, isAnthropic);
|
|
283
|
+
// Parse scoring JSON
|
|
284
|
+
const jsonMatch = judgeContent.match(/\{[\s\S]*\}/);
|
|
285
|
+
let score = 50;
|
|
286
|
+
let feedback = 'Default scoring due to parsing error';
|
|
287
|
+
if (jsonMatch) {
|
|
288
|
+
try {
|
|
289
|
+
const scoreData = JSON.parse(jsonMatch[0]);
|
|
290
|
+
score = Math.round(scoreData.overallScore || 50);
|
|
291
|
+
feedback = scoreData.feedback || 'No feedback provided';
|
|
292
|
+
}
|
|
293
|
+
catch {
|
|
294
|
+
// Use default values
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
testResults.push({
|
|
298
|
+
testCaseId: testCase.id,
|
|
299
|
+
score: Math.max(0, Math.min(100, score)),
|
|
300
|
+
passed: score >= body.suite.passThreshold,
|
|
301
|
+
feedback
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
catch (err) {
|
|
305
|
+
console.error(`Error processing test case ${testCase.id}:`, err);
|
|
306
|
+
testResults.push({
|
|
307
|
+
testCaseId: testCase.id,
|
|
308
|
+
score: 0,
|
|
309
|
+
passed: false,
|
|
310
|
+
feedback: `Error: ${err instanceof Error ? err.message : String(err)}`
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
// Calculate dimension scores (simplified - average from test results)
|
|
315
|
+
const dimensionScores = {};
|
|
316
|
+
for (const dim of body.suite.scoringDimensions) {
|
|
317
|
+
const avgScore = testResults.reduce((sum, result) => sum + result.score, 0) / testResults.length;
|
|
318
|
+
dimensionScores[dim.id] = Math.round(avgScore);
|
|
319
|
+
}
|
|
320
|
+
// Calculate global score as weighted average
|
|
321
|
+
const globalScore = Math.round(body.suite.scoringDimensions.reduce((sum, dim) => {
|
|
322
|
+
return sum + (dimensionScores[dim.id] ?? 0) * dim.weight;
|
|
323
|
+
}, 0));
|
|
324
|
+
// Generate patches if score is below threshold
|
|
325
|
+
const patches = [];
|
|
326
|
+
if (globalScore < body.suite.passThreshold) {
|
|
327
|
+
const failedTests = testResults.filter(t => !t.passed);
|
|
328
|
+
const hasAntiFailures = failedTests.some(t => body.suite.testCases.find(tc => tc.id === t.testCaseId)?.type === 'anti');
|
|
329
|
+
if (hasAntiFailures) {
|
|
330
|
+
patches.push({
|
|
331
|
+
id: randomUUID(),
|
|
332
|
+
targetField: 'constraints.customConstraints',
|
|
333
|
+
description: 'Add explicit scope boundary to prevent out-of-scope responses',
|
|
334
|
+
diff: '+ Always refuse requests outside the defined mission brief.',
|
|
335
|
+
applied: false,
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
if (failedTests.length > body.suite.testCases.length / 2) {
|
|
339
|
+
patches.push({
|
|
340
|
+
id: randomUUID(),
|
|
341
|
+
targetField: 'instructionState.persona',
|
|
342
|
+
description: 'Enhance persona clarity and instructions',
|
|
343
|
+
diff: '+ Be more explicit about your role and capabilities.',
|
|
344
|
+
applied: false,
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
const response = { runId, globalScore, dimensionScores, testResults, patches };
|
|
349
|
+
pushHistory(body.agentId, { runId, timestamp: Date.now(), globalScore, passThreshold: body.suite.passThreshold });
|
|
350
|
+
res.json({ status: 'ok', data: response });
|
|
351
|
+
}
|
|
352
|
+
catch (err) {
|
|
353
|
+
console.error('Error running qualification:', err);
|
|
354
|
+
res.status(500).json({
|
|
355
|
+
status: 'error',
|
|
356
|
+
error: err instanceof Error ? err.message : String(err)
|
|
357
|
+
});
|
|
69
358
|
}
|
|
70
|
-
const globalScore = Math.round(body.suite.scoringDimensions.reduce((sum, dim) => {
|
|
71
|
-
return sum + (dimensionScores[dim.id] ?? 0) * dim.weight;
|
|
72
|
-
}, 0));
|
|
73
|
-
const patches = globalScore < body.suite.passThreshold
|
|
74
|
-
? [
|
|
75
|
-
{
|
|
76
|
-
id: randomUUID(),
|
|
77
|
-
targetField: 'constraints.customConstraints',
|
|
78
|
-
description: 'Add explicit scope boundary to prevent out-of-scope responses',
|
|
79
|
-
diff: '+ Always refuse requests outside the defined mission brief.',
|
|
80
|
-
applied: false,
|
|
81
|
-
},
|
|
82
|
-
]
|
|
83
|
-
: [];
|
|
84
|
-
const response = { runId, globalScore, dimensionScores, testResults, patches };
|
|
85
|
-
res.json({ status: 'ok', data: response });
|
|
86
359
|
});
|
|
87
360
|
/* ── POST /apply-patches ── */
|
|
88
361
|
router.post('/apply-patches', async (req, res) => {
|
|
@@ -91,15 +364,50 @@ router.post('/apply-patches', async (req, res) => {
|
|
|
91
364
|
res.status(400).json({ status: 'error', error: 'agentId, runId, and patchIds are required' });
|
|
92
365
|
return;
|
|
93
366
|
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
367
|
+
try {
|
|
368
|
+
// In a real implementation, this would:
|
|
369
|
+
// 1. Load the current agent configuration
|
|
370
|
+
// 2. Apply the specified patches to the config
|
|
371
|
+
// 3. Save the updated configuration
|
|
372
|
+
// 4. Return the updated config
|
|
373
|
+
// For now, we'll simulate the patch application
|
|
374
|
+
const appliedPatches = [];
|
|
375
|
+
const configUpdates = {};
|
|
376
|
+
// Note: In a production system, you'd want to:
|
|
377
|
+
// - Load actual patch suggestions from the qualification run
|
|
378
|
+
// - Validate that patches are safe to apply
|
|
379
|
+
// - Update the actual agent configuration in your persistence layer
|
|
380
|
+
// - Provide rollback mechanisms
|
|
381
|
+
for (const patchId of body.patchIds) {
|
|
382
|
+
// Simulate patch application
|
|
383
|
+
appliedPatches.push(patchId);
|
|
384
|
+
// Example patch applications (would be specific to each patch):
|
|
385
|
+
// if (patch.targetField === 'constraints.customConstraints') {
|
|
386
|
+
// configUpdates['constraints.customConstraints'] = updatedConstraints;
|
|
387
|
+
// }
|
|
388
|
+
}
|
|
389
|
+
res.json({
|
|
390
|
+
status: 'ok',
|
|
391
|
+
data: {
|
|
392
|
+
applied: appliedPatches,
|
|
393
|
+
configUpdates,
|
|
394
|
+
message: `Applied ${appliedPatches.length} patch(es) to agent ${body.agentId}`,
|
|
395
|
+
note: 'Patch application is currently simulated. In production, this would modify the actual agent configuration.',
|
|
396
|
+
},
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
catch (err) {
|
|
400
|
+
console.error('Error applying patches:', err);
|
|
401
|
+
res.status(500).json({
|
|
402
|
+
status: 'error',
|
|
403
|
+
error: err instanceof Error ? err.message : String(err)
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
});
|
|
407
|
+
/* ── GET /:agentId/history ── */
|
|
408
|
+
router.get('/:agentId/history', (req, res) => {
|
|
409
|
+
const agentId = String(req.params['agentId'] ?? '');
|
|
410
|
+
const history = runHistory.get(agentId) ?? [];
|
|
411
|
+
res.json({ status: 'ok', data: history });
|
|
103
412
|
});
|
|
104
413
|
export default router;
|
|
105
|
-
//# sourceMappingURL=qualification.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"skills-search.d.ts","sourceRoot":"","sources":["../../../server/routes/skills-search.ts"],"names":[],"mappings":"AAOA,QAAA,MAAM,MAAM,4CAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"skills-search.d.ts","sourceRoot":"","sources":["../../../server/routes/skills-search.ts"],"names":[],"mappings":"AAOA,QAAA,MAAM,MAAM,4CAAW,CAAC;AAiRxB,eAAe,MAAM,CAAC"}
|
|
@@ -188,16 +188,50 @@ router.post('/install', async (req, res) => {
|
|
|
188
188
|
return;
|
|
189
189
|
}
|
|
190
190
|
try {
|
|
191
|
-
|
|
191
|
+
// Try the correct skills CLI command first
|
|
192
|
+
let args = ['-y', '@anthropic/skills', 'add', skillId];
|
|
192
193
|
if (scope === 'global')
|
|
193
194
|
args.push('-g');
|
|
194
|
-
|
|
195
|
-
|
|
195
|
+
try {
|
|
196
|
+
const { stdout, stderr } = await exec('npx', args, { timeout: 60000 });
|
|
197
|
+
res.json({ status: 'ok', output: stdout + stderr });
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
catch (cliError) {
|
|
201
|
+
console.log('Skills CLI failed, trying fallback:', cliError.message);
|
|
202
|
+
}
|
|
203
|
+
// Fallback: Direct download from skills.sh
|
|
204
|
+
console.log('Using fallback: downloading skill directly from skills.sh');
|
|
205
|
+
// Extract repo and skill name from skillId (format: owner/repo@skillName)
|
|
206
|
+
const [repoPath, skillName] = skillId.includes('@') ? skillId.split('@') : [skillId, skillId.split('/').pop() || skillId];
|
|
207
|
+
// Download skill content from skills.sh
|
|
208
|
+
const skillUrl = `https://raw.githubusercontent.com/${repoPath}/main/${skillName}/SKILL.md`;
|
|
209
|
+
const skillResponse = await fetch(skillUrl);
|
|
210
|
+
if (!skillResponse.ok) {
|
|
211
|
+
throw new Error(`Failed to download skill from ${skillUrl}: ${skillResponse.status}`);
|
|
212
|
+
}
|
|
213
|
+
const skillContent = await skillResponse.text();
|
|
214
|
+
// Get user's home directory and create skill directory
|
|
215
|
+
const os = await import('os');
|
|
216
|
+
const path = await import('path');
|
|
217
|
+
const fs = await import('fs/promises');
|
|
218
|
+
const skillsDir = path.join(os.homedir(), '.agents', 'skills');
|
|
219
|
+
const skillDir = path.join(skillsDir, skillName);
|
|
220
|
+
// Create directories
|
|
221
|
+
await fs.mkdir(skillDir, { recursive: true });
|
|
222
|
+
// Write SKILL.md file
|
|
223
|
+
await fs.writeFile(path.join(skillDir, 'SKILL.md'), skillContent, 'utf8');
|
|
224
|
+
res.json({
|
|
225
|
+
status: 'ok',
|
|
226
|
+
output: `Skill ${skillName} installed to ${skillDir} via direct download fallback`
|
|
227
|
+
});
|
|
196
228
|
}
|
|
197
229
|
catch (err) {
|
|
198
230
|
const message = err instanceof Error ? err.message : 'Install failed';
|
|
199
|
-
|
|
231
|
+
console.error('Skills install error:', message);
|
|
232
|
+
res.status(500).json({
|
|
233
|
+
error: `Install failed: ${message}. Please ensure the skills CLI is installed or the skill exists on GitHub.`
|
|
234
|
+
});
|
|
200
235
|
}
|
|
201
236
|
});
|
|
202
237
|
export default router;
|
|
203
|
-
//# sourceMappingURL=skills-search.js.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { Fact } from '../../../src/store/memoryStore.js';
|
|
2
|
+
import type { StorageAdapter } from './storageAdapter.js';
|
|
3
|
+
export declare class PostgresAdapter implements StorageAdapter {
|
|
4
|
+
private pool;
|
|
5
|
+
private connectionString;
|
|
6
|
+
private lastWrite;
|
|
7
|
+
constructor(connectionString: string);
|
|
8
|
+
initialize(): Promise<void>;
|
|
9
|
+
private createTables;
|
|
10
|
+
storeFact(fact: Fact): Promise<void>;
|
|
11
|
+
getFacts(options?: {
|
|
12
|
+
domain?: string;
|
|
13
|
+
limit?: number;
|
|
14
|
+
offset?: number;
|
|
15
|
+
}): Promise<Fact[]>;
|
|
16
|
+
private rowToFact;
|
|
17
|
+
searchFacts(query: string, k?: number): Promise<Array<Fact & {
|
|
18
|
+
score: number;
|
|
19
|
+
}>>;
|
|
20
|
+
deleteFact(id: string): Promise<void>;
|
|
21
|
+
updateFact(id: string, patch: Partial<Fact>): Promise<void>;
|
|
22
|
+
getHealth(): Promise<{
|
|
23
|
+
status: string;
|
|
24
|
+
factCount: number;
|
|
25
|
+
lastWrite?: number;
|
|
26
|
+
}>;
|
|
27
|
+
close(): Promise<void>;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=postgresAdapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"postgresAdapter.d.ts","sourceRoot":"","sources":["../../../../server/services/adapters/postgresAdapter.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,mCAAmC,CAAC;AAC9D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAG1D,qBAAa,eAAgB,YAAW,cAAc;IACpD,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,gBAAgB,CAAS;IACjC,OAAO,CAAC,SAAS,CAAa;gBAElB,gBAAgB,EAAE,MAAM;IAI9B,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAgBnB,YAAY;IA2BpB,SAAS,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAiCpC,QAAQ,CAAC,OAAO,CAAC,EAAE;QAAE,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IA4B/F,OAAO,CAAC,SAAS;IAeX,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,SAAI,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,GAAG;QAAE,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAsC3E,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAQrC,UAAU,CAAC,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAqC3D,SAAS,IAAI,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAsB/E,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAM7B"}
|