@amodalai/amodal 0.3.35 → 0.3.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +7 -7
- package/src/e2e-subprocess.test.ts +62 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@amodalai/amodal",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.37",
|
|
4
4
|
"description": "Amodal CLI",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -26,12 +26,12 @@
|
|
|
26
26
|
"react": "^19.2.4",
|
|
27
27
|
"yargs": "^17.7.2",
|
|
28
28
|
"zod": "^4.3.6",
|
|
29
|
-
"@amodalai/types": "0.3.
|
|
30
|
-
"@amodalai/core": "0.3.
|
|
31
|
-
"@amodalai/db": "0.3.
|
|
32
|
-
"@amodalai/runtime": "0.3.
|
|
33
|
-
"@amodalai/studio": "0.3.
|
|
34
|
-
"@amodalai/runtime-app": "0.3.
|
|
29
|
+
"@amodalai/types": "0.3.37",
|
|
30
|
+
"@amodalai/core": "0.3.37",
|
|
31
|
+
"@amodalai/db": "0.3.37",
|
|
32
|
+
"@amodalai/runtime": "0.3.37",
|
|
33
|
+
"@amodalai/studio": "0.3.37",
|
|
34
|
+
"@amodalai/runtime-app": "0.3.37"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@types/node": "^20.11.24",
|
|
@@ -87,6 +87,21 @@ describe.skipIf(!!skipReason)('subprocess smoke tests', () => {
|
|
|
87
87
|
mkdirSync(knowledgeDir, {recursive: true});
|
|
88
88
|
writeFileSync(resolve(knowledgeDir, 'test-doc.md'), '# Test\n\nSENTINEL_FILE_TOOLS_9923\n');
|
|
89
89
|
|
|
90
|
+
// Create a test eval for eval/arena tests
|
|
91
|
+
const evalsDir = resolve(agentDir, 'evals');
|
|
92
|
+
mkdirSync(evalsDir, {recursive: true});
|
|
93
|
+
writeFileSync(resolve(evalsDir, 'math-check.md'), [
|
|
94
|
+
'# Eval: Math Check',
|
|
95
|
+
'',
|
|
96
|
+
'## Query',
|
|
97
|
+
'What is 2 + 2? Reply with just the number.',
|
|
98
|
+
'',
|
|
99
|
+
'## Assertions',
|
|
100
|
+
'- Should contain the number 4',
|
|
101
|
+
'- contains: 4',
|
|
102
|
+
'- Should NOT contain the word elephant',
|
|
103
|
+
].join('\n'));
|
|
104
|
+
|
|
90
105
|
const cliEntry = resolve(__dir, '../dist/src/main.js');
|
|
91
106
|
if (!existsSync(cliEntry)) {
|
|
92
107
|
throw new Error(`CLI not built — run pnpm --filter @amodalai/amodal run build first`);
|
|
@@ -188,4 +203,51 @@ describe.skipIf(!!skipReason)('subprocess smoke tests', () => {
|
|
|
188
203
|
expect(text).toContain('tool_call_start');
|
|
189
204
|
expect(text).toContain('SENTINEL_FILE_TOOLS_9923');
|
|
190
205
|
}, 45_000);
|
|
206
|
+
|
|
207
|
+
it('runtime runs eval and returns results with assertions', async () => {
|
|
208
|
+
const res = await fetch(`http://localhost:${RUNTIME_PORT}/api/evals/run`, {
|
|
209
|
+
method: 'POST',
|
|
210
|
+
headers: {'Content-Type': 'application/json'},
|
|
211
|
+
body: JSON.stringify({evalNames: ['math-check']}),
|
|
212
|
+
signal: AbortSignal.timeout(30_000),
|
|
213
|
+
});
|
|
214
|
+
expect(res.status).toBe(200);
|
|
215
|
+
const text = await res.text();
|
|
216
|
+
// Parse the eval_complete event
|
|
217
|
+
const evalLine = text.split('\n').find((l) => l.includes('eval_complete'));
|
|
218
|
+
expect(evalLine).toBeDefined();
|
|
219
|
+
const event = JSON.parse(evalLine!.replace('data: ', '')) as Record<string, unknown>;
|
|
220
|
+
expect(event['evalName']).toBe('math-check');
|
|
221
|
+
expect(typeof event['passed']).toBe('boolean');
|
|
222
|
+
const result = event['result'] as Record<string, unknown>;
|
|
223
|
+
expect(result['response']).toBeDefined();
|
|
224
|
+
expect(Array.isArray(result['assertions'])).toBe(true);
|
|
225
|
+
// Verify deterministic assertions were evaluated (3 total: 1 LLM-judged + 1 deterministic + 1 negated)
|
|
226
|
+
const assertions = result['assertions'] as Array<Record<string, unknown>>;
|
|
227
|
+
expect(assertions.length).toBe(3);
|
|
228
|
+
// The deterministic "contains: 4" assertion should have a reason mentioning "contains"
|
|
229
|
+
const containsAssertion = assertions.find((a) => a['text'] === 'contains: 4');
|
|
230
|
+
expect(containsAssertion).toBeDefined();
|
|
231
|
+
expect(containsAssertion!['reason']).toBeDefined();
|
|
232
|
+
expect(result['durationMs']).toBeDefined();
|
|
233
|
+
}, 45_000);
|
|
234
|
+
|
|
235
|
+
it('runtime runs arena eval with specified model', async () => {
|
|
236
|
+
const res = await fetch(`http://localhost:${RUNTIME_PORT}/api/evals/run`, {
|
|
237
|
+
method: 'POST',
|
|
238
|
+
headers: {'Content-Type': 'application/json'},
|
|
239
|
+
body: JSON.stringify({
|
|
240
|
+
evalNames: ['math-check'],
|
|
241
|
+
model: {provider: 'google', model: 'gemini-2.0-flash'},
|
|
242
|
+
}),
|
|
243
|
+
signal: AbortSignal.timeout(30_000),
|
|
244
|
+
});
|
|
245
|
+
expect(res.status).toBe(200);
|
|
246
|
+
const text = await res.text();
|
|
247
|
+
const evalLine = text.split('\n').find((l) => l.includes('eval_complete'));
|
|
248
|
+
expect(evalLine).toBeDefined();
|
|
249
|
+
const event = JSON.parse(evalLine!.replace('data: ', '')) as Record<string, unknown>;
|
|
250
|
+
expect(event['evalName']).toBe('math-check');
|
|
251
|
+
expect(typeof event['passed']).toBe('boolean');
|
|
252
|
+
}, 45_000);
|
|
191
253
|
});
|