@amodalai/amodal 0.3.35 → 0.3.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@amodalai/amodal",
3
- "version": "0.3.35",
3
+ "version": "0.3.37",
4
4
  "description": "Amodal CLI",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -26,12 +26,12 @@
26
26
  "react": "^19.2.4",
27
27
  "yargs": "^17.7.2",
28
28
  "zod": "^4.3.6",
29
- "@amodalai/types": "0.3.35",
30
- "@amodalai/core": "0.3.35",
31
- "@amodalai/db": "0.3.35",
32
- "@amodalai/runtime": "0.3.35",
33
- "@amodalai/studio": "0.3.35",
34
- "@amodalai/runtime-app": "0.3.35"
29
+ "@amodalai/types": "0.3.37",
30
+ "@amodalai/core": "0.3.37",
31
+ "@amodalai/db": "0.3.37",
32
+ "@amodalai/runtime": "0.3.37",
33
+ "@amodalai/studio": "0.3.37",
34
+ "@amodalai/runtime-app": "0.3.37"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/node": "^20.11.24",
@@ -87,6 +87,21 @@ describe.skipIf(!!skipReason)('subprocess smoke tests', () => {
87
87
  mkdirSync(knowledgeDir, {recursive: true});
88
88
  writeFileSync(resolve(knowledgeDir, 'test-doc.md'), '# Test\n\nSENTINEL_FILE_TOOLS_9923\n');
89
89
 
90
+ // Create a test eval for eval/arena tests
91
+ const evalsDir = resolve(agentDir, 'evals');
92
+ mkdirSync(evalsDir, {recursive: true});
93
+ writeFileSync(resolve(evalsDir, 'math-check.md'), [
94
+ '# Eval: Math Check',
95
+ '',
96
+ '## Query',
97
+ 'What is 2 + 2? Reply with just the number.',
98
+ '',
99
+ '## Assertions',
100
+ '- Should contain the number 4',
101
+ '- contains: 4',
102
+ '- Should NOT contain the word elephant',
103
+ ].join('\n'));
104
+
90
105
  const cliEntry = resolve(__dir, '../dist/src/main.js');
91
106
  if (!existsSync(cliEntry)) {
92
107
  throw new Error(`CLI not built — run pnpm --filter @amodalai/amodal run build first`);
@@ -188,4 +203,51 @@ describe.skipIf(!!skipReason)('subprocess smoke tests', () => {
188
203
  expect(text).toContain('tool_call_start');
189
204
  expect(text).toContain('SENTINEL_FILE_TOOLS_9923');
190
205
  }, 45_000);
206
+
207
+ it('runtime runs eval and returns results with assertions', async () => {
208
+ const res = await fetch(`http://localhost:${RUNTIME_PORT}/api/evals/run`, {
209
+ method: 'POST',
210
+ headers: {'Content-Type': 'application/json'},
211
+ body: JSON.stringify({evalNames: ['math-check']}),
212
+ signal: AbortSignal.timeout(30_000),
213
+ });
214
+ expect(res.status).toBe(200);
215
+ const text = await res.text();
216
+ // Parse the eval_complete event
217
+ const evalLine = text.split('\n').find((l) => l.includes('eval_complete'));
218
+ expect(evalLine).toBeDefined();
219
+ const event = JSON.parse(evalLine!.replace('data: ', '')) as Record<string, unknown>;
220
+ expect(event['evalName']).toBe('math-check');
221
+ expect(typeof event['passed']).toBe('boolean');
222
+ const result = event['result'] as Record<string, unknown>;
223
+ expect(result['response']).toBeDefined();
224
+ expect(Array.isArray(result['assertions'])).toBe(true);
225
+ // Verify deterministic assertions were evaluated (3 total: 1 LLM-judged + 1 deterministic + 1 negated)
226
+ const assertions = result['assertions'] as Array<Record<string, unknown>>;
227
+ expect(assertions.length).toBe(3);
228
+ // The deterministic "contains: 4" assertion should have a reason mentioning "contains"
229
+ const containsAssertion = assertions.find((a) => a['text'] === 'contains: 4');
230
+ expect(containsAssertion).toBeDefined();
231
+ expect(containsAssertion!['reason']).toBeDefined();
232
+ expect(result['durationMs']).toBeDefined();
233
+ }, 45_000);
234
+
235
+ it('runtime runs arena eval with specified model', async () => {
236
+ const res = await fetch(`http://localhost:${RUNTIME_PORT}/api/evals/run`, {
237
+ method: 'POST',
238
+ headers: {'Content-Type': 'application/json'},
239
+ body: JSON.stringify({
240
+ evalNames: ['math-check'],
241
+ model: {provider: 'google', model: 'gemini-2.0-flash'},
242
+ }),
243
+ signal: AbortSignal.timeout(30_000),
244
+ });
245
+ expect(res.status).toBe(200);
246
+ const text = await res.text();
247
+ const evalLine = text.split('\n').find((l) => l.includes('eval_complete'));
248
+ expect(evalLine).toBeDefined();
249
+ const event = JSON.parse(evalLine!.replace('data: ', '')) as Record<string, unknown>;
250
+ expect(event['evalName']).toBe('math-check');
251
+ expect(typeof event['passed']).toBe('boolean');
252
+ }, 45_000);
191
253
  });