@amodalai/amodal 0.3.35 → 0.3.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @amodalai/amodal
2
2
 
3
+ ## 0.3.36
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [701b5d0]
8
+ - @amodalai/runtime@0.3.36
9
+ - @amodalai/studio@0.3.36
10
+ - @amodalai/types@0.3.36
11
+ - @amodalai/core@0.3.36
12
+ - @amodalai/runtime-app@0.3.36
13
+ - @amodalai/db@0.3.36
14
+
3
15
  ## 0.3.35
4
16
 
5
17
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@amodalai/amodal",
3
- "version": "0.3.35",
3
+ "version": "0.3.36",
4
4
  "description": "Amodal CLI",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -26,12 +26,12 @@
26
26
  "react": "^19.2.4",
27
27
  "yargs": "^17.7.2",
28
28
  "zod": "^4.3.6",
29
- "@amodalai/types": "0.3.35",
30
- "@amodalai/core": "0.3.35",
31
- "@amodalai/db": "0.3.35",
32
- "@amodalai/runtime": "0.3.35",
33
- "@amodalai/studio": "0.3.35",
34
- "@amodalai/runtime-app": "0.3.35"
29
+ "@amodalai/types": "0.3.36",
30
+ "@amodalai/core": "0.3.36",
31
+ "@amodalai/db": "0.3.36",
32
+ "@amodalai/runtime": "0.3.36",
33
+ "@amodalai/studio": "0.3.36",
34
+ "@amodalai/runtime-app": "0.3.36"
35
35
  },
36
36
  "devDependencies": {
37
37
  "@types/node": "^20.11.24",
@@ -87,6 +87,19 @@ describe.skipIf(!!skipReason)('subprocess smoke tests', () => {
87
87
  mkdirSync(knowledgeDir, {recursive: true});
88
88
  writeFileSync(resolve(knowledgeDir, 'test-doc.md'), '# Test\n\nSENTINEL_FILE_TOOLS_9923\n');
89
89
 
90
+ // Create a test eval for eval/arena tests
91
+ const evalsDir = resolve(agentDir, 'evals');
92
+ mkdirSync(evalsDir, {recursive: true});
93
+ writeFileSync(resolve(evalsDir, 'math-check.md'), [
94
+ '# Eval: Math Check',
95
+ '',
96
+ '## Query',
97
+ 'What is 2 + 2? Reply with just the number.',
98
+ '',
99
+ '## Assertions',
100
+ '- Should contain the number 4',
101
+ ].join('\n'));
102
+
90
103
  const cliEntry = resolve(__dir, '../dist/src/main.js');
91
104
  if (!existsSync(cliEntry)) {
92
105
  throw new Error(`CLI not built — run pnpm --filter @amodalai/amodal run build first`);
@@ -188,4 +201,44 @@ describe.skipIf(!!skipReason)('subprocess smoke tests', () => {
188
201
  expect(text).toContain('tool_call_start');
189
202
  expect(text).toContain('SENTINEL_FILE_TOOLS_9923');
190
203
  }, 45_000);
204
+
205
+ it('runtime runs eval and returns results with assertions', async () => {
206
+ const res = await fetch(`http://localhost:${RUNTIME_PORT}/api/evals/run`, {
207
+ method: 'POST',
208
+ headers: {'Content-Type': 'application/json'},
209
+ body: JSON.stringify({evalNames: ['math-check']}),
210
+ signal: AbortSignal.timeout(30_000),
211
+ });
212
+ expect(res.status).toBe(200);
213
+ const text = await res.text();
214
+ // Parse the eval_complete event
215
+ const evalLine = text.split('\n').find((l) => l.includes('eval_complete'));
216
+ expect(evalLine).toBeDefined();
217
+ const event = JSON.parse(evalLine!.replace('data: ', '')) as Record<string, unknown>;
218
+ expect(event['evalName']).toBe('math-check');
219
+ expect(typeof event['passed']).toBe('boolean');
220
+ const result = event['result'] as Record<string, unknown>;
221
+ expect(result['response']).toBeDefined();
222
+ expect(Array.isArray(result['assertions'])).toBe(true);
223
+ expect(result['durationMs']).toBeDefined();
224
+ }, 45_000);
225
+
226
+ it('runtime runs arena eval with specified model', async () => {
227
+ const res = await fetch(`http://localhost:${RUNTIME_PORT}/api/evals/run`, {
228
+ method: 'POST',
229
+ headers: {'Content-Type': 'application/json'},
230
+ body: JSON.stringify({
231
+ evalNames: ['math-check'],
232
+ model: {provider: 'google', model: 'gemini-2.0-flash'},
233
+ }),
234
+ signal: AbortSignal.timeout(30_000),
235
+ });
236
+ expect(res.status).toBe(200);
237
+ const text = await res.text();
238
+ const evalLine = text.split('\n').find((l) => l.includes('eval_complete'));
239
+ expect(evalLine).toBeDefined();
240
+ const event = JSON.parse(evalLine!.replace('data: ', '')) as Record<string, unknown>;
241
+ expect(event['evalName']).toBe('math-check');
242
+ expect(typeof event['passed']).toBe('boolean');
243
+ }, 45_000);
191
244
  });