dialectic 0.5.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,10 +4,10 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  const fs_1 = __importDefault(require("fs"));
7
- const path_1 = __importDefault(require("path"));
8
7
  const os_1 = __importDefault(require("os"));
9
- const index_1 = require("../index");
8
+ const path_1 = __importDefault(require("path"));
10
9
  const dialectic_core_1 = require("dialectic-core");
10
+ const index_1 = require("../index");
11
11
  const TEST_PROBLEM = 'Test problem';
12
12
  const TEST_SOLUTION = 'Test solution';
13
13
  const TEST_PROBLEM_SHORT = 'Test';
@@ -29,14 +29,14 @@ const MOCK_LATENCY_MS = 100;
29
29
  const TEMP_DIR_PREFIX = 'eval-test-';
30
30
  const mockCreateProvider = jest.fn();
31
31
  jest.mock('dialectic-core/providers/provider-factory', () => ({
32
- createProvider: (...args) => mockCreateProvider(...args)
32
+ createProvider: (..._args) => mockCreateProvider(..._args)
33
33
  }));
34
34
  jest.mock('dialectic-core', () => {
35
35
  const actual = jest.requireActual('dialectic-core');
36
36
  return {
37
37
  ...actual,
38
38
  loadEnvironmentFile: jest.fn(),
39
- createProvider: (...args) => mockCreateProvider(...args)
39
+ createProvider: (..._args) => mockCreateProvider(..._args)
40
40
  };
41
41
  });
42
42
  const mockedLoadEnvironmentFile = dialectic_core_1.loadEnvironmentFile;
@@ -104,7 +104,8 @@ describe('CLI eval command', () => {
104
104
  try {
105
105
  fs_1.default.rmSync(tmpDir, { recursive: true, force: true });
106
106
  }
107
- catch { }
107
+ catch {
108
+ }
108
109
  });
109
110
  describe('Required flags validation', () => {
110
111
  it('should reject when --config flag is missing', async () => {
@@ -376,6 +377,24 @@ describe('CLI eval command', () => {
376
377
  await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
377
378
  expect(stderrSpy).toHaveBeenCalledWith(expect.stringContaining('[e1] Skipped due to error'));
378
379
  });
380
+ it('should handle evaluation result with empty rawText', async () => {
381
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
382
+ id: 'e1',
383
+ rawText: '',
384
+ latencyMs: 100
385
+ });
386
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
387
+ expect(stderrSpy).toHaveBeenCalledWith(expect.stringContaining('[e1] Invalid JSON output; skipping agent'));
388
+ });
389
+ it('should handle evaluation result with null rawText', async () => {
390
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
391
+ id: 'e1',
392
+ rawText: null,
393
+ latencyMs: 100
394
+ });
395
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
396
+ expect(stderrSpy).toHaveBeenCalledWith(expect.stringContaining('[e1] Invalid JSON output; skipping agent'));
397
+ });
379
398
  });
380
399
  describe('Score validation and clamping', () => {
381
400
  const SCORE_BELOW_MIN_1 = -5;
@@ -521,6 +540,50 @@ describe('CLI eval command', () => {
521
540
  const output = stdoutSpy.mock.calls.join('');
522
541
  expect(output).toContain('7.00');
523
542
  });
543
+ it('should handle missing evaluation object in parsed result', async () => {
544
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
545
+ id: 'e1',
546
+ rawText: JSON.stringify({
547
+ overall_summary: { overall_score: 8 }
548
+ }),
549
+ latencyMs: 100
550
+ });
551
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
552
+ const output = stdoutSpy.mock.calls.join('');
553
+ expect(output).toContain('8.00');
554
+ expect(output).toContain('N/A');
555
+ });
556
+ it('should handle missing non_functional object in parsed result', async () => {
557
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
558
+ id: 'e1',
559
+ rawText: JSON.stringify({
560
+ evaluation: {
561
+ functional_completeness: { score: 8 }
562
+ },
563
+ overall_summary: { overall_score: 8 }
564
+ }),
565
+ latencyMs: 100
566
+ });
567
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
568
+ const output = stdoutSpy.mock.calls.join('');
569
+ expect(output).toContain('8.00');
570
+ expect(output).toContain('N/A');
571
+ });
572
+ it('should handle missing overall_summary object in parsed result', async () => {
573
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
574
+ id: 'e1',
575
+ rawText: JSON.stringify({
576
+ evaluation: {
577
+ functional_completeness: { score: 8 }
578
+ }
579
+ }),
580
+ latencyMs: 100
581
+ });
582
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
583
+ const output = stdoutSpy.mock.calls.join('');
584
+ expect(output).toContain('8.00');
585
+ expect(output).toContain('N/A');
586
+ });
524
587
  it('should round to 2 decimal places', async () => {
525
588
  const evalSpy = jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate');
526
589
  evalSpy.mockResolvedValueOnce({
@@ -1115,6 +1178,38 @@ describe('CLI eval command', () => {
1115
1178
  expect(stderrSpy).toHaveBeenCalledWith(expect.stringMatching(/systemPrompt=.*built-in default/));
1116
1179
  expect(stderrSpy).toHaveBeenCalledWith(expect.stringMatching(/userPrompt=.*built-in default/));
1117
1180
  });
1181
+ it('should log prompt sources in verbose mode (from file)', async () => {
1182
+ const promptsDir = path_1.default.join(tmpDir, 'prompts');
1183
+ fs_1.default.mkdirSync(promptsDir);
1184
+ const systemPromptPath = path_1.default.join(promptsDir, 'system.md');
1185
+ const userPromptPath = path_1.default.join(promptsDir, 'user.md');
1186
+ fs_1.default.writeFileSync(systemPromptPath, 'Custom system prompt');
1187
+ fs_1.default.writeFileSync(userPromptPath, 'Custom user prompt');
1188
+ const configPathWithPrompts = path_1.default.join(tmpDir, 'config-with-prompts.json');
1189
+ fs_1.default.writeFileSync(configPathWithPrompts, JSON.stringify({
1190
+ agents: [{
1191
+ id: 'e1',
1192
+ name: 'E1',
1193
+ model: 'gpt-4',
1194
+ provider: 'openai',
1195
+ systemPromptPath: './prompts/system.md',
1196
+ userPromptPath: './prompts/user.md'
1197
+ }]
1198
+ }));
1199
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
1200
+ id: 'e1',
1201
+ rawText: JSON.stringify({
1202
+ evaluation: { functional_completeness: { score: 8 } },
1203
+ overall_summary: { overall_score: 8 }
1204
+ }),
1205
+ latencyMs: 100
1206
+ });
1207
+ await (0, index_1.runCli)(['eval', '--config', configPathWithPrompts, '--debate', debatePath, '--verbose']);
1208
+ const stderrCalls = stderrSpy.mock.calls.map(c => String(c[0])).join('');
1209
+ expect(stderrCalls).toContain('systemPrompt=');
1210
+ expect(stderrCalls).toContain('userPrompt=');
1211
+ expect(stderrCalls).not.toContain('built-in default');
1212
+ });
1118
1213
  it('should not log verbose info when verbose flag is absent', async () => {
1119
1214
  jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
1120
1215
  id: 'e1',
@@ -1168,7 +1263,7 @@ describe('CLI eval command', () => {
1168
1263
  }),
1169
1264
  resolvedSystemPrompt: ''
1170
1265
  };
1171
- fromConfigSpy.mockImplementation((_cfg, sysPrompt, _userPrompt) => {
1266
+ fromConfigSpy.mockImplementation((_cfg, sysPrompt) => {
1172
1267
  mockAgent.resolvedSystemPrompt = sysPrompt;
1173
1268
  return mockAgent;
1174
1269
  });
@@ -1264,6 +1359,517 @@ describe('CLI eval command', () => {
1264
1359
  expect(output).toContain('5.00');
1265
1360
  expect(output).toContain('4.00');
1266
1361
  });
1362
+ it('should handle requirements_fulfillment score', async () => {
1363
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
1364
+ id: 'e1',
1365
+ rawText: JSON.stringify({
1366
+ evaluation: {
1367
+ functional_completeness: { score: 8 },
1368
+ non_functional: {
1369
+ requirements_fulfillment: { score: 9 }
1370
+ }
1371
+ },
1372
+ overall_summary: { overall_score: 8 }
1373
+ }),
1374
+ latencyMs: 100
1375
+ });
1376
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1377
+ const output = stdoutSpy.mock.calls.join('');
1378
+ expect(output).toContain('9.00');
1379
+ });
1380
+ });
1381
+ describe('Clarification formatting edge cases', () => {
1382
+ let configPath;
1383
+ let debatePath;
1384
+ beforeEach(() => {
1385
+ configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1386
+ debatePath = path_1.default.join(tmpDir, DEBATE_FILE_NAME);
1387
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1388
+ agents: [createBasicAgentConfig()]
1389
+ }));
1390
+ setupMockProviderAndEvaluator();
1391
+ });
1392
+ it('should handle clarification item with missing id', async () => {
1393
+ fs_1.default.writeFileSync(debatePath, JSON.stringify({
1394
+ ...createBasicDebateData(TEST_PROBLEM_SHORT, TEST_SOLUTION_SHORT),
1395
+ clarifications: [
1396
+ {
1397
+ agentId: 'architect-1',
1398
+ agentName: 'Architect',
1399
+ role: 'architect',
1400
+ items: [
1401
+ { question: 'What is the scale?', answer: '1M users' }
1402
+ ]
1403
+ }
1404
+ ]
1405
+ }));
1406
+ const evaluateSpy = mockSuccessfulEvaluation();
1407
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1408
+ expect(evaluateSpy).toHaveBeenCalled();
1409
+ const call = evaluateSpy.mock.calls[0]?.[0];
1410
+ expect(call).toBeDefined();
1411
+ expect(call?.clarificationsMarkdown).not.toContain('What is the scale?');
1412
+ });
1413
+ it('should handle clarification item with missing question', async () => {
1414
+ fs_1.default.writeFileSync(debatePath, JSON.stringify({
1415
+ ...createBasicDebateData(TEST_PROBLEM_SHORT, TEST_SOLUTION_SHORT),
1416
+ clarifications: [
1417
+ {
1418
+ agentId: 'architect-1',
1419
+ agentName: 'Architect',
1420
+ role: 'architect',
1421
+ items: [
1422
+ { id: 'q1', answer: '1M users' }
1423
+ ]
1424
+ }
1425
+ ]
1426
+ }));
1427
+ const evaluateSpy = mockSuccessfulEvaluation();
1428
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1429
+ expect(evaluateSpy).toHaveBeenCalled();
1430
+ const call = evaluateSpy.mock.calls[0]?.[0];
1431
+ expect(call).toBeDefined();
1432
+ expect(call?.clarificationsMarkdown).not.toContain('q1');
1433
+ });
1434
+ it('should handle clarification item with missing answer', async () => {
1435
+ fs_1.default.writeFileSync(debatePath, JSON.stringify({
1436
+ ...createBasicDebateData(TEST_PROBLEM_SHORT, TEST_SOLUTION_SHORT),
1437
+ clarifications: [
1438
+ {
1439
+ agentId: 'architect-1',
1440
+ agentName: 'Architect',
1441
+ role: 'architect',
1442
+ items: [
1443
+ { id: 'q1', question: 'What is the scale?' }
1444
+ ]
1445
+ }
1446
+ ]
1447
+ }));
1448
+ const evaluateSpy = mockSuccessfulEvaluation();
1449
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1450
+ expect(evaluateSpy).toHaveBeenCalled();
1451
+ const call = evaluateSpy.mock.calls[0]?.[0];
1452
+ expect(call).toBeDefined();
1453
+ expect(call?.clarificationsMarkdown).not.toContain('What is the scale?');
1454
+ });
1455
+ it('should handle clarification group with missing agentName', async () => {
1456
+ fs_1.default.writeFileSync(debatePath, JSON.stringify({
1457
+ ...createBasicDebateData(TEST_PROBLEM_SHORT, TEST_SOLUTION_SHORT),
1458
+ clarifications: [
1459
+ {
1460
+ agentId: 'architect-1',
1461
+ role: 'architect',
1462
+ items: [
1463
+ { id: 'q1', question: 'What is the scale?', answer: '1M users' }
1464
+ ]
1465
+ }
1466
+ ]
1467
+ }));
1468
+ const evaluateSpy = mockSuccessfulEvaluation();
1469
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1470
+ expect(evaluateSpy).toHaveBeenCalled();
1471
+ const call = evaluateSpy.mock.calls[0]?.[0];
1472
+ expect(call).toBeDefined();
1473
+ expect(call?.clarificationsMarkdown).not.toContain('What is the scale?');
1474
+ });
1475
+ it('should handle clarification group with missing role', async () => {
1476
+ fs_1.default.writeFileSync(debatePath, JSON.stringify({
1477
+ ...createBasicDebateData(TEST_PROBLEM_SHORT, TEST_SOLUTION_SHORT),
1478
+ clarifications: [
1479
+ {
1480
+ agentId: 'architect-1',
1481
+ agentName: 'Architect',
1482
+ items: [
1483
+ { id: 'q1', question: 'What is the scale?', answer: '1M users' }
1484
+ ]
1485
+ }
1486
+ ]
1487
+ }));
1488
+ const evaluateSpy = mockSuccessfulEvaluation();
1489
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1490
+ expect(evaluateSpy).toHaveBeenCalled();
1491
+ const call = evaluateSpy.mock.calls[0]?.[0];
1492
+ expect(call).toBeDefined();
1493
+ expect(call?.clarificationsMarkdown).not.toContain('What is the scale?');
1494
+ });
1495
+ it('should handle clarification group with empty items array', async () => {
1496
+ fs_1.default.writeFileSync(debatePath, JSON.stringify({
1497
+ ...createBasicDebateData(TEST_PROBLEM_SHORT, TEST_SOLUTION_SHORT),
1498
+ clarifications: [
1499
+ {
1500
+ agentId: 'architect-1',
1501
+ agentName: 'Architect',
1502
+ role: 'architect',
1503
+ items: []
1504
+ }
1505
+ ]
1506
+ }));
1507
+ const evaluateSpy = mockSuccessfulEvaluation();
1508
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1509
+ expect(evaluateSpy).toHaveBeenCalled();
1510
+ const call = evaluateSpy.mock.calls[0]?.[0];
1511
+ expect(call).toBeDefined();
1512
+ expect(call?.clarificationsMarkdown).not.toContain('Architect');
1513
+ });
1514
+ it('should handle clarification group that becomes empty after filtering invalid items', async () => {
1515
+ fs_1.default.writeFileSync(debatePath, JSON.stringify({
1516
+ ...createBasicDebateData(TEST_PROBLEM_SHORT, TEST_SOLUTION_SHORT),
1517
+ clarifications: [
1518
+ {
1519
+ agentId: 'architect-1',
1520
+ agentName: 'Architect',
1521
+ role: 'architect',
1522
+ items: [
1523
+ { question: 'Invalid item without id', answer: 'answer' }
1524
+ ]
1525
+ }
1526
+ ]
1527
+ }));
1528
+ const evaluateSpy = mockSuccessfulEvaluation();
1529
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1530
+ expect(evaluateSpy).toHaveBeenCalled();
1531
+ const call = evaluateSpy.mock.calls[0]?.[0];
1532
+ expect(call).toBeDefined();
1533
+ const markdown = call?.clarificationsMarkdown || '';
1534
+ expect(markdown).toContain('### Architect (architect)');
1535
+ expect(markdown).not.toContain('Invalid item without id');
1536
+ });
1537
+ });
1538
+ describe('Config validation edge cases', () => {
1539
+ let debatePath;
1540
+ beforeEach(() => {
1541
+ debatePath = path_1.default.join(tmpDir, DEBATE_FILE_NAME);
1542
+ fs_1.default.writeFileSync(debatePath, JSON.stringify(createBasicDebateData()));
1543
+ });
1544
+ it('should reject config with agent that is null', async () => {
1545
+ const configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1546
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1547
+ agents: [null]
1548
+ }));
1549
+ await expect((0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]))
1550
+ .rejects.toHaveProperty('code', dialectic_core_1.EXIT_INVALID_ARGS);
1551
+ expect(stderrSpy).toHaveBeenCalledWith(expect.stringContaining('agent must be an object'));
1552
+ });
1553
+ it('should reject config with agent that is a string', async () => {
1554
+ const configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1555
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1556
+ agents: ['invalid-agent']
1557
+ }));
1558
+ await expect((0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]))
1559
+ .rejects.toHaveProperty('code', dialectic_core_1.EXIT_INVALID_ARGS);
1560
+ expect(stderrSpy).toHaveBeenCalledWith(expect.stringContaining('agent must be an object'));
1561
+ });
1562
+ it('should reject config with agent that is a number', async () => {
1563
+ const configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1564
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1565
+ agents: [123]
1566
+ }));
1567
+ await expect((0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]))
1568
+ .rejects.toHaveProperty('code', dialectic_core_1.EXIT_INVALID_ARGS);
1569
+ });
1570
+ it('should default provider to openai when provider is not a string', async () => {
1571
+ const configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1572
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1573
+ agents: [{
1574
+ id: 'e1',
1575
+ name: 'E1',
1576
+ model: 'gpt-4',
1577
+ provider: 123
1578
+ }]
1579
+ }));
1580
+ setupMockProviderAndEvaluator();
1581
+ mockSuccessfulEvaluation();
1582
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1583
+ expect(mockedCreateProvider).toHaveBeenCalledWith(PROVIDER_OPENAI);
1584
+ });
1585
+ it('should handle agent config with null/undefined id, name, model', async () => {
1586
+ const configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1587
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1588
+ agents: [{
1589
+ id: null,
1590
+ name: undefined,
1591
+ model: null,
1592
+ provider: 'openai'
1593
+ }]
1594
+ }));
1595
+ setupMockProviderAndEvaluator();
1596
+ mockSuccessfulEvaluation();
1597
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1598
+ expect(stdoutSpy).toHaveBeenCalled();
1599
+ });
1600
+ it('should handle agent config with non-number timeout', async () => {
1601
+ const configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1602
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1603
+ agents: [{
1604
+ id: 'e1',
1605
+ name: 'E1',
1606
+ model: 'gpt-4',
1607
+ provider: 'openai',
1608
+ timeout: 'invalid'
1609
+ }]
1610
+ }));
1611
+ setupMockProviderAndEvaluator();
1612
+ mockSuccessfulEvaluation();
1613
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1614
+ expect(stdoutSpy).toHaveBeenCalled();
1615
+ });
1616
+ });
1617
+ describe('Error handling edge cases', () => {
1618
+ let configPath;
1619
+ let debatePath;
1620
+ beforeEach(() => {
1621
+ configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1622
+ debatePath = path_1.default.join(tmpDir, DEBATE_FILE_NAME);
1623
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1624
+ agents: [createBasicAgentConfig()]
1625
+ }));
1626
+ fs_1.default.writeFileSync(debatePath, JSON.stringify(createBasicDebateData()));
1627
+ setupMockProviderAndEvaluator();
1628
+ });
1629
+ it('should handle error without code property', async () => {
1630
+ jest.spyOn(require('dialectic-core'), 'readJsonFile').mockImplementationOnce(() => {
1631
+ throw new Error('Test error without code');
1632
+ });
1633
+ let caughtError;
1634
+ try {
1635
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1636
+ }
1637
+ catch (err) {
1638
+ caughtError = err;
1639
+ expect(err).toHaveProperty('code', dialectic_core_1.EXIT_GENERAL_ERROR);
1640
+ }
1641
+ expect(caughtError).toBeDefined();
1642
+ expect(caughtError.message).toBe('Test error without code');
1643
+ const stderrCalls = stderrSpy.mock.calls.map(c => String(c[0])).join('');
1644
+ expect(stderrCalls).toContain('Test error without code');
1645
+ jest.restoreAllMocks();
1646
+ });
1647
+ it('should handle error that is not an Error object', async () => {
1648
+ jest.spyOn(require('dialectic-core'), 'readJsonFile').mockImplementationOnce(() => {
1649
+ throw 'String error';
1650
+ });
1651
+ let caughtError;
1652
+ try {
1653
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1654
+ }
1655
+ catch (err) {
1656
+ caughtError = err;
1657
+ expect(err).toHaveProperty('code', dialectic_core_1.EXIT_GENERAL_ERROR);
1658
+ }
1659
+ expect(caughtError).toBeDefined();
1660
+ expect(caughtError.message).toBe('Unknown error');
1661
+ jest.restoreAllMocks();
1662
+ });
1663
+ it('should handle error when writeStderr throws', async () => {
1664
+ jest.spyOn(require('dialectic-core'), 'writeStderr').mockImplementation(() => {
1665
+ throw new Error('writeStderr failed');
1666
+ });
1667
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockRejectedValue(new Error('Test error'));
1668
+ await expect((0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]))
1669
+ .rejects.toThrow();
1670
+ jest.restoreAllMocks();
1671
+ });
1672
+ it('should handle error with null message', async () => {
1673
+ jest.spyOn(require('dialectic-core'), 'readJsonFile').mockImplementationOnce(() => {
1674
+ const err = { code: dialectic_core_1.EXIT_INVALID_ARGS };
1675
+ throw err;
1676
+ });
1677
+ try {
1678
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1679
+ }
1680
+ catch {
1681
+ }
1682
+ const stderrCalls = stderrSpy.mock.calls.map(c => String(c[0])).join('');
1683
+ expect(stderrCalls).toContain('Unknown error');
1684
+ jest.restoreAllMocks();
1685
+ });
1686
+ });
1687
+ describe('Path resolution', () => {
1688
+ let configPath;
1689
+ let debatePath;
1690
+ const originalInitCwd = process.env.INIT_CWD;
1691
+ beforeEach(() => {
1692
+ configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1693
+ debatePath = path_1.default.join(tmpDir, DEBATE_FILE_NAME);
1694
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1695
+ agents: [createBasicAgentConfig()]
1696
+ }));
1697
+ fs_1.default.writeFileSync(debatePath, JSON.stringify(createBasicDebateData()));
1698
+ setupMockProviderAndEvaluator();
1699
+ mockSuccessfulEvaluation();
1700
+ });
1701
+ afterEach(() => {
1702
+ if (originalInitCwd) {
1703
+ process.env.INIT_CWD = originalInitCwd;
1704
+ }
1705
+ else {
1706
+ delete process.env.INIT_CWD;
1707
+ }
1708
+ });
1709
+ it('should use INIT_CWD when available for relative paths', async () => {
1710
+ const customInitCwd = path_1.default.join(os_1.default.tmpdir(), 'custom-init-cwd');
1711
+ process.env.INIT_CWD = customInitCwd;
1712
+ const customConfigPath = path_1.default.join(customInitCwd, CONFIG_FILE_NAME);
1713
+ const customDebatePath = path_1.default.join(customInitCwd, DEBATE_FILE_NAME);
1714
+ fs_1.default.mkdirSync(customInitCwd, { recursive: true });
1715
+ fs_1.default.writeFileSync(customConfigPath, JSON.stringify({
1716
+ agents: [createBasicAgentConfig()]
1717
+ }));
1718
+ fs_1.default.writeFileSync(customDebatePath, JSON.stringify(createBasicDebateData()));
1719
+ await (0, index_1.runCli)(['eval', '--config', CONFIG_FILE_NAME, '--debate', DEBATE_FILE_NAME]);
1720
+ fs_1.default.rmSync(customInitCwd, { recursive: true, force: true });
1721
+ });
1722
+ it('should use process.cwd() when INIT_CWD is not set', async () => {
1723
+ delete process.env.INIT_CWD;
1724
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1725
+ expect(stdoutSpy).toHaveBeenCalled();
1726
+ });
1727
+ it('should handle absolute paths regardless of INIT_CWD', async () => {
1728
+ process.env.INIT_CWD = '/some/other/path';
1729
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1730
+ expect(stdoutSpy).toHaveBeenCalled();
1731
+ });
1732
+ });
1733
+ describe('CSV escaping edge cases', () => {
1734
+ let configPath;
1735
+ let debatePath;
1736
+ beforeEach(() => {
1737
+ configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1738
+ debatePath = path_1.default.join(tmpDir, DEBATE_FILE_NAME);
1739
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1740
+ agents: [createBasicAgentConfig()]
1741
+ }));
1742
+ fs_1.default.writeFileSync(debatePath, JSON.stringify(createBasicDebateData()));
1743
+ setupMockProviderAndEvaluator();
1744
+ });
1745
+ it('should escape CSV values containing double quotes', async () => {
1746
+ const outputPath = path_1.default.join(tmpDir, 'results.csv');
1747
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
1748
+ id: 'e1',
1749
+ rawText: JSON.stringify({
1750
+ evaluation: { functional_completeness: { score: 8 } },
1751
+ overall_summary: { overall_score: 8 }
1752
+ }),
1753
+ latencyMs: 100
1754
+ });
1755
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath, '--output', outputPath]);
1756
+ const content = fs_1.default.readFileSync(outputPath, 'utf-8');
1757
+ expect(content).toContain('debate');
1758
+ });
1759
+ it('should escape CSV values containing commas', async () => {
1760
+ const outputPath = path_1.default.join(tmpDir, 'results.csv');
1761
+ const debatePathWithComma = path_1.default.join(tmpDir, 'debate,with,commas.json');
1762
+ try {
1763
+ fs_1.default.writeFileSync(debatePathWithComma, JSON.stringify({
1764
+ problem: 'Test',
1765
+ finalSolution: { description: 'Solution' }
1766
+ }));
1767
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
1768
+ id: 'e1',
1769
+ rawText: JSON.stringify({
1770
+ evaluation: { functional_completeness: { score: 8 } },
1771
+ overall_summary: { overall_score: 8 }
1772
+ }),
1773
+ latencyMs: 100
1774
+ });
1775
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePathWithComma, '--output', outputPath]);
1776
+ const content = fs_1.default.readFileSync(outputPath, 'utf-8');
1777
+ expect(content).toMatch(/^"debate,with,commas",/);
1778
+ }
1779
+ catch {
1780
+ }
1781
+ });
1782
+ });
1783
+ describe('JSON parsing edge cases', () => {
1784
+ let configPath;
1785
+ let debatePath;
1786
+ beforeEach(() => {
1787
+ configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1788
+ debatePath = path_1.default.join(tmpDir, DEBATE_FILE_NAME);
1789
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1790
+ agents: [createBasicAgentConfig()]
1791
+ }));
1792
+ fs_1.default.writeFileSync(debatePath, JSON.stringify(createBasicDebateData()));
1793
+ setupMockProviderAndEvaluator();
1794
+ });
1795
+ it('should handle JSON parsing failure gracefully', async () => {
1796
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
1797
+ id: 'e1',
1798
+ rawText: '{ invalid json }',
1799
+ latencyMs: 100
1800
+ });
1801
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1802
+ expect(stderrSpy).toHaveBeenCalledWith(expect.stringContaining('[e1] Invalid JSON output; skipping agent'));
1803
+ });
1804
+ it('should handle text without JSON object', async () => {
1805
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
1806
+ id: 'e1',
1807
+ rawText: 'This is just plain text with no JSON',
1808
+ latencyMs: 100
1809
+ });
1810
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1811
+ expect(stderrSpy).toHaveBeenCalledWith(expect.stringContaining('[e1] Invalid JSON output; skipping agent'));
1812
+ });
1813
+ it('should extract JSON from text with multiple JSON objects', async () => {
1814
+ const responseWithMultipleJson = 'First: {"foo": "bar"}\n' +
1815
+ 'Second: {"evaluation":{"functional_completeness":{"score":8}},"overall_summary":{"overall_score":8}}\n' +
1816
+ 'Third: {"baz": "qux"}';
1817
+ jest.spyOn(dialectic_core_1.EvaluatorAgent.prototype, 'evaluate').mockResolvedValue({
1818
+ id: 'e1',
1819
+ rawText: responseWithMultipleJson,
1820
+ latencyMs: 100
1821
+ });
1822
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1823
+ const output = stdoutSpy.mock.calls.join('');
1824
+ expect(output).toContain('N/A');
1825
+ });
1826
+ });
1827
+ describe('Empty clarifications edge cases', () => {
1828
+ let configPath;
1829
+ let debatePath;
1830
+ beforeEach(() => {
1831
+ configPath = path_1.default.join(tmpDir, CONFIG_FILE_NAME);
1832
+ debatePath = path_1.default.join(tmpDir, DEBATE_FILE_NAME);
1833
+ fs_1.default.writeFileSync(configPath, JSON.stringify({
1834
+ agents: [createBasicAgentConfig()]
1835
+ }));
1836
+ setupMockProviderAndEvaluator();
1837
+ mockSuccessfulEvaluation();
1838
+ });
1839
+ it('should handle clarifications array that exists but is empty', async () => {
1840
+ fs_1.default.writeFileSync(debatePath, JSON.stringify({
1841
+ ...createBasicDebateData(TEST_PROBLEM_SHORT, TEST_SOLUTION_SHORT),
1842
+ clarifications: []
1843
+ }));
1844
+ const evaluateSpy = mockSuccessfulEvaluation();
1845
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1846
+ expect(evaluateSpy).toHaveBeenCalled();
1847
+ const call = evaluateSpy.mock.calls[0]?.[0];
1848
+ expect(call).toBeDefined();
1849
+ expect(call?.clarificationsMarkdown).toMatch(/```.*```/);
1850
+ });
1851
+ it('should handle clarifications with whitespace-only content', async () => {
1852
+ fs_1.default.writeFileSync(debatePath, JSON.stringify({
1853
+ ...createBasicDebateData(TEST_PROBLEM_SHORT, TEST_SOLUTION_SHORT),
1854
+ clarifications: [
1855
+ {
1856
+ agentId: 'architect-1',
1857
+ agentName: 'Architect',
1858
+ role: 'architect',
1859
+ items: [
1860
+ { id: 'q1', question: ' ', answer: ' ' }
1861
+ ]
1862
+ }
1863
+ ]
1864
+ }));
1865
+ const evaluateSpy = mockSuccessfulEvaluation();
1866
+ await (0, index_1.runCli)(['eval', '--config', configPath, '--debate', debatePath]);
1867
+ expect(evaluateSpy).toHaveBeenCalled();
1868
+ const call = evaluateSpy.mock.calls[0]?.[0];
1869
+ expect(call).toBeDefined();
1870
+ expect(call?.clarificationsMarkdown).toContain('### Architect (architect)');
1871
+ expect(call?.clarificationsMarkdown).toContain('q1');
1872
+ });
1267
1873
  });
1268
1874
  });
1269
1875
  //# sourceMappingURL=eval.spec.js.map