rippletide 1.0.17 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +13 -0
- package/dist/App.js +36 -19
- package/dist/api/evaluation.js +13 -7
- package/dist/components/Header.js +2 -0
- package/dist/index.js +2 -2
- package/package.json +1 -1
- package/src/App.tsx +35 -18
- package/src/api/evaluation.ts +18 -11
- package/src/components/Header.tsx +2 -0
- package/src/index.tsx +2 -2
package/dist/App.js
CHANGED
|
@@ -13,7 +13,7 @@ import { BaseError, ValidationError } from './errors/types.js';
|
|
|
13
13
|
import { logger } from './utils/logger.js';
|
|
14
14
|
import { analytics } from './utils/analytics.js';
|
|
15
15
|
const knowledgeSources = [
|
|
16
|
-
{ label: 'Local Files (qanda.json)', value: 'files', description: 'Use qanda.json from current directory' },
|
|
16
|
+
{ label: 'Local Files (qanda.json)', value: 'files', description: 'Use a question-answer file (qanda.json) from the current directory' },
|
|
17
17
|
{ label: 'PDF Document', value: 'pdf', description: 'Upload and extract knowledge from a PDF file' },
|
|
18
18
|
{ label: 'Pinecone', value: 'pinecone', description: 'Fetch Q&A from Pinecone database' },
|
|
19
19
|
{ label: 'PostgreSQL Database', value: 'postgresql', description: 'Connect to PostgreSQL database' },
|
|
@@ -190,7 +190,7 @@ export const App = ({ backendUrl, dashboardUrl, nonInteractive, agentEndpoint: i
|
|
|
190
190
|
passed: 0,
|
|
191
191
|
failed: 0,
|
|
192
192
|
duration: 'Failed',
|
|
193
|
-
evaluationUrl: dashboardUrl || 'https://
|
|
193
|
+
evaluationUrl: dashboardUrl || 'https://app.rippletide.com/eval',
|
|
194
194
|
error: errorMessage,
|
|
195
195
|
});
|
|
196
196
|
setStep('complete');
|
|
@@ -235,7 +235,7 @@ export const App = ({ backendUrl, dashboardUrl, nonInteractive, agentEndpoint: i
|
|
|
235
235
|
passed: 0,
|
|
236
236
|
failed: 0,
|
|
237
237
|
duration: 'Failed',
|
|
238
|
-
evaluationUrl: dashboardUrl || 'https://
|
|
238
|
+
evaluationUrl: dashboardUrl || 'https://app.rippletide.com/eval',
|
|
239
239
|
error: errorMessage,
|
|
240
240
|
});
|
|
241
241
|
setStep('complete');
|
|
@@ -273,7 +273,7 @@ export const App = ({ backendUrl, dashboardUrl, nonInteractive, agentEndpoint: i
|
|
|
273
273
|
passed: 0,
|
|
274
274
|
failed: 0,
|
|
275
275
|
duration: 'Failed',
|
|
276
|
-
evaluationUrl: dashboardUrl || 'https://
|
|
276
|
+
evaluationUrl: dashboardUrl || 'https://app.rippletide.com/eval',
|
|
277
277
|
error: errorMessage,
|
|
278
278
|
});
|
|
279
279
|
setStep('complete');
|
|
@@ -299,8 +299,9 @@ export const App = ({ backendUrl, dashboardUrl, nonInteractive, agentEndpoint: i
|
|
|
299
299
|
setCurrentAgentId(agentId);
|
|
300
300
|
}
|
|
301
301
|
setEvaluationProgress(30);
|
|
302
|
+
// Import knowledge based on source
|
|
303
|
+
let knowledgeData = null;
|
|
302
304
|
if (knowledgeSource === 'files') {
|
|
303
|
-
let knowledgeData = null;
|
|
304
305
|
if (templatePath) {
|
|
305
306
|
try {
|
|
306
307
|
if (isRemoteTemplate) {
|
|
@@ -334,17 +335,29 @@ export const App = ({ backendUrl, dashboardUrl, nonInteractive, agentEndpoint: i
|
|
|
334
335
|
}
|
|
335
336
|
}
|
|
336
337
|
}
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
338
|
+
}
|
|
339
|
+
else if (knowledgeSource === 'pinecone' && pineconeQAndA.length > 0) {
|
|
340
|
+
// Import Pinecone Q&A as knowledge
|
|
341
|
+
knowledgeData = pineconeQAndA;
|
|
342
|
+
}
|
|
343
|
+
else if (knowledgeSource === 'postgresql' && postgresqlQAndA.length > 0) {
|
|
344
|
+
// Import PostgreSQL Q&A as knowledge
|
|
345
|
+
knowledgeData = postgresqlQAndA;
|
|
346
|
+
}
|
|
347
|
+
else if (knowledgeSource === 'pdf' && pdfQAndA.length > 0) {
|
|
348
|
+
// Import PDF Q&A as knowledge
|
|
349
|
+
knowledgeData = pdfQAndA;
|
|
350
|
+
}
|
|
351
|
+
if (knowledgeData && Array.isArray(knowledgeData) && knowledgeData.length > 0) {
|
|
352
|
+
setEvaluationProgress(35);
|
|
353
|
+
try {
|
|
354
|
+
const importResult = await api.importKnowledge(agentId, knowledgeData);
|
|
355
|
+
logger.debug('Knowledge import result:', importResult);
|
|
356
|
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
357
|
+
}
|
|
358
|
+
catch (error) {
|
|
359
|
+
logger.error('Failed to import knowledge:', error?.message || error);
|
|
360
|
+
logger.debug('Import error details:', error?.response?.data);
|
|
348
361
|
}
|
|
349
362
|
}
|
|
350
363
|
setEvaluationProgress(40);
|
|
@@ -458,7 +471,7 @@ export const App = ({ backendUrl, dashboardUrl, nonInteractive, agentEndpoint: i
|
|
|
458
471
|
passed,
|
|
459
472
|
failed,
|
|
460
473
|
duration: durationStr,
|
|
461
|
-
evaluationUrl: `${dashboardUrl || 'https://
|
|
474
|
+
evaluationUrl: `${dashboardUrl || 'https://app.rippletide.com/eval'}/eval/${agentId}`,
|
|
462
475
|
agentId,
|
|
463
476
|
};
|
|
464
477
|
analytics.track('evaluation_completed', {
|
|
@@ -480,7 +493,7 @@ export const App = ({ backendUrl, dashboardUrl, nonInteractive, agentEndpoint: i
|
|
|
480
493
|
passed: 0,
|
|
481
494
|
failed: 0,
|
|
482
495
|
duration: 'Failed',
|
|
483
|
-
evaluationUrl: dashboardUrl || 'https://
|
|
496
|
+
evaluationUrl: dashboardUrl || 'https://app.rippletide.com/eval',
|
|
484
497
|
error: errorMessage,
|
|
485
498
|
});
|
|
486
499
|
setStep('complete');
|
|
@@ -674,7 +687,11 @@ export const App = ({ backendUrl, dashboardUrl, nonInteractive, agentEndpoint: i
|
|
|
674
687
|
React.createElement(Text, { bold: true, color: "#eba1b5" }, "Choose your data source:")),
|
|
675
688
|
knowledgeFound && (React.createElement(Box, { marginBottom: 1 },
|
|
676
689
|
React.createElement(Text, { color: "white" }, "qanda.json found in current directory"))),
|
|
677
|
-
React.createElement(SelectMenu, { title: "Data Source", options: knowledgeSources, onSelect: handleSourceSelect })
|
|
690
|
+
React.createElement(SelectMenu, { title: "Data Source", options: knowledgeSources, onSelect: handleSourceSelect }),
|
|
691
|
+
React.createElement(Box, { marginTop: 1, flexDirection: "column" },
|
|
692
|
+
React.createElement(Text, { dimColor: true }, "Example qanda.json format:"),
|
|
693
|
+
React.createElement(Box, { paddingLeft: 2, flexDirection: "column" },
|
|
694
|
+
React.createElement(Text, { dimColor: true }, `[ { "question": "What are your hours?", "answer": "Mon-Fri 9am-6pm" } ]`))))),
|
|
678
695
|
step === 'pinecone-url' && (React.createElement(Box, { flexDirection: "column" },
|
|
679
696
|
React.createElement(TextInput, { label: "Pinecone database URL", placeholder: "https://sample-movies-02j22s8.svc.aped-4627-b74a.pinecone.io", onSubmit: handlePineconeUrlSubmit }))),
|
|
680
697
|
step === 'pinecone-api-key' && (React.createElement(Box, { flexDirection: "column" },
|
package/dist/api/evaluation.js
CHANGED
|
@@ -193,12 +193,13 @@ export async function runPromptEvaluation(agentId, promptId, promptText, agentEn
|
|
|
193
193
|
if (hallucinationResult.hallucinationLabel && hallucinationResult.hallucinationLabel !== '') {
|
|
194
194
|
payload.hallucinationLabel = hallucinationResult.hallucinationLabel;
|
|
195
195
|
}
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
196
|
+
// Always include hallucinationFindings, even if empty
|
|
197
|
+
payload.hallucinationFindings = hallucinationResult.hallucinationFindings || [];
|
|
198
|
+
if (payload.hallucinationFindings.length > 0) {
|
|
199
|
+
logger.debug(`Including ${payload.hallucinationFindings.length} hallucination findings`);
|
|
199
200
|
}
|
|
200
201
|
else {
|
|
201
|
-
logger.debug('No hallucination findings to include');
|
|
202
|
+
logger.debug('No hallucination findings to include (empty array)');
|
|
202
203
|
}
|
|
203
204
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, payload);
|
|
204
205
|
logger.debug(`Stored test result for prompt ${promptId} with status ${status}`);
|
|
@@ -214,6 +215,8 @@ export async function runPromptEvaluation(agentId, promptId, promptText, agentEn
|
|
|
214
215
|
if (hallucinationResult.hallucinationLabel) {
|
|
215
216
|
minimalPayload.hallucinationLabel = hallucinationResult.hallucinationLabel;
|
|
216
217
|
}
|
|
218
|
+
// Always include hallucinationFindings, even if empty
|
|
219
|
+
minimalPayload.hallucinationFindings = hallucinationResult.hallucinationFindings || [];
|
|
217
220
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, minimalPayload);
|
|
218
221
|
logger.debug(`Stored minimal test result for prompt ${promptId}`);
|
|
219
222
|
}
|
|
@@ -231,7 +234,8 @@ export async function runPromptEvaluation(agentId, promptId, promptText, agentEn
|
|
|
231
234
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
232
235
|
status: 'passed',
|
|
233
236
|
response: llmResponse,
|
|
234
|
-
expectedAnswer: expectedAnswer || null
|
|
237
|
+
expectedAnswer: expectedAnswer || null,
|
|
238
|
+
hallucinationFindings: []
|
|
235
239
|
});
|
|
236
240
|
}
|
|
237
241
|
catch (storeError) {
|
|
@@ -265,7 +269,8 @@ export async function runPromptEvaluation(agentId, promptId, promptText, agentEn
|
|
|
265
269
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
266
270
|
status: 'failed',
|
|
267
271
|
response: errorResponse,
|
|
268
|
-
expectedAnswer: expectedAnswer || null
|
|
272
|
+
expectedAnswer: expectedAnswer || null,
|
|
273
|
+
hallucinationFindings: []
|
|
269
274
|
});
|
|
270
275
|
logger.debug(`Stored failed result for prompt ${promptId}`);
|
|
271
276
|
}
|
|
@@ -274,7 +279,8 @@ export async function runPromptEvaluation(agentId, promptId, promptText, agentEn
|
|
|
274
279
|
logger.debug('Error details:', e?.response?.data);
|
|
275
280
|
try {
|
|
276
281
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
277
|
-
status: 'failed'
|
|
282
|
+
status: 'failed',
|
|
283
|
+
hallucinationFindings: []
|
|
278
284
|
});
|
|
279
285
|
logger.debug(`Stored minimal failed result for prompt ${promptId}`);
|
|
280
286
|
}
|
|
@@ -3,5 +3,7 @@ import { Box, Text } from 'ink';
|
|
|
3
3
|
export const Header = () => {
|
|
4
4
|
return (React.createElement(Box, { flexDirection: "column", marginBottom: 2 },
|
|
5
5
|
React.createElement(Text, { bold: true, color: "#eba1b5" }, "Rippletide Evaluation"),
|
|
6
|
+
React.createElement(Text, { bold: true, color: "#eba1b5" }, "How It Works"),
|
|
7
|
+
React.createElement(Text, { dimColor: true }, "1. Connect your endpoint 2. Add your knowledge source 3. Run the evaluation"),
|
|
6
8
|
React.createElement(Text, { color: "gray" }, "\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501")));
|
|
7
9
|
};
|
package/dist/index.js
CHANGED
|
@@ -31,7 +31,7 @@ const parseArgs = async () => {
|
|
|
31
31
|
}
|
|
32
32
|
const options = {
|
|
33
33
|
backendUrl: 'https://agent-evalserver-production.up.railway.app',
|
|
34
|
-
dashboardUrl: 'https://
|
|
34
|
+
dashboardUrl: 'https://app.rippletide.com/eval',
|
|
35
35
|
debug: false,
|
|
36
36
|
nonInteractive: false
|
|
37
37
|
};
|
|
@@ -127,7 +127,7 @@ Options:
|
|
|
127
127
|
-a, --agent <url> Agent endpoint URL (e.g., localhost:8000)
|
|
128
128
|
-k, --knowledge <source> Knowledge source: files, pinecone, postgresql, or pdf (default: files)
|
|
129
129
|
-b, --backend-url <url> Backend API URL (default: https://agent-evalserver-production.up.railway.app)
|
|
130
|
-
-d, --dashboard-url <url> Dashboard URL (default: https://
|
|
130
|
+
-d, --dashboard-url <url> Dashboard URL (default: https://app.rippletide.com/eval)
|
|
131
131
|
|
|
132
132
|
Pinecone options:
|
|
133
133
|
-pu, --pinecone-url <url> Pinecone database URL
|
package/package.json
CHANGED
package/src/App.tsx
CHANGED
|
@@ -34,7 +34,7 @@ type Step =
|
|
|
34
34
|
| 'complete';
|
|
35
35
|
|
|
36
36
|
const knowledgeSources = [
|
|
37
|
-
{ label: 'Local Files (qanda.json)', value: 'files', description: 'Use qanda.json from current directory' },
|
|
37
|
+
{ label: 'Local Files (qanda.json)', value: 'files', description: 'Use a question-answer file (qanda.json) from the current directory' },
|
|
38
38
|
{ label: 'PDF Document', value: 'pdf', description: 'Upload and extract knowledge from a PDF file' },
|
|
39
39
|
{ label: 'Pinecone', value: 'pinecone', description: 'Fetch Q&A from Pinecone database' },
|
|
40
40
|
{ label: 'PostgreSQL Database', value: 'postgresql', description: 'Connect to PostgreSQL database' },
|
|
@@ -239,7 +239,7 @@ export const App: React.FC<AppProps> = ({
|
|
|
239
239
|
passed: 0,
|
|
240
240
|
failed: 0,
|
|
241
241
|
duration: 'Failed',
|
|
242
|
-
evaluationUrl: dashboardUrl || 'https://
|
|
242
|
+
evaluationUrl: dashboardUrl || 'https://app.rippletide.com/eval',
|
|
243
243
|
error: errorMessage,
|
|
244
244
|
});
|
|
245
245
|
setStep('complete');
|
|
@@ -293,7 +293,7 @@ export const App: React.FC<AppProps> = ({
|
|
|
293
293
|
passed: 0,
|
|
294
294
|
failed: 0,
|
|
295
295
|
duration: 'Failed',
|
|
296
|
-
evaluationUrl: dashboardUrl || 'https://
|
|
296
|
+
evaluationUrl: dashboardUrl || 'https://app.rippletide.com/eval',
|
|
297
297
|
error: errorMessage,
|
|
298
298
|
});
|
|
299
299
|
setStep('complete');
|
|
@@ -339,7 +339,7 @@ export const App: React.FC<AppProps> = ({
|
|
|
339
339
|
passed: 0,
|
|
340
340
|
failed: 0,
|
|
341
341
|
duration: 'Failed',
|
|
342
|
-
evaluationUrl: dashboardUrl || 'https://
|
|
342
|
+
evaluationUrl: dashboardUrl || 'https://app.rippletide.com/eval',
|
|
343
343
|
error: errorMessage,
|
|
344
344
|
});
|
|
345
345
|
setStep('complete');
|
|
@@ -370,8 +370,10 @@ export const App: React.FC<AppProps> = ({
|
|
|
370
370
|
|
|
371
371
|
setEvaluationProgress(30);
|
|
372
372
|
|
|
373
|
+
// Import knowledge based on source
|
|
374
|
+
let knowledgeData: any = null;
|
|
375
|
+
|
|
373
376
|
if (knowledgeSource === 'files') {
|
|
374
|
-
let knowledgeData: any = null;
|
|
375
377
|
if (templatePath) {
|
|
376
378
|
try {
|
|
377
379
|
if (isRemoteTemplate) {
|
|
@@ -401,17 +403,26 @@ export const App: React.FC<AppProps> = ({
|
|
|
401
403
|
}
|
|
402
404
|
}
|
|
403
405
|
}
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
406
|
+
} else if (knowledgeSource === 'pinecone' && pineconeQAndA.length > 0) {
|
|
407
|
+
// Import Pinecone Q&A as knowledge
|
|
408
|
+
knowledgeData = pineconeQAndA;
|
|
409
|
+
} else if (knowledgeSource === 'postgresql' && postgresqlQAndA.length > 0) {
|
|
410
|
+
// Import PostgreSQL Q&A as knowledge
|
|
411
|
+
knowledgeData = postgresqlQAndA;
|
|
412
|
+
} else if (knowledgeSource === 'pdf' && pdfQAndA.length > 0) {
|
|
413
|
+
// Import PDF Q&A as knowledge
|
|
414
|
+
knowledgeData = pdfQAndA;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
if (knowledgeData && Array.isArray(knowledgeData) && knowledgeData.length > 0) {
|
|
418
|
+
setEvaluationProgress(35);
|
|
419
|
+
try {
|
|
420
|
+
const importResult = await api.importKnowledge(agentId, knowledgeData);
|
|
421
|
+
logger.debug('Knowledge import result:', importResult);
|
|
422
|
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
423
|
+
} catch (error: any) {
|
|
424
|
+
logger.error('Failed to import knowledge:', error?.message || error);
|
|
425
|
+
logger.debug('Import error details:', error?.response?.data);
|
|
415
426
|
}
|
|
416
427
|
}
|
|
417
428
|
|
|
@@ -532,7 +543,7 @@ export const App: React.FC<AppProps> = ({
|
|
|
532
543
|
passed,
|
|
533
544
|
failed,
|
|
534
545
|
duration: durationStr,
|
|
535
|
-
evaluationUrl: `${dashboardUrl || 'https://
|
|
546
|
+
evaluationUrl: `${dashboardUrl || 'https://app.rippletide.com/eval'}/eval/${agentId}`,
|
|
536
547
|
agentId,
|
|
537
548
|
};
|
|
538
549
|
|
|
@@ -555,7 +566,7 @@ export const App: React.FC<AppProps> = ({
|
|
|
555
566
|
passed: 0,
|
|
556
567
|
failed: 0,
|
|
557
568
|
duration: 'Failed',
|
|
558
|
-
evaluationUrl: dashboardUrl || 'https://
|
|
569
|
+
evaluationUrl: dashboardUrl || 'https://app.rippletide.com/eval',
|
|
559
570
|
error: errorMessage,
|
|
560
571
|
});
|
|
561
572
|
setStep('complete');
|
|
@@ -833,6 +844,12 @@ export const App: React.FC<AppProps> = ({
|
|
|
833
844
|
options={knowledgeSources}
|
|
834
845
|
onSelect={handleSourceSelect}
|
|
835
846
|
/>
|
|
847
|
+
<Box marginTop={1} flexDirection="column">
|
|
848
|
+
<Text dimColor>Example qanda.json format:</Text>
|
|
849
|
+
<Box paddingLeft={2} flexDirection="column">
|
|
850
|
+
<Text dimColor>{`[ { "question": "What are your hours?", "answer": "Mon-Fri 9am-6pm" } ]`}</Text>
|
|
851
|
+
</Box>
|
|
852
|
+
</Box>
|
|
836
853
|
</Box>
|
|
837
854
|
)}
|
|
838
855
|
|
package/src/api/evaluation.ts
CHANGED
|
@@ -260,12 +260,13 @@ export async function runPromptEvaluation(
|
|
|
260
260
|
if (hallucinationResult.hallucinationLabel && hallucinationResult.hallucinationLabel !== '') {
|
|
261
261
|
payload.hallucinationLabel = hallucinationResult.hallucinationLabel;
|
|
262
262
|
}
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
263
|
+
|
|
264
|
+
// Always include hallucinationFindings, even if empty
|
|
265
|
+
payload.hallucinationFindings = hallucinationResult.hallucinationFindings || [];
|
|
266
|
+
if (payload.hallucinationFindings.length > 0) {
|
|
267
|
+
logger.debug(`Including ${payload.hallucinationFindings.length} hallucination findings`);
|
|
267
268
|
} else {
|
|
268
|
-
logger.debug('No hallucination findings to include');
|
|
269
|
+
logger.debug('No hallucination findings to include (empty array)');
|
|
269
270
|
}
|
|
270
271
|
|
|
271
272
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, payload);
|
|
@@ -279,11 +280,14 @@ export async function runPromptEvaluation(
|
|
|
279
280
|
response: llmResponse,
|
|
280
281
|
expectedAnswer: expectedAnswer || null
|
|
281
282
|
};
|
|
282
|
-
|
|
283
|
+
|
|
283
284
|
if (hallucinationResult.hallucinationLabel) {
|
|
284
285
|
minimalPayload.hallucinationLabel = hallucinationResult.hallucinationLabel;
|
|
285
286
|
}
|
|
286
|
-
|
|
287
|
+
|
|
288
|
+
// Always include hallucinationFindings, even if empty
|
|
289
|
+
minimalPayload.hallucinationFindings = hallucinationResult.hallucinationFindings || [];
|
|
290
|
+
|
|
287
291
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, minimalPayload);
|
|
288
292
|
logger.debug(`Stored minimal test result for prompt ${promptId}`);
|
|
289
293
|
}
|
|
@@ -301,7 +305,8 @@ export async function runPromptEvaluation(
|
|
|
301
305
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
302
306
|
status: 'passed',
|
|
303
307
|
response: llmResponse,
|
|
304
|
-
expectedAnswer: expectedAnswer || null
|
|
308
|
+
expectedAnswer: expectedAnswer || null,
|
|
309
|
+
hallucinationFindings: []
|
|
305
310
|
});
|
|
306
311
|
} catch (storeError) {
|
|
307
312
|
logger.error('Could not store passed result:', storeError);
|
|
@@ -339,16 +344,18 @@ export async function runPromptEvaluation(
|
|
|
339
344
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
340
345
|
status: 'failed',
|
|
341
346
|
response: errorResponse,
|
|
342
|
-
expectedAnswer: expectedAnswer || null
|
|
347
|
+
expectedAnswer: expectedAnswer || null,
|
|
348
|
+
hallucinationFindings: []
|
|
343
349
|
});
|
|
344
350
|
logger.debug(`Stored failed result for prompt ${promptId}`);
|
|
345
351
|
} catch (e: any) {
|
|
346
352
|
logger.error('Failed to store failed result:', e?.message || e);
|
|
347
353
|
logger.debug('Error details:', e?.response?.data);
|
|
348
|
-
|
|
354
|
+
|
|
349
355
|
try {
|
|
350
356
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
351
|
-
status: 'failed'
|
|
357
|
+
status: 'failed',
|
|
358
|
+
hallucinationFindings: []
|
|
352
359
|
});
|
|
353
360
|
logger.debug(`Stored minimal failed result for prompt ${promptId}`);
|
|
354
361
|
} catch (fallbackError) {
|
|
@@ -5,6 +5,8 @@ export const Header: React.FC = () => {
|
|
|
5
5
|
return (
|
|
6
6
|
<Box flexDirection="column" marginBottom={2}>
|
|
7
7
|
<Text bold color="#eba1b5">Rippletide Evaluation</Text>
|
|
8
|
+
<Text bold color="#eba1b5">How It Works</Text>
|
|
9
|
+
<Text dimColor>1. Connect your endpoint 2. Add your knowledge source 3. Run the evaluation</Text>
|
|
8
10
|
<Text color="gray">━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━</Text>
|
|
9
11
|
</Box>
|
|
10
12
|
);
|
package/src/index.tsx
CHANGED
|
@@ -33,7 +33,7 @@ const parseArgs = async () => {
|
|
|
33
33
|
|
|
34
34
|
const options: any = {
|
|
35
35
|
backendUrl: 'https://agent-evalserver-production.up.railway.app',
|
|
36
|
-
dashboardUrl: 'https://
|
|
36
|
+
dashboardUrl: 'https://app.rippletide.com/eval',
|
|
37
37
|
debug: false,
|
|
38
38
|
nonInteractive: false
|
|
39
39
|
};
|
|
@@ -115,7 +115,7 @@ Options:
|
|
|
115
115
|
-a, --agent <url> Agent endpoint URL (e.g., localhost:8000)
|
|
116
116
|
-k, --knowledge <source> Knowledge source: files, pinecone, postgresql, or pdf (default: files)
|
|
117
117
|
-b, --backend-url <url> Backend API URL (default: https://agent-evalserver-production.up.railway.app)
|
|
118
|
-
-d, --dashboard-url <url> Dashboard URL (default: https://
|
|
118
|
+
-d, --dashboard-url <url> Dashboard URL (default: https://app.rippletide.com/eval)
|
|
119
119
|
|
|
120
120
|
Pinecone options:
|
|
121
121
|
-pu, --pinecone-url <url> Pinecone database URL
|