rippletide 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/babel.config.json +6 -0
- package/bin/rippletide +37 -0
- package/dist/App.d.ts +2 -0
- package/dist/App.js +174 -0
- package/dist/api/client.d.ts +58 -0
- package/dist/api/client.js +316 -0
- package/dist/components/Header.d.ts +2 -0
- package/dist/components/Header.js +7 -0
- package/dist/components/InputPrompt.d.ts +8 -0
- package/dist/components/InputPrompt.js +12 -0
- package/dist/components/ProgressBar.d.ts +7 -0
- package/dist/components/ProgressBar.js +17 -0
- package/dist/components/SelectMenu.d.ts +14 -0
- package/dist/components/SelectMenu.js +45 -0
- package/dist/components/Spinner.d.ts +6 -0
- package/dist/components/Spinner.js +19 -0
- package/dist/components/Summary.d.ts +10 -0
- package/dist/components/Summary.js +34 -0
- package/dist/components/TextInput.d.ts +8 -0
- package/dist/components/TextInput.js +12 -0
- package/dist/demo.d.ts +2 -0
- package/dist/demo.js +97 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +6 -0
- package/dist/utils/logger.d.ts +5 -0
- package/dist/utils/logger.js +18 -0
- package/package.json +38 -0
- package/qanda.json +22 -0
- package/src/App.tsx +266 -0
- package/src/api/client.ts +403 -0
- package/src/components/Header.tsx +11 -0
- package/src/components/ProgressBar.tsx +29 -0
- package/src/components/SelectMenu.tsx +81 -0
- package/src/components/Spinner.tsx +29 -0
- package/src/components/Summary.tsx +70 -0
- package/src/components/TextInput.tsx +30 -0
- package/src/index.tsx +8 -0
- package/src/utils/logger.ts +21 -0
- package/tsconfig.json +20 -0
package/bin/rippletide
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// Simple CLI dispatcher: `rippletide eval` (or just `rippletide`) runs the evaluator.
|
|
4
|
+
// Other commands can be added later if needed.
|
|
5
|
+
|
|
6
|
+
const args = process.argv.slice(2);
|
|
7
|
+
const cmd = args[0];
|
|
8
|
+
|
|
9
|
+
async function main() {
|
|
10
|
+
if (!cmd || cmd === 'eval') {
|
|
11
|
+
// Lazy-load compiled Ink app
|
|
12
|
+
await import('../dist/index.js');
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if (cmd === '--help' || cmd === '-h') {
|
|
17
|
+
console.log(`
|
|
18
|
+
Rippletide CLI
|
|
19
|
+
|
|
20
|
+
Usage:
|
|
21
|
+
rippletide eval Run the Rippletide evaluation UI
|
|
22
|
+
|
|
23
|
+
Options:
|
|
24
|
+
-h, --help Show this help message
|
|
25
|
+
`);
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
console.error(`Unknown command: ${cmd}`);
|
|
30
|
+
console.log(`Run "rippletide --help" for usage.`);
|
|
31
|
+
process.exit(1);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
main().catch((err) => {
|
|
35
|
+
console.error('Error running rippletide:', err?.message || err);
|
|
36
|
+
process.exit(1);
|
|
37
|
+
});
|
package/dist/App.d.ts
ADDED
package/dist/App.js
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import React, { useState, useEffect } from 'react';
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
import { Header } from './components/Header.js';
|
|
4
|
+
import { TextInput } from './components/TextInput.js';
|
|
5
|
+
import { SelectMenu } from './components/SelectMenu.js';
|
|
6
|
+
import { Spinner } from './components/Spinner.js';
|
|
7
|
+
import { ProgressBar } from './components/ProgressBar.js';
|
|
8
|
+
import { Summary } from './components/Summary.js';
|
|
9
|
+
import { api } from './api/client.js';
|
|
10
|
+
const knowledgeSources = [
|
|
11
|
+
{ label: 'Local Files (qanda.json)', value: 'files', description: 'Use qanda.json from current directory' },
|
|
12
|
+
{ label: 'Current Repository', value: 'repo', description: 'Scan current git repository', disabled: true },
|
|
13
|
+
{ label: 'Database', value: 'database', description: 'Connect to a database', disabled: true },
|
|
14
|
+
{ label: 'API Endpoint', value: 'api', description: 'Fetch from REST API', disabled: true },
|
|
15
|
+
{ label: 'GitHub Repository', value: 'github', description: 'Import from GitHub repo', disabled: true },
|
|
16
|
+
{ label: 'Skip (No Knowledge)', value: 'skip', description: 'Run tests without knowledge base', disabled: true },
|
|
17
|
+
];
|
|
18
|
+
export const App = () => {
|
|
19
|
+
const [step, setStep] = useState('agent-endpoint');
|
|
20
|
+
const [agentEndpoint, setAgentEndpoint] = useState('');
|
|
21
|
+
const [knowledgeSource, setKnowledgeSource] = useState('');
|
|
22
|
+
const [knowledgeFound, setKnowledgeFound] = useState(false);
|
|
23
|
+
const [evaluationProgress, setEvaluationProgress] = useState(0);
|
|
24
|
+
const [evaluationResult, setEvaluationResult] = useState(null);
|
|
25
|
+
const [currentQuestion, setCurrentQuestion] = useState('');
|
|
26
|
+
const [currentLLMResponse, setCurrentLLMResponse] = useState('');
|
|
27
|
+
const [evaluationLogs, setEvaluationLogs] = useState([]);
|
|
28
|
+
useEffect(() => {
|
|
29
|
+
if (step === 'checking-knowledge') {
|
|
30
|
+
(async () => {
|
|
31
|
+
try {
|
|
32
|
+
const result = await api.checkKnowledge();
|
|
33
|
+
setKnowledgeFound(result.found);
|
|
34
|
+
}
|
|
35
|
+
catch (error) {
|
|
36
|
+
console.error('Error checking knowledge:', error);
|
|
37
|
+
setKnowledgeFound(false);
|
|
38
|
+
}
|
|
39
|
+
setStep('select-source');
|
|
40
|
+
})();
|
|
41
|
+
}
|
|
42
|
+
}, [step]);
|
|
43
|
+
useEffect(() => {
|
|
44
|
+
if (step === 'running-evaluation') {
|
|
45
|
+
(async () => {
|
|
46
|
+
try {
|
|
47
|
+
const startTime = Date.now();
|
|
48
|
+
const logs = [];
|
|
49
|
+
setEvaluationProgress(5);
|
|
50
|
+
await api.generateApiKey('CLI Evaluation');
|
|
51
|
+
setEvaluationProgress(10);
|
|
52
|
+
const agent = await api.createAgent(agentEndpoint);
|
|
53
|
+
const agentId = agent.id;
|
|
54
|
+
setEvaluationProgress(30);
|
|
55
|
+
setEvaluationProgress(40);
|
|
56
|
+
let testPrompts = [];
|
|
57
|
+
if (knowledgeSource === 'files') {
|
|
58
|
+
const knowledgeResult = await api.checkKnowledge();
|
|
59
|
+
if (knowledgeResult.found && knowledgeResult.path) {
|
|
60
|
+
try {
|
|
61
|
+
const fs = await import('fs');
|
|
62
|
+
const knowledgeData = JSON.parse(fs.readFileSync(knowledgeResult.path, 'utf-8'));
|
|
63
|
+
if (Array.isArray(knowledgeData)) {
|
|
64
|
+
testPrompts = knowledgeData.slice(0, 5).map((item) => ({
|
|
65
|
+
question: item.question || item.prompt || item.input || 'Test question',
|
|
66
|
+
answer: item.answer || item.response || item.expectedAnswer
|
|
67
|
+
}));
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
testPrompts = [];
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
const createdPrompts = await api.addTestPrompts(agentId, testPrompts);
|
|
76
|
+
setEvaluationProgress(50);
|
|
77
|
+
const evaluationResults = await api.runAllPromptEvaluations(agentId, createdPrompts, agentEndpoint, (current, total, question, llmResponse) => {
|
|
78
|
+
const progress = 50 + Math.round((current / total) * 40);
|
|
79
|
+
setEvaluationProgress(progress);
|
|
80
|
+
if (question) {
|
|
81
|
+
setCurrentQuestion(question);
|
|
82
|
+
}
|
|
83
|
+
if (llmResponse) {
|
|
84
|
+
setCurrentLLMResponse(llmResponse);
|
|
85
|
+
logs.push({ question: question || '', response: llmResponse });
|
|
86
|
+
setEvaluationLogs([...logs]);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
setEvaluationProgress(100);
|
|
90
|
+
let passed = 0;
|
|
91
|
+
let failed = 0;
|
|
92
|
+
evaluationResults.forEach((result) => {
|
|
93
|
+
if (result.success) {
|
|
94
|
+
passed++;
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
failed++;
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
const duration = Math.round((Date.now() - startTime) / 1000);
|
|
101
|
+
const durationStr = duration > 60
|
|
102
|
+
? `${Math.floor(duration / 60)}m ${duration % 60}s`
|
|
103
|
+
: `${duration}s`;
|
|
104
|
+
const result = {
|
|
105
|
+
totalTests: createdPrompts.length,
|
|
106
|
+
passed,
|
|
107
|
+
failed,
|
|
108
|
+
duration: durationStr,
|
|
109
|
+
evaluationUrl: `http://localhost:5173/eval/${agentId}`,
|
|
110
|
+
agentId,
|
|
111
|
+
};
|
|
112
|
+
setEvaluationResult(result);
|
|
113
|
+
setStep('complete');
|
|
114
|
+
}
|
|
115
|
+
catch (error) {
|
|
116
|
+
console.error('Error running evaluation:', error);
|
|
117
|
+
setEvaluationResult({
|
|
118
|
+
totalTests: 0,
|
|
119
|
+
passed: 0,
|
|
120
|
+
failed: 0,
|
|
121
|
+
duration: 'Failed',
|
|
122
|
+
evaluationUrl: 'http://localhost:5173',
|
|
123
|
+
});
|
|
124
|
+
setStep('complete');
|
|
125
|
+
}
|
|
126
|
+
})();
|
|
127
|
+
}
|
|
128
|
+
}, [step, agentEndpoint, knowledgeSource]);
|
|
129
|
+
const handleAgentEndpointSubmit = (value) => {
|
|
130
|
+
setAgentEndpoint(value);
|
|
131
|
+
setStep('checking-knowledge');
|
|
132
|
+
};
|
|
133
|
+
const handleSourceSelect = (value) => {
|
|
134
|
+
setKnowledgeSource(value);
|
|
135
|
+
setStep('running-evaluation');
|
|
136
|
+
};
|
|
137
|
+
return (React.createElement(Box, { flexDirection: "column", padding: 1 },
|
|
138
|
+
React.createElement(Header, null),
|
|
139
|
+
step === 'agent-endpoint' && (React.createElement(Box, { flexDirection: "column" },
|
|
140
|
+
React.createElement(TextInput, { label: "Agent endpoint", placeholder: "http://localhost:8000", onSubmit: handleAgentEndpointSubmit }))),
|
|
141
|
+
step === 'checking-knowledge' && (React.createElement(Box, { flexDirection: "column" },
|
|
142
|
+
React.createElement(Spinner, { label: "Checking for knowledge base in current folder..." }))),
|
|
143
|
+
step === 'select-source' && (React.createElement(Box, { flexDirection: "column" },
|
|
144
|
+
React.createElement(Box, { marginBottom: 1 },
|
|
145
|
+
React.createElement(Text, { bold: true, color: "#eba1b5" }, "Choose your data source:")),
|
|
146
|
+
knowledgeFound && (React.createElement(Box, { marginBottom: 1 },
|
|
147
|
+
React.createElement(Text, { color: "white" }, "qanda.json found in current directory"))),
|
|
148
|
+
React.createElement(SelectMenu, { title: "Data Source", options: knowledgeSources, onSelect: handleSourceSelect }))),
|
|
149
|
+
step === 'running-evaluation' && (React.createElement(Box, { flexDirection: "column" },
|
|
150
|
+
React.createElement(Box, { marginBottom: 2 },
|
|
151
|
+
React.createElement(Spinner, { label: "Running evaluation" })),
|
|
152
|
+
React.createElement(Box, { flexDirection: "column", marginBottom: 1 },
|
|
153
|
+
React.createElement(Box, null,
|
|
154
|
+
React.createElement(Box, { width: 12 },
|
|
155
|
+
React.createElement(Text, { dimColor: true }, "Endpoint:")),
|
|
156
|
+
React.createElement(Text, null, agentEndpoint || 'http://localhost:8000')),
|
|
157
|
+
React.createElement(Box, null,
|
|
158
|
+
React.createElement(Box, { width: 12 },
|
|
159
|
+
React.createElement(Text, { dimColor: true }, "Data Source:")),
|
|
160
|
+
React.createElement(Text, null, knowledgeSources.find(s => s.value === knowledgeSource)?.label || 'None'))),
|
|
161
|
+
React.createElement(ProgressBar, { progress: evaluationProgress }),
|
|
162
|
+
currentQuestion && (React.createElement(Box, { flexDirection: "column", marginTop: 1, marginBottom: 1 },
|
|
163
|
+
React.createElement(Box, null,
|
|
164
|
+
React.createElement(Text, { bold: true, color: "#eba1b5" }, "Current Question:")),
|
|
165
|
+
React.createElement(Box, { paddingLeft: 2 },
|
|
166
|
+
React.createElement(Text, { color: "white" }, currentQuestion)))),
|
|
167
|
+
currentLLMResponse && (React.createElement(Box, { flexDirection: "column", marginTop: 1 },
|
|
168
|
+
React.createElement(Box, null,
|
|
169
|
+
React.createElement(Text, { bold: true, color: "green" }, "LLM Response:")),
|
|
170
|
+
React.createElement(Box, { paddingLeft: 2 },
|
|
171
|
+
React.createElement(Text, null, currentLLMResponse.length > 200 ? currentLLMResponse.substring(0, 200) + '...' : currentLLMResponse)))))),
|
|
172
|
+
step === 'complete' && evaluationResult && (React.createElement(Box, { flexDirection: "column" },
|
|
173
|
+
React.createElement(Summary, { totalTests: evaluationResult.totalTests, passed: evaluationResult.passed, failed: evaluationResult.failed, duration: evaluationResult.duration, evaluationUrl: evaluationResult.evaluationUrl })))));
|
|
174
|
+
};
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
export interface EvaluationConfig {
|
|
2
|
+
agentEndpoint: string;
|
|
3
|
+
knowledgeSource?: string;
|
|
4
|
+
}
|
|
5
|
+
export interface EvaluationResult {
|
|
6
|
+
totalTests: number;
|
|
7
|
+
passed: number;
|
|
8
|
+
failed: number;
|
|
9
|
+
duration: string;
|
|
10
|
+
evaluationUrl: string;
|
|
11
|
+
agentId?: string;
|
|
12
|
+
}
|
|
13
|
+
export interface HallucinationCheckResult {
|
|
14
|
+
question: string;
|
|
15
|
+
llmResponse: string;
|
|
16
|
+
summary: string;
|
|
17
|
+
facts: string[];
|
|
18
|
+
status: 'passed' | 'failed' | 'ambiguous';
|
|
19
|
+
hallucinationLabel: string;
|
|
20
|
+
hallucinationFindings: any[];
|
|
21
|
+
}
|
|
22
|
+
export interface PromptEvaluationResult {
|
|
23
|
+
success: boolean;
|
|
24
|
+
question: string;
|
|
25
|
+
llmResponse?: string;
|
|
26
|
+
hallucinationResult?: HallucinationCheckResult;
|
|
27
|
+
error?: any;
|
|
28
|
+
}
|
|
29
|
+
export declare const api: {
|
|
30
|
+
generateApiKey(name?: string): Promise<any>;
|
|
31
|
+
healthCheck(): Promise<any>;
|
|
32
|
+
checkKnowledge(folderPath?: string): Promise<{
|
|
33
|
+
found: boolean;
|
|
34
|
+
path: string;
|
|
35
|
+
} | {
|
|
36
|
+
found: boolean;
|
|
37
|
+
path?: undefined;
|
|
38
|
+
}>;
|
|
39
|
+
createAgent(publicUrl: string): Promise<any>;
|
|
40
|
+
importKnowledge(agentId: string, knowledgeData: any): Promise<any>;
|
|
41
|
+
addTestPrompts(agentId: string, prompts?: string[] | Array<{
|
|
42
|
+
question: string;
|
|
43
|
+
answer?: string;
|
|
44
|
+
}>): Promise<any>;
|
|
45
|
+
checkHallucination(agentId: string, question: string, llmResponse: string, expectedAnswer?: string): Promise<HallucinationCheckResult>;
|
|
46
|
+
callLLMEndpoint(agentEndpoint: string, question: string): Promise<string>;
|
|
47
|
+
runPromptEvaluation(agentId: string, promptId: number, promptText: string, agentEndpoint: string, expectedAnswer?: string, onLLMResponse?: (response: string) => void): Promise<PromptEvaluationResult>;
|
|
48
|
+
runAllPromptEvaluations(agentId: string, prompts: any[], agentEndpoint: string, onProgress?: (current: number, total: number, question?: string, llmResponse?: string) => void): Promise<PromptEvaluationResult[]>;
|
|
49
|
+
getTestResults(agentId: string): Promise<any>;
|
|
50
|
+
runEvaluation(config: EvaluationConfig, onProgress?: (progress: number) => void): Promise<{
|
|
51
|
+
totalTests: any;
|
|
52
|
+
passed: number;
|
|
53
|
+
failed: number;
|
|
54
|
+
duration: string;
|
|
55
|
+
evaluationUrl: string;
|
|
56
|
+
agentId: any;
|
|
57
|
+
}>;
|
|
58
|
+
};
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
import { logger } from '../utils/logger.js';
|
|
5
|
+
const BASE_URL = 'http://localhost:3001';
|
|
6
|
+
let API_KEY = null;
|
|
7
|
+
const client = axios.create({
|
|
8
|
+
baseURL: BASE_URL,
|
|
9
|
+
headers: {
|
|
10
|
+
'Content-Type': 'application/json',
|
|
11
|
+
},
|
|
12
|
+
});
|
|
13
|
+
client.interceptors.request.use((config) => {
|
|
14
|
+
if (API_KEY) {
|
|
15
|
+
config.headers['x-api-key'] = API_KEY;
|
|
16
|
+
}
|
|
17
|
+
return config;
|
|
18
|
+
});
|
|
19
|
+
export const api = {
|
|
20
|
+
async generateApiKey(name) {
|
|
21
|
+
try {
|
|
22
|
+
const response = await client.post('/api/api-keys/generate-cli', {
|
|
23
|
+
name: name || 'CLI Evaluation Key'
|
|
24
|
+
});
|
|
25
|
+
API_KEY = response.data.apiKey;
|
|
26
|
+
logger.info('API key generated successfully');
|
|
27
|
+
logger.debug('API Key:', API_KEY?.substring(0, 12) + '...');
|
|
28
|
+
return response.data;
|
|
29
|
+
}
|
|
30
|
+
catch (error) {
|
|
31
|
+
logger.error('Error generating API key:', error);
|
|
32
|
+
throw error;
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
async healthCheck() {
|
|
36
|
+
const response = await client.get('/health');
|
|
37
|
+
return response.data;
|
|
38
|
+
},
|
|
39
|
+
async checkKnowledge(folderPath = '.') {
|
|
40
|
+
try {
|
|
41
|
+
const knowledgeFiles = [
|
|
42
|
+
'knowledge-base/qanda.json',
|
|
43
|
+
'qanda.json',
|
|
44
|
+
'knowledge.json',
|
|
45
|
+
];
|
|
46
|
+
for (const file of knowledgeFiles) {
|
|
47
|
+
const filePath = path.join(folderPath, file);
|
|
48
|
+
if (fs.existsSync(filePath)) {
|
|
49
|
+
return { found: true, path: filePath };
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return { found: false };
|
|
53
|
+
}
|
|
54
|
+
catch (error) {
|
|
55
|
+
logger.error('Error checking knowledge:', error);
|
|
56
|
+
return { found: false };
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
async createAgent(publicUrl) {
|
|
60
|
+
try {
|
|
61
|
+
const response = await client.post('/api/agents', {
|
|
62
|
+
name: `Agent Eval ${Date.now()}`,
|
|
63
|
+
seed: Math.floor(Math.random() * 1000),
|
|
64
|
+
numNodes: 100,
|
|
65
|
+
publicUrl: publicUrl,
|
|
66
|
+
label: 'eval',
|
|
67
|
+
});
|
|
68
|
+
return response.data;
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
logger.error('Error creating agent:', error);
|
|
72
|
+
throw error;
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
async importKnowledge(agentId, knowledgeData) {
|
|
76
|
+
try {
|
|
77
|
+
const response = await client.post(`/api/agents/${agentId}/knowledge/import`, {
|
|
78
|
+
data: knowledgeData,
|
|
79
|
+
});
|
|
80
|
+
return response.data;
|
|
81
|
+
}
|
|
82
|
+
catch (error) {
|
|
83
|
+
logger.error('Error importing knowledge:', error);
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
},
|
|
87
|
+
async addTestPrompts(agentId, prompts) {
|
|
88
|
+
try {
|
|
89
|
+
const defaultPrompts = [
|
|
90
|
+
'What can you help me with?',
|
|
91
|
+
'Tell me about your capabilities',
|
|
92
|
+
'How do I get started?',
|
|
93
|
+
'What features do you support?',
|
|
94
|
+
'Can you explain your main functionality?',
|
|
95
|
+
];
|
|
96
|
+
let promptsArray;
|
|
97
|
+
if (!prompts || prompts.length === 0) {
|
|
98
|
+
promptsArray = defaultPrompts.map(p => ({ prompt: p, expectedAnswer: null }));
|
|
99
|
+
}
|
|
100
|
+
else if (typeof prompts[0] === 'string') {
|
|
101
|
+
promptsArray = prompts.map(p => ({ prompt: p, expectedAnswer: null }));
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
promptsArray = prompts.map(p => ({
|
|
105
|
+
prompt: p.question,
|
|
106
|
+
expectedAnswer: p.answer || null,
|
|
107
|
+
}));
|
|
108
|
+
}
|
|
109
|
+
const response = await client.post(`/api/agents/${agentId}/test-prompts`, {
|
|
110
|
+
prompts: promptsArray,
|
|
111
|
+
});
|
|
112
|
+
return response.data;
|
|
113
|
+
}
|
|
114
|
+
catch (error) {
|
|
115
|
+
logger.error('Error adding test prompts:', error);
|
|
116
|
+
if (error.response) {
|
|
117
|
+
logger.debug('Response data:', error.response.data);
|
|
118
|
+
logger.debug('Response status:', error.response.status);
|
|
119
|
+
}
|
|
120
|
+
throw error;
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
async checkHallucination(agentId, question, llmResponse, expectedAnswer) {
|
|
124
|
+
const response = await client.post(`/api/agents/${agentId}/hallucination`, {
|
|
125
|
+
question,
|
|
126
|
+
llmResponse,
|
|
127
|
+
expectedAnswer
|
|
128
|
+
});
|
|
129
|
+
return response.data;
|
|
130
|
+
},
|
|
131
|
+
async callLLMEndpoint(agentEndpoint, question) {
|
|
132
|
+
try {
|
|
133
|
+
const llmClient = axios.create({
|
|
134
|
+
timeout: 60000,
|
|
135
|
+
});
|
|
136
|
+
const response = await llmClient.post(agentEndpoint, {
|
|
137
|
+
message: question,
|
|
138
|
+
query: question,
|
|
139
|
+
question: question,
|
|
140
|
+
prompt: question,
|
|
141
|
+
});
|
|
142
|
+
let llmResponse = '';
|
|
143
|
+
if (typeof response.data === 'string') {
|
|
144
|
+
llmResponse = response.data;
|
|
145
|
+
}
|
|
146
|
+
else if (response.data.response) {
|
|
147
|
+
llmResponse = response.data.response;
|
|
148
|
+
}
|
|
149
|
+
else if (response.data.message) {
|
|
150
|
+
llmResponse = response.data.message;
|
|
151
|
+
}
|
|
152
|
+
else if (response.data.answer) {
|
|
153
|
+
llmResponse = response.data.answer;
|
|
154
|
+
}
|
|
155
|
+
else if (response.data.text) {
|
|
156
|
+
llmResponse = response.data.text;
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
llmResponse = JSON.stringify(response.data);
|
|
160
|
+
}
|
|
161
|
+
return llmResponse;
|
|
162
|
+
}
|
|
163
|
+
catch (error) {
|
|
164
|
+
logger.error('Error calling LLM endpoint:', error?.message || error);
|
|
165
|
+
throw new Error(`Failed to call LLM endpoint: ${error?.message || 'Unknown error'}`);
|
|
166
|
+
}
|
|
167
|
+
},
|
|
168
|
+
async runPromptEvaluation(agentId, promptId, promptText, agentEndpoint, expectedAnswer, onLLMResponse) {
|
|
169
|
+
try {
|
|
170
|
+
logger.info(`Calling LLM for question: ${promptText}`);
|
|
171
|
+
const llmResponse = await api.callLLMEndpoint(agentEndpoint, promptText);
|
|
172
|
+
if (onLLMResponse) {
|
|
173
|
+
onLLMResponse(llmResponse);
|
|
174
|
+
}
|
|
175
|
+
logger.info(`LLM Response: ${llmResponse.substring(0, 100)}...`);
|
|
176
|
+
const hallucinationResult = await api.checkHallucination(agentId, promptText, llmResponse, expectedAnswer);
|
|
177
|
+
const status = hallucinationResult.status === 'passed' ? 'passed' : 'failed';
|
|
178
|
+
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
179
|
+
status,
|
|
180
|
+
response: llmResponse,
|
|
181
|
+
hallucinationLabel: hallucinationResult.hallucinationLabel,
|
|
182
|
+
hallucinationFindings: hallucinationResult.hallucinationFindings
|
|
183
|
+
});
|
|
184
|
+
return {
|
|
185
|
+
success: status === 'passed',
|
|
186
|
+
question: promptText,
|
|
187
|
+
llmResponse,
|
|
188
|
+
hallucinationResult
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
catch (error) {
|
|
192
|
+
logger.debug(`Error running prompt ${promptId}:`, error?.response?.data || error.message);
|
|
193
|
+
try {
|
|
194
|
+
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
195
|
+
status: 'failed'
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
catch (e) {
|
|
199
|
+
logger.debug('Failed to store failed result:', e);
|
|
200
|
+
}
|
|
201
|
+
return {
|
|
202
|
+
success: false,
|
|
203
|
+
question: promptText,
|
|
204
|
+
error
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
},
|
|
208
|
+
async runAllPromptEvaluations(agentId, prompts, agentEndpoint, onProgress) {
|
|
209
|
+
const results = [];
|
|
210
|
+
try {
|
|
211
|
+
for (let i = 0; i < prompts.length; i++) {
|
|
212
|
+
const prompt = prompts[i];
|
|
213
|
+
if (onProgress) {
|
|
214
|
+
onProgress(i + 1, prompts.length, prompt.prompt);
|
|
215
|
+
}
|
|
216
|
+
const result = await api.runPromptEvaluation(agentId, prompt.id, prompt.prompt, agentEndpoint, prompt.expectedAnswer, (llmResponse) => {
|
|
217
|
+
if (onProgress) {
|
|
218
|
+
onProgress(i + 1, prompts.length, prompt.prompt, llmResponse);
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
results.push(result);
|
|
222
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
catch (error) {
|
|
226
|
+
logger.error('Error running evaluations:', error);
|
|
227
|
+
}
|
|
228
|
+
return results;
|
|
229
|
+
},
|
|
230
|
+
async getTestResults(agentId) {
|
|
231
|
+
try {
|
|
232
|
+
const response = await client.get(`/api/agents/${agentId}/test-results`);
|
|
233
|
+
return response.data;
|
|
234
|
+
}
|
|
235
|
+
catch (error) {
|
|
236
|
+
console.error('Error getting test results:', error);
|
|
237
|
+
return [];
|
|
238
|
+
}
|
|
239
|
+
},
|
|
240
|
+
async runEvaluation(config, onProgress) {
|
|
241
|
+
try {
|
|
242
|
+
const startTime = Date.now();
|
|
243
|
+
if (onProgress)
|
|
244
|
+
onProgress(10);
|
|
245
|
+
const agent = await api.createAgent(config.agentEndpoint);
|
|
246
|
+
const agentId = agent.id;
|
|
247
|
+
if (onProgress)
|
|
248
|
+
onProgress(30);
|
|
249
|
+
if (config.knowledgeSource === 'files') {
|
|
250
|
+
const knowledgeResult = await api.checkKnowledge();
|
|
251
|
+
if (knowledgeResult.found && knowledgeResult.path) {
|
|
252
|
+
const knowledgeData = JSON.parse(fs.readFileSync(knowledgeResult.path, 'utf-8'));
|
|
253
|
+
await api.importKnowledge(agentId, knowledgeData);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
if (onProgress)
|
|
257
|
+
onProgress(40);
|
|
258
|
+
let testPrompts = [];
|
|
259
|
+
if (config.knowledgeSource === 'files') {
|
|
260
|
+
const knowledgeResult = await api.checkKnowledge();
|
|
261
|
+
if (knowledgeResult.found && knowledgeResult.path) {
|
|
262
|
+
try {
|
|
263
|
+
const knowledgeData = JSON.parse(fs.readFileSync(knowledgeResult.path, 'utf-8'));
|
|
264
|
+
if (Array.isArray(knowledgeData)) {
|
|
265
|
+
testPrompts = knowledgeData.slice(0, 5).map((item) => item.question || item.prompt || item.input || 'Test question');
|
|
266
|
+
}
|
|
267
|
+
else if (knowledgeData.questions) {
|
|
268
|
+
testPrompts = knowledgeData.questions.slice(0, 5);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
catch (error) {
|
|
272
|
+
logger.error('Error loading prompts from knowledge:', error);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
const createdPrompts = await api.addTestPrompts(agentId, testPrompts);
|
|
277
|
+
const promptIds = createdPrompts.map((p) => p.id);
|
|
278
|
+
const promptCount = promptIds.length;
|
|
279
|
+
if (onProgress)
|
|
280
|
+
onProgress(50);
|
|
281
|
+
const evaluationResults = await api.runAllPromptEvaluations(agentId, createdPrompts, config.agentEndpoint, (current, total) => {
|
|
282
|
+
const progress = 50 + Math.round((current / total) * 40);
|
|
283
|
+
if (onProgress)
|
|
284
|
+
onProgress(progress);
|
|
285
|
+
});
|
|
286
|
+
if (onProgress)
|
|
287
|
+
onProgress(100);
|
|
288
|
+
let passed = 0;
|
|
289
|
+
let failed = 0;
|
|
290
|
+
evaluationResults.forEach((result) => {
|
|
291
|
+
if (result.success) {
|
|
292
|
+
passed++;
|
|
293
|
+
}
|
|
294
|
+
else {
|
|
295
|
+
failed++;
|
|
296
|
+
}
|
|
297
|
+
});
|
|
298
|
+
const duration = Math.round((Date.now() - startTime) / 1000);
|
|
299
|
+
const durationStr = duration > 60
|
|
300
|
+
? `${Math.floor(duration / 60)}m ${duration % 60}s`
|
|
301
|
+
: `${duration}s`;
|
|
302
|
+
return {
|
|
303
|
+
totalTests: promptCount || 5,
|
|
304
|
+
passed,
|
|
305
|
+
failed,
|
|
306
|
+
duration: durationStr,
|
|
307
|
+
evaluationUrl: `http://localhost:5173/eval/${agentId}`,
|
|
308
|
+
agentId,
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
catch (error) {
|
|
312
|
+
logger.error('Evaluation error:', error);
|
|
313
|
+
throw error;
|
|
314
|
+
}
|
|
315
|
+
},
|
|
316
|
+
};
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import React from 'react';
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
export const Header = () => {
|
|
4
|
+
return (React.createElement(Box, { flexDirection: "column", marginBottom: 2 },
|
|
5
|
+
React.createElement(Text, { bold: true, color: "#eba1b5" }, "Rippletide Evaluation"),
|
|
6
|
+
React.createElement(Text, { color: "gray" }, "\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501")));
|
|
7
|
+
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import React, { useState } from 'react';
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
export const InputPrompt = ({ label, onSubmit, placeholder }) => {
|
|
4
|
+
const [value, setValue] = useState('');
|
|
5
|
+
return (React.createElement(Box, { flexDirection: "column" },
|
|
6
|
+
React.createElement(Box, { marginBottom: 1 },
|
|
7
|
+
React.createElement(Text, { bold: true, color: "cyan" }, label)),
|
|
8
|
+
React.createElement(Box, null,
|
|
9
|
+
React.createElement(Text, { color: "gray" }, placeholder || 'Type your answer...')),
|
|
10
|
+
React.createElement(Box, { marginTop: 1 },
|
|
11
|
+
React.createElement(Text, { dimColor: true }, "Press Enter to submit"))));
|
|
12
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import React from 'react';
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
export const ProgressBar = ({ progress, label }) => {
|
|
4
|
+
const width = 40;
|
|
5
|
+
const filled = Math.round((progress / 100) * width);
|
|
6
|
+
const empty = width - filled;
|
|
7
|
+
return (React.createElement(Box, { flexDirection: "column", marginY: 1 },
|
|
8
|
+
label && (React.createElement(Box, { marginBottom: 1 },
|
|
9
|
+
React.createElement(Text, { dimColor: true }, label))),
|
|
10
|
+
React.createElement(Box, null,
|
|
11
|
+
React.createElement(Text, { color: "#eba1b5" }, '='.repeat(filled)),
|
|
12
|
+
React.createElement(Text, { color: "gray" }, '-'.repeat(empty)),
|
|
13
|
+
React.createElement(Text, null, " "),
|
|
14
|
+
React.createElement(Text, { color: "#eba1b5" },
|
|
15
|
+
progress.toFixed(0),
|
|
16
|
+
"%"))));
|
|
17
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import React from 'react';
|
|
2
|
+
interface SelectOption {
|
|
3
|
+
label: string;
|
|
4
|
+
value: string;
|
|
5
|
+
description?: string;
|
|
6
|
+
disabled?: boolean;
|
|
7
|
+
}
|
|
8
|
+
interface SelectMenuProps {
|
|
9
|
+
title: string;
|
|
10
|
+
options: SelectOption[];
|
|
11
|
+
onSelect: (value: string) => void;
|
|
12
|
+
}
|
|
13
|
+
export declare const SelectMenu: React.FC<SelectMenuProps>;
|
|
14
|
+
export {};
|