rippletide 1.0.3 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/rippletide +8 -6
- package/dist/App.d.ts +6 -1
- package/dist/App.js +132 -6
- package/dist/api/client.d.ts +1 -0
- package/dist/api/client.js +237 -39
- package/dist/index.js +38 -1
- package/dist/scripts/test-postgresql.d.ts +2 -0
- package/dist/scripts/test-postgresql.js +53 -0
- package/dist/utils/logger.d.ts +1 -0
- package/dist/utils/logger.js +5 -0
- package/dist/utils/pinecone.d.ts +5 -0
- package/dist/utils/pinecone.js +156 -0
- package/dist/utils/postgresql-qa-generator.d.ts +7 -0
- package/dist/utils/postgresql-qa-generator.js +265 -0
- package/dist/utils/postgresql.d.ts +16 -0
- package/dist/utils/postgresql.js +239 -0
- package/package.json +9 -2
- package/src/App.tsx +186 -6
- package/src/api/client.ts +260 -39
- package/src/index.tsx +40 -1
- package/src/utils/logger.ts +6 -0
- package/src/utils/pinecone.ts +190 -0
- package/src/utils/postgresql.ts +298 -0
package/bin/rippletide
CHANGED
|
@@ -1,14 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
-
// Simple CLI dispatcher: `rippletide eval` (or just `rippletide`) runs the evaluator.
|
|
4
|
-
// Other commands can be added later if needed.
|
|
5
|
-
|
|
6
3
|
const args = process.argv.slice(2);
|
|
7
4
|
const cmd = args[0];
|
|
8
5
|
|
|
9
6
|
async function main() {
|
|
10
7
|
if (!cmd || cmd === 'eval') {
|
|
11
|
-
// Lazy-load compiled Ink app
|
|
12
8
|
await import('../dist/index.js');
|
|
13
9
|
return;
|
|
14
10
|
}
|
|
@@ -18,10 +14,16 @@ async function main() {
|
|
|
18
14
|
Rippletide CLI
|
|
19
15
|
|
|
20
16
|
Usage:
|
|
21
|
-
rippletide eval Run the Rippletide evaluation UI
|
|
17
|
+
rippletide eval [options] Run the Rippletide evaluation UI
|
|
22
18
|
|
|
23
19
|
Options:
|
|
24
|
-
-
|
|
20
|
+
-b, --backend-url <url> Backend API URL (default: http://rippletide-backend.azurewebsites.net)
|
|
21
|
+
-d, --dashboard-url <url> Dashboard URL (default: https://eval.rippletide.com)
|
|
22
|
+
-h, --help Show this help message
|
|
23
|
+
|
|
24
|
+
Examples:
|
|
25
|
+
rippletide eval
|
|
26
|
+
rippletide eval -b http://localhost:3001 -d http://localhost:5173
|
|
25
27
|
`);
|
|
26
28
|
return;
|
|
27
29
|
}
|
package/dist/App.d.ts
CHANGED
package/dist/App.js
CHANGED
|
@@ -7,24 +7,39 @@ import { Spinner } from './components/Spinner.js';
|
|
|
7
7
|
import { ProgressBar } from './components/ProgressBar.js';
|
|
8
8
|
import { Summary } from './components/Summary.js';
|
|
9
9
|
import { api } from './api/client.js';
|
|
10
|
+
import { getPineconeQAndA } from './utils/pinecone.js';
|
|
11
|
+
import { getPostgreSQLQAndA, parsePostgreSQLConnectionString } from './utils/postgresql.js';
|
|
10
12
|
const knowledgeSources = [
|
|
11
13
|
{ label: 'Local Files (qanda.json)', value: 'files', description: 'Use qanda.json from current directory' },
|
|
14
|
+
{ label: 'Pinecone', value: 'pinecone', description: 'Fetch Q&A from Pinecone database' },
|
|
15
|
+
{ label: 'PostgreSQL Database', value: 'postgresql', description: 'Connect to PostgreSQL database' },
|
|
12
16
|
{ label: 'Current Repository', value: 'repo', description: 'Scan current git repository', disabled: true },
|
|
13
|
-
{ label: 'Database', value: 'database', description: 'Connect to a database', disabled: true },
|
|
14
17
|
{ label: 'API Endpoint', value: 'api', description: 'Fetch from REST API', disabled: true },
|
|
15
18
|
{ label: 'GitHub Repository', value: 'github', description: 'Import from GitHub repo', disabled: true },
|
|
16
19
|
{ label: 'Skip (No Knowledge)', value: 'skip', description: 'Run tests without knowledge base', disabled: true },
|
|
17
20
|
];
|
|
18
|
-
export const App = () => {
|
|
21
|
+
export const App = ({ backendUrl, dashboardUrl }) => {
|
|
19
22
|
const [step, setStep] = useState('agent-endpoint');
|
|
20
23
|
const [agentEndpoint, setAgentEndpoint] = useState('');
|
|
21
24
|
const [knowledgeSource, setKnowledgeSource] = useState('');
|
|
22
25
|
const [knowledgeFound, setKnowledgeFound] = useState(false);
|
|
26
|
+
const [pineconeUrl, setPineconeUrl] = useState('');
|
|
27
|
+
const [pineconeApiKey, setPineconeApiKey] = useState('');
|
|
28
|
+
const [pineconeQAndA, setPineconeQAndA] = useState([]);
|
|
29
|
+
const [pineconeProgress, setPineconeProgress] = useState('');
|
|
30
|
+
const [postgresqlConnectionString, setPostgresqlConnectionString] = useState('');
|
|
31
|
+
const [postgresqlQAndA, setPostgresqlQAndA] = useState([]);
|
|
32
|
+
const [postgresqlProgress, setPostgresqlProgress] = useState('');
|
|
23
33
|
const [evaluationProgress, setEvaluationProgress] = useState(0);
|
|
24
34
|
const [evaluationResult, setEvaluationResult] = useState(null);
|
|
25
35
|
const [currentQuestion, setCurrentQuestion] = useState('');
|
|
26
36
|
const [currentLLMResponse, setCurrentLLMResponse] = useState('');
|
|
27
37
|
const [evaluationLogs, setEvaluationLogs] = useState([]);
|
|
38
|
+
useEffect(() => {
|
|
39
|
+
if (backendUrl) {
|
|
40
|
+
api.setBaseUrl(backendUrl);
|
|
41
|
+
}
|
|
42
|
+
}, [backendUrl]);
|
|
28
43
|
useEffect(() => {
|
|
29
44
|
if (step === 'checking-knowledge') {
|
|
30
45
|
(async () => {
|
|
@@ -40,6 +55,69 @@ export const App = () => {
|
|
|
40
55
|
})();
|
|
41
56
|
}
|
|
42
57
|
}, [step]);
|
|
58
|
+
useEffect(() => {
|
|
59
|
+
if (step === 'fetching-pinecone') {
|
|
60
|
+
(async () => {
|
|
61
|
+
try {
|
|
62
|
+
const qaPairs = await getPineconeQAndA(pineconeUrl, pineconeApiKey, (message) => setPineconeProgress(message));
|
|
63
|
+
setPineconeQAndA(qaPairs);
|
|
64
|
+
setStep('running-evaluation');
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
console.error('Error fetching Q&A from Pinecone:', error);
|
|
68
|
+
setEvaluationResult({
|
|
69
|
+
totalTests: 0,
|
|
70
|
+
passed: 0,
|
|
71
|
+
failed: 0,
|
|
72
|
+
duration: 'Failed',
|
|
73
|
+
evaluationUrl: dashboardUrl || 'https://eval.rippletide.com',
|
|
74
|
+
error: error.message,
|
|
75
|
+
});
|
|
76
|
+
setStep('complete');
|
|
77
|
+
}
|
|
78
|
+
})();
|
|
79
|
+
}
|
|
80
|
+
}, [step, pineconeUrl, pineconeApiKey]);
|
|
81
|
+
useEffect(() => {
|
|
82
|
+
if (step === 'fetching-postgresql') {
|
|
83
|
+
(async () => {
|
|
84
|
+
try {
|
|
85
|
+
let config;
|
|
86
|
+
if (postgresqlConnectionString.startsWith('postgresql://') || postgresqlConnectionString.startsWith('postgres://')) {
|
|
87
|
+
config = parsePostgreSQLConnectionString(postgresqlConnectionString);
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
const parts = postgresqlConnectionString.split(',');
|
|
91
|
+
if (parts.length !== 5) {
|
|
92
|
+
throw new Error('Invalid connection format. Expected: host,port,database,user,password or postgresql://...');
|
|
93
|
+
}
|
|
94
|
+
config = {
|
|
95
|
+
host: parts[0].trim(),
|
|
96
|
+
port: parseInt(parts[1].trim()),
|
|
97
|
+
database: parts[2].trim(),
|
|
98
|
+
user: parts[3].trim(),
|
|
99
|
+
password: parts[4].trim()
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
const qaPairs = await getPostgreSQLQAndA(config, backendUrl || 'http://rippletide-backend.azurewebsites.net', (message) => setPostgresqlProgress(message));
|
|
103
|
+
setPostgresqlQAndA(qaPairs);
|
|
104
|
+
setStep('running-evaluation');
|
|
105
|
+
}
|
|
106
|
+
catch (error) {
|
|
107
|
+
console.error('Error fetching Q&A from PostgreSQL:', error);
|
|
108
|
+
setEvaluationResult({
|
|
109
|
+
totalTests: 0,
|
|
110
|
+
passed: 0,
|
|
111
|
+
failed: 0,
|
|
112
|
+
duration: 'Failed',
|
|
113
|
+
evaluationUrl: dashboardUrl || 'https://eval.rippletide.com',
|
|
114
|
+
error: error.message,
|
|
115
|
+
});
|
|
116
|
+
setStep('complete');
|
|
117
|
+
}
|
|
118
|
+
})();
|
|
119
|
+
}
|
|
120
|
+
}, [step, postgresqlConnectionString, backendUrl]);
|
|
43
121
|
useEffect(() => {
|
|
44
122
|
if (step === 'running-evaluation') {
|
|
45
123
|
(async () => {
|
|
@@ -72,6 +150,18 @@ export const App = () => {
|
|
|
72
150
|
}
|
|
73
151
|
}
|
|
74
152
|
}
|
|
153
|
+
else if (knowledgeSource === 'pinecone' && pineconeQAndA.length > 0) {
|
|
154
|
+
testPrompts = pineconeQAndA.slice(0, 5).map((item) => ({
|
|
155
|
+
question: item.question,
|
|
156
|
+
answer: item.answer
|
|
157
|
+
}));
|
|
158
|
+
}
|
|
159
|
+
else if (knowledgeSource === 'postgresql' && postgresqlQAndA.length > 0) {
|
|
160
|
+
testPrompts = postgresqlQAndA.slice(0, 5).map((item) => ({
|
|
161
|
+
question: item.question,
|
|
162
|
+
answer: item.answer
|
|
163
|
+
}));
|
|
164
|
+
}
|
|
75
165
|
const createdPrompts = await api.addTestPrompts(agentId, testPrompts);
|
|
76
166
|
setEvaluationProgress(50);
|
|
77
167
|
const evaluationResults = await api.runAllPromptEvaluations(agentId, createdPrompts, agentEndpoint, (current, total, question, llmResponse) => {
|
|
@@ -106,7 +196,7 @@ export const App = () => {
|
|
|
106
196
|
passed,
|
|
107
197
|
failed,
|
|
108
198
|
duration: durationStr,
|
|
109
|
-
evaluationUrl:
|
|
199
|
+
evaluationUrl: `${dashboardUrl || 'https://eval.rippletide.com'}/eval/${agentId}`,
|
|
110
200
|
agentId,
|
|
111
201
|
};
|
|
112
202
|
setEvaluationResult(result);
|
|
@@ -119,20 +209,40 @@ export const App = () => {
|
|
|
119
209
|
passed: 0,
|
|
120
210
|
failed: 0,
|
|
121
211
|
duration: 'Failed',
|
|
122
|
-
evaluationUrl: '
|
|
212
|
+
evaluationUrl: dashboardUrl || 'https://eval.rippletide.com',
|
|
123
213
|
});
|
|
124
214
|
setStep('complete');
|
|
125
215
|
}
|
|
126
216
|
})();
|
|
127
217
|
}
|
|
128
|
-
}, [step, agentEndpoint, knowledgeSource]);
|
|
218
|
+
}, [step, agentEndpoint, knowledgeSource, pineconeQAndA, postgresqlQAndA]);
|
|
129
219
|
const handleAgentEndpointSubmit = (value) => {
|
|
130
220
|
setAgentEndpoint(value);
|
|
131
221
|
setStep('checking-knowledge');
|
|
132
222
|
};
|
|
133
223
|
const handleSourceSelect = (value) => {
|
|
134
224
|
setKnowledgeSource(value);
|
|
135
|
-
|
|
225
|
+
if (value === 'pinecone') {
|
|
226
|
+
setStep('pinecone-url');
|
|
227
|
+
}
|
|
228
|
+
else if (value === 'postgresql') {
|
|
229
|
+
setStep('postgresql-config');
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
setStep('running-evaluation');
|
|
233
|
+
}
|
|
234
|
+
};
|
|
235
|
+
const handlePineconeUrlSubmit = (value) => {
|
|
236
|
+
setPineconeUrl(value);
|
|
237
|
+
setStep('pinecone-api-key');
|
|
238
|
+
};
|
|
239
|
+
const handlePineconeApiKeySubmit = (value) => {
|
|
240
|
+
setPineconeApiKey(value);
|
|
241
|
+
setStep('fetching-pinecone');
|
|
242
|
+
};
|
|
243
|
+
const handlePostgresqlConnectionSubmit = (value) => {
|
|
244
|
+
setPostgresqlConnectionString(value);
|
|
245
|
+
setStep('fetching-postgresql');
|
|
136
246
|
};
|
|
137
247
|
return (React.createElement(Box, { flexDirection: "column", padding: 1 },
|
|
138
248
|
React.createElement(Header, null),
|
|
@@ -146,6 +256,22 @@ export const App = () => {
|
|
|
146
256
|
knowledgeFound && (React.createElement(Box, { marginBottom: 1 },
|
|
147
257
|
React.createElement(Text, { color: "white" }, "qanda.json found in current directory"))),
|
|
148
258
|
React.createElement(SelectMenu, { title: "Data Source", options: knowledgeSources, onSelect: handleSourceSelect }))),
|
|
259
|
+
step === 'pinecone-url' && (React.createElement(Box, { flexDirection: "column" },
|
|
260
|
+
React.createElement(TextInput, { label: "Pinecone database URL", placeholder: "https://sample-movies-02j22s8.svc.aped-4627-b74a.pinecone.io", onSubmit: handlePineconeUrlSubmit }))),
|
|
261
|
+
step === 'pinecone-api-key' && (React.createElement(Box, { flexDirection: "column" },
|
|
262
|
+
React.createElement(TextInput, { label: "Pinecone API key", placeholder: "pcsk_...", onSubmit: handlePineconeApiKeySubmit }))),
|
|
263
|
+
step === 'fetching-pinecone' && (React.createElement(Box, { flexDirection: "column" },
|
|
264
|
+
React.createElement(Spinner, { label: pineconeProgress || "Fetching Q&A from Pinecone..." }))),
|
|
265
|
+
step === 'postgresql-config' && (React.createElement(Box, { flexDirection: "column" },
|
|
266
|
+
React.createElement(Box, { marginBottom: 1 },
|
|
267
|
+
React.createElement(Text, { color: "#eba1b5" }, "Enter PostgreSQL connection details")),
|
|
268
|
+
React.createElement(Box, { marginBottom: 1 },
|
|
269
|
+
React.createElement(Text, { dimColor: true }, "Format 1: postgresql://user:password@host:port/database")),
|
|
270
|
+
React.createElement(Box, { marginBottom: 1 },
|
|
271
|
+
React.createElement(Text, { dimColor: true }, "Format 2: host,port,database,user,password")),
|
|
272
|
+
React.createElement(TextInput, { label: "PostgreSQL connection", placeholder: "postgresql://postgres:password@localhost:5432/mydb", onSubmit: handlePostgresqlConnectionSubmit }))),
|
|
273
|
+
step === 'fetching-postgresql' && (React.createElement(Box, { flexDirection: "column" },
|
|
274
|
+
React.createElement(Spinner, { label: postgresqlProgress || "Analyzing PostgreSQL database..." }))),
|
|
149
275
|
step === 'running-evaluation' && (React.createElement(Box, { flexDirection: "column" },
|
|
150
276
|
React.createElement(Box, { marginBottom: 2 },
|
|
151
277
|
React.createElement(Spinner, { label: "Running evaluation" })),
|
package/dist/api/client.d.ts
CHANGED
package/dist/api/client.js
CHANGED
|
@@ -2,21 +2,35 @@ import axios from 'axios';
|
|
|
2
2
|
import * as fs from 'fs';
|
|
3
3
|
import * as path from 'path';
|
|
4
4
|
import { logger } from '../utils/logger.js';
|
|
5
|
-
|
|
5
|
+
let BASE_URL = 'http://rippletide-backend.azurewebsites.net';
|
|
6
6
|
let API_KEY = null;
|
|
7
|
-
|
|
7
|
+
let client = axios.create({
|
|
8
8
|
baseURL: BASE_URL,
|
|
9
9
|
headers: {
|
|
10
10
|
'Content-Type': 'application/json',
|
|
11
11
|
},
|
|
12
12
|
});
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
13
|
+
const setupInterceptor = () => {
|
|
14
|
+
client.interceptors.request.use((config) => {
|
|
15
|
+
if (API_KEY) {
|
|
16
|
+
config.headers['x-api-key'] = API_KEY;
|
|
17
|
+
}
|
|
18
|
+
return config;
|
|
19
|
+
});
|
|
20
|
+
};
|
|
21
|
+
setupInterceptor();
|
|
19
22
|
export const api = {
|
|
23
|
+
setBaseUrl(url) {
|
|
24
|
+
BASE_URL = url;
|
|
25
|
+
client = axios.create({
|
|
26
|
+
baseURL: BASE_URL,
|
|
27
|
+
headers: {
|
|
28
|
+
'Content-Type': 'application/json',
|
|
29
|
+
},
|
|
30
|
+
});
|
|
31
|
+
setupInterceptor();
|
|
32
|
+
logger.debug('Backend URL set to:', BASE_URL);
|
|
33
|
+
},
|
|
20
34
|
async generateApiKey(name) {
|
|
21
35
|
try {
|
|
22
36
|
const response = await client.post('/api/api-keys/generate-cli', {
|
|
@@ -106,81 +120,213 @@ export const api = {
|
|
|
106
120
|
expectedAnswer: p.answer || null,
|
|
107
121
|
}));
|
|
108
122
|
}
|
|
123
|
+
logger.info(`Adding ${promptsArray.length} test prompts to agent ${agentId}`);
|
|
124
|
+
logger.debug('Prompts:', promptsArray);
|
|
109
125
|
const response = await client.post(`/api/agents/${agentId}/test-prompts`, {
|
|
110
126
|
prompts: promptsArray,
|
|
111
127
|
});
|
|
128
|
+
logger.info(`Successfully added ${response.data.length} test prompts`);
|
|
112
129
|
return response.data;
|
|
113
130
|
}
|
|
114
131
|
catch (error) {
|
|
115
|
-
logger.error('Error adding test prompts:', error);
|
|
132
|
+
logger.error('Error adding test prompts:', error?.message || error);
|
|
116
133
|
if (error.response) {
|
|
117
|
-
logger.
|
|
118
|
-
logger.
|
|
134
|
+
logger.error('Response data:', error.response.data);
|
|
135
|
+
logger.error('Response status:', error.response.status);
|
|
119
136
|
}
|
|
120
137
|
throw error;
|
|
121
138
|
}
|
|
122
139
|
},
|
|
123
140
|
async checkHallucination(agentId, question, llmResponse, expectedAnswer) {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
141
|
+
try {
|
|
142
|
+
if (!llmResponse || llmResponse.startsWith('Error calling LLM endpoint:')) {
|
|
143
|
+
return {
|
|
144
|
+
question,
|
|
145
|
+
llmResponse,
|
|
146
|
+
summary: 'LLM endpoint error',
|
|
147
|
+
facts: [],
|
|
148
|
+
status: 'failed',
|
|
149
|
+
hallucinationLabel: '',
|
|
150
|
+
hallucinationFindings: []
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
logger.debug('Checking hallucination for question:', question);
|
|
154
|
+
logger.debug('LLM Response length:', llmResponse.length);
|
|
155
|
+
logger.debug('Expected answer:', expectedAnswer || 'None provided');
|
|
156
|
+
const response = await client.post(`/api/agents/${agentId}/check-hallucination-response`, {
|
|
157
|
+
question,
|
|
158
|
+
llmResponse,
|
|
159
|
+
expectedAnswer
|
|
160
|
+
});
|
|
161
|
+
logger.debug('Hallucination check result:', response.data);
|
|
162
|
+
return {
|
|
163
|
+
question: response.data.question,
|
|
164
|
+
llmResponse: response.data.llmResponse,
|
|
165
|
+
summary: response.data.summary || '',
|
|
166
|
+
facts: response.data.facts || [],
|
|
167
|
+
status: response.data.status || 'passed',
|
|
168
|
+
hallucinationLabel: response.data.hallucinationLabel || 'FactIsPresent',
|
|
169
|
+
hallucinationFindings: response.data.hallucinationFindings || []
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
catch (error) {
|
|
173
|
+
if (error?.response?.status === 404) {
|
|
174
|
+
logger.warn('Hallucination check endpoint not found, using fallback');
|
|
175
|
+
// Fallback for old backend version
|
|
176
|
+
return {
|
|
177
|
+
question,
|
|
178
|
+
llmResponse,
|
|
179
|
+
summary: 'Hallucination check not available',
|
|
180
|
+
facts: [],
|
|
181
|
+
status: 'passed',
|
|
182
|
+
hallucinationLabel: 'FactIsPresent',
|
|
183
|
+
hallucinationFindings: []
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
logger.error('Error in hallucination check:', error?.message || error);
|
|
187
|
+
logger.debug('Error details:', error?.response?.data);
|
|
188
|
+
return {
|
|
189
|
+
question,
|
|
190
|
+
llmResponse,
|
|
191
|
+
summary: 'Check failed',
|
|
192
|
+
facts: [],
|
|
193
|
+
status: 'passed',
|
|
194
|
+
hallucinationLabel: 'FactIsPresent',
|
|
195
|
+
hallucinationFindings: []
|
|
196
|
+
};
|
|
197
|
+
}
|
|
130
198
|
},
|
|
131
199
|
async callLLMEndpoint(agentEndpoint, question) {
|
|
132
200
|
try {
|
|
201
|
+
logger.debug(`Calling LLM endpoint: ${agentEndpoint}`);
|
|
202
|
+
logger.debug(`Question: ${question}`);
|
|
133
203
|
const llmClient = axios.create({
|
|
134
204
|
timeout: 60000,
|
|
205
|
+
validateStatus: () => true
|
|
135
206
|
});
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
207
|
+
const payload = { message: question };
|
|
208
|
+
if (agentEndpoint.includes('vercel.app') || agentEndpoint.includes('naive-cosmetic')) {
|
|
209
|
+
logger.debug('Using Vercel app format - message only');
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
payload.query = question;
|
|
213
|
+
payload.question = question;
|
|
214
|
+
payload.prompt = question;
|
|
215
|
+
}
|
|
216
|
+
logger.debug('Request payload:', payload);
|
|
217
|
+
const response = await llmClient.post(agentEndpoint, payload);
|
|
218
|
+
logger.debug(`Response status: ${response.status}`);
|
|
219
|
+
logger.debug('Response headers:', response.headers);
|
|
220
|
+
if (response.status >= 400) {
|
|
221
|
+
const errorMsg = `LLM endpoint returned error: HTTP ${response.status} - ${response.statusText}`;
|
|
222
|
+
logger.error(errorMsg);
|
|
223
|
+
logger.debug('Response data:', response.data);
|
|
224
|
+
throw new Error(errorMsg);
|
|
225
|
+
}
|
|
142
226
|
let llmResponse = '';
|
|
143
227
|
if (typeof response.data === 'string') {
|
|
144
228
|
llmResponse = response.data;
|
|
145
229
|
}
|
|
230
|
+
else if (response.data.answer) {
|
|
231
|
+
llmResponse = response.data.answer;
|
|
232
|
+
}
|
|
146
233
|
else if (response.data.response) {
|
|
147
234
|
llmResponse = response.data.response;
|
|
148
235
|
}
|
|
149
236
|
else if (response.data.message) {
|
|
150
237
|
llmResponse = response.data.message;
|
|
151
238
|
}
|
|
152
|
-
else if (response.data.answer) {
|
|
153
|
-
llmResponse = response.data.answer;
|
|
154
|
-
}
|
|
155
239
|
else if (response.data.text) {
|
|
156
240
|
llmResponse = response.data.text;
|
|
157
241
|
}
|
|
242
|
+
else if (response.data.result) {
|
|
243
|
+
llmResponse = response.data.result;
|
|
244
|
+
}
|
|
245
|
+
else if (response.data.output) {
|
|
246
|
+
llmResponse = response.data.output;
|
|
247
|
+
}
|
|
248
|
+
else if (response.data.content) {
|
|
249
|
+
llmResponse = response.data.content;
|
|
250
|
+
}
|
|
251
|
+
else if (response.data.reply) {
|
|
252
|
+
llmResponse = response.data.reply;
|
|
253
|
+
}
|
|
158
254
|
else {
|
|
255
|
+
logger.debug('No standard field found, stringifying response');
|
|
159
256
|
llmResponse = JSON.stringify(response.data);
|
|
160
257
|
}
|
|
258
|
+
if (!llmResponse || llmResponse === '{}') {
|
|
259
|
+
logger.warn('Empty or invalid response from LLM endpoint');
|
|
260
|
+
logger.debug('Full response:', response.data);
|
|
261
|
+
}
|
|
262
|
+
logger.debug(`Extracted response: ${llmResponse.substring(0, 100)}...`);
|
|
161
263
|
return llmResponse;
|
|
162
264
|
}
|
|
163
265
|
catch (error) {
|
|
164
|
-
|
|
165
|
-
|
|
266
|
+
const errorDetails = {
|
|
267
|
+
message: error?.message || 'Unknown error',
|
|
268
|
+
code: error?.code,
|
|
269
|
+
endpoint: agentEndpoint,
|
|
270
|
+
response: error?.response?.data,
|
|
271
|
+
status: error?.response?.status
|
|
272
|
+
};
|
|
273
|
+
logger.error('Error calling LLM endpoint:', errorDetails);
|
|
274
|
+
if (error.code === 'ECONNREFUSED') {
|
|
275
|
+
throw new Error(`Cannot connect to LLM endpoint at ${agentEndpoint} - Connection refused`);
|
|
276
|
+
}
|
|
277
|
+
else if (error.code === 'ETIMEDOUT') {
|
|
278
|
+
throw new Error(`LLM endpoint timeout after 60 seconds`);
|
|
279
|
+
}
|
|
280
|
+
else if (error.code === 'ENOTFOUND') {
|
|
281
|
+
throw new Error(`LLM endpoint not found: ${agentEndpoint}`);
|
|
282
|
+
}
|
|
283
|
+
throw error;
|
|
166
284
|
}
|
|
167
285
|
},
|
|
168
286
|
async runPromptEvaluation(agentId, promptId, promptText, agentEndpoint, expectedAnswer, onLLMResponse) {
|
|
287
|
+
let llmResponse = null;
|
|
169
288
|
try {
|
|
170
289
|
logger.info(`Calling LLM for question: ${promptText}`);
|
|
171
|
-
|
|
290
|
+
llmResponse = await api.callLLMEndpoint(agentEndpoint, promptText);
|
|
172
291
|
if (onLLMResponse) {
|
|
173
292
|
onLLMResponse(llmResponse);
|
|
174
293
|
}
|
|
175
294
|
logger.info(`LLM Response: ${llmResponse.substring(0, 100)}...`);
|
|
176
295
|
const hallucinationResult = await api.checkHallucination(agentId, promptText, llmResponse, expectedAnswer);
|
|
177
296
|
const status = hallucinationResult.status === 'passed' ? 'passed' : 'failed';
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
297
|
+
try {
|
|
298
|
+
const payload = {
|
|
299
|
+
status,
|
|
300
|
+
response: llmResponse,
|
|
301
|
+
expectedAnswer: expectedAnswer || null
|
|
302
|
+
};
|
|
303
|
+
if (hallucinationResult.hallucinationLabel && hallucinationResult.hallucinationLabel !== '') {
|
|
304
|
+
payload.hallucinationLabel = hallucinationResult.hallucinationLabel;
|
|
305
|
+
}
|
|
306
|
+
if (hallucinationResult.hallucinationFindings && hallucinationResult.hallucinationFindings.length > 0) {
|
|
307
|
+
payload.hallucinationFindings = hallucinationResult.hallucinationFindings;
|
|
308
|
+
logger.debug(`Including ${hallucinationResult.hallucinationFindings.length} hallucination findings`);
|
|
309
|
+
}
|
|
310
|
+
else {
|
|
311
|
+
logger.debug('No hallucination findings to include');
|
|
312
|
+
}
|
|
313
|
+
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, payload);
|
|
314
|
+
logger.debug(`Stored test result for prompt ${promptId} with status ${status}`);
|
|
315
|
+
}
|
|
316
|
+
catch (storeError) {
|
|
317
|
+
logger.warn('Could not store test result, trying minimal payload:', storeError?.message);
|
|
318
|
+
logger.debug('Store error details:', storeError?.response?.data);
|
|
319
|
+
const minimalPayload = {
|
|
320
|
+
status,
|
|
321
|
+
response: llmResponse,
|
|
322
|
+
expectedAnswer: expectedAnswer || null
|
|
323
|
+
};
|
|
324
|
+
if (hallucinationResult.hallucinationLabel) {
|
|
325
|
+
minimalPayload.hallucinationLabel = hallucinationResult.hallucinationLabel;
|
|
326
|
+
}
|
|
327
|
+
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, minimalPayload);
|
|
328
|
+
logger.debug(`Stored minimal test result for prompt ${promptId}`);
|
|
329
|
+
}
|
|
184
330
|
return {
|
|
185
331
|
success: status === 'passed',
|
|
186
332
|
question: promptText,
|
|
@@ -189,19 +335,71 @@ export const api = {
|
|
|
189
335
|
};
|
|
190
336
|
}
|
|
191
337
|
catch (error) {
|
|
192
|
-
|
|
338
|
+
if (llmResponse) {
|
|
339
|
+
logger.warn('LLM responded successfully but evaluation failed, marking as passed');
|
|
340
|
+
try {
|
|
341
|
+
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
342
|
+
status: 'passed',
|
|
343
|
+
response: llmResponse,
|
|
344
|
+
expectedAnswer: expectedAnswer || null
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
catch (storeError) {
|
|
348
|
+
logger.error('Could not store passed result:', storeError);
|
|
349
|
+
}
|
|
350
|
+
return {
|
|
351
|
+
success: true,
|
|
352
|
+
question: promptText,
|
|
353
|
+
llmResponse,
|
|
354
|
+
hallucinationResult: {
|
|
355
|
+
question: promptText,
|
|
356
|
+
llmResponse,
|
|
357
|
+
summary: 'Evaluation skipped (LLM responded successfully)',
|
|
358
|
+
facts: [],
|
|
359
|
+
status: 'passed',
|
|
360
|
+
hallucinationLabel: 'NO_HALLUCINATION',
|
|
361
|
+
hallucinationFindings: []
|
|
362
|
+
}
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
const errorMessage = error?.response?.data?.message || error?.message || 'Unknown error';
|
|
366
|
+
const errorDetails = {
|
|
367
|
+
message: errorMessage,
|
|
368
|
+
endpoint: agentEndpoint,
|
|
369
|
+
statusCode: error?.response?.status,
|
|
370
|
+
data: error?.response?.data
|
|
371
|
+
};
|
|
372
|
+
logger.error(`Error running prompt ${promptId}:`, errorDetails);
|
|
373
|
+
const errorResponse = `Error calling LLM endpoint: ${errorMessage}`;
|
|
193
374
|
try {
|
|
194
375
|
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
195
|
-
status: 'failed'
|
|
376
|
+
status: 'failed',
|
|
377
|
+
response: errorResponse,
|
|
378
|
+
expectedAnswer: expectedAnswer || null
|
|
196
379
|
});
|
|
380
|
+
logger.debug(`Stored failed result for prompt ${promptId}`);
|
|
197
381
|
}
|
|
198
382
|
catch (e) {
|
|
199
|
-
logger.
|
|
383
|
+
logger.error('Failed to store failed result:', e?.message || e);
|
|
384
|
+
logger.debug('Error details:', e?.response?.data);
|
|
385
|
+
try {
|
|
386
|
+
await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
|
|
387
|
+
status: 'failed'
|
|
388
|
+
});
|
|
389
|
+
logger.debug(`Stored minimal failed result for prompt ${promptId}`);
|
|
390
|
+
}
|
|
391
|
+
catch (fallbackError) {
|
|
392
|
+
logger.error('Fallback storage also failed:', fallbackError);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
if (onLLMResponse && !llmResponse) {
|
|
396
|
+
onLLMResponse(errorResponse);
|
|
200
397
|
}
|
|
201
398
|
return {
|
|
202
399
|
success: false,
|
|
203
400
|
question: promptText,
|
|
204
|
-
|
|
401
|
+
llmResponse: errorResponse,
|
|
402
|
+
error: errorDetails
|
|
205
403
|
};
|
|
206
404
|
}
|
|
207
405
|
},
|
package/dist/index.js
CHANGED
|
@@ -2,5 +2,42 @@
|
|
|
2
2
|
import React from 'react';
|
|
3
3
|
import { render } from 'ink';
|
|
4
4
|
import { App } from './App.js';
|
|
5
|
+
const parseArgs = () => {
|
|
6
|
+
const args = process.argv.slice(2);
|
|
7
|
+
const options = {
|
|
8
|
+
backendUrl: 'http://rippletide-backend.azurewebsites.net',
|
|
9
|
+
dashboardUrl: 'https://eval.rippletide.com'
|
|
10
|
+
};
|
|
11
|
+
for (let i = 0; i < args.length; i++) {
|
|
12
|
+
if ((args[i] === '--backend-url' || args[i] === '-b') && args[i + 1]) {
|
|
13
|
+
options.backendUrl = args[i + 1];
|
|
14
|
+
i++;
|
|
15
|
+
}
|
|
16
|
+
else if ((args[i] === '--dashboard-url' || args[i] === '-d') && args[i + 1]) {
|
|
17
|
+
options.dashboardUrl = args[i + 1];
|
|
18
|
+
i++;
|
|
19
|
+
}
|
|
20
|
+
else if (args[i] === '--help' || args[i] === '-h') {
|
|
21
|
+
console.log(`
|
|
22
|
+
Rippletide CLI
|
|
23
|
+
|
|
24
|
+
Usage:
|
|
25
|
+
rippletide eval [options]
|
|
26
|
+
|
|
27
|
+
Options:
|
|
28
|
+
-b, --backend-url <url> Backend API URL (default: http://rippletide-backend.azurewebsites.net)
|
|
29
|
+
-d, --dashboard-url <url> Dashboard URL (default: https://eval.rippletide.com)
|
|
30
|
+
-h, --help Show this help message
|
|
31
|
+
|
|
32
|
+
Examples:
|
|
33
|
+
rippletide eval
|
|
34
|
+
rippletide eval -b http://localhost:3001 -d http://localhost:5173
|
|
35
|
+
`);
|
|
36
|
+
process.exit(0);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return options;
|
|
40
|
+
};
|
|
41
|
+
const options = parseArgs();
|
|
5
42
|
process.stdout.write('\x1Bc');
|
|
6
|
-
render(React.createElement(App,
|
|
43
|
+
render(React.createElement(App, { backendUrl: options.backendUrl, dashboardUrl: options.dashboardUrl }));
|