rippletide 1.0.3 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/rippletide CHANGED
@@ -1,14 +1,10 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- // Simple CLI dispatcher: `rippletide eval` (or just `rippletide`) runs the evaluator.
4
- // Other commands can be added later if needed.
5
-
6
3
  const args = process.argv.slice(2);
7
4
  const cmd = args[0];
8
5
 
9
6
  async function main() {
10
7
  if (!cmd || cmd === 'eval') {
11
- // Lazy-load compiled Ink app
12
8
  await import('../dist/index.js');
13
9
  return;
14
10
  }
@@ -18,10 +14,16 @@ async function main() {
18
14
  Rippletide CLI
19
15
 
20
16
  Usage:
21
- rippletide eval Run the Rippletide evaluation UI
17
+ rippletide eval [options] Run the Rippletide evaluation UI
22
18
 
23
19
  Options:
24
- -h, --help Show this help message
20
+ -b, --backend-url <url> Backend API URL (default: http://rippletide-backend.azurewebsites.net)
21
+ -d, --dashboard-url <url> Dashboard URL (default: https://eval.rippletide.com)
22
+ -h, --help Show this help message
23
+
24
+ Examples:
25
+ rippletide eval
26
+ rippletide eval -b http://localhost:3001 -d http://localhost:5173
25
27
  `);
26
28
  return;
27
29
  }
package/dist/App.d.ts CHANGED
@@ -1,2 +1,7 @@
1
1
  import React from 'react';
2
- export declare const App: React.FC;
2
+ interface AppProps {
3
+ backendUrl?: string;
4
+ dashboardUrl?: string;
5
+ }
6
+ export declare const App: React.FC<AppProps>;
7
+ export {};
package/dist/App.js CHANGED
@@ -7,24 +7,39 @@ import { Spinner } from './components/Spinner.js';
7
7
  import { ProgressBar } from './components/ProgressBar.js';
8
8
  import { Summary } from './components/Summary.js';
9
9
  import { api } from './api/client.js';
10
+ import { getPineconeQAndA } from './utils/pinecone.js';
11
+ import { getPostgreSQLQAndA, parsePostgreSQLConnectionString } from './utils/postgresql.js';
10
12
  const knowledgeSources = [
11
13
  { label: 'Local Files (qanda.json)', value: 'files', description: 'Use qanda.json from current directory' },
14
+ { label: 'Pinecone', value: 'pinecone', description: 'Fetch Q&A from Pinecone database' },
15
+ { label: 'PostgreSQL Database', value: 'postgresql', description: 'Connect to PostgreSQL database' },
12
16
  { label: 'Current Repository', value: 'repo', description: 'Scan current git repository', disabled: true },
13
- { label: 'Database', value: 'database', description: 'Connect to a database', disabled: true },
14
17
  { label: 'API Endpoint', value: 'api', description: 'Fetch from REST API', disabled: true },
15
18
  { label: 'GitHub Repository', value: 'github', description: 'Import from GitHub repo', disabled: true },
16
19
  { label: 'Skip (No Knowledge)', value: 'skip', description: 'Run tests without knowledge base', disabled: true },
17
20
  ];
18
- export const App = () => {
21
+ export const App = ({ backendUrl, dashboardUrl }) => {
19
22
  const [step, setStep] = useState('agent-endpoint');
20
23
  const [agentEndpoint, setAgentEndpoint] = useState('');
21
24
  const [knowledgeSource, setKnowledgeSource] = useState('');
22
25
  const [knowledgeFound, setKnowledgeFound] = useState(false);
26
+ const [pineconeUrl, setPineconeUrl] = useState('');
27
+ const [pineconeApiKey, setPineconeApiKey] = useState('');
28
+ const [pineconeQAndA, setPineconeQAndA] = useState([]);
29
+ const [pineconeProgress, setPineconeProgress] = useState('');
30
+ const [postgresqlConnectionString, setPostgresqlConnectionString] = useState('');
31
+ const [postgresqlQAndA, setPostgresqlQAndA] = useState([]);
32
+ const [postgresqlProgress, setPostgresqlProgress] = useState('');
23
33
  const [evaluationProgress, setEvaluationProgress] = useState(0);
24
34
  const [evaluationResult, setEvaluationResult] = useState(null);
25
35
  const [currentQuestion, setCurrentQuestion] = useState('');
26
36
  const [currentLLMResponse, setCurrentLLMResponse] = useState('');
27
37
  const [evaluationLogs, setEvaluationLogs] = useState([]);
38
+ useEffect(() => {
39
+ if (backendUrl) {
40
+ api.setBaseUrl(backendUrl);
41
+ }
42
+ }, [backendUrl]);
28
43
  useEffect(() => {
29
44
  if (step === 'checking-knowledge') {
30
45
  (async () => {
@@ -40,6 +55,69 @@ export const App = () => {
40
55
  })();
41
56
  }
42
57
  }, [step]);
58
+ useEffect(() => {
59
+ if (step === 'fetching-pinecone') {
60
+ (async () => {
61
+ try {
62
+ const qaPairs = await getPineconeQAndA(pineconeUrl, pineconeApiKey, (message) => setPineconeProgress(message));
63
+ setPineconeQAndA(qaPairs);
64
+ setStep('running-evaluation');
65
+ }
66
+ catch (error) {
67
+ console.error('Error fetching Q&A from Pinecone:', error);
68
+ setEvaluationResult({
69
+ totalTests: 0,
70
+ passed: 0,
71
+ failed: 0,
72
+ duration: 'Failed',
73
+ evaluationUrl: dashboardUrl || 'https://eval.rippletide.com',
74
+ error: error.message,
75
+ });
76
+ setStep('complete');
77
+ }
78
+ })();
79
+ }
80
+ }, [step, pineconeUrl, pineconeApiKey]);
81
+ useEffect(() => {
82
+ if (step === 'fetching-postgresql') {
83
+ (async () => {
84
+ try {
85
+ let config;
86
+ if (postgresqlConnectionString.startsWith('postgresql://') || postgresqlConnectionString.startsWith('postgres://')) {
87
+ config = parsePostgreSQLConnectionString(postgresqlConnectionString);
88
+ }
89
+ else {
90
+ const parts = postgresqlConnectionString.split(',');
91
+ if (parts.length !== 5) {
92
+ throw new Error('Invalid connection format. Expected: host,port,database,user,password or postgresql://...');
93
+ }
94
+ config = {
95
+ host: parts[0].trim(),
96
+ port: parseInt(parts[1].trim()),
97
+ database: parts[2].trim(),
98
+ user: parts[3].trim(),
99
+ password: parts[4].trim()
100
+ };
101
+ }
102
+ const qaPairs = await getPostgreSQLQAndA(config, backendUrl || 'http://rippletide-backend.azurewebsites.net', (message) => setPostgresqlProgress(message));
103
+ setPostgresqlQAndA(qaPairs);
104
+ setStep('running-evaluation');
105
+ }
106
+ catch (error) {
107
+ console.error('Error fetching Q&A from PostgreSQL:', error);
108
+ setEvaluationResult({
109
+ totalTests: 0,
110
+ passed: 0,
111
+ failed: 0,
112
+ duration: 'Failed',
113
+ evaluationUrl: dashboardUrl || 'https://eval.rippletide.com',
114
+ error: error.message,
115
+ });
116
+ setStep('complete');
117
+ }
118
+ })();
119
+ }
120
+ }, [step, postgresqlConnectionString, backendUrl]);
43
121
  useEffect(() => {
44
122
  if (step === 'running-evaluation') {
45
123
  (async () => {
@@ -72,6 +150,18 @@ export const App = () => {
72
150
  }
73
151
  }
74
152
  }
153
+ else if (knowledgeSource === 'pinecone' && pineconeQAndA.length > 0) {
154
+ testPrompts = pineconeQAndA.slice(0, 5).map((item) => ({
155
+ question: item.question,
156
+ answer: item.answer
157
+ }));
158
+ }
159
+ else if (knowledgeSource === 'postgresql' && postgresqlQAndA.length > 0) {
160
+ testPrompts = postgresqlQAndA.slice(0, 5).map((item) => ({
161
+ question: item.question,
162
+ answer: item.answer
163
+ }));
164
+ }
75
165
  const createdPrompts = await api.addTestPrompts(agentId, testPrompts);
76
166
  setEvaluationProgress(50);
77
167
  const evaluationResults = await api.runAllPromptEvaluations(agentId, createdPrompts, agentEndpoint, (current, total, question, llmResponse) => {
@@ -106,7 +196,7 @@ export const App = () => {
106
196
  passed,
107
197
  failed,
108
198
  duration: durationStr,
109
- evaluationUrl: `http://localhost:5173/eval/${agentId}`,
199
+ evaluationUrl: `${dashboardUrl || 'https://eval.rippletide.com'}/eval/${agentId}`,
110
200
  agentId,
111
201
  };
112
202
  setEvaluationResult(result);
@@ -119,20 +209,40 @@ export const App = () => {
119
209
  passed: 0,
120
210
  failed: 0,
121
211
  duration: 'Failed',
122
- evaluationUrl: 'http://localhost:5173',
212
+ evaluationUrl: dashboardUrl || 'https://eval.rippletide.com',
123
213
  });
124
214
  setStep('complete');
125
215
  }
126
216
  })();
127
217
  }
128
- }, [step, agentEndpoint, knowledgeSource]);
218
+ }, [step, agentEndpoint, knowledgeSource, pineconeQAndA, postgresqlQAndA]);
129
219
  const handleAgentEndpointSubmit = (value) => {
130
220
  setAgentEndpoint(value);
131
221
  setStep('checking-knowledge');
132
222
  };
133
223
  const handleSourceSelect = (value) => {
134
224
  setKnowledgeSource(value);
135
- setStep('running-evaluation');
225
+ if (value === 'pinecone') {
226
+ setStep('pinecone-url');
227
+ }
228
+ else if (value === 'postgresql') {
229
+ setStep('postgresql-config');
230
+ }
231
+ else {
232
+ setStep('running-evaluation');
233
+ }
234
+ };
235
+ const handlePineconeUrlSubmit = (value) => {
236
+ setPineconeUrl(value);
237
+ setStep('pinecone-api-key');
238
+ };
239
+ const handlePineconeApiKeySubmit = (value) => {
240
+ setPineconeApiKey(value);
241
+ setStep('fetching-pinecone');
242
+ };
243
+ const handlePostgresqlConnectionSubmit = (value) => {
244
+ setPostgresqlConnectionString(value);
245
+ setStep('fetching-postgresql');
136
246
  };
137
247
  return (React.createElement(Box, { flexDirection: "column", padding: 1 },
138
248
  React.createElement(Header, null),
@@ -146,6 +256,22 @@ export const App = () => {
146
256
  knowledgeFound && (React.createElement(Box, { marginBottom: 1 },
147
257
  React.createElement(Text, { color: "white" }, "qanda.json found in current directory"))),
148
258
  React.createElement(SelectMenu, { title: "Data Source", options: knowledgeSources, onSelect: handleSourceSelect }))),
259
+ step === 'pinecone-url' && (React.createElement(Box, { flexDirection: "column" },
260
+ React.createElement(TextInput, { label: "Pinecone database URL", placeholder: "https://sample-movies-02j22s8.svc.aped-4627-b74a.pinecone.io", onSubmit: handlePineconeUrlSubmit }))),
261
+ step === 'pinecone-api-key' && (React.createElement(Box, { flexDirection: "column" },
262
+ React.createElement(TextInput, { label: "Pinecone API key", placeholder: "pcsk_...", onSubmit: handlePineconeApiKeySubmit }))),
263
+ step === 'fetching-pinecone' && (React.createElement(Box, { flexDirection: "column" },
264
+ React.createElement(Spinner, { label: pineconeProgress || "Fetching Q&A from Pinecone..." }))),
265
+ step === 'postgresql-config' && (React.createElement(Box, { flexDirection: "column" },
266
+ React.createElement(Box, { marginBottom: 1 },
267
+ React.createElement(Text, { color: "#eba1b5" }, "Enter PostgreSQL connection details")),
268
+ React.createElement(Box, { marginBottom: 1 },
269
+ React.createElement(Text, { dimColor: true }, "Format 1: postgresql://user:password@host:port/database")),
270
+ React.createElement(Box, { marginBottom: 1 },
271
+ React.createElement(Text, { dimColor: true }, "Format 2: host,port,database,user,password")),
272
+ React.createElement(TextInput, { label: "PostgreSQL connection", placeholder: "postgresql://postgres:password@localhost:5432/mydb", onSubmit: handlePostgresqlConnectionSubmit }))),
273
+ step === 'fetching-postgresql' && (React.createElement(Box, { flexDirection: "column" },
274
+ React.createElement(Spinner, { label: postgresqlProgress || "Analyzing PostgreSQL database..." }))),
149
275
  step === 'running-evaluation' && (React.createElement(Box, { flexDirection: "column" },
150
276
  React.createElement(Box, { marginBottom: 2 },
151
277
  React.createElement(Spinner, { label: "Running evaluation" })),
@@ -27,6 +27,7 @@ export interface PromptEvaluationResult {
27
27
  error?: any;
28
28
  }
29
29
  export declare const api: {
30
+ setBaseUrl(url: string): void;
30
31
  generateApiKey(name?: string): Promise<any>;
31
32
  healthCheck(): Promise<any>;
32
33
  checkKnowledge(folderPath?: string): Promise<{
@@ -2,21 +2,35 @@ import axios from 'axios';
2
2
  import * as fs from 'fs';
3
3
  import * as path from 'path';
4
4
  import { logger } from '../utils/logger.js';
5
- const BASE_URL = 'http://localhost:3001';
5
+ let BASE_URL = 'http://rippletide-backend.azurewebsites.net';
6
6
  let API_KEY = null;
7
- const client = axios.create({
7
+ let client = axios.create({
8
8
  baseURL: BASE_URL,
9
9
  headers: {
10
10
  'Content-Type': 'application/json',
11
11
  },
12
12
  });
13
- client.interceptors.request.use((config) => {
14
- if (API_KEY) {
15
- config.headers['x-api-key'] = API_KEY;
16
- }
17
- return config;
18
- });
13
+ const setupInterceptor = () => {
14
+ client.interceptors.request.use((config) => {
15
+ if (API_KEY) {
16
+ config.headers['x-api-key'] = API_KEY;
17
+ }
18
+ return config;
19
+ });
20
+ };
21
+ setupInterceptor();
19
22
  export const api = {
23
+ setBaseUrl(url) {
24
+ BASE_URL = url;
25
+ client = axios.create({
26
+ baseURL: BASE_URL,
27
+ headers: {
28
+ 'Content-Type': 'application/json',
29
+ },
30
+ });
31
+ setupInterceptor();
32
+ logger.debug('Backend URL set to:', BASE_URL);
33
+ },
20
34
  async generateApiKey(name) {
21
35
  try {
22
36
  const response = await client.post('/api/api-keys/generate-cli', {
@@ -106,81 +120,213 @@ export const api = {
106
120
  expectedAnswer: p.answer || null,
107
121
  }));
108
122
  }
123
+ logger.info(`Adding ${promptsArray.length} test prompts to agent ${agentId}`);
124
+ logger.debug('Prompts:', promptsArray);
109
125
  const response = await client.post(`/api/agents/${agentId}/test-prompts`, {
110
126
  prompts: promptsArray,
111
127
  });
128
+ logger.info(`Successfully added ${response.data.length} test prompts`);
112
129
  return response.data;
113
130
  }
114
131
  catch (error) {
115
- logger.error('Error adding test prompts:', error);
132
+ logger.error('Error adding test prompts:', error?.message || error);
116
133
  if (error.response) {
117
- logger.debug('Response data:', error.response.data);
118
- logger.debug('Response status:', error.response.status);
134
+ logger.error('Response data:', error.response.data);
135
+ logger.error('Response status:', error.response.status);
119
136
  }
120
137
  throw error;
121
138
  }
122
139
  },
123
140
  async checkHallucination(agentId, question, llmResponse, expectedAnswer) {
124
- const response = await client.post(`/api/agents/${agentId}/hallucination`, {
125
- question,
126
- llmResponse,
127
- expectedAnswer
128
- });
129
- return response.data;
141
+ try {
142
+ if (!llmResponse || llmResponse.startsWith('Error calling LLM endpoint:')) {
143
+ return {
144
+ question,
145
+ llmResponse,
146
+ summary: 'LLM endpoint error',
147
+ facts: [],
148
+ status: 'failed',
149
+ hallucinationLabel: '',
150
+ hallucinationFindings: []
151
+ };
152
+ }
153
+ logger.debug('Checking hallucination for question:', question);
154
+ logger.debug('LLM Response length:', llmResponse.length);
155
+ logger.debug('Expected answer:', expectedAnswer || 'None provided');
156
+ const response = await client.post(`/api/agents/${agentId}/check-hallucination-response`, {
157
+ question,
158
+ llmResponse,
159
+ expectedAnswer
160
+ });
161
+ logger.debug('Hallucination check result:', response.data);
162
+ return {
163
+ question: response.data.question,
164
+ llmResponse: response.data.llmResponse,
165
+ summary: response.data.summary || '',
166
+ facts: response.data.facts || [],
167
+ status: response.data.status || 'passed',
168
+ hallucinationLabel: response.data.hallucinationLabel || 'FactIsPresent',
169
+ hallucinationFindings: response.data.hallucinationFindings || []
170
+ };
171
+ }
172
+ catch (error) {
173
+ if (error?.response?.status === 404) {
174
+ logger.warn('Hallucination check endpoint not found, using fallback');
175
+ // Fallback for old backend version
176
+ return {
177
+ question,
178
+ llmResponse,
179
+ summary: 'Hallucination check not available',
180
+ facts: [],
181
+ status: 'passed',
182
+ hallucinationLabel: 'FactIsPresent',
183
+ hallucinationFindings: []
184
+ };
185
+ }
186
+ logger.error('Error in hallucination check:', error?.message || error);
187
+ logger.debug('Error details:', error?.response?.data);
188
+ return {
189
+ question,
190
+ llmResponse,
191
+ summary: 'Check failed',
192
+ facts: [],
193
+ status: 'passed',
194
+ hallucinationLabel: 'FactIsPresent',
195
+ hallucinationFindings: []
196
+ };
197
+ }
130
198
  },
131
199
  async callLLMEndpoint(agentEndpoint, question) {
132
200
  try {
201
+ logger.debug(`Calling LLM endpoint: ${agentEndpoint}`);
202
+ logger.debug(`Question: ${question}`);
133
203
  const llmClient = axios.create({
134
204
  timeout: 60000,
205
+ validateStatus: () => true
135
206
  });
136
- const response = await llmClient.post(agentEndpoint, {
137
- message: question,
138
- query: question,
139
- question: question,
140
- prompt: question,
141
- });
207
+ const payload = { message: question };
208
+ if (agentEndpoint.includes('vercel.app') || agentEndpoint.includes('naive-cosmetic')) {
209
+ logger.debug('Using Vercel app format - message only');
210
+ }
211
+ else {
212
+ payload.query = question;
213
+ payload.question = question;
214
+ payload.prompt = question;
215
+ }
216
+ logger.debug('Request payload:', payload);
217
+ const response = await llmClient.post(agentEndpoint, payload);
218
+ logger.debug(`Response status: ${response.status}`);
219
+ logger.debug('Response headers:', response.headers);
220
+ if (response.status >= 400) {
221
+ const errorMsg = `LLM endpoint returned error: HTTP ${response.status} - ${response.statusText}`;
222
+ logger.error(errorMsg);
223
+ logger.debug('Response data:', response.data);
224
+ throw new Error(errorMsg);
225
+ }
142
226
  let llmResponse = '';
143
227
  if (typeof response.data === 'string') {
144
228
  llmResponse = response.data;
145
229
  }
230
+ else if (response.data.answer) {
231
+ llmResponse = response.data.answer;
232
+ }
146
233
  else if (response.data.response) {
147
234
  llmResponse = response.data.response;
148
235
  }
149
236
  else if (response.data.message) {
150
237
  llmResponse = response.data.message;
151
238
  }
152
- else if (response.data.answer) {
153
- llmResponse = response.data.answer;
154
- }
155
239
  else if (response.data.text) {
156
240
  llmResponse = response.data.text;
157
241
  }
242
+ else if (response.data.result) {
243
+ llmResponse = response.data.result;
244
+ }
245
+ else if (response.data.output) {
246
+ llmResponse = response.data.output;
247
+ }
248
+ else if (response.data.content) {
249
+ llmResponse = response.data.content;
250
+ }
251
+ else if (response.data.reply) {
252
+ llmResponse = response.data.reply;
253
+ }
158
254
  else {
255
+ logger.debug('No standard field found, stringifying response');
159
256
  llmResponse = JSON.stringify(response.data);
160
257
  }
258
+ if (!llmResponse || llmResponse === '{}') {
259
+ logger.warn('Empty or invalid response from LLM endpoint');
260
+ logger.debug('Full response:', response.data);
261
+ }
262
+ logger.debug(`Extracted response: ${llmResponse.substring(0, 100)}...`);
161
263
  return llmResponse;
162
264
  }
163
265
  catch (error) {
164
- logger.error('Error calling LLM endpoint:', error?.message || error);
165
- throw new Error(`Failed to call LLM endpoint: ${error?.message || 'Unknown error'}`);
266
+ const errorDetails = {
267
+ message: error?.message || 'Unknown error',
268
+ code: error?.code,
269
+ endpoint: agentEndpoint,
270
+ response: error?.response?.data,
271
+ status: error?.response?.status
272
+ };
273
+ logger.error('Error calling LLM endpoint:', errorDetails);
274
+ if (error.code === 'ECONNREFUSED') {
275
+ throw new Error(`Cannot connect to LLM endpoint at ${agentEndpoint} - Connection refused`);
276
+ }
277
+ else if (error.code === 'ETIMEDOUT') {
278
+ throw new Error(`LLM endpoint timeout after 60 seconds`);
279
+ }
280
+ else if (error.code === 'ENOTFOUND') {
281
+ throw new Error(`LLM endpoint not found: ${agentEndpoint}`);
282
+ }
283
+ throw error;
166
284
  }
167
285
  },
168
286
  async runPromptEvaluation(agentId, promptId, promptText, agentEndpoint, expectedAnswer, onLLMResponse) {
287
+ let llmResponse = null;
169
288
  try {
170
289
  logger.info(`Calling LLM for question: ${promptText}`);
171
- const llmResponse = await api.callLLMEndpoint(agentEndpoint, promptText);
290
+ llmResponse = await api.callLLMEndpoint(agentEndpoint, promptText);
172
291
  if (onLLMResponse) {
173
292
  onLLMResponse(llmResponse);
174
293
  }
175
294
  logger.info(`LLM Response: ${llmResponse.substring(0, 100)}...`);
176
295
  const hallucinationResult = await api.checkHallucination(agentId, promptText, llmResponse, expectedAnswer);
177
296
  const status = hallucinationResult.status === 'passed' ? 'passed' : 'failed';
178
- await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
179
- status,
180
- response: llmResponse,
181
- hallucinationLabel: hallucinationResult.hallucinationLabel,
182
- hallucinationFindings: hallucinationResult.hallucinationFindings
183
- });
297
+ try {
298
+ const payload = {
299
+ status,
300
+ response: llmResponse,
301
+ expectedAnswer: expectedAnswer || null
302
+ };
303
+ if (hallucinationResult.hallucinationLabel && hallucinationResult.hallucinationLabel !== '') {
304
+ payload.hallucinationLabel = hallucinationResult.hallucinationLabel;
305
+ }
306
+ if (hallucinationResult.hallucinationFindings && hallucinationResult.hallucinationFindings.length > 0) {
307
+ payload.hallucinationFindings = hallucinationResult.hallucinationFindings;
308
+ logger.debug(`Including ${hallucinationResult.hallucinationFindings.length} hallucination findings`);
309
+ }
310
+ else {
311
+ logger.debug('No hallucination findings to include');
312
+ }
313
+ await client.post(`/api/agents/${agentId}/test-results/${promptId}`, payload);
314
+ logger.debug(`Stored test result for prompt ${promptId} with status ${status}`);
315
+ }
316
+ catch (storeError) {
317
+ logger.warn('Could not store test result, trying minimal payload:', storeError?.message);
318
+ logger.debug('Store error details:', storeError?.response?.data);
319
+ const minimalPayload = {
320
+ status,
321
+ response: llmResponse,
322
+ expectedAnswer: expectedAnswer || null
323
+ };
324
+ if (hallucinationResult.hallucinationLabel) {
325
+ minimalPayload.hallucinationLabel = hallucinationResult.hallucinationLabel;
326
+ }
327
+ await client.post(`/api/agents/${agentId}/test-results/${promptId}`, minimalPayload);
328
+ logger.debug(`Stored minimal test result for prompt ${promptId}`);
329
+ }
184
330
  return {
185
331
  success: status === 'passed',
186
332
  question: promptText,
@@ -189,19 +335,71 @@ export const api = {
189
335
  };
190
336
  }
191
337
  catch (error) {
192
- logger.debug(`Error running prompt ${promptId}:`, error?.response?.data || error.message);
338
+ if (llmResponse) {
339
+ logger.warn('LLM responded successfully but evaluation failed, marking as passed');
340
+ try {
341
+ await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
342
+ status: 'passed',
343
+ response: llmResponse,
344
+ expectedAnswer: expectedAnswer || null
345
+ });
346
+ }
347
+ catch (storeError) {
348
+ logger.error('Could not store passed result:', storeError);
349
+ }
350
+ return {
351
+ success: true,
352
+ question: promptText,
353
+ llmResponse,
354
+ hallucinationResult: {
355
+ question: promptText,
356
+ llmResponse,
357
+ summary: 'Evaluation skipped (LLM responded successfully)',
358
+ facts: [],
359
+ status: 'passed',
360
+ hallucinationLabel: 'NO_HALLUCINATION',
361
+ hallucinationFindings: []
362
+ }
363
+ };
364
+ }
365
+ const errorMessage = error?.response?.data?.message || error?.message || 'Unknown error';
366
+ const errorDetails = {
367
+ message: errorMessage,
368
+ endpoint: agentEndpoint,
369
+ statusCode: error?.response?.status,
370
+ data: error?.response?.data
371
+ };
372
+ logger.error(`Error running prompt ${promptId}:`, errorDetails);
373
+ const errorResponse = `Error calling LLM endpoint: ${errorMessage}`;
193
374
  try {
194
375
  await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
195
- status: 'failed'
376
+ status: 'failed',
377
+ response: errorResponse,
378
+ expectedAnswer: expectedAnswer || null
196
379
  });
380
+ logger.debug(`Stored failed result for prompt ${promptId}`);
197
381
  }
198
382
  catch (e) {
199
- logger.debug('Failed to store failed result:', e);
383
+ logger.error('Failed to store failed result:', e?.message || e);
384
+ logger.debug('Error details:', e?.response?.data);
385
+ try {
386
+ await client.post(`/api/agents/${agentId}/test-results/${promptId}`, {
387
+ status: 'failed'
388
+ });
389
+ logger.debug(`Stored minimal failed result for prompt ${promptId}`);
390
+ }
391
+ catch (fallbackError) {
392
+ logger.error('Fallback storage also failed:', fallbackError);
393
+ }
394
+ }
395
+ if (onLLMResponse && !llmResponse) {
396
+ onLLMResponse(errorResponse);
200
397
  }
201
398
  return {
202
399
  success: false,
203
400
  question: promptText,
204
- error
401
+ llmResponse: errorResponse,
402
+ error: errorDetails
205
403
  };
206
404
  }
207
405
  },
package/dist/index.js CHANGED
@@ -2,5 +2,42 @@
2
2
  import React from 'react';
3
3
  import { render } from 'ink';
4
4
  import { App } from './App.js';
5
+ const parseArgs = () => {
6
+ const args = process.argv.slice(2);
7
+ const options = {
8
+ backendUrl: 'http://rippletide-backend.azurewebsites.net',
9
+ dashboardUrl: 'https://eval.rippletide.com'
10
+ };
11
+ for (let i = 0; i < args.length; i++) {
12
+ if ((args[i] === '--backend-url' || args[i] === '-b') && args[i + 1]) {
13
+ options.backendUrl = args[i + 1];
14
+ i++;
15
+ }
16
+ else if ((args[i] === '--dashboard-url' || args[i] === '-d') && args[i + 1]) {
17
+ options.dashboardUrl = args[i + 1];
18
+ i++;
19
+ }
20
+ else if (args[i] === '--help' || args[i] === '-h') {
21
+ console.log(`
22
+ Rippletide CLI
23
+
24
+ Usage:
25
+ rippletide eval [options]
26
+
27
+ Options:
28
+ -b, --backend-url <url> Backend API URL (default: http://rippletide-backend.azurewebsites.net)
29
+ -d, --dashboard-url <url> Dashboard URL (default: https://eval.rippletide.com)
30
+ -h, --help Show this help message
31
+
32
+ Examples:
33
+ rippletide eval
34
+ rippletide eval -b http://localhost:3001 -d http://localhost:5173
35
+ `);
36
+ process.exit(0);
37
+ }
38
+ }
39
+ return options;
40
+ };
41
+ const options = parseArgs();
5
42
  process.stdout.write('\x1Bc');
6
- render(React.createElement(App, null));
43
+ render(React.createElement(App, { backendUrl: options.backendUrl, dashboardUrl: options.dashboardUrl }));
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};