opik-mcp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +203 -0
- package/README.md +203 -0
- package/build/cli.js +72 -0
- package/build/client/index.html +323 -0
- package/build/config.js +205 -0
- package/build/debug-log.js +64 -0
- package/build/index.js +1847 -0
- package/build/mcp-server.js +96 -0
- package/build/test-client.js +436 -0
- package/build/transports/sse-transport.js +169 -0
- package/build/transports/types.js +4 -0
- package/build/types.js +4 -0
- package/build/utils/capabilities.js +303 -0
- package/build/utils/env.js +52 -0
- package/build/utils/examples.js +414 -0
- package/build/utils/metrics-info.js +263 -0
- package/build/utils/tracing-info.js +119 -0
- package/package.json +79 -0
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Examples module for Opik Comet
|
|
3
|
+
* Provides usage examples for common tasks with the Opik API
|
|
4
|
+
*/
|
|
5
|
+
// Define the examples for various tasks
|
|
6
|
+
const examples = {
|
|
7
|
+
'create-prompt': {
|
|
8
|
+
title: 'Create Prompt',
|
|
9
|
+
description: 'Create a new prompt in Opik to use with your LLM applications.',
|
|
10
|
+
steps: [
|
|
11
|
+
'Initialize the Opik client with your API key',
|
|
12
|
+
'Define a name for your prompt',
|
|
13
|
+
'Call the createPrompt API endpoint',
|
|
14
|
+
'Store the returned promptId for future reference',
|
|
15
|
+
],
|
|
16
|
+
codeExample: `
|
|
17
|
+
// Python Example
|
|
18
|
+
import opik
|
|
19
|
+
|
|
20
|
+
# Initialize the client
|
|
21
|
+
client = opik.Client(api_key="YOUR_API_KEY")
|
|
22
|
+
|
|
23
|
+
# Create a new prompt
|
|
24
|
+
prompt = client.create_prompt(name="My Customer Support Prompt")
|
|
25
|
+
|
|
26
|
+
# Store the prompt ID for future use
|
|
27
|
+
prompt_id = prompt["id"]
|
|
28
|
+
print(f"Created prompt with ID: {prompt_id}")
|
|
29
|
+
|
|
30
|
+
// JavaScript/TypeScript Example
|
|
31
|
+
import { OpikClient } from '@opik/sdk';
|
|
32
|
+
|
|
33
|
+
// Initialize the client
|
|
34
|
+
const client = new OpikClient({ apiKey: "YOUR_API_KEY" });
|
|
35
|
+
|
|
36
|
+
// Create a new prompt
|
|
37
|
+
const prompt = await client.createPrompt({ name: "My Customer Support Prompt" });
|
|
38
|
+
|
|
39
|
+
// Store the prompt ID for future use
|
|
40
|
+
const promptId = prompt.id;
|
|
41
|
+
console.log(\`Created prompt with ID: \${promptId}\`);
|
|
42
|
+
`,
|
|
43
|
+
},
|
|
44
|
+
'version-prompt': {
|
|
45
|
+
title: 'Version Prompt',
|
|
46
|
+
description: 'Create a new version of an existing prompt with updated template content.',
|
|
47
|
+
steps: [
|
|
48
|
+
'Initialize the Opik client with your API key',
|
|
49
|
+
'Retrieve the prompt ID of the prompt you want to version',
|
|
50
|
+
'Define the new template content',
|
|
51
|
+
'Add a commit message describing the changes',
|
|
52
|
+
'Call the createPromptVersion API endpoint',
|
|
53
|
+
],
|
|
54
|
+
codeExample: `
|
|
55
|
+
// Python Example
|
|
56
|
+
import opik
|
|
57
|
+
|
|
58
|
+
# Initialize the client
|
|
59
|
+
client = opik.Client(api_key="YOUR_API_KEY")
|
|
60
|
+
|
|
61
|
+
# Define the prompt ID and new template content
|
|
62
|
+
prompt_id = "prompt_123456"
|
|
63
|
+
template = """
|
|
64
|
+
You are a helpful customer support agent for Acme Inc.
|
|
65
|
+
Please respond to the customer's query in a friendly and professional manner.
|
|
66
|
+
|
|
67
|
+
Customer query: {{query}}
|
|
68
|
+
"""
|
|
69
|
+
commit_message = "Updated template with more specific instructions"
|
|
70
|
+
|
|
71
|
+
# Create a new version
|
|
72
|
+
version = client.create_prompt_version(
|
|
73
|
+
prompt_id=prompt_id,
|
|
74
|
+
template=template,
|
|
75
|
+
commit_message=commit_message
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
print(f"Created version {version['version']} of prompt {prompt_id}")
|
|
79
|
+
|
|
80
|
+
// JavaScript/TypeScript Example
|
|
81
|
+
import { OpikClient } from '@opik/sdk';
|
|
82
|
+
|
|
83
|
+
// Initialize the client
|
|
84
|
+
const client = new OpikClient({ apiKey: "YOUR_API_KEY" });
|
|
85
|
+
|
|
86
|
+
// Define the prompt ID and new template content
|
|
87
|
+
const promptId = "prompt_123456";
|
|
88
|
+
const template = \`
|
|
89
|
+
You are a helpful customer support agent for Acme Inc.
|
|
90
|
+
Please respond to the customer's query in a friendly and professional manner.
|
|
91
|
+
|
|
92
|
+
Customer query: {{query}}
|
|
93
|
+
\`;
|
|
94
|
+
const commitMessage = "Updated template with more specific instructions";
|
|
95
|
+
|
|
96
|
+
// Create a new version
|
|
97
|
+
const version = await client.createPromptVersion({
|
|
98
|
+
promptId,
|
|
99
|
+
template,
|
|
100
|
+
commitMessage
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
console.log(\`Created version \${version.version} of prompt \${promptId}\`);
|
|
104
|
+
`,
|
|
105
|
+
},
|
|
106
|
+
'create-project': {
|
|
107
|
+
title: 'Create Project',
|
|
108
|
+
description: 'Create a new project in Opik to organize your prompts, traces, and evaluations.',
|
|
109
|
+
steps: [
|
|
110
|
+
'Initialize the Opik client with your API key',
|
|
111
|
+
'Define a name and optional description for your project',
|
|
112
|
+
'Call the createProject API endpoint',
|
|
113
|
+
'Store the returned projectId for future reference',
|
|
114
|
+
],
|
|
115
|
+
codeExample: `
|
|
116
|
+
// Python Example
|
|
117
|
+
import opik
|
|
118
|
+
|
|
119
|
+
# Initialize the client
|
|
120
|
+
client = opik.Client(api_key="YOUR_API_KEY")
|
|
121
|
+
|
|
122
|
+
# Create a new project
|
|
123
|
+
project = client.create_project(
|
|
124
|
+
name="Customer Support Bot",
|
|
125
|
+
description="A project for our customer support chatbot application"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Store the project ID for future use
|
|
129
|
+
project_id = project["id"]
|
|
130
|
+
print(f"Created project with ID: {project_id}")
|
|
131
|
+
|
|
132
|
+
// JavaScript/TypeScript Example
|
|
133
|
+
import { OpikClient } from '@opik/sdk';
|
|
134
|
+
|
|
135
|
+
// Initialize the client
|
|
136
|
+
const client = new OpikClient({ apiKey: "YOUR_API_KEY" });
|
|
137
|
+
|
|
138
|
+
// Create a new project
|
|
139
|
+
const project = await client.createProject({
|
|
140
|
+
name: "Customer Support Bot",
|
|
141
|
+
description: "A project for our customer support chatbot application"
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
// Store the project ID for future use
|
|
145
|
+
const projectId = project.id;
|
|
146
|
+
console.log(\`Created project with ID: \${projectId}\`);
|
|
147
|
+
`,
|
|
148
|
+
},
|
|
149
|
+
'log-trace': {
|
|
150
|
+
title: 'Log Trace',
|
|
151
|
+
description: 'Log a trace of an LLM interaction to capture inputs, outputs, and metadata for analysis.',
|
|
152
|
+
steps: [
|
|
153
|
+
'Initialize the Opik client with your API key',
|
|
154
|
+
'Prepare the trace data including inputs, outputs, and metadata',
|
|
155
|
+
'Optionally specify a project ID to associate the trace with',
|
|
156
|
+
'Call the logTrace API endpoint',
|
|
157
|
+
'Store the returned traceId for future reference',
|
|
158
|
+
],
|
|
159
|
+
codeExample: `
|
|
160
|
+
// Python Example
|
|
161
|
+
import opik
|
|
162
|
+
from datetime import datetime
|
|
163
|
+
|
|
164
|
+
# Initialize the client
|
|
165
|
+
client = opik.Client(api_key="YOUR_API_KEY")
|
|
166
|
+
|
|
167
|
+
# Define the trace data
|
|
168
|
+
trace_data = {
|
|
169
|
+
"project_id": "project_123456", # Optional
|
|
170
|
+
"inputs": {
|
|
171
|
+
"prompt": "What is the capital of France?",
|
|
172
|
+
"model": "gpt-4",
|
|
173
|
+
"temperature": 0.7
|
|
174
|
+
},
|
|
175
|
+
"outputs": {
|
|
176
|
+
"completion": "The capital of France is Paris.",
|
|
177
|
+
"tokens": 8,
|
|
178
|
+
"finish_reason": "stop"
|
|
179
|
+
},
|
|
180
|
+
"metadata": {
|
|
181
|
+
"user_id": "user_789",
|
|
182
|
+
"session_id": "session_456",
|
|
183
|
+
"timestamp": datetime.now().isoformat()
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
# Log the trace
|
|
188
|
+
trace = client.log_trace(trace_data)
|
|
189
|
+
|
|
190
|
+
# Store the trace ID for future use
|
|
191
|
+
trace_id = trace["id"]
|
|
192
|
+
print(f"Logged trace with ID: {trace_id}")
|
|
193
|
+
|
|
194
|
+
// JavaScript/TypeScript Example
|
|
195
|
+
import { OpikClient } from '@opik/sdk';
|
|
196
|
+
|
|
197
|
+
// Initialize the client
|
|
198
|
+
const client = new OpikClient({ apiKey: "YOUR_API_KEY" });
|
|
199
|
+
|
|
200
|
+
// Define the trace data
|
|
201
|
+
const traceData = {
|
|
202
|
+
projectId: "project_123456", // Optional
|
|
203
|
+
inputs: {
|
|
204
|
+
prompt: "What is the capital of France?",
|
|
205
|
+
model: "gpt-4",
|
|
206
|
+
temperature: 0.7
|
|
207
|
+
},
|
|
208
|
+
outputs: {
|
|
209
|
+
completion: "The capital of France is Paris.",
|
|
210
|
+
tokens: 8,
|
|
211
|
+
finishReason: "stop"
|
|
212
|
+
},
|
|
213
|
+
metadata: {
|
|
214
|
+
userId: "user_789",
|
|
215
|
+
sessionId: "session_456",
|
|
216
|
+
timestamp: new Date().toISOString()
|
|
217
|
+
}
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
// Log the trace
|
|
221
|
+
const trace = await client.logTrace(traceData);
|
|
222
|
+
|
|
223
|
+
// Store the trace ID for future use
|
|
224
|
+
const traceId = trace.id;
|
|
225
|
+
console.log(\`Logged trace with ID: \${traceId}\`);
|
|
226
|
+
`,
|
|
227
|
+
},
|
|
228
|
+
'analyze-traces': {
|
|
229
|
+
title: 'Analyze Traces',
|
|
230
|
+
description: 'Search and analyze traces to gain insights into your LLM application performance.',
|
|
231
|
+
steps: [
|
|
232
|
+
'Initialize the Opik client with your API key',
|
|
233
|
+
'Define search criteria such as time range, project ID, or content filters',
|
|
234
|
+
'Call the searchTraces API endpoint',
|
|
235
|
+
'Process the returned traces to extract insights',
|
|
236
|
+
'Optionally use the getTraceStats API for aggregated metrics',
|
|
237
|
+
],
|
|
238
|
+
codeExample: `
|
|
239
|
+
// Python Example
|
|
240
|
+
import opik
|
|
241
|
+
from datetime import datetime, timedelta
|
|
242
|
+
|
|
243
|
+
# Initialize the client
|
|
244
|
+
client = opik.Client(api_key="YOUR_API_KEY")
|
|
245
|
+
|
|
246
|
+
# Define search parameters
|
|
247
|
+
end_date = datetime.now()
|
|
248
|
+
start_date = end_date - timedelta(days=7)
|
|
249
|
+
|
|
250
|
+
# Search for traces
|
|
251
|
+
traces = client.search_traces(
|
|
252
|
+
project_id="project_123456",
|
|
253
|
+
start_date=start_date.isoformat(),
|
|
254
|
+
end_date=end_date.isoformat(),
|
|
255
|
+
query="capital of France", # Optional text search
|
|
256
|
+
limit=100
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
print(f"Found {len(traces)} traces")
|
|
260
|
+
|
|
261
|
+
# Get aggregated statistics
|
|
262
|
+
stats = client.get_trace_stats(
|
|
263
|
+
project_id="project_123456",
|
|
264
|
+
start_date=start_date.isoformat(),
|
|
265
|
+
end_date=end_date.isoformat()
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
print(f"Average response time: {stats['avg_response_time']}ms")
|
|
269
|
+
print(f"Total traces: {stats['total_traces']}")
|
|
270
|
+
|
|
271
|
+
// JavaScript/TypeScript Example
|
|
272
|
+
import { OpikClient } from '@opik/sdk';
|
|
273
|
+
|
|
274
|
+
// Initialize the client
|
|
275
|
+
const client = new OpikClient({ apiKey: "YOUR_API_KEY" });
|
|
276
|
+
|
|
277
|
+
// Define search parameters
|
|
278
|
+
const endDate = new Date();
|
|
279
|
+
const startDate = new Date(endDate);
|
|
280
|
+
startDate.setDate(startDate.getDate() - 7);
|
|
281
|
+
|
|
282
|
+
// Search for traces
|
|
283
|
+
const traces = await client.searchTraces({
|
|
284
|
+
projectId: "project_123456",
|
|
285
|
+
startDate: startDate.toISOString(),
|
|
286
|
+
endDate: endDate.toISOString(),
|
|
287
|
+
query: "capital of France", // Optional text search
|
|
288
|
+
limit: 100
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
console.log(\`Found \${traces.length} traces\`);
|
|
292
|
+
|
|
293
|
+
// Get aggregated statistics
|
|
294
|
+
const stats = await client.getTraceStats({
|
|
295
|
+
projectId: "project_123456",
|
|
296
|
+
startDate: startDate.toISOString(),
|
|
297
|
+
endDate: endDate.toISOString()
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
console.log(\`Average response time: \${stats.avgResponseTime}ms\`);
|
|
301
|
+
console.log(\`Total traces: \${stats.totalTraces}\`);
|
|
302
|
+
`,
|
|
303
|
+
},
|
|
304
|
+
'evaluate-response': {
|
|
305
|
+
title: 'Evaluate Response',
|
|
306
|
+
description: "Evaluate LLM responses using Opik's evaluation metrics to measure quality and performance.",
|
|
307
|
+
steps: [
|
|
308
|
+
'Initialize the Opik client with your API key',
|
|
309
|
+
'Select the appropriate evaluation metric',
|
|
310
|
+
'Prepare the parameters for the evaluation',
|
|
311
|
+
'Call the evaluateMetric API endpoint',
|
|
312
|
+
'Process the evaluation results',
|
|
313
|
+
],
|
|
314
|
+
codeExample: `
|
|
315
|
+
// Python Example
|
|
316
|
+
import opik
|
|
317
|
+
|
|
318
|
+
# Initialize the client
|
|
319
|
+
client = opik.Client(api_key="YOUR_API_KEY")
|
|
320
|
+
|
|
321
|
+
# Evaluate for hallucination
|
|
322
|
+
hallucination_result = client.evaluate_metric(
|
|
323
|
+
metric="hallucination",
|
|
324
|
+
parameters={
|
|
325
|
+
"answer": "Einstein was born in 1879 in Germany and developed the theory of relativity.",
|
|
326
|
+
"context": "Albert Einstein was born on March 14, 1879, in Ulm, Germany."
|
|
327
|
+
}
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
print(f"Hallucination score: {hallucination_result['score']}")
|
|
331
|
+
|
|
332
|
+
# Evaluate for answer relevance
|
|
333
|
+
relevance_result = client.evaluate_metric(
|
|
334
|
+
metric="answerrelevance",
|
|
335
|
+
parameters={
|
|
336
|
+
"question": "What are the main causes of climate change?",
|
|
337
|
+
"answer": "Climate change is primarily caused by greenhouse gas emissions from human activities."
|
|
338
|
+
}
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
print(f"Relevance score: {relevance_result['score']}")
|
|
342
|
+
|
|
343
|
+
// JavaScript/TypeScript Example
|
|
344
|
+
import { OpikClient } from '@opik/sdk';
|
|
345
|
+
|
|
346
|
+
// Initialize the client
|
|
347
|
+
const client = new OpikClient({ apiKey: "YOUR_API_KEY" });
|
|
348
|
+
|
|
349
|
+
// Evaluate for hallucination
|
|
350
|
+
const hallucinationResult = await client.evaluateMetric({
|
|
351
|
+
metric: "hallucination",
|
|
352
|
+
parameters: {
|
|
353
|
+
answer: "Einstein was born in 1879 in Germany and developed the theory of relativity.",
|
|
354
|
+
context: "Albert Einstein was born on March 14, 1879, in Ulm, Germany."
|
|
355
|
+
}
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
console.log(\`Hallucination score: \${hallucinationResult.score}\`);
|
|
359
|
+
|
|
360
|
+
// Evaluate for answer relevance
|
|
361
|
+
const relevanceResult = await client.evaluateMetric({
|
|
362
|
+
metric: "answerrelevance",
|
|
363
|
+
parameters: {
|
|
364
|
+
question: "What are the main causes of climate change?",
|
|
365
|
+
answer: "Climate change is primarily caused by greenhouse gas emissions from human activities."
|
|
366
|
+
}
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
console.log(\`Relevance score: \${relevanceResult.score}\`);
|
|
370
|
+
`,
|
|
371
|
+
},
|
|
372
|
+
};
|
|
373
|
+
/**
|
|
374
|
+
* Get an example for a specific task
|
|
375
|
+
* @param task The task to get an example for
|
|
376
|
+
* @returns Example data for the specified task or null if not found
|
|
377
|
+
*/
|
|
378
|
+
export function getExampleForTask(task) {
|
|
379
|
+
if (!task)
|
|
380
|
+
return null;
|
|
381
|
+
// Normalize the task string
|
|
382
|
+
const normalizedTask = task.toLowerCase().trim();
|
|
383
|
+
// Direct match first
|
|
384
|
+
for (const [key, example] of Object.entries(examples)) {
|
|
385
|
+
if (key.replace('-', ' ') === normalizedTask) {
|
|
386
|
+
return example;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
// Fuzzy match if direct match fails
|
|
390
|
+
for (const [key, example] of Object.entries(examples)) {
|
|
391
|
+
const keyWords = key.split('-');
|
|
392
|
+
const taskWords = normalizedTask.split(/\s+/);
|
|
393
|
+
// Check if all key words are in the task
|
|
394
|
+
const allWordsMatch = keyWords.every(word => taskWords.some(taskWord => taskWord.includes(word) || word.includes(taskWord)));
|
|
395
|
+
if (allWordsMatch) {
|
|
396
|
+
return example;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
// Check if the task contains any of the example keys
|
|
400
|
+
for (const [key, example] of Object.entries(examples)) {
|
|
401
|
+
if (normalizedTask.includes(key.replace('-', ' ')) ||
|
|
402
|
+
key.replace('-', ' ').includes(normalizedTask)) {
|
|
403
|
+
return example;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
return null;
|
|
407
|
+
}
|
|
408
|
+
/**
|
|
409
|
+
* Get a list of all available example tasks
|
|
410
|
+
* @returns Array of task titles
|
|
411
|
+
*/
|
|
412
|
+
export function getAllExampleTasks() {
|
|
413
|
+
return Object.values(examples).map(example => example.title);
|
|
414
|
+
}
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics information module for Opik Comet
|
|
3
|
+
* Provides detailed information about Opik's evaluation metrics
|
|
4
|
+
*/
|
|
5
|
+
// Define the metrics information
|
|
6
|
+
const metricsInfo = {
|
|
7
|
+
hallucination: {
|
|
8
|
+
name: 'Hallucination',
|
|
9
|
+
description: 'Detects unsupported or factually incorrect information generated by LLMs.',
|
|
10
|
+
type: 'AI-based',
|
|
11
|
+
use_cases: [
|
|
12
|
+
'Fact-checking LLM outputs',
|
|
13
|
+
'Ensuring responses are grounded in provided context',
|
|
14
|
+
'Identifying fabricated information',
|
|
15
|
+
'Quality control for knowledge-intensive applications',
|
|
16
|
+
],
|
|
17
|
+
parameters: {
|
|
18
|
+
answer: 'The LLM-generated text to evaluate',
|
|
19
|
+
context: 'Optional reference text to check against (if provided)',
|
|
20
|
+
},
|
|
21
|
+
example: `
|
|
22
|
+
// Example API call
|
|
23
|
+
const result = await opik.evaluateMetric({
|
|
24
|
+
metric: "hallucination",
|
|
25
|
+
parameters: {
|
|
26
|
+
answer: "Einstein was born in 1879 in Germany and developed the theory of relativity.",
|
|
27
|
+
context: "Albert Einstein was born on March 14, 1879, in Ulm, Germany."
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
// Returns a score between 0-1, where 0 indicates high hallucination and 1 indicates no hallucination
|
|
31
|
+
`,
|
|
32
|
+
},
|
|
33
|
+
answerrelevance: {
|
|
34
|
+
name: 'AnswerRelevance',
|
|
35
|
+
description: 'Evaluates how relevant an answer is to a given question.',
|
|
36
|
+
type: 'AI-based',
|
|
37
|
+
use_cases: [
|
|
38
|
+
'Ensuring LLM responses address the user query',
|
|
39
|
+
'Detecting off-topic or tangential responses',
|
|
40
|
+
'Measuring answer quality for question-answering systems',
|
|
41
|
+
'Filtering irrelevant content from responses',
|
|
42
|
+
],
|
|
43
|
+
parameters: {
|
|
44
|
+
question: 'The question or prompt given to the LLM',
|
|
45
|
+
answer: 'The LLM-generated response to evaluate',
|
|
46
|
+
},
|
|
47
|
+
example: `
|
|
48
|
+
// Example API call
|
|
49
|
+
const result = await opik.evaluateMetric({
|
|
50
|
+
metric: "answerrelevance",
|
|
51
|
+
parameters: {
|
|
52
|
+
question: "What are the main causes of climate change?",
|
|
53
|
+
answer: "Climate change is primarily caused by greenhouse gas emissions from human activities such as burning fossil fuels, deforestation, and industrial processes."
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
// Returns a score between 0-1, where higher values indicate greater relevance
|
|
57
|
+
`,
|
|
58
|
+
},
|
|
59
|
+
contextprecision: {
|
|
60
|
+
name: 'ContextPrecision',
|
|
61
|
+
description: 'Measures how precisely an answer uses the provided context without including irrelevant information.',
|
|
62
|
+
type: 'AI-based',
|
|
63
|
+
use_cases: [
|
|
64
|
+
'Evaluating RAG system outputs',
|
|
65
|
+
'Ensuring responses stay focused on relevant context',
|
|
66
|
+
'Detecting when models add unnecessary information',
|
|
67
|
+
'Measuring information quality in context-based responses',
|
|
68
|
+
],
|
|
69
|
+
parameters: {
|
|
70
|
+
answer: 'The LLM-generated response to evaluate',
|
|
71
|
+
context: 'The context provided to the LLM',
|
|
72
|
+
},
|
|
73
|
+
example: `
|
|
74
|
+
// Example API call
|
|
75
|
+
const result = await opik.evaluateMetric({
|
|
76
|
+
metric: "contextprecision",
|
|
77
|
+
parameters: {
|
|
78
|
+
answer: "The company reported a 15% increase in revenue for Q2 2023, exceeding analyst expectations.",
|
|
79
|
+
context: "In its quarterly report, the company announced a 15% year-over-year revenue increase for Q2 2023, with total revenue reaching $2.3 billion."
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
// Returns a score between 0-1, where higher values indicate better precision
|
|
83
|
+
`,
|
|
84
|
+
},
|
|
85
|
+
contextrecall: {
|
|
86
|
+
name: 'ContextRecall',
|
|
87
|
+
description: 'Assesses how completely an answer captures the relevant information from the provided context.',
|
|
88
|
+
type: 'AI-based',
|
|
89
|
+
use_cases: [
|
|
90
|
+
'Ensuring comprehensive use of relevant context',
|
|
91
|
+
'Detecting when important information is omitted',
|
|
92
|
+
'Evaluating information completeness in summaries',
|
|
93
|
+
'Measuring context utilization in RAG systems',
|
|
94
|
+
],
|
|
95
|
+
parameters: {
|
|
96
|
+
answer: 'The LLM-generated response to evaluate',
|
|
97
|
+
context: 'The context provided to the LLM',
|
|
98
|
+
},
|
|
99
|
+
example: `
|
|
100
|
+
// Example API call
|
|
101
|
+
const result = await opik.evaluateMetric({
|
|
102
|
+
metric: "contextrecall",
|
|
103
|
+
parameters: {
|
|
104
|
+
answer: "The company reported increased revenue in Q2.",
|
|
105
|
+
context: "In its quarterly report, the company announced a 15% year-over-year revenue increase for Q2 2023, with total revenue reaching $2.3 billion."
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
// Returns a score between 0-1, where higher values indicate better recall
|
|
109
|
+
`,
|
|
110
|
+
},
|
|
111
|
+
moderation: {
|
|
112
|
+
name: 'Moderation',
|
|
113
|
+
description: 'Detects harmful or inappropriate content in LLM outputs, including toxicity, profanity, and unsafe content.',
|
|
114
|
+
type: 'AI-based',
|
|
115
|
+
use_cases: [
|
|
116
|
+
'Content filtering for user-facing applications',
|
|
117
|
+
'Safety checks for generated content',
|
|
118
|
+
'Identifying policy violations in responses',
|
|
119
|
+
'Preventing harmful outputs in production systems',
|
|
120
|
+
],
|
|
121
|
+
parameters: {
|
|
122
|
+
text: 'The text to evaluate for harmful content',
|
|
123
|
+
},
|
|
124
|
+
example: `
|
|
125
|
+
// Example API call
|
|
126
|
+
const result = await opik.evaluateMetric({
|
|
127
|
+
metric: "moderation",
|
|
128
|
+
parameters: {
|
|
129
|
+
text: "This is a sample text that will be evaluated for harmful content."
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
// Returns a score between 0-1, where 0 indicates harmful content and 1 indicates safe content
|
|
133
|
+
`,
|
|
134
|
+
},
|
|
135
|
+
equals: {
|
|
136
|
+
name: 'Equals',
|
|
137
|
+
description: 'Simple exact match comparison between the answer and an expected value.',
|
|
138
|
+
type: 'Rule-based',
|
|
139
|
+
use_cases: [
|
|
140
|
+
'Validating exact outputs for deterministic tasks',
|
|
141
|
+
'Testing for specific expected responses',
|
|
142
|
+
'Checking format compliance in structured outputs',
|
|
143
|
+
'Verifying exact matches in critical applications',
|
|
144
|
+
],
|
|
145
|
+
parameters: {
|
|
146
|
+
answer: 'The text to evaluate',
|
|
147
|
+
expected: 'The expected exact value',
|
|
148
|
+
},
|
|
149
|
+
example: `
|
|
150
|
+
// Example API call
|
|
151
|
+
const result = await opik.evaluateMetric({
|
|
152
|
+
metric: "equals",
|
|
153
|
+
parameters: {
|
|
154
|
+
answer: "42",
|
|
155
|
+
expected: "42"
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
// Returns 1 for exact match, 0 otherwise
|
|
159
|
+
`,
|
|
160
|
+
},
|
|
161
|
+
regexmatch: {
|
|
162
|
+
name: 'RegexMatch',
|
|
163
|
+
description: 'Validates answers against regular expression patterns.',
|
|
164
|
+
type: 'Rule-based',
|
|
165
|
+
use_cases: [
|
|
166
|
+
'Validating formatted outputs (emails, phone numbers, etc.)',
|
|
167
|
+
'Checking for pattern compliance',
|
|
168
|
+
'Extracting structured data from responses',
|
|
169
|
+
'Verifying output format consistency',
|
|
170
|
+
],
|
|
171
|
+
parameters: {
|
|
172
|
+
answer: 'The text to evaluate',
|
|
173
|
+
pattern: 'The regex pattern to match against',
|
|
174
|
+
},
|
|
175
|
+
example: `
|
|
176
|
+
// Example API call
|
|
177
|
+
const result = await opik.evaluateMetric({
|
|
178
|
+
metric: "regexmatch",
|
|
179
|
+
parameters: {
|
|
180
|
+
answer: "user@example.com",
|
|
181
|
+
pattern: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
// Returns 1 if the pattern matches, 0 otherwise
|
|
185
|
+
`,
|
|
186
|
+
},
|
|
187
|
+
contains: {
|
|
188
|
+
name: 'Contains',
|
|
189
|
+
description: 'Checks if the answer contains specific substrings.',
|
|
190
|
+
type: 'Rule-based',
|
|
191
|
+
use_cases: [
|
|
192
|
+
'Verifying key information is included in responses',
|
|
193
|
+
'Checking for required elements in outputs',
|
|
194
|
+
'Testing for inclusion of critical terms',
|
|
195
|
+
'Basic content validation',
|
|
196
|
+
],
|
|
197
|
+
parameters: {
|
|
198
|
+
answer: 'The text to evaluate',
|
|
199
|
+
substrings: 'Array of substrings to check for',
|
|
200
|
+
},
|
|
201
|
+
example: `
|
|
202
|
+
// Example API call
|
|
203
|
+
const result = await opik.evaluateMetric({
|
|
204
|
+
metric: "contains",
|
|
205
|
+
parameters: {
|
|
206
|
+
answer: "The capital of France is Paris, which is known for the Eiffel Tower.",
|
|
207
|
+
substrings: ["Paris", "France", "capital"]
|
|
208
|
+
}
|
|
209
|
+
});
|
|
210
|
+
// Returns a score between 0-1 based on the proportion of substrings found
|
|
211
|
+
`,
|
|
212
|
+
},
|
|
213
|
+
levenshteinratio: {
|
|
214
|
+
name: 'LevenshteinRatio',
|
|
215
|
+
description: 'Measures string similarity using Levenshtein distance, normalized to a ratio.',
|
|
216
|
+
type: 'Rule-based',
|
|
217
|
+
use_cases: [
|
|
218
|
+
'Fuzzy matching for approximate answers',
|
|
219
|
+
'Evaluating text similarity with tolerance for minor differences',
|
|
220
|
+
'Spell-check validation',
|
|
221
|
+
'Measuring response closeness to expected outputs',
|
|
222
|
+
],
|
|
223
|
+
parameters: {
|
|
224
|
+
answer: 'The text to evaluate',
|
|
225
|
+
expected: 'The expected text to compare against',
|
|
226
|
+
},
|
|
227
|
+
example: `
|
|
228
|
+
// Example API call
|
|
229
|
+
const result = await opik.evaluateMetric({
|
|
230
|
+
metric: "levenshteinratio",
|
|
231
|
+
parameters: {
|
|
232
|
+
answer: "The capital of Frence is Paris.",
|
|
233
|
+
expected: "The capital of France is Paris."
|
|
234
|
+
}
|
|
235
|
+
});
|
|
236
|
+
// Returns a score between 0-1, where higher values indicate greater similarity
|
|
237
|
+
`,
|
|
238
|
+
},
|
|
239
|
+
};
|
|
240
|
+
/**
|
|
241
|
+
* Get information about a specific evaluation metric
|
|
242
|
+
* @param metric The name of the metric to get information about
|
|
243
|
+
* @returns Information about the specified metric or null if not found
|
|
244
|
+
*/
|
|
245
|
+
export function getMetricInfo(metric) {
|
|
246
|
+
if (!metric)
|
|
247
|
+
return null;
|
|
248
|
+
const normalizedMetric = metric.toLowerCase();
|
|
249
|
+
// Check if the metric exists in our info object
|
|
250
|
+
for (const [key, info] of Object.entries(metricsInfo)) {
|
|
251
|
+
if (key.toLowerCase() === normalizedMetric) {
|
|
252
|
+
return info;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
return null;
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Get information about all available evaluation metrics
|
|
259
|
+
* @returns Array of metric information objects
|
|
260
|
+
*/
|
|
261
|
+
export function getAllMetricsInfo() {
|
|
262
|
+
return Object.values(metricsInfo);
|
|
263
|
+
}
|