judgeval 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +202 -0
- package/README.md +340 -0
- package/dist/clients.d.ts +7 -0
- package/dist/clients.js +78 -0
- package/dist/clients.js.map +1 -0
- package/dist/common/integrations/langgraph.d.ts +40 -0
- package/dist/common/integrations/langgraph.js +444 -0
- package/dist/common/integrations/langgraph.js.map +1 -0
- package/dist/common/logger-instance.d.ts +3 -0
- package/dist/common/logger-instance.js +64 -0
- package/dist/common/logger-instance.js.map +1 -0
- package/dist/common/logger.d.ts +54 -0
- package/dist/common/logger.js +221 -0
- package/dist/common/logger.js.map +1 -0
- package/dist/common/tracer.d.ts +205 -0
- package/dist/common/tracer.js +1035 -0
- package/dist/common/tracer.js.map +1 -0
- package/dist/constants.d.ts +51 -0
- package/dist/constants.js +344 -0
- package/dist/constants.js.map +1 -0
- package/dist/data/example.d.ts +70 -0
- package/dist/data/example.js +125 -0
- package/dist/data/example.js.map +1 -0
- package/dist/data/result.d.ts +51 -0
- package/dist/data/result.js +83 -0
- package/dist/data/result.js.map +1 -0
- package/dist/evaluation-run.d.ts +44 -0
- package/dist/evaluation-run.js +136 -0
- package/dist/evaluation-run.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +73 -0
- package/dist/index.js.map +1 -0
- package/dist/judgment-client.d.ts +179 -0
- package/dist/judgment-client.js +1038 -0
- package/dist/judgment-client.js.map +1 -0
- package/dist/rules.d.ts +120 -0
- package/dist/rules.js +322 -0
- package/dist/rules.js.map +1 -0
- package/dist/run-evaluation.d.ts +78 -0
- package/dist/run-evaluation.js +618 -0
- package/dist/run-evaluation.js.map +1 -0
- package/dist/scorers/api-scorer.d.ts +79 -0
- package/dist/scorers/api-scorer.js +291 -0
- package/dist/scorers/api-scorer.js.map +1 -0
- package/dist/scorers/base-scorer.d.ts +100 -0
- package/dist/scorers/base-scorer.js +190 -0
- package/dist/scorers/base-scorer.js.map +1 -0
- package/dist/scorers/exact-match-scorer.d.ts +10 -0
- package/dist/scorers/exact-match-scorer.js +84 -0
- package/dist/scorers/exact-match-scorer.js.map +1 -0
- package/package.json +88 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Logger utilities and result printing for the JudgEval TypeScript SDK
|
|
4
|
+
*/
|
|
5
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
6
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
7
|
+
};
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.print = exports.printResults = exports.formatEvaluationResults = exports.withExampleContext = exports.clearExampleContext = exports.setExampleContext = exports.error = exports.warn = exports.warning = exports.info = exports.log = exports.debug = void 0;
|
|
10
|
+
// Remove fs and path imports if no longer needed
|
|
11
|
+
// import * as fs from 'fs';
|
|
12
|
+
// import * as path from 'path';
|
|
13
|
+
const logger_instance_1 = __importDefault(require("./logger-instance")); // Import the configured winston logger
|
|
14
|
+
// Track current example info (Keep for potential context integration)
|
|
15
|
+
let currentExampleId = null;
|
|
16
|
+
let currentTimestamp = null;
|
|
17
|
+
/**
|
|
18
|
+
* Log a debug message
|
|
19
|
+
*/
|
|
20
|
+
function debug(message, meta) {
|
|
21
|
+
logger_instance_1.default.debug(message, Object.assign(Object.assign({}, meta), { exampleId: currentExampleId, timestamp: currentTimestamp }));
|
|
22
|
+
}
|
|
23
|
+
exports.debug = debug;
|
|
24
|
+
/**
|
|
25
|
+
* Log an info message (alias for info)
|
|
26
|
+
*/
|
|
27
|
+
function log(message, ...args) {
|
|
28
|
+
// Simple handling for additional args, could be improved
|
|
29
|
+
let meta = { args: [] };
|
|
30
|
+
if (args.length > 0) {
|
|
31
|
+
meta = { args: args };
|
|
32
|
+
}
|
|
33
|
+
logger_instance_1.default.info(message, Object.assign(Object.assign({}, meta), { exampleId: currentExampleId, timestamp: currentTimestamp }));
|
|
34
|
+
}
|
|
35
|
+
exports.log = log;
|
|
36
|
+
/**
|
|
37
|
+
* Log an info message
|
|
38
|
+
*/
|
|
39
|
+
function info(message, meta) {
|
|
40
|
+
logger_instance_1.default.info(message, Object.assign(Object.assign({}, meta), { exampleId: currentExampleId, timestamp: currentTimestamp }));
|
|
41
|
+
}
|
|
42
|
+
exports.info = info;
|
|
43
|
+
/**
|
|
44
|
+
* Log a warning message
|
|
45
|
+
*/
|
|
46
|
+
function warning(message, meta) {
|
|
47
|
+
logger_instance_1.default.warn(message, Object.assign(Object.assign({}, meta), { exampleId: currentExampleId, timestamp: currentTimestamp }));
|
|
48
|
+
}
|
|
49
|
+
exports.warning = warning;
|
|
50
|
+
/**
|
|
51
|
+
* Alias for warning
|
|
52
|
+
*/
|
|
53
|
+
function warn(message, meta) {
|
|
54
|
+
warning(message, meta);
|
|
55
|
+
}
|
|
56
|
+
exports.warn = warn;
|
|
57
|
+
/**
|
|
58
|
+
* Log an error message
|
|
59
|
+
*/
|
|
60
|
+
function error(message, meta) {
|
|
61
|
+
logger_instance_1.default.error(message, Object.assign(Object.assign({}, meta), { exampleId: currentExampleId, timestamp: currentTimestamp }));
|
|
62
|
+
}
|
|
63
|
+
exports.error = error;
|
|
64
|
+
/**
|
|
65
|
+
* Set the current example context for logging (Keep for potential context integration)
|
|
66
|
+
*/
|
|
67
|
+
function setExampleContext(exampleId, timestamp) {
|
|
68
|
+
currentExampleId = exampleId;
|
|
69
|
+
currentTimestamp = timestamp;
|
|
70
|
+
}
|
|
71
|
+
exports.setExampleContext = setExampleContext;
|
|
72
|
+
/**
|
|
73
|
+
* Clear the current example context (Keep for potential context integration)
|
|
74
|
+
*/
|
|
75
|
+
function clearExampleContext() {
|
|
76
|
+
currentExampleId = null;
|
|
77
|
+
currentTimestamp = null;
|
|
78
|
+
}
|
|
79
|
+
exports.clearExampleContext = clearExampleContext;
|
|
80
|
+
/**
|
|
81
|
+
* Create a context for example-specific logging (Keep for potential context integration)
|
|
82
|
+
*/
|
|
83
|
+
function withExampleContext(exampleId, timestamp, fn) {
|
|
84
|
+
setExampleContext(exampleId, timestamp);
|
|
85
|
+
try {
|
|
86
|
+
return fn();
|
|
87
|
+
}
|
|
88
|
+
finally {
|
|
89
|
+
clearExampleContext();
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
exports.withExampleContext = withExampleContext;
|
|
93
|
+
/**
|
|
94
|
+
* Format evaluation results for display
|
|
95
|
+
* This matches the Python SDK's output format with clickable links
|
|
96
|
+
*/
|
|
97
|
+
function formatEvaluationResults(results, projectName, evalName) {
|
|
98
|
+
var _a;
|
|
99
|
+
let output = '';
|
|
100
|
+
// Print summary information
|
|
101
|
+
if (results.length > 0) {
|
|
102
|
+
output += `\n=== Evaluation Results (${results.length} examples) ===\n\n`;
|
|
103
|
+
// Calculate success rate
|
|
104
|
+
const successfulExamples = results.filter(r => { var _a, _b; return r.success || ((_b = (_a = r.scorersData) === null || _a === void 0 ? void 0 : _a.every((s) => s.success)) !== null && _b !== void 0 ? _b : false); }).length;
|
|
105
|
+
const successRate = (successfulExamples / results.length) * 100;
|
|
106
|
+
output += `Success Rate: ${successRate.toFixed(2)}% (${successfulExamples}/${results.length})\n\n`;
|
|
107
|
+
// Print failures if any
|
|
108
|
+
const failures = results.filter(r => { var _a, _b; return !r.success || ((_b = (_a = r.scorersData) === null || _a === void 0 ? void 0 : _a.some((s) => !s.success)) !== null && _b !== void 0 ? _b : false); });
|
|
109
|
+
if (failures.length > 0) {
|
|
110
|
+
output += `Failures (${failures.length}):\n`;
|
|
111
|
+
for (const [index, failure] of failures.entries()) {
|
|
112
|
+
output += `\nExample ${index + 1}:\n`;
|
|
113
|
+
output += `Input: ${((_a = failure.example) === null || _a === void 0 ? void 0 : _a.input) || 'N/A'}\n`;
|
|
114
|
+
if (failure.scorersData) {
|
|
115
|
+
output += 'Scorer Failures:\n';
|
|
116
|
+
for (const scorer of failure.scorersData) {
|
|
117
|
+
if (!scorer.success) {
|
|
118
|
+
output += ` - ${scorer.name}: ${scorer.error || 'Unknown error'}\n`;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
else if (!failure.success) {
|
|
123
|
+
output += `Error: ${failure.error || 'Unknown error'}\n`;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return output;
|
|
129
|
+
}
|
|
130
|
+
exports.formatEvaluationResults = formatEvaluationResults;
|
|
131
|
+
/**
|
|
132
|
+
* Print evaluation results to the console
|
|
133
|
+
* This matches the Python SDK's output format exactly
|
|
134
|
+
*/
|
|
135
|
+
function printResults(results, projectName, evalName) {
|
|
136
|
+
// Always print a URL if projectName and evalName are provided
|
|
137
|
+
if (projectName && evalName) {
|
|
138
|
+
const baseUrl = 'https://app.judgmentlabs.ai/app/experiment';
|
|
139
|
+
const urlParams = `?project_name=${projectName}&eval_run_name=${evalName}`;
|
|
140
|
+
const resultsUrl = `${baseUrl}${urlParams}`;
|
|
141
|
+
// Print the URL
|
|
142
|
+
console.log(`\n🔍 View results: ${resultsUrl}\n`);
|
|
143
|
+
}
|
|
144
|
+
// Format the results - only includes failure details
|
|
145
|
+
const formattedResults = formatEvaluationResults(results, projectName, evalName);
|
|
146
|
+
// Print the results to the console directly
|
|
147
|
+
if (formattedResults) {
|
|
148
|
+
console.log(formattedResults);
|
|
149
|
+
}
|
|
150
|
+
// Print raw results in the same format as Python SDK
|
|
151
|
+
console.log(JSON.stringify(results, null, 2));
|
|
152
|
+
}
|
|
153
|
+
exports.printResults = printResults;
|
|
154
|
+
/**
|
|
155
|
+
* Simplified print function for results - matches Python SDK's print(results) behavior
|
|
156
|
+
* This is the preferred way to print results
|
|
157
|
+
*/
|
|
158
|
+
function print(data) {
|
|
159
|
+
if (Array.isArray(data)) {
|
|
160
|
+
// Handle array of results (evaluation results)
|
|
161
|
+
let projectName, evalName;
|
|
162
|
+
// Try to extract project name and eval name from the first result
|
|
163
|
+
if (data.length > 0 && data[0].metadata) {
|
|
164
|
+
projectName = data[0].metadata.project_name;
|
|
165
|
+
evalName = data[0].metadata.eval_name;
|
|
166
|
+
}
|
|
167
|
+
printResults(data, projectName, evalName);
|
|
168
|
+
}
|
|
169
|
+
else if (data && typeof data === 'object' && data.traceId) {
|
|
170
|
+
// Handle trace object
|
|
171
|
+
console.log(`\n--- Trace: ${data.name || 'Unnamed'} (ID: ${data.traceId}) ---`);
|
|
172
|
+
if (data.projectName) {
|
|
173
|
+
const traceUrl = `https://app.judgmentlabs.ai/app/monitor?project_name=${data.projectName}&trace_id=${data.traceId}&trace_name=${data.name || 'trace'}&show_trace=true`;
|
|
174
|
+
console.log(`\n🔍 View trace: ${traceUrl}\n`);
|
|
175
|
+
}
|
|
176
|
+
console.log(JSON.stringify(data, null, 2));
|
|
177
|
+
}
|
|
178
|
+
else if (data && typeof data === 'object' && data.title === "Workflow Analysis Results") {
|
|
179
|
+
// Handle workflow analysis results
|
|
180
|
+
console.log(`\n=== ${data.title} ===\n`);
|
|
181
|
+
// Print scorer performance
|
|
182
|
+
console.log('Scorer Performance Summary:');
|
|
183
|
+
console.log('----------------------------');
|
|
184
|
+
if (Array.isArray(data.scorerPerformance)) {
|
|
185
|
+
data.scorerPerformance.forEach((scorer) => {
|
|
186
|
+
console.log(`${scorer.name.padEnd(30)} ${scorer.score.toFixed(2)} (${scorer.rating})`);
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
// Print areas for improvement
|
|
190
|
+
if (Array.isArray(data.areasForImprovement) && data.areasForImprovement.length > 0) {
|
|
191
|
+
console.log('\nAreas for Improvement:');
|
|
192
|
+
console.log('----------------------');
|
|
193
|
+
data.areasForImprovement.forEach((area, index) => {
|
|
194
|
+
console.log(`${index + 1}. ${area}`);
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
// Print strengths
|
|
198
|
+
if (Array.isArray(data.strengths) && data.strengths.length > 0) {
|
|
199
|
+
console.log('\nStrengths:');
|
|
200
|
+
console.log('----------');
|
|
201
|
+
data.strengths.forEach((strength, index) => {
|
|
202
|
+
console.log(`${index + 1}. ${strength}`);
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
else if (data && typeof data === 'object' && data.title && data.recommendations) {
|
|
207
|
+
// Handle recommendations
|
|
208
|
+
console.log(`\n=== ${data.title} ===`);
|
|
209
|
+
if (Array.isArray(data.recommendations)) {
|
|
210
|
+
data.recommendations.forEach((rec, index) => {
|
|
211
|
+
console.log(`${index + 1}. ${rec}`);
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
else {
|
|
216
|
+
// Handle any other object
|
|
217
|
+
console.log(JSON.stringify(data, null, 2));
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
exports.print = print;
|
|
221
|
+
//# sourceMappingURL=logger.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/common/logger.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;AAEH,iDAAiD;AACjD,4BAA4B;AAC5B,gCAAgC;AAChC,wEAAuC,CAAC,uCAAuC;AAE/E,sEAAsE;AACtE,IAAI,gBAAgB,GAAkB,IAAI,CAAC;AAC3C,IAAI,gBAAgB,GAAkB,IAAI,CAAC;AAE3C;;GAEG;AACH,SAAgB,KAAK,CAAC,OAAe,EAAE,IAA0B;IAC/D,yBAAM,CAAC,KAAK,CAAC,OAAO,kCAAO,IAAI,KAAE,SAAS,EAAE,gBAAgB,EAAE,SAAS,EAAE,gBAAgB,IAAG,CAAC;AAC/F,CAAC;AAFD,sBAEC;AAED;;GAEG;AACH,SAAgB,GAAG,CAAC,OAAe,EAAE,GAAG,IAAW;IACjD,yDAAyD;IACzD,IAAI,IAAI,GAAwB,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;IAC7C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpB,IAAI,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACxB,CAAC;IACD,yBAAM,CAAC,IAAI,CAAC,OAAO,kCAAO,IAAI,KAAE,SAAS,EAAE,gBAAgB,EAAE,SAAS,EAAE,gBAAgB,IAAG,CAAC;AAC9F,CAAC;AAPD,kBAOC;AAED;;GAEG;AACH,SAAgB,IAAI,CAAC,OAAe,EAAE,IAA0B;IAC9D,yBAAM,CAAC,IAAI,CAAC,OAAO,kCAAO,IAAI,KAAE,SAAS,EAAE,gBAAgB,EAAE,SAAS,EAAE,gBAAgB,IAAG,CAAC;AAC9F,CAAC;AAFD,oBAEC;AAED;;GAEG;AACH,SAAgB,OAAO,CAAC,OAAe,EAAE,IAA0B;IACjE,yBAAM,CAAC,IAAI,CAAC,OAAO,kCAAO,IAAI,KAAE,SAAS,EAAE,gBAAgB,EAAE,SAAS,EAAE,gBAAgB,IAAG,CAAC;AAC9F,CAAC;AAFD,0BAEC;AAED;;GAEG;AACH,SAAgB,IAAI,CAAC,OAAe,EAAE,IAA0B;IAC9D,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;AACzB,CAAC;AAFD,oBAEC;AAED;;GAEG;AACH,SAAgB,KAAK,CAAC,OAAe,EAAE,IAA0B;IAC/D,yBAAM,CAAC,KAAK,CAAC,OAAO,kCAAO,IAAI,KAAE,SAAS,EAAE,gBAAgB,EAAE,SAAS,EAAE,gBAAgB,IAAG,CAAC;AAC/F,CAAC;AAFD,sBAEC;AAED;;GAEG;AACH,SAAgB,iBAAiB,CAAC,SAAiB,EAAE,SAAiB;IACpE,gBAAgB,GAAG,SAAS,CAAC;IAC7B,gBAAgB,GAAG,SAAS,CAAC;AAC/B,CAAC;AAHD,8CAGC;AAED;;GAEG;AACH,SAAgB,mBAAmB;IACjC,gBAAgB,GAAG,IAAI,CAAC;IACxB,gBAAgB,GAAG,IAAI,CAAC;AAC1B,CAAC;AAHD,kDAGC;AAED;;GAEG;AACH,SAAgB,kBAAkB,CAAI,SAAiB,EAAE,SAAiB,EAAE,EAAW;IACrF,iBAAiB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;IACxC,IAAI,CAAC;QACH,OAAO,EAAE,EAAE,CAAC;IACd,CAAC;YAAS,CAAC;QACT,mBAAmB,EAAE,CAAC;IACxB,CAAC;AACH,CAAC;AAPD,gDAOC;AAED;;;GAGG;AACH,SAAgB,uBAAuB,CAAC,OAAc,EAAE,WAAoB,EAAE,QAAiB;;IAC7F,IAAI,MAAM,GAAG,EAAE,CAAC;IAEhB,4BAA4B;IAC5B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,6BAA6B,OAAO,CAAC,MAAM,oBAAoB,CAAC;QAE1E,yBAAyB;QACzB,MAAM,kBAAkB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,eAC5C,OAAA,CAAC,CAAC,OAAO,IAAI,CAAC,MAAA,MAAA,CAAC,CAAC,WAAW,0CAAE,KAAK,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,mCAAI,KAAK,CAAC,CAAA,EAAA,CACpE,CAAC,MAAM,CAAC;QAET,MAAM,WAAW,GAAG,CAAC,kBAAkB,GAAG,OAAO,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC;QAChE,MAAM,IAAI,iBAAiB,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,kBAAkB,IAAI,OAAO,CAAC,MAAM,OAAO,CAAC;QAEnG,wBAAwB;QACxB,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,eAClC,OAAA,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,MAAA,MAAA,CAAC,CAAC,WAAW,0CAAE,IAAI,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,mCAAI,KAAK,CAAC,CAAA,EAAA,CACrE,CAAC;QAEF,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,MAAM,IAAI,aAAa,QAAQ,CAAC,MAAM,MAAM,CAAC;YAE7C,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,QAAQ,CAAC,OAAO,EAAE,EAAE,CAAC;gBAClD,MAAM,IAAI,aAAa,KAAK,GAAG,CAAC,KAAK,CAAC;gBACtC,MAAM,IAAI,UAAU,CAAA,MAAA,OAAO,CAAC,OAAO,0CAAE,KAAK,KAAI,KAAK,IAAI,CAAC;gBAExD,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;oBACxB,MAAM,IAAI,oBAAoB,CAAC;oBAE/B,KAAK,MAAM,MAAM,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;wBACzC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;4BACpB,MAAM,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,MAAM,CAAC,KAAK,IAAI,eAAe,IAAI,CAAC;wBACvE,CAAC;oBACH,CAAC;gBACH,CAAC;qBAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;oBAC5B,MAAM,IAAI,UAAU,OAAO,CAAC,KAAK,IAAI,eAAe,IAAI,CAAC;gBAC3D,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AA3CD,0DA2CC;AAED;;;GAGG;AACH,SAAgB,YAAY,CAAC,OAAc,EAAE,WAAoB,EAAE,QAAiB;IAClF,8DAA8D;IAC9D,IAAI,WAAW,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,OAAO,GAAG,4CAA4C,CAAC;QAC7D,MAAM,SAAS,GAAG,iBAAiB,WAAW,kBAAkB,QAAQ,EAAE,CAAC;QAC3E,MAAM,UAAU,GAAG,GAAG,OAAO,GAAG,SAAS,EAAE,CAAC;QAE5C,gBAAgB;QAChB,OAAO,CAAC,GAAG,CAAC,sBAAsB,UAAU,IAAI,CAAC,CAAC;IACpD,CAAC;IAED,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,uBAAuB,CAAC,OAAO,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;IAEjF,4CAA4C;IAC5C,IAAI,gBAAgB,EAAE,CAAC;QACrB,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAChC,CAAC;IAED,qDAAqD;IACrD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AAChD,CAAC;AArBD,oCAqBC;AAED;;;GAGG;AACH,SAAgB,KAAK,CAAC,IAAS;IAC7B,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QACxB,+CAA+C;QAC/C,IAAI,WAAW,EAAE,QAAQ,CAAC;QAE1B,kEAAkE;QAClE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;YACxC,WAAW,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC;YAC5C,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC;QACxC,CAAC;QAED,YAAY,CAAC,IAAI,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;IAC5C,CAAC;SAAM,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;QAC5D,sBAAsB;QACtB,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,CAAC,IAAI,IAAI,SAAS,SAAS,IAAI,CAAC,OAAO,OAAO,CAAC,CAAC;QAChF,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,MAAM,QAAQ,GAAG,wDAAwD,IAAI,CAAC,WAAW,aAAa,IAAI,CAAC,OAAO,eAAe,IAAI,CAAC,IAAI,IAAI,OAAO,kBAAkB,CAAC;YACxK,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,IAAI,CAAC,CAAC;QAChD,CAAC;QACD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;SAAM,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,KAAK,KAAK,2BAA2B,EAAE,CAAC;QAC1F,mCAAmC;QACnC,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,KAAK,QAAQ,CAAC,CAAC;QAEzC,2BAA2B;QAC3B,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;QAC3C,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;QAC5C,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE,CAAC;YAC1C,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,MAAW,EAAE,EAAE;gBAC7C,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;YACzF,CAAC,CAAC,CAAC;QACL,CAAC;QAED,8BAA8B;QAC9B,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,IAAI,IAAI,CAAC,mBAAmB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnF,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;YACtC,IAAI,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC,IAAY,EAAE,KAAa,EAAE,EAAE;gBAC/D,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;YACvC,CAAC,CAAC,CAAC;QACL,CAAC;QAED,kBAAkB;QAClB,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/D,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;YAC5B,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,QAAgB,EAAE,KAAa,EAAE,EAAE;gBACzD,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,GAAG,CAAC,KAAK,QAAQ,EAAE,CAAC,CAAC;YAC3C,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;SAAM,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;QAClF,yBAAyB;QACzB,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,KAAK,MAAM,CAAC,CAAC;QAEvC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;YACxC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,GAAW,EAAE,KAAa,EAAE,EAAE;gBAC1D,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,GAAG,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;YACtC,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;SAAM,CAAC;QACN,0BAA0B;QAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;AACH,CAAC;AA/DD,sBA+DC"}
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
import { AsyncLocalStorage } from 'async_hooks';
|
|
3
|
+
import OpenAI from 'openai';
|
|
4
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
5
|
+
import Together from 'together-ai';
|
|
6
|
+
import { APIJudgmentScorer } from '../scorers/base-scorer';
|
|
7
|
+
interface NotificationConfig {
|
|
8
|
+
enabled?: boolean;
|
|
9
|
+
communication_methods?: string[];
|
|
10
|
+
email_addresses?: string[];
|
|
11
|
+
send_at?: number;
|
|
12
|
+
}
|
|
13
|
+
interface Condition {
|
|
14
|
+
metric: APIJudgmentScorer;
|
|
15
|
+
}
|
|
16
|
+
type CombineType = "all" | "any";
|
|
17
|
+
interface Rule {
|
|
18
|
+
rule_id?: string;
|
|
19
|
+
name: string;
|
|
20
|
+
description?: string;
|
|
21
|
+
conditions: Condition[];
|
|
22
|
+
combine_type: CombineType;
|
|
23
|
+
notification?: NotificationConfig;
|
|
24
|
+
}
|
|
25
|
+
type ApiClient = OpenAI | Anthropic | Together;
|
|
26
|
+
type SpanType = string;
|
|
27
|
+
interface TraceEntry {
|
|
28
|
+
type: 'enter' | 'exit' | 'input' | 'output' | 'error' | 'evaluation';
|
|
29
|
+
function: string;
|
|
30
|
+
span_id: string;
|
|
31
|
+
depth: number;
|
|
32
|
+
timestamp: number;
|
|
33
|
+
duration?: number;
|
|
34
|
+
output?: any;
|
|
35
|
+
inputs?: Record<string, any>;
|
|
36
|
+
span_type: SpanType;
|
|
37
|
+
parent_span_id?: string;
|
|
38
|
+
evaluation_runs?: any[];
|
|
39
|
+
}
|
|
40
|
+
interface TraceSavePayload {
|
|
41
|
+
trace_id: string;
|
|
42
|
+
name: string;
|
|
43
|
+
project_name: string;
|
|
44
|
+
created_at: string;
|
|
45
|
+
duration: number;
|
|
46
|
+
token_counts: {
|
|
47
|
+
prompt_tokens: number;
|
|
48
|
+
completion_tokens: number;
|
|
49
|
+
total_tokens: number;
|
|
50
|
+
prompt_tokens_cost_usd: number;
|
|
51
|
+
completion_tokens_cost_usd: number;
|
|
52
|
+
total_cost_usd: number;
|
|
53
|
+
};
|
|
54
|
+
entries: CondensedSpanEntry[];
|
|
55
|
+
rules?: Record<string, Rule>;
|
|
56
|
+
empty_save: boolean;
|
|
57
|
+
overwrite: boolean;
|
|
58
|
+
parent_trace_id?: string | null;
|
|
59
|
+
parent_name?: string | null;
|
|
60
|
+
}
|
|
61
|
+
interface CondensedSpanEntry {
|
|
62
|
+
span_id: string;
|
|
63
|
+
function: string;
|
|
64
|
+
depth: number;
|
|
65
|
+
timestamp: number;
|
|
66
|
+
parent_span_id?: string | null;
|
|
67
|
+
span_type: SpanType;
|
|
68
|
+
inputs: Record<string, any> | null;
|
|
69
|
+
output: any | null;
|
|
70
|
+
evaluation_runs: any[];
|
|
71
|
+
duration: number | null;
|
|
72
|
+
children?: CondensedSpanEntry[];
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Client for interacting with Judgment trace API endpoints.
|
|
76
|
+
*/
|
|
77
|
+
declare class TraceManagerClient {
|
|
78
|
+
private apiKey;
|
|
79
|
+
private organizationId;
|
|
80
|
+
constructor(apiKey: string, organizationId: string);
|
|
81
|
+
private _fetch;
|
|
82
|
+
fetchTrace(traceId: string): Promise<any>;
|
|
83
|
+
saveTrace(traceData: TraceSavePayload, emptySave: boolean): Promise<any>;
|
|
84
|
+
deleteTrace(traceId: string): Promise<any>;
|
|
85
|
+
deleteTraces(traceIds: string[]): Promise<any>;
|
|
86
|
+
addTraceToEvalQueue(traceData: TraceSavePayload): Promise<any>;
|
|
87
|
+
}
|
|
88
|
+
declare const currentTraceAsyncLocalStorage: AsyncLocalStorage<TraceClient>;
|
|
89
|
+
declare const currentSpanAsyncLocalStorage: AsyncLocalStorage<string>;
|
|
90
|
+
/**
|
|
91
|
+
* Represents an ongoing trace context.
|
|
92
|
+
*/
|
|
93
|
+
declare class TraceClient {
|
|
94
|
+
readonly traceId: string;
|
|
95
|
+
readonly name: string;
|
|
96
|
+
readonly projectName: string;
|
|
97
|
+
readonly overwrite: boolean;
|
|
98
|
+
readonly rules: Rule[];
|
|
99
|
+
readonly enableMonitoring: boolean;
|
|
100
|
+
readonly enableEvaluations: boolean;
|
|
101
|
+
readonly parentTraceId?: string | null;
|
|
102
|
+
readonly parentName?: string | null;
|
|
103
|
+
entries: Partial<TraceEntry>[];
|
|
104
|
+
spanDepths: Record<string, number>;
|
|
105
|
+
private startTime;
|
|
106
|
+
private traceManager;
|
|
107
|
+
private apiKey;
|
|
108
|
+
private organizationId;
|
|
109
|
+
private originalName;
|
|
110
|
+
constructor(config: {
|
|
111
|
+
tracer: Tracer;
|
|
112
|
+
traceId?: string;
|
|
113
|
+
name?: string;
|
|
114
|
+
projectName?: string;
|
|
115
|
+
overwrite?: boolean;
|
|
116
|
+
rules?: Rule[];
|
|
117
|
+
enableMonitoring?: boolean;
|
|
118
|
+
enableEvaluations?: boolean;
|
|
119
|
+
parentTraceId?: string | null;
|
|
120
|
+
parentName?: string | null;
|
|
121
|
+
apiKey: string;
|
|
122
|
+
organizationId: string;
|
|
123
|
+
});
|
|
124
|
+
addEntry(entry: Partial<TraceEntry>): void;
|
|
125
|
+
recordInput(inputs: any): void;
|
|
126
|
+
recordOutput(output: any): void;
|
|
127
|
+
runInSpan<T>(name: string, options: {
|
|
128
|
+
spanType?: SpanType;
|
|
129
|
+
}, func: () => Promise<T> | T): Promise<T>;
|
|
130
|
+
getDuration(): number;
|
|
131
|
+
private condenseTrace;
|
|
132
|
+
save(emptySave?: boolean): Promise<{
|
|
133
|
+
traceId: string;
|
|
134
|
+
traceData: TraceSavePayload;
|
|
135
|
+
} | null>;
|
|
136
|
+
print(): void;
|
|
137
|
+
delete(): Promise<any>;
|
|
138
|
+
/**
|
|
139
|
+
* Asynchronously evaluate an example using the provided scorers,
|
|
140
|
+
* embedding the evaluation request into the trace data.
|
|
141
|
+
* Ported from the Python SDK's async_evaluate method.
|
|
142
|
+
*
|
|
143
|
+
* @param scorers Array of scorers to use for evaluation (currently assumes APIJudgmentScorer)
|
|
144
|
+
* @param options Evaluation options including input, outputs, and metadata
|
|
145
|
+
* @returns Promise that resolves when the evaluation entry has been added to the trace
|
|
146
|
+
*/
|
|
147
|
+
asyncEvaluate(scorers: APIJudgmentScorer[], options?: {
|
|
148
|
+
input?: string;
|
|
149
|
+
actualOutput?: string;
|
|
150
|
+
expectedOutput?: string;
|
|
151
|
+
context?: string[];
|
|
152
|
+
retrievalContext?: string[];
|
|
153
|
+
toolsCalled?: string[];
|
|
154
|
+
expectedTools?: string[];
|
|
155
|
+
additionalMetadata?: Record<string, any>;
|
|
156
|
+
model?: string;
|
|
157
|
+
logResults?: boolean;
|
|
158
|
+
}): Promise<void>;
|
|
159
|
+
/**
|
|
160
|
+
* Private helper to add an evaluation entry to the trace.
|
|
161
|
+
* This mirrors the structure of Python's add_eval_run.
|
|
162
|
+
*
|
|
163
|
+
* @param evalRunPayload The constructed payload for the evaluation.
|
|
164
|
+
* @param startTime The start time (in seconds) of the evaluation process.
|
|
165
|
+
*/
|
|
166
|
+
private _addEvalRun;
|
|
167
|
+
getOriginalName(): string;
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Singleton Tracer class. Manages overall tracing configuration and trace creation.
|
|
171
|
+
*/
|
|
172
|
+
declare class Tracer {
|
|
173
|
+
private static instance;
|
|
174
|
+
readonly apiKey: string;
|
|
175
|
+
readonly organizationId: string;
|
|
176
|
+
readonly projectName: string;
|
|
177
|
+
readonly defaultRules: Rule[];
|
|
178
|
+
readonly enableMonitoring: boolean;
|
|
179
|
+
readonly enableEvaluations: boolean;
|
|
180
|
+
private initialized;
|
|
181
|
+
private constructor();
|
|
182
|
+
static getInstance(config?: {
|
|
183
|
+
apiKey?: string;
|
|
184
|
+
organizationId?: string;
|
|
185
|
+
projectName?: string;
|
|
186
|
+
rules?: Rule[];
|
|
187
|
+
enableMonitoring?: boolean;
|
|
188
|
+
enableEvaluations?: boolean;
|
|
189
|
+
}): Tracer;
|
|
190
|
+
getCurrentTrace(): TraceClient | undefined;
|
|
191
|
+
private _startTraceInternal;
|
|
192
|
+
runInTrace<T>(config: {
|
|
193
|
+
name: string;
|
|
194
|
+
projectName?: string;
|
|
195
|
+
overwrite?: boolean;
|
|
196
|
+
createRootSpan?: boolean;
|
|
197
|
+
rules?: Rule[];
|
|
198
|
+
}, func: (traceClient: TraceClient) => Promise<T> | T): Promise<T>;
|
|
199
|
+
observe<T extends (...args: any[]) => any>(options?: {
|
|
200
|
+
name?: string;
|
|
201
|
+
spanType?: SpanType;
|
|
202
|
+
}): (func: T) => T;
|
|
203
|
+
}
|
|
204
|
+
export declare function wrap<T extends ApiClient>(client: T): T;
|
|
205
|
+
export { Tracer, TraceClient, TraceManagerClient, currentTraceAsyncLocalStorage, currentSpanAsyncLocalStorage, Rule, Condition, NotificationConfig, CombineType, TraceEntry, SpanType, ApiClient, TraceSavePayload, CondensedSpanEntry };
|