@fallom/trace 0.2.21 → 0.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-KFD5AQ7V.mjs +308 -0
- package/dist/{chunk-GZ6TE7G4.mjs → chunk-NNVWIZN5.mjs} +101 -10
- package/dist/{core-DUG2SP2V.mjs → core-3MHBKYBC.mjs} +1 -1
- package/dist/index.d.mts +46 -14
- package/dist/index.d.ts +46 -14
- package/dist/index.js +108 -9
- package/dist/index.mjs +6 -2
- package/dist/models-SEFDGZU2.mjs +8 -0
- package/package.json +1 -1
- package/dist/chunk-XBZ3ESNV.mjs +0 -824
- package/dist/core-JLHYFVYS.mjs +0 -21
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __export = (target, all) => {
|
|
3
|
+
for (var name in all)
|
|
4
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
// src/models.ts
|
|
8
|
+
var models_exports = {};
|
|
9
|
+
__export(models_exports, {
|
|
10
|
+
get: () => get,
|
|
11
|
+
init: () => init
|
|
12
|
+
});
|
|
13
|
+
import { createHash } from "crypto";
|
|
14
|
+
var apiKey = null;
|
|
15
|
+
var baseUrl = "https://configs.fallom.com";
|
|
16
|
+
var initialized = false;
|
|
17
|
+
var syncInterval = null;
|
|
18
|
+
var debugMode = false;
|
|
19
|
+
var configCache = /* @__PURE__ */ new Map();
|
|
20
|
+
var SYNC_TIMEOUT = 2e3;
|
|
21
|
+
var RECORD_TIMEOUT = 1e3;
|
|
22
|
+
function log(msg) {
|
|
23
|
+
if (debugMode) {
|
|
24
|
+
console.log(`[Fallom] ${msg}`);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
function evaluateTargeting(targeting, customerId, context) {
|
|
28
|
+
if (!targeting || targeting.enabled === false) {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
const evalContext = {
|
|
32
|
+
...context || {},
|
|
33
|
+
...customerId ? { customerId } : {}
|
|
34
|
+
};
|
|
35
|
+
log(`Evaluating targeting with context: ${JSON.stringify(evalContext)}`);
|
|
36
|
+
if (targeting.individualTargets) {
|
|
37
|
+
for (const target of targeting.individualTargets) {
|
|
38
|
+
const fieldValue = evalContext[target.field];
|
|
39
|
+
if (fieldValue === target.value) {
|
|
40
|
+
log(`Individual target matched: ${target.field}=${target.value} -> variant ${target.variantIndex}`);
|
|
41
|
+
return target.variantIndex;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (targeting.rules) {
|
|
46
|
+
for (const rule of targeting.rules) {
|
|
47
|
+
const allConditionsMatch = rule.conditions.every((condition) => {
|
|
48
|
+
const fieldValue = evalContext[condition.field];
|
|
49
|
+
if (fieldValue === void 0) return false;
|
|
50
|
+
switch (condition.operator) {
|
|
51
|
+
case "eq":
|
|
52
|
+
return fieldValue === condition.value;
|
|
53
|
+
case "neq":
|
|
54
|
+
return fieldValue !== condition.value;
|
|
55
|
+
case "in":
|
|
56
|
+
return Array.isArray(condition.value) && condition.value.includes(fieldValue);
|
|
57
|
+
case "nin":
|
|
58
|
+
return Array.isArray(condition.value) && !condition.value.includes(fieldValue);
|
|
59
|
+
case "contains":
|
|
60
|
+
return typeof condition.value === "string" && fieldValue.includes(condition.value);
|
|
61
|
+
case "startsWith":
|
|
62
|
+
return typeof condition.value === "string" && fieldValue.startsWith(condition.value);
|
|
63
|
+
case "endsWith":
|
|
64
|
+
return typeof condition.value === "string" && fieldValue.endsWith(condition.value);
|
|
65
|
+
default:
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
if (allConditionsMatch) {
|
|
70
|
+
log(`Rule matched: ${JSON.stringify(rule.conditions)} -> variant ${rule.variantIndex}`);
|
|
71
|
+
return rule.variantIndex;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
log("No targeting rules matched, falling back to weighted random");
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
function init(options = {}) {
|
|
79
|
+
apiKey = options.apiKey || process.env.FALLOM_API_KEY || null;
|
|
80
|
+
baseUrl = options.baseUrl || process.env.FALLOM_CONFIGS_URL || process.env.FALLOM_BASE_URL || "https://configs.fallom.com";
|
|
81
|
+
initialized = true;
|
|
82
|
+
if (!apiKey) {
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
fetchConfigs().catch(() => {
|
|
86
|
+
});
|
|
87
|
+
if (!syncInterval) {
|
|
88
|
+
syncInterval = setInterval(() => {
|
|
89
|
+
fetchConfigs().catch(() => {
|
|
90
|
+
});
|
|
91
|
+
}, 3e4);
|
|
92
|
+
syncInterval.unref();
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
function ensureInit() {
|
|
96
|
+
if (!initialized) {
|
|
97
|
+
try {
|
|
98
|
+
init();
|
|
99
|
+
} catch {
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
async function fetchConfigs(timeout = SYNC_TIMEOUT) {
|
|
104
|
+
if (!apiKey) {
|
|
105
|
+
log("_fetchConfigs: No API key, skipping");
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
try {
|
|
109
|
+
log(`Fetching configs from ${baseUrl}/configs`);
|
|
110
|
+
const controller = new AbortController();
|
|
111
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
112
|
+
const resp = await fetch(`${baseUrl}/configs`, {
|
|
113
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
114
|
+
signal: controller.signal
|
|
115
|
+
});
|
|
116
|
+
clearTimeout(timeoutId);
|
|
117
|
+
log(`Response status: ${resp.status}`);
|
|
118
|
+
if (resp.ok) {
|
|
119
|
+
const data = await resp.json();
|
|
120
|
+
const configs = data.configs || [];
|
|
121
|
+
log(`Got ${configs.length} configs: ${configs.map((c) => c.key)}`);
|
|
122
|
+
for (const c of configs) {
|
|
123
|
+
const key = c.key;
|
|
124
|
+
const version = c.version || 1;
|
|
125
|
+
log(`Config '${key}' v${version}: ${JSON.stringify(c.variants)}`);
|
|
126
|
+
if (!configCache.has(key)) {
|
|
127
|
+
configCache.set(key, { versions: /* @__PURE__ */ new Map(), latest: null });
|
|
128
|
+
}
|
|
129
|
+
const cached = configCache.get(key);
|
|
130
|
+
cached.versions.set(version, c);
|
|
131
|
+
cached.latest = version;
|
|
132
|
+
}
|
|
133
|
+
} else {
|
|
134
|
+
log(`Fetch failed: ${resp.statusText}`);
|
|
135
|
+
}
|
|
136
|
+
} catch (e) {
|
|
137
|
+
log(`Fetch exception: ${e}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
async function fetchSpecificVersion(configKey, version, timeout = SYNC_TIMEOUT) {
|
|
141
|
+
if (!apiKey) return null;
|
|
142
|
+
try {
|
|
143
|
+
const controller = new AbortController();
|
|
144
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
145
|
+
const resp = await fetch(
|
|
146
|
+
`${baseUrl}/configs/${configKey}/version/${version}`,
|
|
147
|
+
{
|
|
148
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
149
|
+
signal: controller.signal
|
|
150
|
+
}
|
|
151
|
+
);
|
|
152
|
+
clearTimeout(timeoutId);
|
|
153
|
+
if (resp.ok) {
|
|
154
|
+
const config = await resp.json();
|
|
155
|
+
if (!configCache.has(configKey)) {
|
|
156
|
+
configCache.set(configKey, { versions: /* @__PURE__ */ new Map(), latest: null });
|
|
157
|
+
}
|
|
158
|
+
configCache.get(configKey).versions.set(version, config);
|
|
159
|
+
return config;
|
|
160
|
+
}
|
|
161
|
+
} catch {
|
|
162
|
+
}
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
async function get(configKey, sessionId, options = {}) {
|
|
166
|
+
const { version, fallback, customerId, context, debug = false } = options;
|
|
167
|
+
debugMode = debug;
|
|
168
|
+
ensureInit();
|
|
169
|
+
log(
|
|
170
|
+
`get() called: configKey=${configKey}, sessionId=${sessionId}, fallback=${fallback}`
|
|
171
|
+
);
|
|
172
|
+
try {
|
|
173
|
+
let configData = configCache.get(configKey);
|
|
174
|
+
log(
|
|
175
|
+
`Cache lookup for '${configKey}': ${configData ? "found" : "not found"}`
|
|
176
|
+
);
|
|
177
|
+
if (!configData) {
|
|
178
|
+
log("Not in cache, fetching...");
|
|
179
|
+
await fetchConfigs(SYNC_TIMEOUT);
|
|
180
|
+
configData = configCache.get(configKey);
|
|
181
|
+
log(
|
|
182
|
+
`After fetch, cache lookup: ${configData ? "found" : "still not found"}`
|
|
183
|
+
);
|
|
184
|
+
}
|
|
185
|
+
if (!configData) {
|
|
186
|
+
log(`Config not found, using fallback: ${fallback}`);
|
|
187
|
+
if (fallback) {
|
|
188
|
+
console.warn(
|
|
189
|
+
`[Fallom WARNING] Config '${configKey}' not found, using fallback model: ${fallback}`
|
|
190
|
+
);
|
|
191
|
+
return returnModel(configKey, sessionId, fallback, 0);
|
|
192
|
+
}
|
|
193
|
+
throw new Error(
|
|
194
|
+
`Config '${configKey}' not found. Check that it exists in your Fallom dashboard.`
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
let config;
|
|
198
|
+
let targetVersion;
|
|
199
|
+
if (version !== void 0) {
|
|
200
|
+
config = configData.versions.get(version);
|
|
201
|
+
if (!config) {
|
|
202
|
+
config = await fetchSpecificVersion(configKey, version, SYNC_TIMEOUT) || void 0;
|
|
203
|
+
}
|
|
204
|
+
if (!config) {
|
|
205
|
+
if (fallback) {
|
|
206
|
+
console.warn(
|
|
207
|
+
`[Fallom WARNING] Config '${configKey}' version ${version} not found, using fallback: ${fallback}`
|
|
208
|
+
);
|
|
209
|
+
return returnModel(configKey, sessionId, fallback, 0);
|
|
210
|
+
}
|
|
211
|
+
throw new Error(`Config '${configKey}' version ${version} not found.`);
|
|
212
|
+
}
|
|
213
|
+
targetVersion = version;
|
|
214
|
+
} else {
|
|
215
|
+
targetVersion = configData.latest;
|
|
216
|
+
config = configData.versions.get(targetVersion);
|
|
217
|
+
if (!config) {
|
|
218
|
+
if (fallback) {
|
|
219
|
+
console.warn(
|
|
220
|
+
`[Fallom WARNING] Config '${configKey}' has no cached version, using fallback: ${fallback}`
|
|
221
|
+
);
|
|
222
|
+
return returnModel(configKey, sessionId, fallback, 0);
|
|
223
|
+
}
|
|
224
|
+
throw new Error(`Config '${configKey}' has no cached version.`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
const variantsRaw = config.variants;
|
|
228
|
+
const configVersion = config.version || targetVersion;
|
|
229
|
+
const variants = Array.isArray(variantsRaw) ? variantsRaw : Object.values(variantsRaw);
|
|
230
|
+
log(
|
|
231
|
+
`Config found! Version: ${configVersion}, Variants: ${JSON.stringify(
|
|
232
|
+
variants
|
|
233
|
+
)}`
|
|
234
|
+
);
|
|
235
|
+
const targetedVariantIndex = evaluateTargeting(config.targeting, customerId, context);
|
|
236
|
+
if (targetedVariantIndex !== null && variants[targetedVariantIndex]) {
|
|
237
|
+
const assignedModel2 = variants[targetedVariantIndex].model;
|
|
238
|
+
log(`\u2705 Assigned model via targeting: ${assignedModel2}`);
|
|
239
|
+
return returnModel(configKey, sessionId, assignedModel2, configVersion);
|
|
240
|
+
}
|
|
241
|
+
const hashBytes = createHash("md5").update(sessionId).digest();
|
|
242
|
+
const hashVal = hashBytes.readUInt32BE(0) % 1e6;
|
|
243
|
+
log(`Session hash: ${hashVal} (out of 1,000,000)`);
|
|
244
|
+
let cumulative = 0;
|
|
245
|
+
let assignedModel = variants[variants.length - 1].model;
|
|
246
|
+
for (const v of variants) {
|
|
247
|
+
const oldCumulative = cumulative;
|
|
248
|
+
cumulative += v.weight * 1e4;
|
|
249
|
+
log(
|
|
250
|
+
`Variant ${v.model}: weight=${v.weight}%, range=${oldCumulative}-${cumulative}, hash=${hashVal}, match=${hashVal < cumulative}`
|
|
251
|
+
);
|
|
252
|
+
if (hashVal < cumulative) {
|
|
253
|
+
assignedModel = v.model;
|
|
254
|
+
break;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
log(`\u2705 Assigned model via weighted random: ${assignedModel}`);
|
|
258
|
+
return returnModel(configKey, sessionId, assignedModel, configVersion);
|
|
259
|
+
} catch (e) {
|
|
260
|
+
if (e instanceof Error && e.message.includes("not found")) {
|
|
261
|
+
throw e;
|
|
262
|
+
}
|
|
263
|
+
if (fallback) {
|
|
264
|
+
console.warn(
|
|
265
|
+
`[Fallom WARNING] Error getting model for '${configKey}': ${e}. Using fallback: ${fallback}`
|
|
266
|
+
);
|
|
267
|
+
return returnModel(configKey, sessionId, fallback, 0);
|
|
268
|
+
}
|
|
269
|
+
throw e;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
function returnModel(configKey, sessionId, model, version) {
|
|
273
|
+
if (version > 0) {
|
|
274
|
+
recordSession(configKey, version, sessionId, model).catch(() => {
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
return model;
|
|
278
|
+
}
|
|
279
|
+
async function recordSession(configKey, version, sessionId, model) {
|
|
280
|
+
if (!apiKey) return;
|
|
281
|
+
try {
|
|
282
|
+
const controller = new AbortController();
|
|
283
|
+
const timeoutId = setTimeout(() => controller.abort(), RECORD_TIMEOUT);
|
|
284
|
+
await fetch(`${baseUrl}/sessions`, {
|
|
285
|
+
method: "POST",
|
|
286
|
+
headers: {
|
|
287
|
+
Authorization: `Bearer ${apiKey}`,
|
|
288
|
+
"Content-Type": "application/json"
|
|
289
|
+
},
|
|
290
|
+
body: JSON.stringify({
|
|
291
|
+
config_key: configKey,
|
|
292
|
+
config_version: version,
|
|
293
|
+
session_id: sessionId,
|
|
294
|
+
assigned_model: model
|
|
295
|
+
}),
|
|
296
|
+
signal: controller.signal
|
|
297
|
+
});
|
|
298
|
+
clearTimeout(timeoutId);
|
|
299
|
+
} catch {
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
export {
|
|
304
|
+
__export,
|
|
305
|
+
init,
|
|
306
|
+
get,
|
|
307
|
+
models_exports
|
|
308
|
+
};
|
|
@@ -120,7 +120,16 @@ Respond in JSON format:
|
|
|
120
120
|
"score": 0.85
|
|
121
121
|
}`;
|
|
122
122
|
}
|
|
123
|
-
async function runGEval(
|
|
123
|
+
async function runGEval(options) {
|
|
124
|
+
const {
|
|
125
|
+
metric,
|
|
126
|
+
inputText,
|
|
127
|
+
outputText,
|
|
128
|
+
systemMessage,
|
|
129
|
+
judgeModel,
|
|
130
|
+
openrouterKey,
|
|
131
|
+
fallomApiKey
|
|
132
|
+
} = options;
|
|
124
133
|
const apiKey = openrouterKey || process.env.OPENROUTER_API_KEY;
|
|
125
134
|
if (!apiKey) {
|
|
126
135
|
throw new Error(
|
|
@@ -131,6 +140,7 @@ async function runGEval(metric, inputText, outputText, systemMessage, judgeModel
|
|
|
131
140
|
if (!config) {
|
|
132
141
|
throw new Error(`Unknown metric: ${metric}`);
|
|
133
142
|
}
|
|
143
|
+
const metricName = typeof metric === "object" ? metric.name : metric;
|
|
134
144
|
const prompt = buildGEvalPrompt(
|
|
135
145
|
config.criteria,
|
|
136
146
|
config.steps,
|
|
@@ -138,6 +148,7 @@ async function runGEval(metric, inputText, outputText, systemMessage, judgeModel
|
|
|
138
148
|
inputText,
|
|
139
149
|
outputText
|
|
140
150
|
);
|
|
151
|
+
const startTime = Date.now();
|
|
141
152
|
const response = await fetch(
|
|
142
153
|
"https://openrouter.ai/api/v1/chat/completions",
|
|
143
154
|
{
|
|
@@ -158,17 +169,89 @@ async function runGEval(metric, inputText, outputText, systemMessage, judgeModel
|
|
|
158
169
|
throw new Error(`G-Eval API error: ${response.statusText}`);
|
|
159
170
|
}
|
|
160
171
|
const data = await response.json();
|
|
172
|
+
const endTime = Date.now();
|
|
161
173
|
try {
|
|
162
174
|
const result = JSON.parse(data.choices[0].message.content);
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
175
|
+
const score = Math.max(0, Math.min(1, result.score));
|
|
176
|
+
const reasoning = result.overall_reasoning || "";
|
|
177
|
+
if (fallomApiKey) {
|
|
178
|
+
sendGEvalTrace({
|
|
179
|
+
fallomApiKey,
|
|
180
|
+
metricName,
|
|
181
|
+
judgeModel,
|
|
182
|
+
prompt,
|
|
183
|
+
response: data.choices[0].message.content,
|
|
184
|
+
score,
|
|
185
|
+
reasoning,
|
|
186
|
+
startTime,
|
|
187
|
+
endTime,
|
|
188
|
+
usage: data.usage
|
|
189
|
+
}).catch(() => {
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
return { score, reasoning };
|
|
168
193
|
} catch {
|
|
169
194
|
throw new Error("Failed to parse G-Eval response");
|
|
170
195
|
}
|
|
171
196
|
}
|
|
197
|
+
async function sendGEvalTrace(options) {
|
|
198
|
+
const {
|
|
199
|
+
fallomApiKey,
|
|
200
|
+
metricName,
|
|
201
|
+
judgeModel,
|
|
202
|
+
prompt,
|
|
203
|
+
response,
|
|
204
|
+
score,
|
|
205
|
+
reasoning,
|
|
206
|
+
startTime,
|
|
207
|
+
endTime,
|
|
208
|
+
usage
|
|
209
|
+
} = options;
|
|
210
|
+
const traceUrl = process.env.FALLOM_TRACES_URL || "https://traces.fallom.com";
|
|
211
|
+
const traceData = {
|
|
212
|
+
config_key: "eval-worker",
|
|
213
|
+
session_id: `geval-${Date.now()}`,
|
|
214
|
+
trace_id: generateHexId(32),
|
|
215
|
+
span_id: generateHexId(16),
|
|
216
|
+
name: `geval.${metricName}`,
|
|
217
|
+
kind: "llm",
|
|
218
|
+
model: judgeModel,
|
|
219
|
+
start_time: new Date(startTime).toISOString(),
|
|
220
|
+
end_time: new Date(endTime).toISOString(),
|
|
221
|
+
duration_ms: endTime - startTime,
|
|
222
|
+
status: "OK",
|
|
223
|
+
metadata: {
|
|
224
|
+
metric: metricName,
|
|
225
|
+
score
|
|
226
|
+
},
|
|
227
|
+
tags: ["eval-worker", "geval", metricName],
|
|
228
|
+
attributes: {
|
|
229
|
+
"fallom.sdk_version": "2",
|
|
230
|
+
"fallom.method": "runGEval",
|
|
231
|
+
"geval.metric": metricName,
|
|
232
|
+
"geval.score": score,
|
|
233
|
+
"geval.reasoning": reasoning,
|
|
234
|
+
"gen_ai.prompt.0.role": "user",
|
|
235
|
+
"gen_ai.prompt.0.content": prompt,
|
|
236
|
+
"gen_ai.completion.0.content": response,
|
|
237
|
+
"gen_ai.usage.prompt_tokens": usage?.prompt_tokens,
|
|
238
|
+
"gen_ai.usage.completion_tokens": usage?.completion_tokens
|
|
239
|
+
}
|
|
240
|
+
};
|
|
241
|
+
await fetch(`${traceUrl}/v1/traces`, {
|
|
242
|
+
method: "POST",
|
|
243
|
+
headers: {
|
|
244
|
+
Authorization: `Bearer ${fallomApiKey}`,
|
|
245
|
+
"Content-Type": "application/json"
|
|
246
|
+
},
|
|
247
|
+
body: JSON.stringify(traceData)
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
function generateHexId(length) {
|
|
251
|
+
const bytes = new Uint8Array(length / 2);
|
|
252
|
+
crypto.getRandomValues(bytes);
|
|
253
|
+
return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
254
|
+
}
|
|
172
255
|
function calculateAggregateScores(results) {
|
|
173
256
|
const aggregates = {};
|
|
174
257
|
for (const result of results) {
|
|
@@ -333,7 +416,7 @@ function datasetFromTraces(traces) {
|
|
|
333
416
|
return items;
|
|
334
417
|
}
|
|
335
418
|
async function datasetFromFallom(datasetKey, version, config) {
|
|
336
|
-
const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await import("./core-
|
|
419
|
+
const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await import("./core-3MHBKYBC.mjs").then(
|
|
337
420
|
(m) => ({
|
|
338
421
|
_apiKey: config?._apiKey ?? m._apiKey,
|
|
339
422
|
_baseUrl: config?._baseUrl ?? m._baseUrl,
|
|
@@ -406,7 +489,7 @@ var EvaluationDataset = class {
|
|
|
406
489
|
* @returns Self for chaining
|
|
407
490
|
*/
|
|
408
491
|
async pull(alias, version) {
|
|
409
|
-
const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await import("./core-
|
|
492
|
+
const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await import("./core-3MHBKYBC.mjs");
|
|
410
493
|
if (!_initialized2) {
|
|
411
494
|
throw new Error("Fallom evals not initialized. Call evals.init() first.");
|
|
412
495
|
}
|
|
@@ -545,7 +628,13 @@ function init(options = {}) {
|
|
|
545
628
|
}
|
|
546
629
|
async function runGEval2(metric, inputText, outputText, systemMessage, judgeModel) {
|
|
547
630
|
const metricArg = isCustomMetric(metric) ? { name: metric.name, criteria: metric.criteria, steps: metric.steps } : metric;
|
|
548
|
-
return runGEval(
|
|
631
|
+
return runGEval({
|
|
632
|
+
metric: metricArg,
|
|
633
|
+
inputText,
|
|
634
|
+
outputText,
|
|
635
|
+
systemMessage,
|
|
636
|
+
judgeModel
|
|
637
|
+
});
|
|
549
638
|
}
|
|
550
639
|
async function resolveDataset(datasetInput) {
|
|
551
640
|
if (typeof datasetInput === "string") {
|
|
@@ -617,7 +706,9 @@ async function evaluate(options) {
|
|
|
617
706
|
for (const m of metrics) {
|
|
618
707
|
if (typeof m === "string" && !AVAILABLE_METRICS.includes(m)) {
|
|
619
708
|
throw new Error(
|
|
620
|
-
`Invalid metric: ${m}. Available: ${AVAILABLE_METRICS.join(
|
|
709
|
+
`Invalid metric: ${m}. Available: ${AVAILABLE_METRICS.join(
|
|
710
|
+
", "
|
|
711
|
+
)}. Or use CustomMetric for custom metrics.`
|
|
621
712
|
);
|
|
622
713
|
}
|
|
623
714
|
}
|
package/dist/index.d.mts
CHANGED
|
@@ -568,22 +568,36 @@ interface GEvalScore {
|
|
|
568
568
|
score: number;
|
|
569
569
|
reasoning: string;
|
|
570
570
|
}
|
|
571
|
+
/**
|
|
572
|
+
* Options for runGEval function.
|
|
573
|
+
*/
|
|
574
|
+
interface RunGEvalOptions {
|
|
575
|
+
/** Built-in metric name or custom metric config */
|
|
576
|
+
metric: string | {
|
|
577
|
+
name: string;
|
|
578
|
+
criteria: string;
|
|
579
|
+
steps: string[];
|
|
580
|
+
};
|
|
581
|
+
/** The user's input/query */
|
|
582
|
+
inputText: string;
|
|
583
|
+
/** The LLM's response to evaluate */
|
|
584
|
+
outputText: string;
|
|
585
|
+
/** Optional system message for context */
|
|
586
|
+
systemMessage?: string;
|
|
587
|
+
/** The model to use as judge (OpenRouter format, e.g., "openai/gpt-4o-mini") */
|
|
588
|
+
judgeModel: string;
|
|
589
|
+
/** OpenRouter API key (defaults to OPENROUTER_API_KEY env var) */
|
|
590
|
+
openrouterKey?: string;
|
|
591
|
+
/** Optional Fallom API key to enable tracing of the judge LLM call */
|
|
592
|
+
fallomApiKey?: string;
|
|
593
|
+
}
|
|
571
594
|
/**
|
|
572
595
|
* Run G-Eval for a single metric using OpenRouter.
|
|
573
596
|
* This is the low-level function used by both the SDK and backend workers.
|
|
574
597
|
*
|
|
575
|
-
*
|
|
576
|
-
* @param inputText - The user's input/query
|
|
577
|
-
* @param outputText - The LLM's response
|
|
578
|
-
* @param systemMessage - Optional system message
|
|
579
|
-
* @param judgeModel - The model to use as judge (OpenRouter format)
|
|
580
|
-
* @param openrouterKey - OpenRouter API key (defaults to env var)
|
|
598
|
+
* If `fallomApiKey` is provided, the judge LLM call will be traced to Fallom.
|
|
581
599
|
*/
|
|
582
|
-
declare function runGEval(
|
|
583
|
-
name: string;
|
|
584
|
-
criteria: string;
|
|
585
|
-
steps: string[];
|
|
586
|
-
}, inputText: string, outputText: string, systemMessage: string | undefined, judgeModel: string, openrouterKey?: string): Promise<GEvalScore>;
|
|
600
|
+
declare function runGEval(options: RunGEvalOptions): Promise<GEvalScore>;
|
|
587
601
|
/**
|
|
588
602
|
* Calculate aggregate scores from a list of results.
|
|
589
603
|
*/
|
|
@@ -614,12 +628,22 @@ declare function detectRegression(currentScores: Record<string, {
|
|
|
614
628
|
};
|
|
615
629
|
|
|
616
630
|
/**
|
|
617
|
-
* Core evaluation functions.
|
|
631
|
+
* Core evaluation functions for Fallom Evals.
|
|
632
|
+
*
|
|
633
|
+
* Provides the main API for running LLM evaluations using G-Eval methodology.
|
|
618
634
|
*/
|
|
619
635
|
|
|
636
|
+
/** Default judge model (via OpenRouter) */
|
|
620
637
|
declare const DEFAULT_JUDGE_MODEL = "openai/gpt-4o-mini";
|
|
621
638
|
/**
|
|
622
639
|
* Initialize Fallom evals.
|
|
640
|
+
*
|
|
641
|
+
* @example
|
|
642
|
+
* ```typescript
|
|
643
|
+
* import fallom from "@fallom/trace";
|
|
644
|
+
*
|
|
645
|
+
* fallom.evals.init({ apiKey: "your-api-key" });
|
|
646
|
+
* ```
|
|
623
647
|
*/
|
|
624
648
|
declare function init$1(options?: InitOptions$1): void;
|
|
625
649
|
/**
|
|
@@ -627,6 +651,13 @@ declare function init$1(options?: InitOptions$1): void;
|
|
|
627
651
|
*
|
|
628
652
|
* Results are automatically uploaded to Fallom dashboard.
|
|
629
653
|
*
|
|
654
|
+
* @example
|
|
655
|
+
* ```typescript
|
|
656
|
+
* const results = await fallom.evals.evaluate({
|
|
657
|
+
* dataset: [{ input: "What is 2+2?", output: "4" }],
|
|
658
|
+
* metrics: ["answer_relevancy", "faithfulness"],
|
|
659
|
+
* });
|
|
660
|
+
* ```
|
|
630
661
|
*/
|
|
631
662
|
declare function evaluate(options: EvaluateOptions): Promise<EvalResult[]>;
|
|
632
663
|
/**
|
|
@@ -848,6 +879,7 @@ type evals_MetricName = MetricName;
|
|
|
848
879
|
type evals_Model = Model;
|
|
849
880
|
type evals_ModelCallable = ModelCallable;
|
|
850
881
|
type evals_ModelResponse = ModelResponse;
|
|
882
|
+
type evals_RunGEvalOptions = RunGEvalOptions;
|
|
851
883
|
declare const evals_buildGEvalPrompt: typeof buildGEvalPrompt;
|
|
852
884
|
declare const evals_calculateAggregateScores: typeof calculateAggregateScores;
|
|
853
885
|
declare const evals_compareModels: typeof compareModels;
|
|
@@ -863,7 +895,7 @@ declare const evals_getMetricName: typeof getMetricName;
|
|
|
863
895
|
declare const evals_isCustomMetric: typeof isCustomMetric;
|
|
864
896
|
declare const evals_runGEval: typeof runGEval;
|
|
865
897
|
declare namespace evals {
|
|
866
|
-
export { evals_AVAILABLE_METRICS as AVAILABLE_METRICS, type evals_CompareModelsOptions as CompareModelsOptions, type evals_CustomMetric as CustomMetric, evals_DEFAULT_JUDGE_MODEL as DEFAULT_JUDGE_MODEL, type evals_DatasetInput as DatasetInput, type evals_DatasetItem as DatasetItem, type evals_EvalResult as EvalResult, type evals_EvaluateOptions as EvaluateOptions, evals_EvaluationDataset as EvaluationDataset, type evals_GEvalScore as GEvalScore, type evals_Golden as Golden, type InitOptions$1 as InitOptions, type evals_LLMTestCase as LLMTestCase, evals_METRIC_PROMPTS as METRIC_PROMPTS, type evals_Message as Message, type evals_MetricInput as MetricInput, type evals_MetricName as MetricName, type evals_Model as Model, type evals_ModelCallable as ModelCallable, type evals_ModelResponse as ModelResponse, evals_buildGEvalPrompt as buildGEvalPrompt, evals_calculateAggregateScores as calculateAggregateScores, evals_compareModels as compareModels, evals_createCustomModel as createCustomModel, evals_createModelFromCallable as createModelFromCallable, evals_createOpenAIModel as createOpenAIModel, evals_customMetric as customMetric, evals_datasetFromFallom as datasetFromFallom, evals_datasetFromTraces as datasetFromTraces, evals_detectRegression as detectRegression, evals_evaluate as evaluate, evals_getMetricName as getMetricName, init$1 as init, evals_isCustomMetric as isCustomMetric, evals_runGEval as runGEval, uploadResultsPublic as uploadResults };
|
|
898
|
+
export { evals_AVAILABLE_METRICS as AVAILABLE_METRICS, type evals_CompareModelsOptions as CompareModelsOptions, type evals_CustomMetric as CustomMetric, evals_DEFAULT_JUDGE_MODEL as DEFAULT_JUDGE_MODEL, type evals_DatasetInput as DatasetInput, type evals_DatasetItem as DatasetItem, type evals_EvalResult as EvalResult, type evals_EvaluateOptions as EvaluateOptions, evals_EvaluationDataset as EvaluationDataset, type evals_GEvalScore as GEvalScore, type evals_Golden as Golden, type InitOptions$1 as InitOptions, type evals_LLMTestCase as LLMTestCase, evals_METRIC_PROMPTS as METRIC_PROMPTS, type evals_Message as Message, type evals_MetricInput as MetricInput, type evals_MetricName as MetricName, type evals_Model as Model, type evals_ModelCallable as ModelCallable, type evals_ModelResponse as ModelResponse, type evals_RunGEvalOptions as RunGEvalOptions, evals_buildGEvalPrompt as buildGEvalPrompt, evals_calculateAggregateScores as calculateAggregateScores, evals_compareModels as compareModels, evals_createCustomModel as createCustomModel, evals_createModelFromCallable as createModelFromCallable, evals_createOpenAIModel as createOpenAIModel, evals_customMetric as customMetric, evals_datasetFromFallom as datasetFromFallom, evals_datasetFromTraces as datasetFromTraces, evals_detectRegression as detectRegression, evals_evaluate as evaluate, evals_getMetricName as getMetricName, init$1 as init, evals_isCustomMetric as isCustomMetric, evals_runGEval as runGEval, uploadResultsPublic as uploadResults };
|
|
867
899
|
}
|
|
868
900
|
|
|
869
901
|
/**
|
|
@@ -1072,4 +1104,4 @@ declare const _default: {
|
|
|
1072
1104
|
session: typeof session;
|
|
1073
1105
|
};
|
|
1074
1106
|
|
|
1075
|
-
export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, clearMastraPrompt, _default as default, evals, init, models, prompts, session, setMastraPrompt, setMastraPromptAB, trace };
|
|
1107
|
+
export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace };
|