@fallom/trace 0.2.15 → 0.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-3HBKT4HK.mjs +827 -0
- package/dist/core-4L56QWI7.mjs +21 -0
- package/dist/index.d.mts +140 -3
- package/dist/index.d.ts +140 -3
- package/dist/index.js +263 -1
- package/dist/index.mjs +98 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -590,9 +590,159 @@ async function datasetFromFallom(datasetKey, version, config) {
|
|
|
590
590
|
);
|
|
591
591
|
return items;
|
|
592
592
|
}
|
|
593
|
+
var EvaluationDataset;
|
|
593
594
|
var init_helpers = __esm({
|
|
594
595
|
"src/evals/helpers.ts"() {
|
|
595
596
|
"use strict";
|
|
597
|
+
EvaluationDataset = class {
|
|
598
|
+
constructor() {
|
|
599
|
+
this._goldens = [];
|
|
600
|
+
this._testCases = [];
|
|
601
|
+
this._datasetKey = null;
|
|
602
|
+
this._datasetName = null;
|
|
603
|
+
this._version = null;
|
|
604
|
+
}
|
|
605
|
+
/** List of golden records (inputs with optional expected outputs). */
|
|
606
|
+
get goldens() {
|
|
607
|
+
return this._goldens;
|
|
608
|
+
}
|
|
609
|
+
/** List of test cases (inputs with actual outputs from your LLM). */
|
|
610
|
+
get testCases() {
|
|
611
|
+
return this._testCases;
|
|
612
|
+
}
|
|
613
|
+
/** The Fallom dataset key if pulled from Fallom. */
|
|
614
|
+
get datasetKey() {
|
|
615
|
+
return this._datasetKey;
|
|
616
|
+
}
|
|
617
|
+
/**
|
|
618
|
+
* Pull a dataset from Fallom.
|
|
619
|
+
*
|
|
620
|
+
* @param alias - The dataset key/alias in Fallom
|
|
621
|
+
* @param version - Specific version to pull (default: latest)
|
|
622
|
+
* @returns Self for chaining
|
|
623
|
+
*/
|
|
624
|
+
async pull(alias, version) {
|
|
625
|
+
const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await Promise.resolve().then(() => (init_core(), core_exports));
|
|
626
|
+
if (!_initialized2) {
|
|
627
|
+
throw new Error("Fallom evals not initialized. Call evals.init() first.");
|
|
628
|
+
}
|
|
629
|
+
const params = new URLSearchParams({ include_entries: "true" });
|
|
630
|
+
if (version !== void 0) {
|
|
631
|
+
params.set("version", String(version));
|
|
632
|
+
}
|
|
633
|
+
const url = `${_baseUrl2}/api/datasets/${encodeURIComponent(alias)}?${params}`;
|
|
634
|
+
const response = await fetch(url, {
|
|
635
|
+
headers: {
|
|
636
|
+
Authorization: `Bearer ${_apiKey2}`,
|
|
637
|
+
"Content-Type": "application/json"
|
|
638
|
+
}
|
|
639
|
+
});
|
|
640
|
+
if (response.status === 404) {
|
|
641
|
+
throw new Error(`Dataset '${alias}' not found`);
|
|
642
|
+
} else if (response.status === 403) {
|
|
643
|
+
throw new Error(`Access denied to dataset '${alias}'`);
|
|
644
|
+
}
|
|
645
|
+
if (!response.ok) {
|
|
646
|
+
throw new Error(`Failed to fetch dataset: ${response.statusText}`);
|
|
647
|
+
}
|
|
648
|
+
const data = await response.json();
|
|
649
|
+
this._datasetKey = alias;
|
|
650
|
+
this._datasetName = data.dataset?.name || alias;
|
|
651
|
+
this._version = data.version?.version || null;
|
|
652
|
+
this._goldens = [];
|
|
653
|
+
for (const entry of data.entries || []) {
|
|
654
|
+
this._goldens.push({
|
|
655
|
+
input: entry.input || "",
|
|
656
|
+
expectedOutput: entry.output,
|
|
657
|
+
systemMessage: entry.systemMessage,
|
|
658
|
+
metadata: entry.metadata
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
console.log(
|
|
662
|
+
`\u2713 Pulled dataset '${this._datasetName}' (version ${this._version}) with ${this._goldens.length} goldens`
|
|
663
|
+
);
|
|
664
|
+
return this;
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Add a golden record manually.
|
|
668
|
+
* @param golden - A Golden object
|
|
669
|
+
* @returns Self for chaining
|
|
670
|
+
*/
|
|
671
|
+
addGolden(golden) {
|
|
672
|
+
this._goldens.push(golden);
|
|
673
|
+
return this;
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Add multiple golden records.
|
|
677
|
+
* @param goldens - Array of Golden objects
|
|
678
|
+
* @returns Self for chaining
|
|
679
|
+
*/
|
|
680
|
+
addGoldens(goldens) {
|
|
681
|
+
this._goldens.push(...goldens);
|
|
682
|
+
return this;
|
|
683
|
+
}
|
|
684
|
+
/**
|
|
685
|
+
* Add a test case with actual LLM output.
|
|
686
|
+
* @param testCase - An LLMTestCase object
|
|
687
|
+
* @returns Self for chaining
|
|
688
|
+
*/
|
|
689
|
+
addTestCase(testCase) {
|
|
690
|
+
this._testCases.push(testCase);
|
|
691
|
+
return this;
|
|
692
|
+
}
|
|
693
|
+
/**
|
|
694
|
+
* Add multiple test cases.
|
|
695
|
+
* @param testCases - Array of LLMTestCase objects
|
|
696
|
+
* @returns Self for chaining
|
|
697
|
+
*/
|
|
698
|
+
addTestCases(testCases) {
|
|
699
|
+
this._testCases.push(...testCases);
|
|
700
|
+
return this;
|
|
701
|
+
}
|
|
702
|
+
/**
|
|
703
|
+
* Automatically generate test cases by running all goldens through your LLM app.
|
|
704
|
+
*
|
|
705
|
+
* @param llmApp - A callable that takes messages and returns response
|
|
706
|
+
* @param options - Configuration options
|
|
707
|
+
* @returns Self for chaining
|
|
708
|
+
*/
|
|
709
|
+
async generateTestCases(llmApp, options = {}) {
|
|
710
|
+
const { includeContext = false } = options;
|
|
711
|
+
console.log(`Generating test cases for ${this._goldens.length} goldens...`);
|
|
712
|
+
for (let i = 0; i < this._goldens.length; i++) {
|
|
713
|
+
const golden = this._goldens[i];
|
|
714
|
+
const messages = [];
|
|
715
|
+
if (golden.systemMessage) {
|
|
716
|
+
messages.push({ role: "system", content: golden.systemMessage });
|
|
717
|
+
}
|
|
718
|
+
messages.push({ role: "user", content: golden.input });
|
|
719
|
+
const response = await llmApp(messages);
|
|
720
|
+
const testCase = {
|
|
721
|
+
input: golden.input,
|
|
722
|
+
actualOutput: response.content,
|
|
723
|
+
expectedOutput: golden.expectedOutput,
|
|
724
|
+
systemMessage: golden.systemMessage,
|
|
725
|
+
context: includeContext ? response.context : golden.context,
|
|
726
|
+
metadata: golden.metadata
|
|
727
|
+
};
|
|
728
|
+
this._testCases.push(testCase);
|
|
729
|
+
console.log(
|
|
730
|
+
` [${i + 1}/${this._goldens.length}] Generated output for: ${golden.input.slice(0, 50)}...`
|
|
731
|
+
);
|
|
732
|
+
}
|
|
733
|
+
console.log(`\u2713 Generated ${this._testCases.length} test cases`);
|
|
734
|
+
return this;
|
|
735
|
+
}
|
|
736
|
+
/** Clear all test cases (useful for re-running with different LLM). */
|
|
737
|
+
clearTestCases() {
|
|
738
|
+
this._testCases = [];
|
|
739
|
+
return this;
|
|
740
|
+
}
|
|
741
|
+
/** Return the number of goldens. */
|
|
742
|
+
get length() {
|
|
743
|
+
return this._goldens.length;
|
|
744
|
+
}
|
|
745
|
+
};
|
|
596
746
|
}
|
|
597
747
|
});
|
|
598
748
|
|
|
@@ -707,9 +857,22 @@ async function evaluate(options) {
|
|
|
707
857
|
name,
|
|
708
858
|
description,
|
|
709
859
|
verbose = true,
|
|
860
|
+
testCases,
|
|
710
861
|
_skipUpload = false
|
|
711
862
|
} = options;
|
|
712
|
-
|
|
863
|
+
let dataset;
|
|
864
|
+
if (testCases !== void 0 && testCases.length > 0) {
|
|
865
|
+
dataset = testCases.map((tc) => ({
|
|
866
|
+
input: tc.input,
|
|
867
|
+
output: tc.actualOutput,
|
|
868
|
+
systemMessage: tc.systemMessage,
|
|
869
|
+
metadata: tc.metadata
|
|
870
|
+
}));
|
|
871
|
+
} else if (datasetInput !== void 0) {
|
|
872
|
+
dataset = await resolveDataset(datasetInput);
|
|
873
|
+
} else {
|
|
874
|
+
throw new Error("Either 'dataset' or 'testCases' must be provided");
|
|
875
|
+
}
|
|
713
876
|
for (const m of metrics) {
|
|
714
877
|
if (typeof m === "string" && !AVAILABLE_METRICS.includes(m)) {
|
|
715
878
|
throw new Error(
|
|
@@ -775,6 +938,9 @@ async function compareModels(options) {
|
|
|
775
938
|
description,
|
|
776
939
|
verbose = true
|
|
777
940
|
} = options;
|
|
941
|
+
if (!datasetInput) {
|
|
942
|
+
throw new Error("'dataset' is required for compareModels()");
|
|
943
|
+
}
|
|
778
944
|
const dataset = await resolveDataset(datasetInput);
|
|
779
945
|
const results = {};
|
|
780
946
|
if (includeProduction) {
|
|
@@ -2050,6 +2216,54 @@ function clearPromptContext() {
|
|
|
2050
2216
|
promptContext = null;
|
|
2051
2217
|
}
|
|
2052
2218
|
|
|
2219
|
+
// src/trace/wrappers/shared-utils.ts
|
|
2220
|
+
function sanitizeMetadataOnly(key, value) {
|
|
2221
|
+
const contentKeys = [
|
|
2222
|
+
"text",
|
|
2223
|
+
"content",
|
|
2224
|
+
"message",
|
|
2225
|
+
"messages",
|
|
2226
|
+
"object",
|
|
2227
|
+
"prompt",
|
|
2228
|
+
"system",
|
|
2229
|
+
"input",
|
|
2230
|
+
"output",
|
|
2231
|
+
"response",
|
|
2232
|
+
"toolCalls",
|
|
2233
|
+
"toolResults",
|
|
2234
|
+
"steps",
|
|
2235
|
+
"reasoning",
|
|
2236
|
+
"rawResponse",
|
|
2237
|
+
"rawCall",
|
|
2238
|
+
"body",
|
|
2239
|
+
"candidates",
|
|
2240
|
+
"parts"
|
|
2241
|
+
];
|
|
2242
|
+
if (contentKeys.includes(key)) {
|
|
2243
|
+
if (typeof value === "string") {
|
|
2244
|
+
return `[content omitted: ${value.length} chars]`;
|
|
2245
|
+
}
|
|
2246
|
+
if (Array.isArray(value)) {
|
|
2247
|
+
return `[content omitted: ${value.length} items]`;
|
|
2248
|
+
}
|
|
2249
|
+
if (typeof value === "object" && value !== null) {
|
|
2250
|
+
return "[content omitted]";
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
if (typeof value === "string") {
|
|
2254
|
+
if (value.startsWith("data:image/")) {
|
|
2255
|
+
return "[base64 image omitted]";
|
|
2256
|
+
}
|
|
2257
|
+
if (value.length > 1e3) {
|
|
2258
|
+
return `[large string omitted: ${value.length} chars]`;
|
|
2259
|
+
}
|
|
2260
|
+
}
|
|
2261
|
+
if (value instanceof Uint8Array || value && value.type === "Buffer") {
|
|
2262
|
+
return "[binary data omitted]";
|
|
2263
|
+
}
|
|
2264
|
+
return value;
|
|
2265
|
+
}
|
|
2266
|
+
|
|
2053
2267
|
// src/trace/wrappers/openai.ts
|
|
2054
2268
|
function wrapOpenAI(client, sessionCtx) {
|
|
2055
2269
|
const originalCreate = client.chat.completions.create.bind(
|
|
@@ -2097,6 +2311,13 @@ function wrapOpenAI(client, sessionCtx) {
|
|
|
2097
2311
|
if (response?.usage) {
|
|
2098
2312
|
attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
|
|
2099
2313
|
}
|
|
2314
|
+
try {
|
|
2315
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
2316
|
+
response,
|
|
2317
|
+
sanitizeMetadataOnly
|
|
2318
|
+
);
|
|
2319
|
+
} catch {
|
|
2320
|
+
}
|
|
2100
2321
|
const waterfallTimings = {
|
|
2101
2322
|
requestStart: 0,
|
|
2102
2323
|
requestEnd: endTime - startTime,
|
|
@@ -2223,6 +2444,13 @@ function wrapAnthropic(client, sessionCtx) {
|
|
|
2223
2444
|
if (response?.usage) {
|
|
2224
2445
|
attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
|
|
2225
2446
|
}
|
|
2447
|
+
try {
|
|
2448
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
2449
|
+
response,
|
|
2450
|
+
sanitizeMetadataOnly
|
|
2451
|
+
);
|
|
2452
|
+
} catch {
|
|
2453
|
+
}
|
|
2226
2454
|
const waterfallTimings = {
|
|
2227
2455
|
requestStart: 0,
|
|
2228
2456
|
requestEnd: endTime - startTime,
|
|
@@ -2343,6 +2571,13 @@ function wrapGoogleAI(model, sessionCtx) {
|
|
|
2343
2571
|
if (result?.usageMetadata) {
|
|
2344
2572
|
attributes["fallom.raw.usage"] = JSON.stringify(result.usageMetadata);
|
|
2345
2573
|
}
|
|
2574
|
+
try {
|
|
2575
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
2576
|
+
result,
|
|
2577
|
+
sanitizeMetadataOnly
|
|
2578
|
+
);
|
|
2579
|
+
} catch {
|
|
2580
|
+
}
|
|
2346
2581
|
const waterfallTimings = {
|
|
2347
2582
|
requestStart: 0,
|
|
2348
2583
|
requestEnd: endTime - startTime,
|
|
@@ -2539,6 +2774,13 @@ function createGenerateTextWrapper(aiModule, sessionCtx, debug = false) {
|
|
|
2539
2774
|
result.experimental_providerMetadata
|
|
2540
2775
|
);
|
|
2541
2776
|
}
|
|
2777
|
+
try {
|
|
2778
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
2779
|
+
result,
|
|
2780
|
+
sanitizeMetadataOnly
|
|
2781
|
+
);
|
|
2782
|
+
} catch {
|
|
2783
|
+
}
|
|
2542
2784
|
const totalDurationMs = endTime - startTime;
|
|
2543
2785
|
const sortedToolTimings = Array.from(toolTimings.values()).sort(
|
|
2544
2786
|
(a, b) => a.startTime - b.startTime
|
|
@@ -2867,6 +3109,10 @@ function createStreamTextWrapper(aiModule, sessionCtx, debug = false) {
|
|
|
2867
3109
|
if (firstTokenTime) {
|
|
2868
3110
|
attributes["fallom.time_to_first_token_ms"] = firstTokenTime - startTime;
|
|
2869
3111
|
}
|
|
3112
|
+
try {
|
|
3113
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
|
|
3114
|
+
} catch {
|
|
3115
|
+
}
|
|
2870
3116
|
const totalDurationMs = endTime - startTime;
|
|
2871
3117
|
const sortedToolTimings = Array.from(toolTimings.values()).sort(
|
|
2872
3118
|
(a, b) => a.startTime - b.startTime
|
|
@@ -3072,6 +3318,10 @@ function createGenerateObjectWrapper(aiModule, sessionCtx, debug = false) {
|
|
|
3072
3318
|
result.experimental_providerMetadata
|
|
3073
3319
|
);
|
|
3074
3320
|
}
|
|
3321
|
+
try {
|
|
3322
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
|
|
3323
|
+
} catch {
|
|
3324
|
+
}
|
|
3075
3325
|
const promptCtx = getPromptContext();
|
|
3076
3326
|
sendTrace({
|
|
3077
3327
|
config_key: ctx.configKey,
|
|
@@ -3191,6 +3441,10 @@ function createStreamObjectWrapper(aiModule, sessionCtx, debug = false) {
|
|
|
3191
3441
|
if (providerMetadata) {
|
|
3192
3442
|
attributes["fallom.raw.providerMetadata"] = JSON.stringify(providerMetadata);
|
|
3193
3443
|
}
|
|
3444
|
+
try {
|
|
3445
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
|
|
3446
|
+
} catch {
|
|
3447
|
+
}
|
|
3194
3448
|
const promptCtx = getPromptContext();
|
|
3195
3449
|
sendTrace({
|
|
3196
3450
|
config_key: ctx.configKey,
|
|
@@ -3287,6 +3541,13 @@ function wrapMastraAgent(agent, sessionCtx) {
|
|
|
3287
3541
|
attributes["fallom.raw.request"] = JSON.stringify(input);
|
|
3288
3542
|
attributes["fallom.raw.response"] = JSON.stringify(result);
|
|
3289
3543
|
}
|
|
3544
|
+
try {
|
|
3545
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
3546
|
+
result,
|
|
3547
|
+
sanitizeMetadataOnly
|
|
3548
|
+
);
|
|
3549
|
+
} catch {
|
|
3550
|
+
}
|
|
3290
3551
|
sendTrace({
|
|
3291
3552
|
config_key: ctx.configKey,
|
|
3292
3553
|
session_id: ctx.sessionId,
|
|
@@ -3543,6 +3804,7 @@ var evals_exports = {};
|
|
|
3543
3804
|
__export(evals_exports, {
|
|
3544
3805
|
AVAILABLE_METRICS: () => AVAILABLE_METRICS,
|
|
3545
3806
|
DEFAULT_JUDGE_MODEL: () => DEFAULT_JUDGE_MODEL,
|
|
3807
|
+
EvaluationDataset: () => EvaluationDataset,
|
|
3546
3808
|
METRIC_PROMPTS: () => METRIC_PROMPTS,
|
|
3547
3809
|
compareModels: () => compareModels,
|
|
3548
3810
|
createCustomModel: () => createCustomModel,
|
package/dist/index.mjs
CHANGED
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
import {
|
|
6
6
|
AVAILABLE_METRICS,
|
|
7
7
|
DEFAULT_JUDGE_MODEL,
|
|
8
|
+
EvaluationDataset,
|
|
8
9
|
METRIC_PROMPTS,
|
|
9
10
|
compareModels,
|
|
10
11
|
createCustomModel,
|
|
@@ -18,7 +19,7 @@ import {
|
|
|
18
19
|
init as init2,
|
|
19
20
|
isCustomMetric,
|
|
20
21
|
uploadResultsPublic
|
|
21
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-3HBKT4HK.mjs";
|
|
22
23
|
import {
|
|
23
24
|
__export
|
|
24
25
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -1055,6 +1056,54 @@ function clearPromptContext() {
|
|
|
1055
1056
|
promptContext = null;
|
|
1056
1057
|
}
|
|
1057
1058
|
|
|
1059
|
+
// src/trace/wrappers/shared-utils.ts
|
|
1060
|
+
function sanitizeMetadataOnly(key, value) {
|
|
1061
|
+
const contentKeys = [
|
|
1062
|
+
"text",
|
|
1063
|
+
"content",
|
|
1064
|
+
"message",
|
|
1065
|
+
"messages",
|
|
1066
|
+
"object",
|
|
1067
|
+
"prompt",
|
|
1068
|
+
"system",
|
|
1069
|
+
"input",
|
|
1070
|
+
"output",
|
|
1071
|
+
"response",
|
|
1072
|
+
"toolCalls",
|
|
1073
|
+
"toolResults",
|
|
1074
|
+
"steps",
|
|
1075
|
+
"reasoning",
|
|
1076
|
+
"rawResponse",
|
|
1077
|
+
"rawCall",
|
|
1078
|
+
"body",
|
|
1079
|
+
"candidates",
|
|
1080
|
+
"parts"
|
|
1081
|
+
];
|
|
1082
|
+
if (contentKeys.includes(key)) {
|
|
1083
|
+
if (typeof value === "string") {
|
|
1084
|
+
return `[content omitted: ${value.length} chars]`;
|
|
1085
|
+
}
|
|
1086
|
+
if (Array.isArray(value)) {
|
|
1087
|
+
return `[content omitted: ${value.length} items]`;
|
|
1088
|
+
}
|
|
1089
|
+
if (typeof value === "object" && value !== null) {
|
|
1090
|
+
return "[content omitted]";
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
if (typeof value === "string") {
|
|
1094
|
+
if (value.startsWith("data:image/")) {
|
|
1095
|
+
return "[base64 image omitted]";
|
|
1096
|
+
}
|
|
1097
|
+
if (value.length > 1e3) {
|
|
1098
|
+
return `[large string omitted: ${value.length} chars]`;
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
if (value instanceof Uint8Array || value && value.type === "Buffer") {
|
|
1102
|
+
return "[binary data omitted]";
|
|
1103
|
+
}
|
|
1104
|
+
return value;
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1058
1107
|
// src/trace/wrappers/openai.ts
|
|
1059
1108
|
function wrapOpenAI(client, sessionCtx) {
|
|
1060
1109
|
const originalCreate = client.chat.completions.create.bind(
|
|
@@ -1102,6 +1151,13 @@ function wrapOpenAI(client, sessionCtx) {
|
|
|
1102
1151
|
if (response?.usage) {
|
|
1103
1152
|
attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
|
|
1104
1153
|
}
|
|
1154
|
+
try {
|
|
1155
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
1156
|
+
response,
|
|
1157
|
+
sanitizeMetadataOnly
|
|
1158
|
+
);
|
|
1159
|
+
} catch {
|
|
1160
|
+
}
|
|
1105
1161
|
const waterfallTimings = {
|
|
1106
1162
|
requestStart: 0,
|
|
1107
1163
|
requestEnd: endTime - startTime,
|
|
@@ -1228,6 +1284,13 @@ function wrapAnthropic(client, sessionCtx) {
|
|
|
1228
1284
|
if (response?.usage) {
|
|
1229
1285
|
attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
|
|
1230
1286
|
}
|
|
1287
|
+
try {
|
|
1288
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
1289
|
+
response,
|
|
1290
|
+
sanitizeMetadataOnly
|
|
1291
|
+
);
|
|
1292
|
+
} catch {
|
|
1293
|
+
}
|
|
1231
1294
|
const waterfallTimings = {
|
|
1232
1295
|
requestStart: 0,
|
|
1233
1296
|
requestEnd: endTime - startTime,
|
|
@@ -1348,6 +1411,13 @@ function wrapGoogleAI(model, sessionCtx) {
|
|
|
1348
1411
|
if (result?.usageMetadata) {
|
|
1349
1412
|
attributes["fallom.raw.usage"] = JSON.stringify(result.usageMetadata);
|
|
1350
1413
|
}
|
|
1414
|
+
try {
|
|
1415
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
1416
|
+
result,
|
|
1417
|
+
sanitizeMetadataOnly
|
|
1418
|
+
);
|
|
1419
|
+
} catch {
|
|
1420
|
+
}
|
|
1351
1421
|
const waterfallTimings = {
|
|
1352
1422
|
requestStart: 0,
|
|
1353
1423
|
requestEnd: endTime - startTime,
|
|
@@ -1544,6 +1614,13 @@ function createGenerateTextWrapper(aiModule, sessionCtx, debug = false) {
|
|
|
1544
1614
|
result.experimental_providerMetadata
|
|
1545
1615
|
);
|
|
1546
1616
|
}
|
|
1617
|
+
try {
|
|
1618
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
1619
|
+
result,
|
|
1620
|
+
sanitizeMetadataOnly
|
|
1621
|
+
);
|
|
1622
|
+
} catch {
|
|
1623
|
+
}
|
|
1547
1624
|
const totalDurationMs = endTime - startTime;
|
|
1548
1625
|
const sortedToolTimings = Array.from(toolTimings.values()).sort(
|
|
1549
1626
|
(a, b) => a.startTime - b.startTime
|
|
@@ -1872,6 +1949,10 @@ function createStreamTextWrapper(aiModule, sessionCtx, debug = false) {
|
|
|
1872
1949
|
if (firstTokenTime) {
|
|
1873
1950
|
attributes["fallom.time_to_first_token_ms"] = firstTokenTime - startTime;
|
|
1874
1951
|
}
|
|
1952
|
+
try {
|
|
1953
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
|
|
1954
|
+
} catch {
|
|
1955
|
+
}
|
|
1875
1956
|
const totalDurationMs = endTime - startTime;
|
|
1876
1957
|
const sortedToolTimings = Array.from(toolTimings.values()).sort(
|
|
1877
1958
|
(a, b) => a.startTime - b.startTime
|
|
@@ -2077,6 +2158,10 @@ function createGenerateObjectWrapper(aiModule, sessionCtx, debug = false) {
|
|
|
2077
2158
|
result.experimental_providerMetadata
|
|
2078
2159
|
);
|
|
2079
2160
|
}
|
|
2161
|
+
try {
|
|
2162
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
|
|
2163
|
+
} catch {
|
|
2164
|
+
}
|
|
2080
2165
|
const promptCtx = getPromptContext();
|
|
2081
2166
|
sendTrace({
|
|
2082
2167
|
config_key: ctx.configKey,
|
|
@@ -2196,6 +2281,10 @@ function createStreamObjectWrapper(aiModule, sessionCtx, debug = false) {
|
|
|
2196
2281
|
if (providerMetadata) {
|
|
2197
2282
|
attributes["fallom.raw.providerMetadata"] = JSON.stringify(providerMetadata);
|
|
2198
2283
|
}
|
|
2284
|
+
try {
|
|
2285
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
|
|
2286
|
+
} catch {
|
|
2287
|
+
}
|
|
2199
2288
|
const promptCtx = getPromptContext();
|
|
2200
2289
|
sendTrace({
|
|
2201
2290
|
config_key: ctx.configKey,
|
|
@@ -2292,6 +2381,13 @@ function wrapMastraAgent(agent, sessionCtx) {
|
|
|
2292
2381
|
attributes["fallom.raw.request"] = JSON.stringify(input);
|
|
2293
2382
|
attributes["fallom.raw.response"] = JSON.stringify(result);
|
|
2294
2383
|
}
|
|
2384
|
+
try {
|
|
2385
|
+
attributes["fallom.raw.metadata"] = JSON.stringify(
|
|
2386
|
+
result,
|
|
2387
|
+
sanitizeMetadataOnly
|
|
2388
|
+
);
|
|
2389
|
+
} catch {
|
|
2390
|
+
}
|
|
2295
2391
|
sendTrace({
|
|
2296
2392
|
config_key: ctx.configKey,
|
|
2297
2393
|
session_id: ctx.sessionId,
|
|
@@ -2545,6 +2641,7 @@ var evals_exports = {};
|
|
|
2545
2641
|
__export(evals_exports, {
|
|
2546
2642
|
AVAILABLE_METRICS: () => AVAILABLE_METRICS,
|
|
2547
2643
|
DEFAULT_JUDGE_MODEL: () => DEFAULT_JUDGE_MODEL,
|
|
2644
|
+
EvaluationDataset: () => EvaluationDataset,
|
|
2548
2645
|
METRIC_PROMPTS: () => METRIC_PROMPTS,
|
|
2549
2646
|
compareModels: () => compareModels,
|
|
2550
2647
|
createCustomModel: () => createCustomModel,
|
package/package.json
CHANGED