@fallom/trace 0.2.6 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,14 +1,33 @@
1
1
  import {
2
- __export,
3
2
  init,
4
3
  models_exports
5
- } from "./chunk-KFD5AQ7V.mjs";
4
+ } from "./chunk-CCZLSKZ7.mjs";
5
+ import {
6
+ AVAILABLE_METRICS,
7
+ DEFAULT_JUDGE_MODEL,
8
+ METRIC_PROMPTS,
9
+ compareModels,
10
+ createCustomModel,
11
+ createModelFromCallable,
12
+ createOpenAIModel,
13
+ customMetric,
14
+ datasetFromFallom,
15
+ datasetFromTraces,
16
+ evaluate,
17
+ getMetricName,
18
+ init as init2,
19
+ isCustomMetric,
20
+ uploadResultsPublic
21
+ } from "./chunk-2NGJF2JZ.mjs";
22
+ import {
23
+ __export
24
+ } from "./chunk-7P6ASYW6.mjs";
6
25
 
7
26
  // src/trace.ts
8
27
  var trace_exports = {};
9
28
  __export(trace_exports, {
10
29
  FallomSession: () => FallomSession,
11
- init: () => init2,
30
+ init: () => init3,
12
31
  session: () => session,
13
32
  shutdown: () => shutdown
14
33
  });
@@ -714,7 +733,7 @@ async function tryAddInstrumentation(instrumentations, pkg, className) {
714
733
  log(` \u274C ${pkg} not installed`);
715
734
  }
716
735
  }
717
- async function init2(options = {}) {
736
+ async function init3(options = {}) {
718
737
  if (initialized) return;
719
738
  debugMode = options.debug ?? false;
720
739
  log("\u{1F680} Initializing Fallom tracing...");
@@ -796,6 +815,246 @@ function generateHexId(length) {
796
815
  return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
797
816
  }
798
817
 
818
+ // src/prompts.ts
819
+ var prompts_exports = {};
820
+ __export(prompts_exports, {
821
+ clearPromptContext: () => clearPromptContext,
822
+ get: () => get,
823
+ getAB: () => getAB,
824
+ getPromptContext: () => getPromptContext,
825
+ init: () => init4
826
+ });
827
+ import { createHash } from "crypto";
828
+ var apiKey2 = null;
829
+ var baseUrl2 = "https://prompts.fallom.com";
830
+ var initialized2 = false;
831
+ var syncInterval = null;
832
+ var debugMode2 = false;
833
+ var promptCache = /* @__PURE__ */ new Map();
834
+ var promptABCache = /* @__PURE__ */ new Map();
835
+ var promptContext = null;
836
+ var SYNC_TIMEOUT = 2e3;
837
+ function log2(msg) {
838
+ if (debugMode2) {
839
+ console.log(`[Fallom Prompts] ${msg}`);
840
+ }
841
+ }
842
+ function init4(options = {}) {
843
+ apiKey2 = options.apiKey || process.env.FALLOM_API_KEY || null;
844
+ baseUrl2 = options.baseUrl || process.env.FALLOM_PROMPTS_URL || process.env.FALLOM_BASE_URL || "https://prompts.fallom.com";
845
+ initialized2 = true;
846
+ if (!apiKey2) {
847
+ return;
848
+ }
849
+ fetchAll().catch(() => {
850
+ });
851
+ if (!syncInterval) {
852
+ syncInterval = setInterval(() => {
853
+ fetchAll().catch(() => {
854
+ });
855
+ }, 3e4);
856
+ syncInterval.unref();
857
+ }
858
+ }
859
+ function ensureInit() {
860
+ if (!initialized2) {
861
+ try {
862
+ init4();
863
+ } catch {
864
+ }
865
+ }
866
+ }
867
+ async function fetchAll() {
868
+ await Promise.all([fetchPrompts(), fetchPromptABTests()]);
869
+ }
870
+ async function fetchPrompts(timeout = SYNC_TIMEOUT) {
871
+ if (!apiKey2) return;
872
+ try {
873
+ const controller = new AbortController();
874
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
875
+ const resp = await fetch(`${baseUrl2}/prompts`, {
876
+ headers: { Authorization: `Bearer ${apiKey2}` },
877
+ signal: controller.signal
878
+ });
879
+ clearTimeout(timeoutId);
880
+ if (resp.ok) {
881
+ const data = await resp.json();
882
+ for (const p of data.prompts || []) {
883
+ if (!promptCache.has(p.key)) {
884
+ promptCache.set(p.key, { versions: /* @__PURE__ */ new Map(), current: null });
885
+ }
886
+ const cached = promptCache.get(p.key);
887
+ cached.versions.set(p.version, {
888
+ systemPrompt: p.system_prompt,
889
+ userTemplate: p.user_template
890
+ });
891
+ cached.current = p.version;
892
+ }
893
+ }
894
+ } catch {
895
+ }
896
+ }
897
+ async function fetchPromptABTests(timeout = SYNC_TIMEOUT) {
898
+ if (!apiKey2) return;
899
+ try {
900
+ const controller = new AbortController();
901
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
902
+ const resp = await fetch(`${baseUrl2}/prompt-ab-tests`, {
903
+ headers: { Authorization: `Bearer ${apiKey2}` },
904
+ signal: controller.signal
905
+ });
906
+ clearTimeout(timeoutId);
907
+ if (resp.ok) {
908
+ const data = await resp.json();
909
+ for (const t of data.prompt_ab_tests || []) {
910
+ if (!promptABCache.has(t.key)) {
911
+ promptABCache.set(t.key, { versions: /* @__PURE__ */ new Map(), current: null });
912
+ }
913
+ const cached = promptABCache.get(t.key);
914
+ cached.versions.set(t.version, { variants: t.variants });
915
+ cached.current = t.version;
916
+ }
917
+ }
918
+ } catch {
919
+ }
920
+ }
921
+ function replaceVariables(template, variables) {
922
+ if (!variables) return template;
923
+ return template.replace(/\{\{(\s*\w+\s*)\}\}/g, (match, varName) => {
924
+ const key = varName.trim();
925
+ return key in variables ? String(variables[key]) : match;
926
+ });
927
+ }
928
+ function setPromptContext(ctx) {
929
+ promptContext = ctx;
930
+ }
931
+ function getPromptContext() {
932
+ const ctx = promptContext;
933
+ promptContext = null;
934
+ return ctx;
935
+ }
936
+ async function get(promptKey, options = {}) {
937
+ const { variables, version, debug = false } = options;
938
+ debugMode2 = debug;
939
+ ensureInit();
940
+ log2(`get() called: promptKey=${promptKey}`);
941
+ let promptData = promptCache.get(promptKey);
942
+ if (!promptData) {
943
+ log2("Not in cache, fetching...");
944
+ await fetchPrompts(SYNC_TIMEOUT);
945
+ promptData = promptCache.get(promptKey);
946
+ }
947
+ if (!promptData) {
948
+ throw new Error(
949
+ `Prompt '${promptKey}' not found. Check that it exists in your Fallom dashboard.`
950
+ );
951
+ }
952
+ const targetVersion = version ?? promptData.current;
953
+ const content = promptData.versions.get(targetVersion);
954
+ if (!content) {
955
+ throw new Error(
956
+ `Prompt '${promptKey}' version ${targetVersion} not found.`
957
+ );
958
+ }
959
+ const system = replaceVariables(content.systemPrompt, variables);
960
+ const user = replaceVariables(content.userTemplate, variables);
961
+ setPromptContext({
962
+ promptKey,
963
+ promptVersion: targetVersion
964
+ });
965
+ log2(`\u2705 Got prompt: ${promptKey} v${targetVersion}`);
966
+ return {
967
+ key: promptKey,
968
+ version: targetVersion,
969
+ system,
970
+ user
971
+ };
972
+ }
973
+ async function getAB(abTestKey, sessionId, options = {}) {
974
+ const { variables, debug = false } = options;
975
+ debugMode2 = debug;
976
+ ensureInit();
977
+ log2(`getAB() called: abTestKey=${abTestKey}, sessionId=${sessionId}`);
978
+ let abData = promptABCache.get(abTestKey);
979
+ if (!abData) {
980
+ log2("Not in cache, fetching...");
981
+ await fetchPromptABTests(SYNC_TIMEOUT);
982
+ abData = promptABCache.get(abTestKey);
983
+ }
984
+ if (!abData) {
985
+ throw new Error(
986
+ `Prompt A/B test '${abTestKey}' not found. Check that it exists in your Fallom dashboard.`
987
+ );
988
+ }
989
+ const currentVersion = abData.current;
990
+ const versionData = abData.versions.get(currentVersion);
991
+ if (!versionData) {
992
+ throw new Error(`Prompt A/B test '${abTestKey}' has no current version.`);
993
+ }
994
+ const { variants } = versionData;
995
+ log2(`A/B test '${abTestKey}' has ${variants?.length ?? 0} variants`);
996
+ log2(`Version data: ${JSON.stringify(versionData, null, 2)}`);
997
+ if (!variants || variants.length === 0) {
998
+ throw new Error(
999
+ `Prompt A/B test '${abTestKey}' has no variants configured.`
1000
+ );
1001
+ }
1002
+ const hashBytes = createHash("md5").update(sessionId).digest();
1003
+ const hashVal = hashBytes.readUInt32BE(0) % 1e6;
1004
+ let cumulative = 0;
1005
+ let selectedVariant = variants[variants.length - 1];
1006
+ let selectedIndex = variants.length - 1;
1007
+ for (let i = 0; i < variants.length; i++) {
1008
+ cumulative += variants[i].weight * 1e4;
1009
+ if (hashVal < cumulative) {
1010
+ selectedVariant = variants[i];
1011
+ selectedIndex = i;
1012
+ break;
1013
+ }
1014
+ }
1015
+ const promptKey = selectedVariant.prompt_key;
1016
+ const promptVersion = selectedVariant.prompt_version;
1017
+ let promptData = promptCache.get(promptKey);
1018
+ if (!promptData) {
1019
+ await fetchPrompts(SYNC_TIMEOUT);
1020
+ promptData = promptCache.get(promptKey);
1021
+ }
1022
+ if (!promptData) {
1023
+ throw new Error(
1024
+ `Prompt '${promptKey}' (from A/B test '${abTestKey}') not found.`
1025
+ );
1026
+ }
1027
+ const targetVersion = promptVersion ?? promptData.current;
1028
+ const content = promptData.versions.get(targetVersion);
1029
+ if (!content) {
1030
+ throw new Error(
1031
+ `Prompt '${promptKey}' version ${targetVersion} not found.`
1032
+ );
1033
+ }
1034
+ const system = replaceVariables(content.systemPrompt, variables);
1035
+ const user = replaceVariables(content.userTemplate, variables);
1036
+ setPromptContext({
1037
+ promptKey,
1038
+ promptVersion: targetVersion,
1039
+ abTestKey,
1040
+ variantIndex: selectedIndex
1041
+ });
1042
+ log2(
1043
+ `\u2705 Got prompt from A/B: ${promptKey} v${targetVersion} (variant ${selectedIndex})`
1044
+ );
1045
+ return {
1046
+ key: promptKey,
1047
+ version: targetVersion,
1048
+ system,
1049
+ user,
1050
+ abTestKey,
1051
+ variantIndex: selectedIndex
1052
+ };
1053
+ }
1054
+ function clearPromptContext() {
1055
+ promptContext = null;
1056
+ }
1057
+
799
1058
  // src/trace/wrappers/openai.ts
800
1059
  function wrapOpenAI(client, sessionCtx) {
801
1060
  const originalCreate = client.chat.completions.create.bind(
@@ -823,18 +1082,43 @@ function wrapOpenAI(client, sessionCtx) {
823
1082
  if (captureContent2) {
824
1083
  attributes["fallom.raw.request"] = JSON.stringify({
825
1084
  messages: params?.messages,
826
- model: params?.model
1085
+ model: params?.model,
1086
+ tools: params?.tools,
1087
+ tool_choice: params?.tool_choice,
1088
+ functions: params?.functions,
1089
+ function_call: params?.function_call
827
1090
  });
1091
+ const choice = response?.choices?.[0];
828
1092
  attributes["fallom.raw.response"] = JSON.stringify({
829
- text: response?.choices?.[0]?.message?.content,
830
- finishReason: response?.choices?.[0]?.finish_reason,
1093
+ text: choice?.message?.content,
1094
+ finishReason: choice?.finish_reason,
831
1095
  responseId: response?.id,
832
- model: response?.model
1096
+ model: response?.model,
1097
+ // Tool calls - send everything!
1098
+ toolCalls: choice?.message?.tool_calls,
1099
+ functionCall: choice?.message?.function_call
833
1100
  });
834
1101
  }
835
1102
  if (response?.usage) {
836
1103
  attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
837
1104
  }
1105
+ const waterfallTimings = {
1106
+ requestStart: 0,
1107
+ requestEnd: endTime - startTime,
1108
+ responseEnd: endTime - startTime,
1109
+ totalDurationMs: endTime - startTime,
1110
+ // OpenAI tool calls (if present)
1111
+ toolCalls: response?.choices?.[0]?.message?.tool_calls?.map(
1112
+ (tc, idx) => ({
1113
+ id: tc.id,
1114
+ name: tc.function?.name,
1115
+ callTime: 0
1116
+ // All tool calls happen at once in non-streaming
1117
+ })
1118
+ )
1119
+ };
1120
+ attributes["fallom.raw.timings"] = JSON.stringify(waterfallTimings);
1121
+ const promptCtx = getPromptContext();
838
1122
  sendTrace({
839
1123
  config_key: ctx.configKey,
840
1124
  session_id: ctx.sessionId,
@@ -849,7 +1133,12 @@ function wrapOpenAI(client, sessionCtx) {
849
1133
  end_time: new Date(endTime).toISOString(),
850
1134
  duration_ms: endTime - startTime,
851
1135
  status: "OK",
852
- attributes
1136
+ attributes,
1137
+ // Prompt context (if prompts.get() or prompts.getAB() was called)
1138
+ prompt_key: promptCtx?.promptKey,
1139
+ prompt_version: promptCtx?.promptVersion,
1140
+ prompt_ab_test_key: promptCtx?.abTestKey,
1141
+ prompt_variant_index: promptCtx?.variantIndex
853
1142
  }).catch(() => {
854
1143
  });
855
1144
  return response;
@@ -908,18 +1197,48 @@ function wrapAnthropic(client, sessionCtx) {
908
1197
  attributes["fallom.raw.request"] = JSON.stringify({
909
1198
  messages: params?.messages,
910
1199
  system: params?.system,
911
- model: params?.model
1200
+ model: params?.model,
1201
+ tools: params?.tools,
1202
+ tool_choice: params?.tool_choice
912
1203
  });
1204
+ const contentBlocks = response?.content || [];
1205
+ const textBlocks = contentBlocks.filter((b) => b.type === "text");
1206
+ const toolUseBlocks2 = contentBlocks.filter(
1207
+ (b) => b.type === "tool_use"
1208
+ );
913
1209
  attributes["fallom.raw.response"] = JSON.stringify({
914
- text: response?.content?.[0]?.text,
1210
+ text: textBlocks.map((b) => b.text).join(""),
915
1211
  finishReason: response?.stop_reason,
916
1212
  responseId: response?.id,
917
- model: response?.model
1213
+ model: response?.model,
1214
+ // Tool calls - Anthropic uses tool_use content blocks
1215
+ toolCalls: toolUseBlocks2.map((b) => ({
1216
+ id: b.id,
1217
+ name: b.name,
1218
+ arguments: b.input
1219
+ })),
1220
+ // Also send raw content for full fidelity
1221
+ content: contentBlocks
918
1222
  });
919
1223
  }
920
1224
  if (response?.usage) {
921
1225
  attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
922
1226
  }
1227
+ const waterfallTimings = {
1228
+ requestStart: 0,
1229
+ requestEnd: endTime - startTime,
1230
+ responseEnd: endTime - startTime,
1231
+ totalDurationMs: endTime - startTime,
1232
+ // Anthropic tool calls (if present)
1233
+ toolCalls: toolUseBlocks.map((b) => ({
1234
+ id: b.id,
1235
+ name: b.name,
1236
+ callTime: 0
1237
+ // All tool calls happen at once in non-streaming
1238
+ }))
1239
+ };
1240
+ attributes["fallom.raw.timings"] = JSON.stringify(waterfallTimings);
1241
+ const promptCtx = getPromptContext();
923
1242
  sendTrace({
924
1243
  config_key: ctx.configKey,
925
1244
  session_id: ctx.sessionId,
@@ -934,7 +1253,12 @@ function wrapAnthropic(client, sessionCtx) {
934
1253
  end_time: new Date(endTime).toISOString(),
935
1254
  duration_ms: endTime - startTime,
936
1255
  status: "OK",
937
- attributes
1256
+ attributes,
1257
+ // Prompt context (if prompts.get() or prompts.getAB() was called)
1258
+ prompt_key: promptCtx?.promptKey,
1259
+ prompt_version: promptCtx?.promptVersion,
1260
+ prompt_ab_test_key: promptCtx?.abTestKey,
1261
+ prompt_variant_index: promptCtx?.variantIndex
938
1262
  }).catch(() => {
939
1263
  });
940
1264
  return response;
@@ -992,14 +1316,44 @@ function wrapGoogleAI(model, sessionCtx) {
992
1316
  };
993
1317
  if (captureContent2) {
994
1318
  attributes["fallom.raw.request"] = JSON.stringify(request);
1319
+ const candidates = result?.candidates || [];
1320
+ const functionCalls2 = [];
1321
+ for (const candidate of candidates) {
1322
+ const parts = candidate?.content?.parts || [];
1323
+ for (const part of parts) {
1324
+ if (part.functionCall) {
1325
+ functionCalls2.push({
1326
+ name: part.functionCall.name,
1327
+ arguments: part.functionCall.args
1328
+ });
1329
+ }
1330
+ }
1331
+ }
995
1332
  attributes["fallom.raw.response"] = JSON.stringify({
996
1333
  text: result?.text?.(),
997
- candidates: result?.candidates
1334
+ candidates: result?.candidates,
1335
+ finishReason: candidates[0]?.finishReason,
1336
+ // Tool/function calls - Google uses functionCall in parts
1337
+ toolCalls: functionCalls2.length > 0 ? functionCalls2 : void 0
998
1338
  });
999
1339
  }
1000
1340
  if (result?.usageMetadata) {
1001
1341
  attributes["fallom.raw.usage"] = JSON.stringify(result.usageMetadata);
1002
1342
  }
1343
+ const waterfallTimings = {
1344
+ requestStart: 0,
1345
+ requestEnd: endTime - startTime,
1346
+ responseEnd: endTime - startTime,
1347
+ totalDurationMs: endTime - startTime,
1348
+ // Google AI function calls (if present)
1349
+ toolCalls: functionCalls.map((fc) => ({
1350
+ name: fc.name,
1351
+ callTime: 0
1352
+ // All tool calls happen at once in non-streaming
1353
+ }))
1354
+ };
1355
+ attributes["fallom.raw.timings"] = JSON.stringify(waterfallTimings);
1356
+ const promptCtx = getPromptContext();
1003
1357
  sendTrace({
1004
1358
  config_key: ctx.configKey,
1005
1359
  session_id: ctx.sessionId,
@@ -1014,7 +1368,12 @@ function wrapGoogleAI(model, sessionCtx) {
1014
1368
  end_time: new Date(endTime).toISOString(),
1015
1369
  duration_ms: endTime - startTime,
1016
1370
  status: "OK",
1017
- attributes
1371
+ attributes,
1372
+ // Prompt context (if prompts.get() or prompts.getAB() was called)
1373
+ prompt_key: promptCtx?.promptKey,
1374
+ prompt_version: promptCtx?.promptVersion,
1375
+ prompt_ab_test_key: promptCtx?.abTestKey,
1376
+ prompt_variant_index: promptCtx?.variantIndex
1018
1377
  }).catch(() => {
1019
1378
  });
1020
1379
  return response;
@@ -1061,11 +1420,57 @@ function createGenerateTextWrapper(aiModule, sessionCtx, debug = false) {
1061
1420
  const params = args[0] || {};
1062
1421
  const startTime = Date.now();
1063
1422
  const captureContent2 = shouldCaptureContent();
1423
+ const toolTimings = /* @__PURE__ */ new Map();
1424
+ let wrappedParams = params;
1425
+ if (params.tools && typeof params.tools === "object") {
1426
+ const wrappedTools = {};
1427
+ for (const [toolName, tool] of Object.entries(
1428
+ params.tools
1429
+ )) {
1430
+ if (tool && typeof tool.execute === "function") {
1431
+ const originalExecute = tool.execute;
1432
+ wrappedTools[toolName] = {
1433
+ ...tool,
1434
+ execute: async (...executeArgs) => {
1435
+ const toolStartTime = Date.now();
1436
+ const toolCallId = `${toolName}-${toolStartTime}`;
1437
+ try {
1438
+ const result = await originalExecute(...executeArgs);
1439
+ const toolEndTime = Date.now();
1440
+ toolTimings.set(toolCallId, {
1441
+ name: toolName,
1442
+ startTime: toolStartTime - startTime,
1443
+ // Relative to request start
1444
+ endTime: toolEndTime - startTime,
1445
+ duration: toolEndTime - toolStartTime
1446
+ });
1447
+ return result;
1448
+ } catch (error) {
1449
+ const toolEndTime = Date.now();
1450
+ toolTimings.set(toolCallId, {
1451
+ name: toolName,
1452
+ startTime: toolStartTime - startTime,
1453
+ endTime: toolEndTime - startTime,
1454
+ duration: toolEndTime - toolStartTime
1455
+ });
1456
+ throw error;
1457
+ }
1458
+ }
1459
+ };
1460
+ } else {
1461
+ wrappedTools[toolName] = tool;
1462
+ }
1463
+ }
1464
+ wrappedParams = { ...params, tools: wrappedTools };
1465
+ }
1064
1466
  try {
1065
- const result = await aiModule.generateText(...args);
1467
+ const result = await aiModule.generateText(wrappedParams);
1066
1468
  const endTime = Date.now();
1067
1469
  if (debug || isDebugMode()) {
1068
- console.log("\n\u{1F50D} [Fallom Debug] generateText raw result:", JSON.stringify(result, null, 2));
1470
+ console.log(
1471
+ "\n\u{1F50D} [Fallom Debug] generateText raw result:",
1472
+ JSON.stringify(result, null, 2)
1473
+ );
1069
1474
  }
1070
1475
  const modelId = result?.response?.modelId || params?.model?.modelId || String(params?.model || "unknown");
1071
1476
  const attributes = {
@@ -1077,21 +1482,153 @@ function createGenerateTextWrapper(aiModule, sessionCtx, debug = false) {
1077
1482
  prompt: params?.prompt,
1078
1483
  messages: params?.messages,
1079
1484
  system: params?.system,
1080
- model: modelId
1485
+ model: modelId,
1486
+ tools: params?.tools ? Object.keys(params.tools) : void 0,
1487
+ maxSteps: params?.maxSteps
1488
+ });
1489
+ const mapToolCall = (tc) => ({
1490
+ toolCallId: tc?.toolCallId,
1491
+ toolName: tc?.toolName,
1492
+ args: tc?.args,
1493
+ // The actual arguments passed to the tool!
1494
+ type: tc?.type
1495
+ });
1496
+ const mapToolResult = (tr) => ({
1497
+ toolCallId: tr?.toolCallId,
1498
+ toolName: tr?.toolName,
1499
+ result: tr?.result,
1500
+ // The actual result from the tool!
1501
+ type: tr?.type
1081
1502
  });
1082
1503
  attributes["fallom.raw.response"] = JSON.stringify({
1083
1504
  text: result?.text,
1084
1505
  finishReason: result?.finishReason,
1085
1506
  responseId: result?.response?.id,
1086
- modelId: result?.response?.modelId
1507
+ modelId: result?.response?.modelId,
1508
+ // Tool calls with FULL data (id, name, args)
1509
+ toolCalls: result?.toolCalls?.map(mapToolCall),
1510
+ // Tool results with FULL data (id, name, result)
1511
+ toolResults: result?.toolResults?.map(mapToolResult),
1512
+ // Multi-step agent data with FULL tool info including timestamps
1513
+ steps: result?.steps?.map((step) => ({
1514
+ stepType: step?.stepType,
1515
+ text: step?.text,
1516
+ finishReason: step?.finishReason,
1517
+ toolCalls: step?.toolCalls?.map(mapToolCall),
1518
+ toolResults: step?.toolResults?.map(mapToolResult),
1519
+ usage: step?.usage,
1520
+ // Step-level timing from Vercel AI SDK
1521
+ timestamp: step?.response?.timestamp,
1522
+ responseId: step?.response?.id
1523
+ })),
1524
+ // Response messages (includes tool call/result messages)
1525
+ responseMessages: result?.responseMessages
1087
1526
  });
1088
1527
  }
1089
1528
  if (result?.usage) {
1090
1529
  attributes["fallom.raw.usage"] = JSON.stringify(result.usage);
1091
1530
  }
1092
1531
  if (result?.experimental_providerMetadata) {
1093
- attributes["fallom.raw.providerMetadata"] = JSON.stringify(result.experimental_providerMetadata);
1532
+ attributes["fallom.raw.providerMetadata"] = JSON.stringify(
1533
+ result.experimental_providerMetadata
1534
+ );
1094
1535
  }
1536
+ const totalDurationMs = endTime - startTime;
1537
+ const sortedToolTimings = Array.from(toolTimings.values()).sort(
1538
+ (a, b) => a.startTime - b.startTime
1539
+ );
1540
+ const waterfallTimings = {
1541
+ requestStart: 0,
1542
+ responseEnd: totalDurationMs,
1543
+ totalDurationMs,
1544
+ phases: [],
1545
+ // Include actual tool timings for verification
1546
+ toolTimings: sortedToolTimings
1547
+ };
1548
+ if (sortedToolTimings.length > 0) {
1549
+ const firstToolStart = Math.min(
1550
+ ...sortedToolTimings.map((t) => t.startTime)
1551
+ );
1552
+ const lastToolEnd = Math.max(
1553
+ ...sortedToolTimings.map((t) => t.endTime)
1554
+ );
1555
+ if (firstToolStart > 10) {
1556
+ waterfallTimings.phases.push({
1557
+ type: "llm",
1558
+ label: "LLM Call 1 (decides tools)",
1559
+ startMs: 0,
1560
+ endMs: firstToolStart,
1561
+ durationMs: firstToolStart,
1562
+ accurate: true
1563
+ });
1564
+ }
1565
+ sortedToolTimings.forEach((toolTiming) => {
1566
+ waterfallTimings.phases.push({
1567
+ type: "tool",
1568
+ label: `${toolTiming.name}()`,
1569
+ startMs: toolTiming.startTime,
1570
+ endMs: toolTiming.endTime,
1571
+ durationMs: toolTiming.duration,
1572
+ accurate: true
1573
+ // This is REAL measured timing!
1574
+ });
1575
+ });
1576
+ const finalResponseDuration = totalDurationMs - lastToolEnd;
1577
+ if (finalResponseDuration > 10) {
1578
+ waterfallTimings.phases.push({
1579
+ type: "response",
1580
+ label: "LLM Call 2 \u2192 Final Response",
1581
+ startMs: lastToolEnd,
1582
+ endMs: totalDurationMs,
1583
+ durationMs: finalResponseDuration,
1584
+ accurate: true
1585
+ });
1586
+ }
1587
+ } else if (result?.steps && result.steps.length > 0) {
1588
+ const steps = result.steps;
1589
+ const stepDuration = Math.round(totalDurationMs / steps.length);
1590
+ steps.forEach((step, idx) => {
1591
+ const hasTools = step?.toolCalls && step.toolCalls.length > 0;
1592
+ const isFinalStep = step?.finishReason === "stop";
1593
+ const stepStart = idx * stepDuration;
1594
+ const stepEnd = Math.min((idx + 1) * stepDuration, totalDurationMs);
1595
+ if (hasTools) {
1596
+ waterfallTimings.phases.push({
1597
+ type: "llm",
1598
+ label: `Step ${idx + 1}: LLM + Tools`,
1599
+ startMs: stepStart,
1600
+ endMs: stepEnd,
1601
+ durationMs: stepEnd - stepStart,
1602
+ accurate: false,
1603
+ note: "Tool timing not captured - combined step"
1604
+ });
1605
+ } else if (isFinalStep) {
1606
+ waterfallTimings.phases.push({
1607
+ type: "response",
1608
+ label: `Step ${idx + 1}: Final Response`,
1609
+ startMs: stepStart,
1610
+ endMs: stepEnd,
1611
+ durationMs: stepEnd - stepStart,
1612
+ accurate: true
1613
+ });
1614
+ }
1615
+ });
1616
+ }
1617
+ if (result?.steps) {
1618
+ waterfallTimings.steps = result.steps.map((step, idx) => ({
1619
+ stepIndex: idx,
1620
+ stepType: step?.stepType,
1621
+ finishReason: step?.finishReason,
1622
+ timestamp: step?.response?.timestamp,
1623
+ toolCalls: step?.toolCalls?.map((tc) => ({
1624
+ id: tc?.toolCallId,
1625
+ name: tc?.toolName
1626
+ })),
1627
+ usage: step?.usage
1628
+ }));
1629
+ }
1630
+ attributes["fallom.raw.timings"] = JSON.stringify(waterfallTimings);
1631
+ const promptCtx = getPromptContext();
1095
1632
  sendTrace({
1096
1633
  config_key: ctx.configKey,
1097
1634
  session_id: ctx.sessionId,
@@ -1106,7 +1643,12 @@ function createGenerateTextWrapper(aiModule, sessionCtx, debug = false) {
1106
1643
  end_time: new Date(endTime).toISOString(),
1107
1644
  duration_ms: endTime - startTime,
1108
1645
  status: "OK",
1109
- attributes
1646
+ attributes,
1647
+ // Prompt context (if prompts.get() or prompts.getAB() was called)
1648
+ prompt_key: promptCtx?.promptKey,
1649
+ prompt_version: promptCtx?.promptVersion,
1650
+ prompt_ab_test_key: promptCtx?.abTestKey,
1651
+ prompt_variant_index: promptCtx?.variantIndex
1110
1652
  }).catch(() => {
1111
1653
  });
1112
1654
  return result;
@@ -1146,7 +1688,7 @@ function createGenerateTextWrapper(aiModule, sessionCtx, debug = false) {
1146
1688
  }
1147
1689
 
1148
1690
  // src/trace/wrappers/vercel-ai/stream-text.ts
1149
- function log2(...args) {
1691
+ function log3(...args) {
1150
1692
  if (isDebugMode()) console.log("[Fallom]", ...args);
1151
1693
  }
1152
1694
  function createStreamTextWrapper(aiModule, sessionCtx, debug = false) {
@@ -1155,7 +1697,47 @@ function createStreamTextWrapper(aiModule, sessionCtx, debug = false) {
1155
1697
  const params = args[0] || {};
1156
1698
  const startTime = Date.now();
1157
1699
  const captureContent2 = shouldCaptureContent();
1158
- const result = await aiModule.streamText(...args);
1700
+ const toolTimings = /* @__PURE__ */ new Map();
1701
+ let wrappedParams = params;
1702
+ if (params.tools && typeof params.tools === "object") {
1703
+ const wrappedTools = {};
1704
+ for (const [toolName, tool] of Object.entries(params.tools)) {
1705
+ if (tool && typeof tool.execute === "function") {
1706
+ const originalExecute = tool.execute;
1707
+ wrappedTools[toolName] = {
1708
+ ...tool,
1709
+ execute: async (...executeArgs) => {
1710
+ const toolStartTime = Date.now();
1711
+ const toolCallId = `${toolName}-${toolStartTime}`;
1712
+ try {
1713
+ const result2 = await originalExecute(...executeArgs);
1714
+ const toolEndTime = Date.now();
1715
+ toolTimings.set(toolCallId, {
1716
+ name: toolName,
1717
+ startTime: toolStartTime - startTime,
1718
+ endTime: toolEndTime - startTime,
1719
+ duration: toolEndTime - toolStartTime
1720
+ });
1721
+ return result2;
1722
+ } catch (error) {
1723
+ const toolEndTime = Date.now();
1724
+ toolTimings.set(toolCallId, {
1725
+ name: toolName,
1726
+ startTime: toolStartTime - startTime,
1727
+ endTime: toolEndTime - startTime,
1728
+ duration: toolEndTime - toolStartTime
1729
+ });
1730
+ throw error;
1731
+ }
1732
+ }
1733
+ };
1734
+ } else {
1735
+ wrappedTools[toolName] = tool;
1736
+ }
1737
+ }
1738
+ wrappedParams = { ...params, tools: wrappedTools };
1739
+ }
1740
+ const result = await aiModule.streamText(wrappedParams);
1159
1741
  if (!isInitialized()) {
1160
1742
  return result;
1161
1743
  }
@@ -1169,72 +1751,213 @@ function createStreamTextWrapper(aiModule, sessionCtx, debug = false) {
1169
1751
  Promise.all([
1170
1752
  result.usage.catch(() => null),
1171
1753
  result.text?.catch(() => null),
1172
- result.finishReason?.catch(() => null)
1173
- ]).then(async ([rawUsage, responseText, finishReason]) => {
1174
- const endTime = Date.now();
1175
- if (debug || isDebugMode()) {
1176
- console.log("\n\u{1F50D} [Fallom Debug] streamText raw usage:", JSON.stringify(rawUsage, null, 2));
1177
- console.log("\u{1F50D} [Fallom Debug] streamText response text:", responseText?.slice(0, 100));
1178
- console.log("\u{1F50D} [Fallom Debug] streamText finish reason:", finishReason);
1179
- }
1180
- let providerMetadata = result?.experimental_providerMetadata;
1181
- if (providerMetadata && typeof providerMetadata.then === "function") {
1182
- try {
1183
- providerMetadata = await providerMetadata;
1184
- } catch {
1185
- providerMetadata = void 0;
1754
+ result.finishReason?.catch(() => null),
1755
+ result.toolCalls?.catch(() => null),
1756
+ result.toolResults?.catch(() => null),
1757
+ result.steps?.catch(() => null),
1758
+ result.responseMessages?.catch(() => null)
1759
+ ]).then(
1760
+ async ([
1761
+ rawUsage,
1762
+ responseText,
1763
+ finishReason,
1764
+ toolCalls,
1765
+ toolResults,
1766
+ steps,
1767
+ responseMessages
1768
+ ]) => {
1769
+ const endTime = Date.now();
1770
+ if (debug || isDebugMode()) {
1771
+ console.log(
1772
+ "\n\u{1F50D} [Fallom Debug] streamText raw usage:",
1773
+ JSON.stringify(rawUsage, null, 2)
1774
+ );
1775
+ console.log(
1776
+ "\u{1F50D} [Fallom Debug] streamText response text:",
1777
+ responseText?.slice(0, 100)
1778
+ );
1779
+ console.log(
1780
+ "\u{1F50D} [Fallom Debug] streamText finish reason:",
1781
+ finishReason
1782
+ );
1783
+ console.log(
1784
+ "\u{1F50D} [Fallom Debug] streamText toolCalls:",
1785
+ JSON.stringify(toolCalls, null, 2)
1786
+ );
1787
+ console.log(
1788
+ "\u{1F50D} [Fallom Debug] streamText steps count:",
1789
+ steps?.length
1790
+ );
1186
1791
  }
1187
- }
1188
- const attributes = {
1189
- "fallom.sdk_version": "2",
1190
- "fallom.method": "streamText",
1191
- "fallom.is_streaming": true
1192
- };
1193
- if (captureContent2) {
1194
- attributes["fallom.raw.request"] = JSON.stringify({
1195
- prompt: params?.prompt,
1196
- messages: params?.messages,
1197
- system: params?.system,
1198
- model: modelId
1199
- });
1200
- if (responseText || finishReason) {
1792
+ let providerMetadata = result?.experimental_providerMetadata;
1793
+ if (providerMetadata && typeof providerMetadata.then === "function") {
1794
+ try {
1795
+ providerMetadata = await providerMetadata;
1796
+ } catch {
1797
+ providerMetadata = void 0;
1798
+ }
1799
+ }
1800
+ const attributes = {
1801
+ "fallom.sdk_version": "2",
1802
+ "fallom.method": "streamText",
1803
+ "fallom.is_streaming": true
1804
+ };
1805
+ if (captureContent2) {
1806
+ const mapToolCall = (tc) => ({
1807
+ toolCallId: tc?.toolCallId,
1808
+ toolName: tc?.toolName,
1809
+ args: tc?.args,
1810
+ // The actual arguments passed to the tool!
1811
+ type: tc?.type
1812
+ });
1813
+ const mapToolResult = (tr) => ({
1814
+ toolCallId: tr?.toolCallId,
1815
+ toolName: tr?.toolName,
1816
+ result: tr?.result,
1817
+ // The actual result from the tool!
1818
+ type: tr?.type
1819
+ });
1820
+ attributes["fallom.raw.request"] = JSON.stringify({
1821
+ prompt: params?.prompt,
1822
+ messages: params?.messages,
1823
+ system: params?.system,
1824
+ model: modelId,
1825
+ tools: params?.tools ? Object.keys(params.tools) : void 0,
1826
+ maxSteps: params?.maxSteps
1827
+ });
1201
1828
  attributes["fallom.raw.response"] = JSON.stringify({
1202
1829
  text: responseText,
1203
- finishReason
1830
+ finishReason,
1831
+ // Tool calls with FULL data (id, name, args)
1832
+ toolCalls: toolCalls?.map(mapToolCall),
1833
+ // Tool results with FULL data (id, name, result)
1834
+ toolResults: toolResults?.map(mapToolResult),
1835
+ // Multi-step agent data with FULL tool info including timestamps
1836
+ steps: steps?.map((step) => ({
1837
+ stepType: step?.stepType,
1838
+ text: step?.text,
1839
+ finishReason: step?.finishReason,
1840
+ toolCalls: step?.toolCalls?.map(mapToolCall),
1841
+ toolResults: step?.toolResults?.map(mapToolResult),
1842
+ usage: step?.usage,
1843
+ // Step-level timing from Vercel AI SDK
1844
+ timestamp: step?.response?.timestamp,
1845
+ responseId: step?.response?.id
1846
+ })),
1847
+ // Response messages (includes tool call/result messages)
1848
+ responseMessages
1204
1849
  });
1205
1850
  }
1851
+ if (rawUsage) {
1852
+ attributes["fallom.raw.usage"] = JSON.stringify(rawUsage);
1853
+ }
1854
+ if (providerMetadata) {
1855
+ attributes["fallom.raw.providerMetadata"] = JSON.stringify(providerMetadata);
1856
+ }
1857
+ if (firstTokenTime) {
1858
+ attributes["fallom.time_to_first_token_ms"] = firstTokenTime - startTime;
1859
+ }
1860
+ const totalDurationMs = endTime - startTime;
1861
+ const sortedToolTimings = Array.from(toolTimings.values()).sort(
1862
+ (a, b) => a.startTime - b.startTime
1863
+ );
1864
+ const waterfallTimings = {
1865
+ requestStart: 0,
1866
+ firstTokenTime: firstTokenTime ? firstTokenTime - startTime : void 0,
1867
+ responseEnd: totalDurationMs,
1868
+ totalDurationMs,
1869
+ isStreaming: true,
1870
+ phases: [],
1871
+ toolTimings: sortedToolTimings
1872
+ };
1873
+ if (firstTokenTime) {
1874
+ waterfallTimings.phases.push({
1875
+ type: "ttft",
1876
+ label: "Time to First Token",
1877
+ startMs: 0,
1878
+ endMs: firstTokenTime - startTime,
1879
+ durationMs: firstTokenTime - startTime,
1880
+ accurate: true
1881
+ });
1882
+ }
1883
+ if (sortedToolTimings.length > 0) {
1884
+ const firstToolStart = Math.min(...sortedToolTimings.map((t) => t.startTime));
1885
+ const lastToolEnd = Math.max(...sortedToolTimings.map((t) => t.endTime));
1886
+ if (firstToolStart > 10) {
1887
+ waterfallTimings.phases.push({
1888
+ type: "llm",
1889
+ label: "LLM Call 1 (decides tools)",
1890
+ startMs: 0,
1891
+ endMs: firstToolStart,
1892
+ durationMs: firstToolStart,
1893
+ accurate: true
1894
+ });
1895
+ }
1896
+ sortedToolTimings.forEach((toolTiming) => {
1897
+ waterfallTimings.phases.push({
1898
+ type: "tool",
1899
+ label: `${toolTiming.name}()`,
1900
+ startMs: toolTiming.startTime,
1901
+ endMs: toolTiming.endTime,
1902
+ durationMs: toolTiming.duration,
1903
+ accurate: true
1904
+ });
1905
+ });
1906
+ const finalResponseDuration = totalDurationMs - lastToolEnd;
1907
+ if (finalResponseDuration > 10) {
1908
+ waterfallTimings.phases.push({
1909
+ type: "response",
1910
+ label: "LLM Call 2 \u2192 Final Response",
1911
+ startMs: lastToolEnd,
1912
+ endMs: totalDurationMs,
1913
+ durationMs: finalResponseDuration,
1914
+ accurate: true
1915
+ });
1916
+ }
1917
+ }
1918
+ if (steps) {
1919
+ waterfallTimings.steps = steps.map((step, idx) => ({
1920
+ stepIndex: idx,
1921
+ stepType: step?.stepType,
1922
+ finishReason: step?.finishReason,
1923
+ timestamp: step?.response?.timestamp,
1924
+ toolCalls: step?.toolCalls?.map((tc) => ({
1925
+ id: tc?.toolCallId,
1926
+ name: tc?.toolName
1927
+ })),
1928
+ usage: step?.usage
1929
+ }));
1930
+ }
1931
+ attributes["fallom.raw.timings"] = JSON.stringify(waterfallTimings);
1932
+ const promptCtx = getPromptContext();
1933
+ sendTrace({
1934
+ config_key: ctx.configKey,
1935
+ session_id: ctx.sessionId,
1936
+ customer_id: ctx.customerId,
1937
+ trace_id: traceId,
1938
+ span_id: spanId,
1939
+ parent_span_id: parentSpanId,
1940
+ name: "streamText",
1941
+ kind: "llm",
1942
+ model: modelId,
1943
+ start_time: new Date(startTime).toISOString(),
1944
+ end_time: new Date(endTime).toISOString(),
1945
+ duration_ms: endTime - startTime,
1946
+ status: "OK",
1947
+ time_to_first_token_ms: firstTokenTime ? firstTokenTime - startTime : void 0,
1948
+ is_streaming: true,
1949
+ attributes,
1950
+ // Prompt context (if prompts.get() or prompts.getAB() was called)
1951
+ prompt_key: promptCtx?.promptKey,
1952
+ prompt_version: promptCtx?.promptVersion,
1953
+ prompt_ab_test_key: promptCtx?.abTestKey,
1954
+ prompt_variant_index: promptCtx?.variantIndex
1955
+ }).catch(() => {
1956
+ });
1206
1957
  }
1207
- if (rawUsage) {
1208
- attributes["fallom.raw.usage"] = JSON.stringify(rawUsage);
1209
- }
1210
- if (providerMetadata) {
1211
- attributes["fallom.raw.providerMetadata"] = JSON.stringify(providerMetadata);
1212
- }
1213
- if (firstTokenTime) {
1214
- attributes["fallom.time_to_first_token_ms"] = firstTokenTime - startTime;
1215
- }
1216
- sendTrace({
1217
- config_key: ctx.configKey,
1218
- session_id: ctx.sessionId,
1219
- customer_id: ctx.customerId,
1220
- trace_id: traceId,
1221
- span_id: spanId,
1222
- parent_span_id: parentSpanId,
1223
- name: "streamText",
1224
- kind: "llm",
1225
- model: modelId,
1226
- start_time: new Date(startTime).toISOString(),
1227
- end_time: new Date(endTime).toISOString(),
1228
- duration_ms: endTime - startTime,
1229
- status: "OK",
1230
- time_to_first_token_ms: firstTokenTime ? firstTokenTime - startTime : void 0,
1231
- is_streaming: true,
1232
- attributes
1233
- }).catch(() => {
1234
- });
1235
- }).catch((error) => {
1958
+ ).catch((error) => {
1236
1959
  const endTime = Date.now();
1237
- log2("\u274C streamText error:", error?.message);
1960
+ log3("\u274C streamText error:", error?.message);
1238
1961
  sendTrace({
1239
1962
  config_key: ctx.configKey,
1240
1963
  session_id: ctx.sessionId,
@@ -1265,7 +1988,7 @@ function createStreamTextWrapper(aiModule, sessionCtx, debug = false) {
1265
1988
  for await (const chunk of originalTextStream) {
1266
1989
  if (!firstTokenTime) {
1267
1990
  firstTokenTime = Date.now();
1268
- log2("\u23F1\uFE0F Time to first token:", firstTokenTime - startTime, "ms");
1991
+ log3("\u23F1\uFE0F Time to first token:", firstTokenTime - startTime, "ms");
1269
1992
  }
1270
1993
  yield chunk;
1271
1994
  }
@@ -1335,6 +2058,7 @@ function createGenerateObjectWrapper(aiModule, sessionCtx, debug = false) {
1335
2058
  result.experimental_providerMetadata
1336
2059
  );
1337
2060
  }
2061
+ const promptCtx = getPromptContext();
1338
2062
  sendTrace({
1339
2063
  config_key: ctx.configKey,
1340
2064
  session_id: ctx.sessionId,
@@ -1349,7 +2073,12 @@ function createGenerateObjectWrapper(aiModule, sessionCtx, debug = false) {
1349
2073
  end_time: new Date(endTime).toISOString(),
1350
2074
  duration_ms: endTime - startTime,
1351
2075
  status: "OK",
1352
- attributes
2076
+ attributes,
2077
+ // Prompt context (if prompts.get() or prompts.getAB() was called)
2078
+ prompt_key: promptCtx?.promptKey,
2079
+ prompt_version: promptCtx?.promptVersion,
2080
+ prompt_ab_test_key: promptCtx?.abTestKey,
2081
+ prompt_variant_index: promptCtx?.variantIndex
1353
2082
  }).catch(() => {
1354
2083
  });
1355
2084
  return result;
@@ -1444,6 +2173,7 @@ function createStreamObjectWrapper(aiModule, sessionCtx, debug = false) {
1444
2173
  if (providerMetadata) {
1445
2174
  attributes["fallom.raw.providerMetadata"] = JSON.stringify(providerMetadata);
1446
2175
  }
2176
+ const promptCtx = getPromptContext();
1447
2177
  sendTrace({
1448
2178
  config_key: ctx.configKey,
1449
2179
  session_id: ctx.sessionId,
@@ -1459,7 +2189,12 @@ function createStreamObjectWrapper(aiModule, sessionCtx, debug = false) {
1459
2189
  duration_ms: endTime - startTime,
1460
2190
  status: "OK",
1461
2191
  is_streaming: true,
1462
- attributes
2192
+ attributes,
2193
+ // Prompt context (if prompts.get() or prompts.getAB() was called)
2194
+ prompt_key: promptCtx?.promptKey,
2195
+ prompt_version: promptCtx?.promptVersion,
2196
+ prompt_ab_test_key: promptCtx?.abTestKey,
2197
+ prompt_variant_index: promptCtx?.variantIndex
1463
2198
  }).catch(() => {
1464
2199
  });
1465
2200
  }).catch((error) => {
@@ -1602,7 +2337,7 @@ var FallomSession = class {
1602
2337
  configKey = this.ctx.configKey;
1603
2338
  opts = configKeyOrOptions || {};
1604
2339
  }
1605
- const { get: get2 } = await import("./models-SEFDGZU2.mjs");
2340
+ const { get: get2 } = await import("./models-NKYYGMSR.mjs");
1606
2341
  return get2(configKey, this.ctx.sessionId, opts);
1607
2342
  }
1608
2343
  /**
@@ -1764,252 +2499,32 @@ function session(options) {
1764
2499
  return new FallomSession(options);
1765
2500
  }
1766
2501
 
1767
- // src/prompts.ts
1768
- var prompts_exports = {};
1769
- __export(prompts_exports, {
1770
- clearPromptContext: () => clearPromptContext,
1771
- get: () => get,
1772
- getAB: () => getAB,
1773
- getPromptContext: () => getPromptContext,
1774
- init: () => init3
2502
+ // src/evals/index.ts
2503
+ var evals_exports = {};
2504
+ __export(evals_exports, {
2505
+ AVAILABLE_METRICS: () => AVAILABLE_METRICS,
2506
+ DEFAULT_JUDGE_MODEL: () => DEFAULT_JUDGE_MODEL,
2507
+ METRIC_PROMPTS: () => METRIC_PROMPTS,
2508
+ compareModels: () => compareModels,
2509
+ createCustomModel: () => createCustomModel,
2510
+ createModelFromCallable: () => createModelFromCallable,
2511
+ createOpenAIModel: () => createOpenAIModel,
2512
+ customMetric: () => customMetric,
2513
+ datasetFromFallom: () => datasetFromFallom,
2514
+ datasetFromTraces: () => datasetFromTraces,
2515
+ evaluate: () => evaluate,
2516
+ getMetricName: () => getMetricName,
2517
+ init: () => init2,
2518
+ isCustomMetric: () => isCustomMetric,
2519
+ uploadResults: () => uploadResultsPublic
1775
2520
  });
1776
- import { createHash } from "crypto";
1777
- var apiKey2 = null;
1778
- var baseUrl2 = "https://prompts.fallom.com";
1779
- var initialized2 = false;
1780
- var syncInterval = null;
1781
- var debugMode2 = false;
1782
- var promptCache = /* @__PURE__ */ new Map();
1783
- var promptABCache = /* @__PURE__ */ new Map();
1784
- var promptContext = null;
1785
- var SYNC_TIMEOUT = 2e3;
1786
- function log3(msg) {
1787
- if (debugMode2) {
1788
- console.log(`[Fallom Prompts] ${msg}`);
1789
- }
1790
- }
1791
- function init3(options = {}) {
1792
- apiKey2 = options.apiKey || process.env.FALLOM_API_KEY || null;
1793
- baseUrl2 = options.baseUrl || process.env.FALLOM_PROMPTS_URL || process.env.FALLOM_BASE_URL || "https://prompts.fallom.com";
1794
- initialized2 = true;
1795
- if (!apiKey2) {
1796
- return;
1797
- }
1798
- fetchAll().catch(() => {
1799
- });
1800
- if (!syncInterval) {
1801
- syncInterval = setInterval(() => {
1802
- fetchAll().catch(() => {
1803
- });
1804
- }, 3e4);
1805
- syncInterval.unref();
1806
- }
1807
- }
1808
- function ensureInit() {
1809
- if (!initialized2) {
1810
- try {
1811
- init3();
1812
- } catch {
1813
- }
1814
- }
1815
- }
1816
- async function fetchAll() {
1817
- await Promise.all([fetchPrompts(), fetchPromptABTests()]);
1818
- }
1819
- async function fetchPrompts(timeout = SYNC_TIMEOUT) {
1820
- if (!apiKey2) return;
1821
- try {
1822
- const controller = new AbortController();
1823
- const timeoutId = setTimeout(() => controller.abort(), timeout);
1824
- const resp = await fetch(`${baseUrl2}/prompts`, {
1825
- headers: { Authorization: `Bearer ${apiKey2}` },
1826
- signal: controller.signal
1827
- });
1828
- clearTimeout(timeoutId);
1829
- if (resp.ok) {
1830
- const data = await resp.json();
1831
- for (const p of data.prompts || []) {
1832
- if (!promptCache.has(p.key)) {
1833
- promptCache.set(p.key, { versions: /* @__PURE__ */ new Map(), current: null });
1834
- }
1835
- const cached = promptCache.get(p.key);
1836
- cached.versions.set(p.version, {
1837
- systemPrompt: p.system_prompt,
1838
- userTemplate: p.user_template
1839
- });
1840
- cached.current = p.version;
1841
- }
1842
- }
1843
- } catch {
1844
- }
1845
- }
1846
- async function fetchPromptABTests(timeout = SYNC_TIMEOUT) {
1847
- if (!apiKey2) return;
1848
- try {
1849
- const controller = new AbortController();
1850
- const timeoutId = setTimeout(() => controller.abort(), timeout);
1851
- const resp = await fetch(`${baseUrl2}/prompt-ab-tests`, {
1852
- headers: { Authorization: `Bearer ${apiKey2}` },
1853
- signal: controller.signal
1854
- });
1855
- clearTimeout(timeoutId);
1856
- if (resp.ok) {
1857
- const data = await resp.json();
1858
- for (const t of data.prompt_ab_tests || []) {
1859
- if (!promptABCache.has(t.key)) {
1860
- promptABCache.set(t.key, { versions: /* @__PURE__ */ new Map(), current: null });
1861
- }
1862
- const cached = promptABCache.get(t.key);
1863
- cached.versions.set(t.version, { variants: t.variants });
1864
- cached.current = t.version;
1865
- }
1866
- }
1867
- } catch {
1868
- }
1869
- }
1870
- function replaceVariables(template, variables) {
1871
- if (!variables) return template;
1872
- return template.replace(/\{\{(\s*\w+\s*)\}\}/g, (match, varName) => {
1873
- const key = varName.trim();
1874
- return key in variables ? String(variables[key]) : match;
1875
- });
1876
- }
1877
- function setPromptContext(ctx) {
1878
- promptContext = ctx;
1879
- }
1880
- function getPromptContext() {
1881
- const ctx = promptContext;
1882
- promptContext = null;
1883
- return ctx;
1884
- }
1885
- async function get(promptKey, options = {}) {
1886
- const { variables, version, debug = false } = options;
1887
- debugMode2 = debug;
1888
- ensureInit();
1889
- log3(`get() called: promptKey=${promptKey}`);
1890
- let promptData = promptCache.get(promptKey);
1891
- if (!promptData) {
1892
- log3("Not in cache, fetching...");
1893
- await fetchPrompts(SYNC_TIMEOUT);
1894
- promptData = promptCache.get(promptKey);
1895
- }
1896
- if (!promptData) {
1897
- throw new Error(
1898
- `Prompt '${promptKey}' not found. Check that it exists in your Fallom dashboard.`
1899
- );
1900
- }
1901
- const targetVersion = version ?? promptData.current;
1902
- const content = promptData.versions.get(targetVersion);
1903
- if (!content) {
1904
- throw new Error(
1905
- `Prompt '${promptKey}' version ${targetVersion} not found.`
1906
- );
1907
- }
1908
- const system = replaceVariables(content.systemPrompt, variables);
1909
- const user = replaceVariables(content.userTemplate, variables);
1910
- setPromptContext({
1911
- promptKey,
1912
- promptVersion: targetVersion
1913
- });
1914
- log3(`\u2705 Got prompt: ${promptKey} v${targetVersion}`);
1915
- return {
1916
- key: promptKey,
1917
- version: targetVersion,
1918
- system,
1919
- user
1920
- };
1921
- }
1922
- async function getAB(abTestKey, sessionId, options = {}) {
1923
- const { variables, debug = false } = options;
1924
- debugMode2 = debug;
1925
- ensureInit();
1926
- log3(`getAB() called: abTestKey=${abTestKey}, sessionId=${sessionId}`);
1927
- let abData = promptABCache.get(abTestKey);
1928
- if (!abData) {
1929
- log3("Not in cache, fetching...");
1930
- await fetchPromptABTests(SYNC_TIMEOUT);
1931
- abData = promptABCache.get(abTestKey);
1932
- }
1933
- if (!abData) {
1934
- throw new Error(
1935
- `Prompt A/B test '${abTestKey}' not found. Check that it exists in your Fallom dashboard.`
1936
- );
1937
- }
1938
- const currentVersion = abData.current;
1939
- const versionData = abData.versions.get(currentVersion);
1940
- if (!versionData) {
1941
- throw new Error(`Prompt A/B test '${abTestKey}' has no current version.`);
1942
- }
1943
- const { variants } = versionData;
1944
- log3(`A/B test '${abTestKey}' has ${variants?.length ?? 0} variants`);
1945
- log3(`Version data: ${JSON.stringify(versionData, null, 2)}`);
1946
- if (!variants || variants.length === 0) {
1947
- throw new Error(
1948
- `Prompt A/B test '${abTestKey}' has no variants configured.`
1949
- );
1950
- }
1951
- const hashBytes = createHash("md5").update(sessionId).digest();
1952
- const hashVal = hashBytes.readUInt32BE(0) % 1e6;
1953
- let cumulative = 0;
1954
- let selectedVariant = variants[variants.length - 1];
1955
- let selectedIndex = variants.length - 1;
1956
- for (let i = 0; i < variants.length; i++) {
1957
- cumulative += variants[i].weight * 1e4;
1958
- if (hashVal < cumulative) {
1959
- selectedVariant = variants[i];
1960
- selectedIndex = i;
1961
- break;
1962
- }
1963
- }
1964
- const promptKey = selectedVariant.prompt_key;
1965
- const promptVersion = selectedVariant.prompt_version;
1966
- let promptData = promptCache.get(promptKey);
1967
- if (!promptData) {
1968
- await fetchPrompts(SYNC_TIMEOUT);
1969
- promptData = promptCache.get(promptKey);
1970
- }
1971
- if (!promptData) {
1972
- throw new Error(
1973
- `Prompt '${promptKey}' (from A/B test '${abTestKey}') not found.`
1974
- );
1975
- }
1976
- const targetVersion = promptVersion ?? promptData.current;
1977
- const content = promptData.versions.get(targetVersion);
1978
- if (!content) {
1979
- throw new Error(
1980
- `Prompt '${promptKey}' version ${targetVersion} not found.`
1981
- );
1982
- }
1983
- const system = replaceVariables(content.systemPrompt, variables);
1984
- const user = replaceVariables(content.userTemplate, variables);
1985
- setPromptContext({
1986
- promptKey,
1987
- promptVersion: targetVersion,
1988
- abTestKey,
1989
- variantIndex: selectedIndex
1990
- });
1991
- log3(
1992
- `\u2705 Got prompt from A/B: ${promptKey} v${targetVersion} (variant ${selectedIndex})`
1993
- );
1994
- return {
1995
- key: promptKey,
1996
- version: targetVersion,
1997
- system,
1998
- user,
1999
- abTestKey,
2000
- variantIndex: selectedIndex
2001
- };
2002
- }
2003
- function clearPromptContext() {
2004
- promptContext = null;
2005
- }
2006
2521
 
2007
2522
  // src/init.ts
2008
- async function init4(options = {}) {
2523
+ async function init5(options = {}) {
2009
2524
  const tracesUrl = options.tracesUrl || process.env.FALLOM_TRACES_URL || "https://traces.fallom.com";
2010
2525
  const configsUrl = options.configsUrl || process.env.FALLOM_CONFIGS_URL || "https://configs.fallom.com";
2011
2526
  const promptsUrl = options.promptsUrl || process.env.FALLOM_PROMPTS_URL || "https://prompts.fallom.com";
2012
- await init2({
2527
+ await init3({
2013
2528
  apiKey: options.apiKey,
2014
2529
  baseUrl: tracesUrl,
2015
2530
  captureContent: options.captureContent,
@@ -2019,7 +2534,7 @@ async function init4(options = {}) {
2019
2534
  apiKey: options.apiKey,
2020
2535
  baseUrl: configsUrl
2021
2536
  });
2022
- init3({
2537
+ init4({
2023
2538
  apiKey: options.apiKey,
2024
2539
  baseUrl: promptsUrl
2025
2540
  });
@@ -2265,10 +2780,11 @@ var FallomExporter = class {
2265
2780
 
2266
2781
  // src/index.ts
2267
2782
  var index_default = {
2268
- init: init4,
2783
+ init: init5,
2269
2784
  trace: trace_exports,
2270
2785
  models: models_exports,
2271
2786
  prompts: prompts_exports,
2787
+ evals: evals_exports,
2272
2788
  session
2273
2789
  };
2274
2790
  export {
@@ -2276,7 +2792,8 @@ export {
2276
2792
  FallomSession,
2277
2793
  clearMastraPrompt,
2278
2794
  index_default as default,
2279
- init4 as init,
2795
+ evals_exports as evals,
2796
+ init5 as init,
2280
2797
  models_exports as models,
2281
2798
  prompts_exports as prompts,
2282
2799
  session,