@fallom/trace 0.2.25 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -346,10 +346,15 @@ var init_types = __esm({
346
346
  });
347
347
 
348
348
  // src/evals/prompts.ts
349
- function buildGEvalPrompt(criteria, steps, systemMessage, inputText, outputText) {
349
+ function buildGEvalPrompt(criteria, steps, systemMessage, inputText, outputText, judgeContext) {
350
350
  const stepsText = steps.map((s, i) => `${i + 1}. ${s}`).join("\n");
351
351
  return `You are an expert evaluator assessing LLM outputs using the G-Eval methodology.
352
+ ${judgeContext ? `
353
+ ## Important Context
354
+ The following context provides background information about the product/domain being evaluated. Use this to inform your evaluation - for example, if the context mentions that certain features or capabilities exist, do not mark responses as hallucinations when they reference those features.
352
355
 
356
+ ${judgeContext}
357
+ ` : ""}
353
358
  ## Evaluation Criteria
354
359
  ${criteria}
355
360
 
@@ -388,7 +393,8 @@ async function runGEval(options) {
388
393
  openrouterKey,
389
394
  fallomApiKey,
390
395
  traceSessionId,
391
- traceCustomerId
396
+ traceCustomerId,
397
+ judgeContext
392
398
  } = options;
393
399
  const apiKey4 = openrouterKey || process.env.OPENROUTER_API_KEY;
394
400
  if (!apiKey4) {
@@ -406,7 +412,8 @@ async function runGEval(options) {
406
412
  config.steps,
407
413
  systemMessage,
408
414
  inputText,
409
- outputText
415
+ outputText,
416
+ judgeContext
410
417
  );
411
418
  const startTime = Date.now();
412
419
  const response = await fetch(
@@ -982,14 +989,15 @@ function init4(options = {}) {
982
989
  }
983
990
  _initialized = true;
984
991
  }
985
- async function runGEval2(metric, inputText, outputText, systemMessage, judgeModel) {
992
+ async function runGEval2(metric, inputText, outputText, systemMessage, judgeModel, judgeContext) {
986
993
  const metricArg = isCustomMetric(metric) ? { name: metric.name, criteria: metric.criteria, steps: metric.steps } : metric;
987
994
  return runGEval({
988
995
  metric: metricArg,
989
996
  inputText,
990
997
  outputText,
991
998
  systemMessage,
992
- judgeModel
999
+ judgeModel,
1000
+ judgeContext
993
1001
  });
994
1002
  }
995
1003
  async function resolveDataset(datasetInput) {
@@ -1040,6 +1048,7 @@ async function evaluate(options) {
1040
1048
  dataset: datasetInput,
1041
1049
  metrics = [...AVAILABLE_METRICS],
1042
1050
  judgeModel = DEFAULT_JUDGE_MODEL,
1051
+ judgeContext,
1043
1052
  name,
1044
1053
  description,
1045
1054
  verbose = true,
@@ -1047,13 +1056,22 @@ async function evaluate(options) {
1047
1056
  _skipUpload = false
1048
1057
  } = options;
1049
1058
  let dataset;
1059
+ let testCaseExtras = /* @__PURE__ */ new Map();
1050
1060
  if (testCases !== void 0 && testCases.length > 0) {
1051
- dataset = testCases.map((tc) => ({
1052
- input: tc.input,
1053
- output: tc.actualOutput,
1054
- systemMessage: tc.systemMessage,
1055
- metadata: tc.metadata
1056
- }));
1061
+ dataset = testCases.map((tc, idx) => {
1062
+ if (tc.expectedOutput || tc.context) {
1063
+ testCaseExtras.set(idx, {
1064
+ expectedOutput: tc.expectedOutput,
1065
+ context: tc.context
1066
+ });
1067
+ }
1068
+ return {
1069
+ input: tc.input,
1070
+ output: tc.actualOutput,
1071
+ systemMessage: tc.systemMessage,
1072
+ metadata: tc.metadata
1073
+ };
1074
+ });
1057
1075
  } else if (datasetInput !== void 0) {
1058
1076
  dataset = await resolveDataset(datasetInput);
1059
1077
  } else {
@@ -1072,10 +1090,14 @@ async function evaluate(options) {
1072
1090
  for (let i = 0; i < dataset.length; i++) {
1073
1091
  const item = dataset[i];
1074
1092
  if (verbose) console.log(`Evaluating item ${i + 1}/${dataset.length}...`);
1093
+ const extras = testCaseExtras.get(i);
1075
1094
  const result = {
1076
1095
  input: item.input,
1077
1096
  output: item.output,
1078
1097
  systemMessage: item.systemMessage,
1098
+ expectedOutput: extras?.expectedOutput,
1099
+ context: extras?.context,
1100
+ metadata: item.metadata,
1079
1101
  model: "production",
1080
1102
  isProduction: true,
1081
1103
  reasoning: {}
@@ -1089,7 +1111,8 @@ async function evaluate(options) {
1089
1111
  item.input,
1090
1112
  item.output,
1091
1113
  item.systemMessage,
1092
- judgeModel
1114
+ judgeModel,
1115
+ judgeContext
1093
1116
  );
1094
1117
  const key = isCustomMetric(metric) ? metricName : metricName.replace(/_([a-z])/g, (_, c) => c.toUpperCase());
1095
1118
  result[key] = score;
@@ -1120,6 +1143,7 @@ async function compareModels(options) {
1120
1143
  models,
1121
1144
  metrics = [...AVAILABLE_METRICS],
1122
1145
  judgeModel = DEFAULT_JUDGE_MODEL,
1146
+ judgeContext,
1123
1147
  includeProduction = true,
1124
1148
  modelKwargs = {},
1125
1149
  name,
@@ -1137,6 +1161,7 @@ async function compareModels(options) {
1137
1161
  dataset,
1138
1162
  metrics,
1139
1163
  judgeModel,
1164
+ judgeContext,
1140
1165
  verbose,
1141
1166
  _skipUpload: true
1142
1167
  });
@@ -1175,6 +1200,7 @@ async function compareModels(options) {
1175
1200
  input: item.input,
1176
1201
  output,
1177
1202
  systemMessage: item.systemMessage,
1203
+ metadata: item.metadata,
1178
1204
  model: model.name,
1179
1205
  isProduction: false,
1180
1206
  reasoning: {},
@@ -1192,7 +1218,8 @@ async function compareModels(options) {
1192
1218
  item.input,
1193
1219
  output,
1194
1220
  item.systemMessage,
1195
- judgeModel
1221
+ judgeModel,
1222
+ judgeContext
1196
1223
  );
1197
1224
  const key = isCustomMetric(metric) ? metricName : metricName.replace(/_([a-z])/g, (_, c) => c.toUpperCase());
1198
1225
  result[key] = score;
@@ -1286,6 +1313,9 @@ async function uploadResults(results, name, description, judgeModel, verbose) {
1286
1313
  results: allResults.map((r) => ({
1287
1314
  input: r.input,
1288
1315
  system_message: r.systemMessage,
1316
+ expected_output: r.expectedOutput,
1317
+ context: r.context,
1318
+ metadata: r.metadata,
1289
1319
  model: r.model,
1290
1320
  output: r.output,
1291
1321
  is_production: r.isProduction,
@@ -1361,6 +1391,7 @@ var index_exports = {};
1361
1391
  __export(index_exports, {
1362
1392
  FallomExporter: () => FallomExporter,
1363
1393
  FallomSession: () => FallomSession,
1394
+ FallomSpan: () => FallomSpan,
1364
1395
  buildGEvalPrompt: () => buildGEvalPrompt,
1365
1396
  calculateAggregateScores: () => calculateAggregateScores,
1366
1397
  clearMastraPrompt: () => clearMastraPrompt,
@@ -1374,7 +1405,8 @@ __export(index_exports, {
1374
1405
  session: () => session,
1375
1406
  setMastraPrompt: () => setMastraPrompt,
1376
1407
  setMastraPromptAB: () => setMastraPromptAB,
1377
- trace: () => trace_exports
1408
+ trace: () => trace_exports,
1409
+ wrapTraced: () => wrapTraced
1378
1410
  });
1379
1411
  module.exports = __toCommonJS(index_exports);
1380
1412
 
@@ -1382,9 +1414,11 @@ module.exports = __toCommonJS(index_exports);
1382
1414
  var trace_exports = {};
1383
1415
  __export(trace_exports, {
1384
1416
  FallomSession: () => FallomSession,
1417
+ FallomSpan: () => FallomSpan,
1385
1418
  init: () => init,
1386
1419
  session: () => session,
1387
- shutdown: () => shutdown
1420
+ shutdown: () => shutdown,
1421
+ wrapTraced: () => wrapTraced
1388
1422
  });
1389
1423
 
1390
1424
  // src/trace/core.ts
@@ -1395,7 +1429,7 @@ var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otl
1395
1429
  // node_modules/@opentelemetry/resources/build/esm/Resource.js
1396
1430
  var import_api = require("@opentelemetry/api");
1397
1431
 
1398
- // node_modules/@opentelemetry/resources/node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
1432
+ // node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
1399
1433
  var SemanticResourceAttributes = {
1400
1434
  /**
1401
1435
  * Name of the cloud provider.
@@ -2170,6 +2204,109 @@ function generateHexId(length) {
2170
2204
  return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
2171
2205
  }
2172
2206
 
2207
+ // src/trace/span.ts
2208
+ var FallomSpan = class {
2209
+ constructor(name, ctx, options = {}) {
2210
+ this.name = name;
2211
+ this.ctx = ctx;
2212
+ this.attrs = {};
2213
+ this.ended = false;
2214
+ this._status = "OK";
2215
+ this.spanId = generateHexId(16);
2216
+ this.traceId = options.traceId || generateHexId(32);
2217
+ this.parentSpanId = options.parentSpanId;
2218
+ this.kind = options.kind || "custom";
2219
+ this.startTime = Date.now();
2220
+ }
2221
+ /**
2222
+ * Set attributes on the span.
2223
+ * Can be called multiple times - attributes are merged.
2224
+ */
2225
+ set(attributes) {
2226
+ if (this.ended) {
2227
+ console.warn("[Fallom] Cannot set attributes on ended span");
2228
+ return this;
2229
+ }
2230
+ Object.assign(this.attrs, attributes);
2231
+ return this;
2232
+ }
2233
+ /**
2234
+ * Mark the span as errored.
2235
+ */
2236
+ setError(error) {
2237
+ this._status = "ERROR";
2238
+ this._errorMessage = error instanceof Error ? error.message : error;
2239
+ return this;
2240
+ }
2241
+ /**
2242
+ * Get span context for creating child spans.
2243
+ */
2244
+ context() {
2245
+ return {
2246
+ traceId: this.traceId,
2247
+ spanId: this.spanId
2248
+ };
2249
+ }
2250
+ /**
2251
+ * End the span and send it.
2252
+ * Must be called for the span to be recorded.
2253
+ */
2254
+ end() {
2255
+ if (this.ended) {
2256
+ console.warn("[Fallom] Span already ended");
2257
+ return;
2258
+ }
2259
+ this.ended = true;
2260
+ if (!isInitialized()) {
2261
+ return;
2262
+ }
2263
+ const endTime = Date.now();
2264
+ sendTrace({
2265
+ config_key: this.ctx.configKey,
2266
+ session_id: this.ctx.sessionId,
2267
+ customer_id: this.ctx.customerId,
2268
+ metadata: this.ctx.metadata,
2269
+ tags: this.ctx.tags,
2270
+ trace_id: this.traceId,
2271
+ span_id: this.spanId,
2272
+ parent_span_id: this.parentSpanId,
2273
+ name: this.name,
2274
+ kind: this.kind,
2275
+ start_time: new Date(this.startTime).toISOString(),
2276
+ end_time: new Date(endTime).toISOString(),
2277
+ duration_ms: endTime - this.startTime,
2278
+ status: this._status,
2279
+ error_message: this._errorMessage,
2280
+ attributes: {
2281
+ "fallom.sdk_version": "2",
2282
+ "fallom.span_type": "manual",
2283
+ ...this.attrs
2284
+ }
2285
+ }).catch(() => {
2286
+ });
2287
+ }
2288
+ };
2289
+ function wrapTraced(session2, name, fn, options = {}) {
2290
+ return (async (...args) => {
2291
+ const span = session2.span(name, options);
2292
+ if (args.length === 1) {
2293
+ span.set({ input: args[0] });
2294
+ } else if (args.length > 1) {
2295
+ span.set({ input: args });
2296
+ }
2297
+ try {
2298
+ const result = await fn(...args);
2299
+ span.set({ output: result });
2300
+ span.end();
2301
+ return result;
2302
+ } catch (error) {
2303
+ span.setError(error instanceof Error ? error : String(error));
2304
+ span.end();
2305
+ throw error;
2306
+ }
2307
+ });
2308
+ }
2309
+
2173
2310
  // src/prompts.ts
2174
2311
  var prompts_exports = {};
2175
2312
  __export(prompts_exports, {
@@ -4039,6 +4176,26 @@ var FallomSession = class {
4039
4176
  getContext() {
4040
4177
  return { ...this.ctx };
4041
4178
  }
4179
+ /**
4180
+ * Create a manual span for custom operations.
4181
+ *
4182
+ * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
4183
+ * The span uses the session's context (configKey, sessionId, etc.).
4184
+ *
4185
+ * @example
4186
+ * ```typescript
4187
+ * const span = session.span("rag.retrieve");
4188
+ * span.set({ "rag.query": userQuery, "rag.topK": 5 });
4189
+ *
4190
+ * const docs = await retrieveDocuments(userQuery);
4191
+ * span.set({ "rag.documents.count": docs.length });
4192
+ *
4193
+ * span.end(); // Must call to send the span
4194
+ * ```
4195
+ */
4196
+ span(name, options) {
4197
+ return new FallomSpan(name, this.ctx, options);
4198
+ }
4042
4199
  /**
4043
4200
  * Get model assignment for this session (A/B testing).
4044
4201
  */
@@ -4277,7 +4434,7 @@ async function init5(options = {}) {
4277
4434
  }
4278
4435
 
4279
4436
  // src/mastra.ts
4280
- var import_core13 = require("@opentelemetry/core");
4437
+ var import_core14 = require("@opentelemetry/core");
4281
4438
  var promptContext2 = {};
4282
4439
  function setMastraPrompt(promptKey, version) {
4283
4440
  promptContext2 = {
@@ -4327,7 +4484,7 @@ var FallomExporter = class {
4327
4484
  */
4328
4485
  export(spans, resultCallback) {
4329
4486
  if (spans.length === 0) {
4330
- resultCallback({ code: import_core13.ExportResultCode.SUCCESS });
4487
+ resultCallback({ code: import_core14.ExportResultCode.SUCCESS });
4331
4488
  return;
4332
4489
  }
4333
4490
  this.log(`Exporting ${spans.length} spans...`);
@@ -4344,11 +4501,11 @@ var FallomExporter = class {
4344
4501
  }
4345
4502
  const exportPromise = this.sendSpans(spans).then(() => {
4346
4503
  this.log("Export successful");
4347
- resultCallback({ code: import_core13.ExportResultCode.SUCCESS });
4504
+ resultCallback({ code: import_core14.ExportResultCode.SUCCESS });
4348
4505
  }).catch((error) => {
4349
4506
  console.error("[FallomExporter] Export failed:", error);
4350
4507
  resultCallback({
4351
- code: import_core13.ExportResultCode.FAILED,
4508
+ code: import_core14.ExportResultCode.FAILED,
4352
4509
  error: error instanceof Error ? error : new Error(String(error))
4353
4510
  });
4354
4511
  });
@@ -4528,6 +4685,7 @@ var index_default = {
4528
4685
  0 && (module.exports = {
4529
4686
  FallomExporter,
4530
4687
  FallomSession,
4688
+ FallomSpan,
4531
4689
  buildGEvalPrompt,
4532
4690
  calculateAggregateScores,
4533
4691
  clearMastraPrompt,
@@ -4540,5 +4698,6 @@ var index_default = {
4540
4698
  session,
4541
4699
  setMastraPrompt,
4542
4700
  setMastraPromptAB,
4543
- trace
4701
+ trace,
4702
+ wrapTraced
4544
4703
  });
package/dist/index.mjs CHANGED
@@ -23,7 +23,7 @@ import {
23
23
  isCustomMetric,
24
24
  runGEval,
25
25
  uploadResultsPublic
26
- } from "./chunk-3VWF2OJX.mjs";
26
+ } from "./chunk-MSI4HGK6.mjs";
27
27
  import {
28
28
  __export
29
29
  } from "./chunk-7P6ASYW6.mjs";
@@ -32,9 +32,11 @@ import {
32
32
  var trace_exports = {};
33
33
  __export(trace_exports, {
34
34
  FallomSession: () => FallomSession,
35
+ FallomSpan: () => FallomSpan,
35
36
  init: () => init3,
36
37
  session: () => session,
37
- shutdown: () => shutdown
38
+ shutdown: () => shutdown,
39
+ wrapTraced: () => wrapTraced
38
40
  });
39
41
 
40
42
  // src/trace/core.ts
@@ -45,7 +47,7 @@ import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
45
47
  // node_modules/@opentelemetry/resources/build/esm/Resource.js
46
48
  import { diag } from "@opentelemetry/api";
47
49
 
48
- // node_modules/@opentelemetry/resources/node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
50
+ // node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
49
51
  var SemanticResourceAttributes = {
50
52
  /**
51
53
  * Name of the cloud provider.
@@ -820,6 +822,109 @@ function generateHexId(length) {
820
822
  return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
821
823
  }
822
824
 
825
+ // src/trace/span.ts
826
+ var FallomSpan = class {
827
+ constructor(name, ctx, options = {}) {
828
+ this.name = name;
829
+ this.ctx = ctx;
830
+ this.attrs = {};
831
+ this.ended = false;
832
+ this._status = "OK";
833
+ this.spanId = generateHexId(16);
834
+ this.traceId = options.traceId || generateHexId(32);
835
+ this.parentSpanId = options.parentSpanId;
836
+ this.kind = options.kind || "custom";
837
+ this.startTime = Date.now();
838
+ }
839
+ /**
840
+ * Set attributes on the span.
841
+ * Can be called multiple times - attributes are merged.
842
+ */
843
+ set(attributes) {
844
+ if (this.ended) {
845
+ console.warn("[Fallom] Cannot set attributes on ended span");
846
+ return this;
847
+ }
848
+ Object.assign(this.attrs, attributes);
849
+ return this;
850
+ }
851
+ /**
852
+ * Mark the span as errored.
853
+ */
854
+ setError(error) {
855
+ this._status = "ERROR";
856
+ this._errorMessage = error instanceof Error ? error.message : error;
857
+ return this;
858
+ }
859
+ /**
860
+ * Get span context for creating child spans.
861
+ */
862
+ context() {
863
+ return {
864
+ traceId: this.traceId,
865
+ spanId: this.spanId
866
+ };
867
+ }
868
+ /**
869
+ * End the span and send it.
870
+ * Must be called for the span to be recorded.
871
+ */
872
+ end() {
873
+ if (this.ended) {
874
+ console.warn("[Fallom] Span already ended");
875
+ return;
876
+ }
877
+ this.ended = true;
878
+ if (!isInitialized()) {
879
+ return;
880
+ }
881
+ const endTime = Date.now();
882
+ sendTrace({
883
+ config_key: this.ctx.configKey,
884
+ session_id: this.ctx.sessionId,
885
+ customer_id: this.ctx.customerId,
886
+ metadata: this.ctx.metadata,
887
+ tags: this.ctx.tags,
888
+ trace_id: this.traceId,
889
+ span_id: this.spanId,
890
+ parent_span_id: this.parentSpanId,
891
+ name: this.name,
892
+ kind: this.kind,
893
+ start_time: new Date(this.startTime).toISOString(),
894
+ end_time: new Date(endTime).toISOString(),
895
+ duration_ms: endTime - this.startTime,
896
+ status: this._status,
897
+ error_message: this._errorMessage,
898
+ attributes: {
899
+ "fallom.sdk_version": "2",
900
+ "fallom.span_type": "manual",
901
+ ...this.attrs
902
+ }
903
+ }).catch(() => {
904
+ });
905
+ }
906
+ };
907
+ function wrapTraced(session2, name, fn, options = {}) {
908
+ return (async (...args) => {
909
+ const span = session2.span(name, options);
910
+ if (args.length === 1) {
911
+ span.set({ input: args[0] });
912
+ } else if (args.length > 1) {
913
+ span.set({ input: args });
914
+ }
915
+ try {
916
+ const result = await fn(...args);
917
+ span.set({ output: result });
918
+ span.end();
919
+ return result;
920
+ } catch (error) {
921
+ span.setError(error instanceof Error ? error : String(error));
922
+ span.end();
923
+ throw error;
924
+ }
925
+ });
926
+ }
927
+
823
928
  // src/prompts.ts
824
929
  var prompts_exports = {};
825
930
  __export(prompts_exports, {
@@ -2689,6 +2794,26 @@ var FallomSession = class {
2689
2794
  getContext() {
2690
2795
  return { ...this.ctx };
2691
2796
  }
2797
+ /**
2798
+ * Create a manual span for custom operations.
2799
+ *
2800
+ * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
2801
+ * The span uses the session's context (configKey, sessionId, etc.).
2802
+ *
2803
+ * @example
2804
+ * ```typescript
2805
+ * const span = session.span("rag.retrieve");
2806
+ * span.set({ "rag.query": userQuery, "rag.topK": 5 });
2807
+ *
2808
+ * const docs = await retrieveDocuments(userQuery);
2809
+ * span.set({ "rag.documents.count": docs.length });
2810
+ *
2811
+ * span.end(); // Must call to send the span
2812
+ * ```
2813
+ */
2814
+ span(name, options) {
2815
+ return new FallomSpan(name, this.ctx, options);
2816
+ }
2692
2817
  /**
2693
2818
  * Get model assignment for this session (A/B testing).
2694
2819
  */
@@ -3168,6 +3293,7 @@ var index_default = {
3168
3293
  export {
3169
3294
  FallomExporter,
3170
3295
  FallomSession,
3296
+ FallomSpan,
3171
3297
  buildGEvalPrompt,
3172
3298
  calculateAggregateScores,
3173
3299
  clearMastraPrompt,
@@ -3181,5 +3307,6 @@ export {
3181
3307
  session,
3182
3308
  setMastraPrompt,
3183
3309
  setMastraPromptAB,
3184
- trace_exports as trace
3310
+ trace_exports as trace,
3311
+ wrapTraced
3185
3312
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fallom/trace",
3
- "version": "0.2.25",
3
+ "version": "0.2.28",
4
4
  "description": "Model A/B testing and tracing for LLM applications. Zero latency, production-ready.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",
@@ -40,7 +40,7 @@
40
40
  "license": "MIT",
41
41
  "repository": {
42
42
  "type": "git",
43
- "url": "https://github.com/fallom/fallom-js"
43
+ "url": "https://github.com/Fallomai/fallom-typescript-sdk"
44
44
  },
45
45
  "dependencies": {
46
46
  "@opentelemetry/api": "^1.7.0",