@fallom/trace 0.2.25 → 0.2.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import {
7
7
  evaluate,
8
8
  init,
9
9
  uploadResultsPublic
10
- } from "./chunk-3VWF2OJX.mjs";
10
+ } from "./chunk-2NGJF2JZ.mjs";
11
11
  import "./chunk-7P6ASYW6.mjs";
12
12
  export {
13
13
  DEFAULT_JUDGE_MODEL,
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-3HBKT4HK.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-GZ6TE7G4.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-XBZ3ESNV.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-FTZVXPQN.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
package/dist/index.d.mts CHANGED
@@ -445,6 +445,12 @@ interface EvalResult {
445
445
  input: string;
446
446
  output: string;
447
447
  systemMessage?: string;
448
+ /** Expected/golden output for comparison (if provided) */
449
+ expectedOutput?: string;
450
+ /** Retrieved documents/context for RAG evaluation */
451
+ context?: string[];
452
+ /** Additional metadata */
453
+ metadata?: Record<string, unknown>;
448
454
  model: string;
449
455
  isProduction: boolean;
450
456
  answerRelevancy?: number;
package/dist/index.d.ts CHANGED
@@ -445,6 +445,12 @@ interface EvalResult {
445
445
  input: string;
446
446
  output: string;
447
447
  systemMessage?: string;
448
+ /** Expected/golden output for comparison (if provided) */
449
+ expectedOutput?: string;
450
+ /** Retrieved documents/context for RAG evaluation */
451
+ context?: string[];
452
+ /** Additional metadata */
453
+ metadata?: Record<string, unknown>;
448
454
  model: string;
449
455
  isProduction: boolean;
450
456
  answerRelevancy?: number;
package/dist/index.js CHANGED
@@ -1047,13 +1047,22 @@ async function evaluate(options) {
1047
1047
  _skipUpload = false
1048
1048
  } = options;
1049
1049
  let dataset;
1050
+ let testCaseExtras = /* @__PURE__ */ new Map();
1050
1051
  if (testCases !== void 0 && testCases.length > 0) {
1051
- dataset = testCases.map((tc) => ({
1052
- input: tc.input,
1053
- output: tc.actualOutput,
1054
- systemMessage: tc.systemMessage,
1055
- metadata: tc.metadata
1056
- }));
1052
+ dataset = testCases.map((tc, idx) => {
1053
+ if (tc.expectedOutput || tc.context) {
1054
+ testCaseExtras.set(idx, {
1055
+ expectedOutput: tc.expectedOutput,
1056
+ context: tc.context
1057
+ });
1058
+ }
1059
+ return {
1060
+ input: tc.input,
1061
+ output: tc.actualOutput,
1062
+ systemMessage: tc.systemMessage,
1063
+ metadata: tc.metadata
1064
+ };
1065
+ });
1057
1066
  } else if (datasetInput !== void 0) {
1058
1067
  dataset = await resolveDataset(datasetInput);
1059
1068
  } else {
@@ -1072,10 +1081,14 @@ async function evaluate(options) {
1072
1081
  for (let i = 0; i < dataset.length; i++) {
1073
1082
  const item = dataset[i];
1074
1083
  if (verbose) console.log(`Evaluating item ${i + 1}/${dataset.length}...`);
1084
+ const extras = testCaseExtras.get(i);
1075
1085
  const result = {
1076
1086
  input: item.input,
1077
1087
  output: item.output,
1078
1088
  systemMessage: item.systemMessage,
1089
+ expectedOutput: extras?.expectedOutput,
1090
+ context: extras?.context,
1091
+ metadata: item.metadata,
1079
1092
  model: "production",
1080
1093
  isProduction: true,
1081
1094
  reasoning: {}
@@ -1175,6 +1188,7 @@ async function compareModels(options) {
1175
1188
  input: item.input,
1176
1189
  output,
1177
1190
  systemMessage: item.systemMessage,
1191
+ metadata: item.metadata,
1178
1192
  model: model.name,
1179
1193
  isProduction: false,
1180
1194
  reasoning: {},
@@ -1286,6 +1300,9 @@ async function uploadResults(results, name, description, judgeModel, verbose) {
1286
1300
  results: allResults.map((r) => ({
1287
1301
  input: r.input,
1288
1302
  system_message: r.systemMessage,
1303
+ expected_output: r.expectedOutput,
1304
+ context: r.context,
1305
+ metadata: r.metadata,
1289
1306
  model: r.model,
1290
1307
  output: r.output,
1291
1308
  is_production: r.isProduction,
@@ -1395,7 +1412,7 @@ var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otl
1395
1412
  // node_modules/@opentelemetry/resources/build/esm/Resource.js
1396
1413
  var import_api = require("@opentelemetry/api");
1397
1414
 
1398
- // node_modules/@opentelemetry/resources/node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
1415
+ // node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
1399
1416
  var SemanticResourceAttributes = {
1400
1417
  /**
1401
1418
  * Name of the cloud provider.
package/dist/index.mjs CHANGED
@@ -23,7 +23,7 @@ import {
23
23
  isCustomMetric,
24
24
  runGEval,
25
25
  uploadResultsPublic
26
- } from "./chunk-3VWF2OJX.mjs";
26
+ } from "./chunk-FTZVXPQN.mjs";
27
27
  import {
28
28
  __export
29
29
  } from "./chunk-7P6ASYW6.mjs";
@@ -45,7 +45,7 @@ import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
45
45
  // node_modules/@opentelemetry/resources/build/esm/Resource.js
46
46
  import { diag } from "@opentelemetry/api";
47
47
 
48
- // node_modules/@opentelemetry/resources/node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
48
+ // node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
49
49
  var SemanticResourceAttributes = {
50
50
  /**
51
51
  * Name of the cloud provider.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fallom/trace",
3
- "version": "0.2.25",
3
+ "version": "0.2.26",
4
4
  "description": "Model A/B testing and tracing for LLM applications. Zero latency, production-ready.",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",