@fallom/trace 0.2.25 → 0.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2NGJF2JZ.mjs +661 -0
- package/dist/chunk-3HBKT4HK.mjs +827 -0
- package/dist/{chunk-3VWF2OJX.mjs → chunk-FTZVXPQN.mjs} +25 -8
- package/dist/chunk-GZ6TE7G4.mjs +923 -0
- package/dist/chunk-XBZ3ESNV.mjs +824 -0
- package/dist/{core-Q3IHBEHB.mjs → core-46Z4Q54J.mjs} +1 -1
- package/dist/core-4L56QWI7.mjs +21 -0
- package/dist/core-DUG2SP2V.mjs +21 -0
- package/dist/core-JLHYFVYS.mjs +21 -0
- package/dist/core-NTEI2B5Z.mjs +21 -0
- package/dist/index.d.mts +6 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +24 -7
- package/dist/index.mjs +2 -2
- package/package.json +1 -1
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-3HBKT4HK.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-GZ6TE7G4.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-XBZ3ESNV.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-FTZVXPQN.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
package/dist/index.d.mts
CHANGED
|
@@ -445,6 +445,12 @@ interface EvalResult {
|
|
|
445
445
|
input: string;
|
|
446
446
|
output: string;
|
|
447
447
|
systemMessage?: string;
|
|
448
|
+
/** Expected/golden output for comparison (if provided) */
|
|
449
|
+
expectedOutput?: string;
|
|
450
|
+
/** Retrieved documents/context for RAG evaluation */
|
|
451
|
+
context?: string[];
|
|
452
|
+
/** Additional metadata */
|
|
453
|
+
metadata?: Record<string, unknown>;
|
|
448
454
|
model: string;
|
|
449
455
|
isProduction: boolean;
|
|
450
456
|
answerRelevancy?: number;
|
package/dist/index.d.ts
CHANGED
|
@@ -445,6 +445,12 @@ interface EvalResult {
|
|
|
445
445
|
input: string;
|
|
446
446
|
output: string;
|
|
447
447
|
systemMessage?: string;
|
|
448
|
+
/** Expected/golden output for comparison (if provided) */
|
|
449
|
+
expectedOutput?: string;
|
|
450
|
+
/** Retrieved documents/context for RAG evaluation */
|
|
451
|
+
context?: string[];
|
|
452
|
+
/** Additional metadata */
|
|
453
|
+
metadata?: Record<string, unknown>;
|
|
448
454
|
model: string;
|
|
449
455
|
isProduction: boolean;
|
|
450
456
|
answerRelevancy?: number;
|
package/dist/index.js
CHANGED
|
@@ -1047,13 +1047,22 @@ async function evaluate(options) {
|
|
|
1047
1047
|
_skipUpload = false
|
|
1048
1048
|
} = options;
|
|
1049
1049
|
let dataset;
|
|
1050
|
+
let testCaseExtras = /* @__PURE__ */ new Map();
|
|
1050
1051
|
if (testCases !== void 0 && testCases.length > 0) {
|
|
1051
|
-
dataset = testCases.map((tc) =>
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1052
|
+
dataset = testCases.map((tc, idx) => {
|
|
1053
|
+
if (tc.expectedOutput || tc.context) {
|
|
1054
|
+
testCaseExtras.set(idx, {
|
|
1055
|
+
expectedOutput: tc.expectedOutput,
|
|
1056
|
+
context: tc.context
|
|
1057
|
+
});
|
|
1058
|
+
}
|
|
1059
|
+
return {
|
|
1060
|
+
input: tc.input,
|
|
1061
|
+
output: tc.actualOutput,
|
|
1062
|
+
systemMessage: tc.systemMessage,
|
|
1063
|
+
metadata: tc.metadata
|
|
1064
|
+
};
|
|
1065
|
+
});
|
|
1057
1066
|
} else if (datasetInput !== void 0) {
|
|
1058
1067
|
dataset = await resolveDataset(datasetInput);
|
|
1059
1068
|
} else {
|
|
@@ -1072,10 +1081,14 @@ async function evaluate(options) {
|
|
|
1072
1081
|
for (let i = 0; i < dataset.length; i++) {
|
|
1073
1082
|
const item = dataset[i];
|
|
1074
1083
|
if (verbose) console.log(`Evaluating item ${i + 1}/${dataset.length}...`);
|
|
1084
|
+
const extras = testCaseExtras.get(i);
|
|
1075
1085
|
const result = {
|
|
1076
1086
|
input: item.input,
|
|
1077
1087
|
output: item.output,
|
|
1078
1088
|
systemMessage: item.systemMessage,
|
|
1089
|
+
expectedOutput: extras?.expectedOutput,
|
|
1090
|
+
context: extras?.context,
|
|
1091
|
+
metadata: item.metadata,
|
|
1079
1092
|
model: "production",
|
|
1080
1093
|
isProduction: true,
|
|
1081
1094
|
reasoning: {}
|
|
@@ -1175,6 +1188,7 @@ async function compareModels(options) {
|
|
|
1175
1188
|
input: item.input,
|
|
1176
1189
|
output,
|
|
1177
1190
|
systemMessage: item.systemMessage,
|
|
1191
|
+
metadata: item.metadata,
|
|
1178
1192
|
model: model.name,
|
|
1179
1193
|
isProduction: false,
|
|
1180
1194
|
reasoning: {},
|
|
@@ -1286,6 +1300,9 @@ async function uploadResults(results, name, description, judgeModel, verbose) {
|
|
|
1286
1300
|
results: allResults.map((r) => ({
|
|
1287
1301
|
input: r.input,
|
|
1288
1302
|
system_message: r.systemMessage,
|
|
1303
|
+
expected_output: r.expectedOutput,
|
|
1304
|
+
context: r.context,
|
|
1305
|
+
metadata: r.metadata,
|
|
1289
1306
|
model: r.model,
|
|
1290
1307
|
output: r.output,
|
|
1291
1308
|
is_production: r.isProduction,
|
|
@@ -1395,7 +1412,7 @@ var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otl
|
|
|
1395
1412
|
// node_modules/@opentelemetry/resources/build/esm/Resource.js
|
|
1396
1413
|
var import_api = require("@opentelemetry/api");
|
|
1397
1414
|
|
|
1398
|
-
// node_modules/@opentelemetry/
|
|
1415
|
+
// node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
|
|
1399
1416
|
var SemanticResourceAttributes = {
|
|
1400
1417
|
/**
|
|
1401
1418
|
* Name of the cloud provider.
|
package/dist/index.mjs
CHANGED
|
@@ -23,7 +23,7 @@ import {
|
|
|
23
23
|
isCustomMetric,
|
|
24
24
|
runGEval,
|
|
25
25
|
uploadResultsPublic
|
|
26
|
-
} from "./chunk-
|
|
26
|
+
} from "./chunk-FTZVXPQN.mjs";
|
|
27
27
|
import {
|
|
28
28
|
__export
|
|
29
29
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -45,7 +45,7 @@ import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
|
|
45
45
|
// node_modules/@opentelemetry/resources/build/esm/Resource.js
|
|
46
46
|
import { diag } from "@opentelemetry/api";
|
|
47
47
|
|
|
48
|
-
// node_modules/@opentelemetry/
|
|
48
|
+
// node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
|
|
49
49
|
var SemanticResourceAttributes = {
|
|
50
50
|
/**
|
|
51
51
|
* Name of the cloud provider.
|
package/package.json
CHANGED