@fallom/trace 0.2.25 → 0.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2NGJF2JZ.mjs +661 -0
- package/dist/chunk-3HBKT4HK.mjs +827 -0
- package/dist/{chunk-3VWF2OJX.mjs → chunk-FTZVXPQN.mjs} +25 -8
- package/dist/chunk-GZ6TE7G4.mjs +923 -0
- package/dist/chunk-XBZ3ESNV.mjs +824 -0
- package/dist/{core-Q3IHBEHB.mjs → core-46Z4Q54J.mjs} +1 -1
- package/dist/core-4L56QWI7.mjs +21 -0
- package/dist/core-DUG2SP2V.mjs +21 -0
- package/dist/core-JLHYFVYS.mjs +21 -0
- package/dist/core-NTEI2B5Z.mjs +21 -0
- package/dist/index.d.mts +6 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +24 -7
- package/dist/index.mjs +2 -2
- package/package.json +1 -1
|
@@ -423,7 +423,7 @@ function datasetFromTraces(traces) {
|
|
|
423
423
|
return items;
|
|
424
424
|
}
|
|
425
425
|
async function datasetFromFallom(datasetKey, version, config) {
|
|
426
|
-
const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await import("./core-
|
|
426
|
+
const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await import("./core-NTEI2B5Z.mjs").then(
|
|
427
427
|
(m) => ({
|
|
428
428
|
_apiKey: config?._apiKey ?? m._apiKey,
|
|
429
429
|
_baseUrl: config?._baseUrl ?? m._baseUrl,
|
|
@@ -496,7 +496,7 @@ var EvaluationDataset = class {
|
|
|
496
496
|
* @returns Self for chaining
|
|
497
497
|
*/
|
|
498
498
|
async pull(alias, version) {
|
|
499
|
-
const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await import("./core-
|
|
499
|
+
const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await import("./core-NTEI2B5Z.mjs");
|
|
500
500
|
if (!_initialized2) {
|
|
501
501
|
throw new Error("Fallom evals not initialized. Call evals.init() first.");
|
|
502
502
|
}
|
|
@@ -698,13 +698,22 @@ async function evaluate(options) {
|
|
|
698
698
|
_skipUpload = false
|
|
699
699
|
} = options;
|
|
700
700
|
let dataset;
|
|
701
|
+
let testCaseExtras = /* @__PURE__ */ new Map();
|
|
701
702
|
if (testCases !== void 0 && testCases.length > 0) {
|
|
702
|
-
dataset = testCases.map((tc) =>
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
703
|
+
dataset = testCases.map((tc, idx) => {
|
|
704
|
+
if (tc.expectedOutput || tc.context) {
|
|
705
|
+
testCaseExtras.set(idx, {
|
|
706
|
+
expectedOutput: tc.expectedOutput,
|
|
707
|
+
context: tc.context
|
|
708
|
+
});
|
|
709
|
+
}
|
|
710
|
+
return {
|
|
711
|
+
input: tc.input,
|
|
712
|
+
output: tc.actualOutput,
|
|
713
|
+
systemMessage: tc.systemMessage,
|
|
714
|
+
metadata: tc.metadata
|
|
715
|
+
};
|
|
716
|
+
});
|
|
708
717
|
} else if (datasetInput !== void 0) {
|
|
709
718
|
dataset = await resolveDataset(datasetInput);
|
|
710
719
|
} else {
|
|
@@ -723,10 +732,14 @@ async function evaluate(options) {
|
|
|
723
732
|
for (let i = 0; i < dataset.length; i++) {
|
|
724
733
|
const item = dataset[i];
|
|
725
734
|
if (verbose) console.log(`Evaluating item ${i + 1}/${dataset.length}...`);
|
|
735
|
+
const extras = testCaseExtras.get(i);
|
|
726
736
|
const result = {
|
|
727
737
|
input: item.input,
|
|
728
738
|
output: item.output,
|
|
729
739
|
systemMessage: item.systemMessage,
|
|
740
|
+
expectedOutput: extras?.expectedOutput,
|
|
741
|
+
context: extras?.context,
|
|
742
|
+
metadata: item.metadata,
|
|
730
743
|
model: "production",
|
|
731
744
|
isProduction: true,
|
|
732
745
|
reasoning: {}
|
|
@@ -826,6 +839,7 @@ async function compareModels(options) {
|
|
|
826
839
|
input: item.input,
|
|
827
840
|
output,
|
|
828
841
|
systemMessage: item.systemMessage,
|
|
842
|
+
metadata: item.metadata,
|
|
829
843
|
model: model.name,
|
|
830
844
|
isProduction: false,
|
|
831
845
|
reasoning: {},
|
|
@@ -937,6 +951,9 @@ async function uploadResults(results, name, description, judgeModel, verbose) {
|
|
|
937
951
|
results: allResults.map((r) => ({
|
|
938
952
|
input: r.input,
|
|
939
953
|
system_message: r.systemMessage,
|
|
954
|
+
expected_output: r.expectedOutput,
|
|
955
|
+
context: r.context,
|
|
956
|
+
metadata: r.metadata,
|
|
940
957
|
model: r.model,
|
|
941
958
|
output: r.output,
|
|
942
959
|
is_production: r.isProduction,
|