orchestrated 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +13 -1
- package/index.js +100 -49
- package/index.js.map +8 -8
- package/package.json +1 -1
package/index.d.ts
CHANGED
|
@@ -72,8 +72,14 @@ declare class BatchClient {
|
|
|
72
72
|
/**
|
|
73
73
|
* Initialize the batch client by checking for existing batches
|
|
74
74
|
* Must be called after constructor
|
|
75
|
+
*
|
|
76
|
+
* @param name - Evaluation name
|
|
77
|
+
* @param checksum - Dataset checksum
|
|
78
|
+
* @param dataSourceType - Type of data source ("static" or "dataset")
|
|
79
|
+
* - "static": Filter by both name AND checksum (fixed dataset)
|
|
80
|
+
* - "dataset": Filter by name only (dynamic dataset)
|
|
75
81
|
*/
|
|
76
|
-
initialize(name: string, checksum: string): Promise<void>;
|
|
82
|
+
initialize(name: string, checksum: string, dataSourceType?: "static" | "dataset"): Promise<void>;
|
|
77
83
|
generateRequestId(body: BatchRequest["body"]): string;
|
|
78
84
|
/**
|
|
79
85
|
* Adds a request to the batch
|
|
@@ -933,6 +939,12 @@ declare interface ResolvedDataCtx {
|
|
|
933
939
|
* - "synthetic.my-group" - synthetic data with custom grouping
|
|
934
940
|
*/
|
|
935
941
|
sourceType: string;
|
|
942
|
+
/**
|
|
943
|
+
* Data source type - used for batch naming strategy
|
|
944
|
+
* - "static": use checksum in batch name (data is fixed)
|
|
945
|
+
* - "dataset": use only eval name (data changes over time)
|
|
946
|
+
*/
|
|
947
|
+
dataSourceType?: "static" | "dataset";
|
|
936
948
|
checksum: string;
|
|
937
949
|
caseCount: number;
|
|
938
950
|
}
|
package/index.js
CHANGED
|
@@ -21158,6 +21158,7 @@ function getState() {
|
|
|
21158
21158
|
function resetState() {
|
|
21159
21159
|
globalState = null;
|
|
21160
21160
|
isInitialized = false;
|
|
21161
|
+
globalThis.__ORCHESTRATED_LAZY_LOAD__ = undefined;
|
|
21161
21162
|
}
|
|
21162
21163
|
function isStateInitialized() {
|
|
21163
21164
|
return isInitialized || globalThis.__ORCHESTRATED_SHARED_STATE__ !== undefined;
|
|
@@ -88555,6 +88556,48 @@ var init_scorer = __esm(() => {
|
|
|
88555
88556
|
init_jsdist();
|
|
88556
88557
|
});
|
|
88557
88558
|
|
|
88559
|
+
// src/serialization/types.ts
|
|
88560
|
+
function initRegistry() {
|
|
88561
|
+
if (!globalThis.__ORCHESTRATED_REGISTRY__) {
|
|
88562
|
+
globalThis.__ORCHESTRATED_REGISTRY__ = {
|
|
88563
|
+
scorers: [],
|
|
88564
|
+
handlers: {},
|
|
88565
|
+
evaluations: [],
|
|
88566
|
+
pendingEvaluations: []
|
|
88567
|
+
};
|
|
88568
|
+
}
|
|
88569
|
+
return globalThis.__ORCHESTRATED_REGISTRY__;
|
|
88570
|
+
}
|
|
88571
|
+
function getRegistry2() {
|
|
88572
|
+
return initRegistry();
|
|
88573
|
+
}
|
|
88574
|
+
function registerHandler(name, handler, usageRef) {
|
|
88575
|
+
const registry2 = getRegistry2();
|
|
88576
|
+
if (!registry2.handlers[name]) {
|
|
88577
|
+
registry2.handlers[name] = {
|
|
88578
|
+
handler,
|
|
88579
|
+
source: handler.toString(),
|
|
88580
|
+
usedIn: []
|
|
88581
|
+
};
|
|
88582
|
+
}
|
|
88583
|
+
if (usageRef) {
|
|
88584
|
+
registry2.handlers[name].usedIn.push(usageRef);
|
|
88585
|
+
}
|
|
88586
|
+
return name;
|
|
88587
|
+
}
|
|
88588
|
+
function registerScorer(scorer) {
|
|
88589
|
+
const registry2 = getRegistry2();
|
|
88590
|
+
registry2.scorers.push(scorer);
|
|
88591
|
+
}
|
|
88592
|
+
function registerEvaluation2(evaluation) {
|
|
88593
|
+
const registry2 = getRegistry2();
|
|
88594
|
+
registry2.evaluations.push(evaluation);
|
|
88595
|
+
}
|
|
88596
|
+
function trackEvaluationPromise(promise3) {
|
|
88597
|
+
const registry2 = getRegistry2();
|
|
88598
|
+
registry2.pendingEvaluations.push(promise3);
|
|
88599
|
+
}
|
|
88600
|
+
|
|
88558
88601
|
// src/serialization/resolver.ts
|
|
88559
88602
|
var exports_resolver = {};
|
|
88560
88603
|
__export(exports_resolver, {
|
|
@@ -88567,6 +88610,14 @@ __export(exports_resolver, {
|
|
|
88567
88610
|
});
|
|
88568
88611
|
import { existsSync as existsSync4, mkdirSync as mkdirSync3, writeFileSync as writeFileSync4 } from "node:fs";
|
|
88569
88612
|
import { resolve } from "node:path";
|
|
88613
|
+
function getLocalScorer(slug) {
|
|
88614
|
+
const registry2 = getRegistry2();
|
|
88615
|
+
return registry2.scorers.find((s) => (s.slug || s.name.toLowerCase()) === slug) || null;
|
|
88616
|
+
}
|
|
88617
|
+
function getLocalHandler(name) {
|
|
88618
|
+
const registry2 = getRegistry2();
|
|
88619
|
+
return registry2.handlers[name]?.handler || null;
|
|
88620
|
+
}
|
|
88570
88621
|
function getCacheBaseDir() {
|
|
88571
88622
|
const isLambda = process.env.AWS_LAMBDA_FUNCTION_NAME !== undefined;
|
|
88572
88623
|
if (isLambda) {
|
|
@@ -88647,6 +88698,33 @@ async function resolveScorer(scorerRef) {
|
|
|
88647
88698
|
if (scorerRef.type === "internal") {
|
|
88648
88699
|
throw new Error(`Internal scorer "${scorerRef.name}" should be resolved by the evaluator, not the resolver`);
|
|
88649
88700
|
}
|
|
88701
|
+
const localScorer = getLocalScorer(scorerRef.slug);
|
|
88702
|
+
if (localScorer) {
|
|
88703
|
+
console.log(`Found scorer "${scorerRef.slug}" in local registry`);
|
|
88704
|
+
if (localScorer.type === "prompt") {
|
|
88705
|
+
const schemaObj = JSON.parse(localScorer.schema.definition);
|
|
88706
|
+
const zodSchema = zodFromJSONSchema(schemaObj);
|
|
88707
|
+
return buildPromptScorer({
|
|
88708
|
+
name: localScorer.name,
|
|
88709
|
+
promptTemplate: localScorer.promptTemplate,
|
|
88710
|
+
choiceScores: localScorer.choiceScores,
|
|
88711
|
+
model: localScorer.model,
|
|
88712
|
+
useCoT: localScorer.useCoT,
|
|
88713
|
+
temperature: localScorer.temperature,
|
|
88714
|
+
parameters: zodSchema
|
|
88715
|
+
});
|
|
88716
|
+
}
|
|
88717
|
+
if (localScorer.type === "custom_scorer") {
|
|
88718
|
+
const handlerName2 = localScorer.handler.reference;
|
|
88719
|
+
const localHandler = getLocalHandler(handlerName2);
|
|
88720
|
+
if (localHandler) {
|
|
88721
|
+
console.log(`Found handler "${handlerName2}" in local registry`);
|
|
88722
|
+
return createScorerWrapper(localScorer, localHandler);
|
|
88723
|
+
}
|
|
88724
|
+
console.log(`Handler "${handlerName2}" not found locally, falling back to S3`);
|
|
88725
|
+
}
|
|
88726
|
+
}
|
|
88727
|
+
console.log(`Scorer "${scorerRef.slug}" not found locally, fetching from API`);
|
|
88650
88728
|
const version2 = scorerRef.fingerprint || "LATEST";
|
|
88651
88729
|
const scorerRecord = await getDefinition("scorer", scorerRef.slug, version2);
|
|
88652
88730
|
const scorerDef = scorerRecord.definition;
|
|
@@ -88681,6 +88759,12 @@ Available handlers: ${Object.keys(handlers).join(", ")}`);
|
|
|
88681
88759
|
}
|
|
88682
88760
|
async function resolveTask(taskRef) {
|
|
88683
88761
|
const handlerName = taskRef.reference;
|
|
88762
|
+
const localHandler = getLocalHandler(handlerName);
|
|
88763
|
+
if (localHandler) {
|
|
88764
|
+
console.log(`Found task handler "${handlerName}" in local registry`);
|
|
88765
|
+
return localHandler;
|
|
88766
|
+
}
|
|
88767
|
+
console.log(`Task handler "${handlerName}" not found locally, fetching from API`);
|
|
88684
88768
|
const handlerVersion = taskRef.bundle?.fingerprint || "LATEST";
|
|
88685
88769
|
const handlerRecord = await getDefinition("handler", handlerName, handlerVersion);
|
|
88686
88770
|
const handlerDef = handlerRecord.definition;
|
|
@@ -116717,10 +116801,17 @@ class BatchClient {
|
|
|
116717
116801
|
fs.mkdirSync(this.tempDir, { recursive: true });
|
|
116718
116802
|
}
|
|
116719
116803
|
}
|
|
116720
|
-
async initialize(name, checksum) {
|
|
116804
|
+
async initialize(name, checksum, dataSourceType) {
|
|
116721
116805
|
this.metadata = { name, checksum };
|
|
116722
116806
|
const batches = await this.listBatches(100);
|
|
116723
|
-
const matchingBatches = batches.filter((batch) =>
|
|
116807
|
+
const matchingBatches = batches.filter((batch) => {
|
|
116808
|
+
const nameMatches = batch.metadata?.name === this.metadata?.name;
|
|
116809
|
+
if (dataSourceType === "static") {
|
|
116810
|
+
return nameMatches && batch.metadata?.checksum === this.metadata?.checksum;
|
|
116811
|
+
} else {
|
|
116812
|
+
return nameMatches;
|
|
116813
|
+
}
|
|
116814
|
+
});
|
|
116724
116815
|
if (matchingBatches.length === 0) {
|
|
116725
116816
|
return;
|
|
116726
116817
|
}
|
|
@@ -116943,49 +117034,6 @@ init_data_source();
|
|
|
116943
117034
|
init_resolver();
|
|
116944
117035
|
init_schema_serializer();
|
|
116945
117036
|
init_scorer();
|
|
116946
|
-
|
|
116947
|
-
// src/serialization/types.ts
|
|
116948
|
-
function initRegistry() {
|
|
116949
|
-
if (!globalThis.__ORCHESTRATED_REGISTRY__) {
|
|
116950
|
-
globalThis.__ORCHESTRATED_REGISTRY__ = {
|
|
116951
|
-
scorers: [],
|
|
116952
|
-
handlers: {},
|
|
116953
|
-
evaluations: [],
|
|
116954
|
-
pendingEvaluations: []
|
|
116955
|
-
};
|
|
116956
|
-
}
|
|
116957
|
-
return globalThis.__ORCHESTRATED_REGISTRY__;
|
|
116958
|
-
}
|
|
116959
|
-
function getRegistry2() {
|
|
116960
|
-
return initRegistry();
|
|
116961
|
-
}
|
|
116962
|
-
function registerHandler(name, handler, usageRef) {
|
|
116963
|
-
const registry2 = getRegistry2();
|
|
116964
|
-
if (!registry2.handlers[name]) {
|
|
116965
|
-
registry2.handlers[name] = {
|
|
116966
|
-
handler,
|
|
116967
|
-
source: handler.toString(),
|
|
116968
|
-
usedIn: []
|
|
116969
|
-
};
|
|
116970
|
-
}
|
|
116971
|
-
if (usageRef) {
|
|
116972
|
-
registry2.handlers[name].usedIn.push(usageRef);
|
|
116973
|
-
}
|
|
116974
|
-
return name;
|
|
116975
|
-
}
|
|
116976
|
-
function registerScorer(scorer) {
|
|
116977
|
-
const registry2 = getRegistry2();
|
|
116978
|
-
registry2.scorers.push(scorer);
|
|
116979
|
-
}
|
|
116980
|
-
function registerEvaluation2(evaluation) {
|
|
116981
|
-
const registry2 = getRegistry2();
|
|
116982
|
-
registry2.evaluations.push(evaluation);
|
|
116983
|
-
}
|
|
116984
|
-
function trackEvaluationPromise(promise3) {
|
|
116985
|
-
const registry2 = getRegistry2();
|
|
116986
|
-
registry2.pendingEvaluations.push(promise3);
|
|
116987
|
-
}
|
|
116988
|
-
|
|
116989
117037
|
// src/serialization/serializer.ts
|
|
116990
117038
|
async function serializeDataSource(data) {
|
|
116991
117039
|
if (typeof data === "object" && data !== null && "type" in data && (data.type === "dataset" || data.type === "static")) {
|
|
@@ -117649,6 +117697,7 @@ async function resolveDataSet(dataset, datasetMetadata) {
|
|
|
117649
117697
|
ctx: {
|
|
117650
117698
|
isLiveData,
|
|
117651
117699
|
sourceType,
|
|
117700
|
+
dataSourceType: datasetMetadata?.type,
|
|
117652
117701
|
checksum,
|
|
117653
117702
|
caseCount: dataset.length
|
|
117654
117703
|
}
|
|
@@ -117974,7 +118023,7 @@ async function runEval(name, evaluator, options) {
|
|
|
117974
118023
|
[ATTR_DATASET_SOURCE_TYPE]: data.ctx.sourceType,
|
|
117975
118024
|
[ATTR_EVAL_EXECUTION_METADATA_TEST_CASE_COUNT]: data.ctx.caseCount
|
|
117976
118025
|
});
|
|
117977
|
-
await batchClient.initialize(name, data.ctx.checksum);
|
|
118026
|
+
await batchClient.initialize(name, data.ctx.checksum, data.ctx.dataSourceType);
|
|
117978
118027
|
if (batchClient.hasPendingBatch) {
|
|
117979
118028
|
return batchClient.getPending();
|
|
117980
118029
|
}
|
|
@@ -118083,12 +118132,14 @@ async function resolveEvaluatorDefinitions(name, evaluator) {
|
|
|
118083
118132
|
if (dataSourceDef.type === "dataset") {
|
|
118084
118133
|
datasetMetadata = {
|
|
118085
118134
|
name: dataSourceDef.config.name,
|
|
118086
|
-
group_name: dataSourceDef.config.group_name
|
|
118135
|
+
group_name: dataSourceDef.config.group_name,
|
|
118136
|
+
type: "dataset"
|
|
118087
118137
|
};
|
|
118088
118138
|
} else if (dataSourceDef.type === "static") {
|
|
118089
118139
|
datasetMetadata = {
|
|
118090
118140
|
name: "static",
|
|
118091
|
-
group_name: dataSourceDef.group_name
|
|
118141
|
+
group_name: dataSourceDef.group_name,
|
|
118142
|
+
type: "static"
|
|
118092
118143
|
};
|
|
118093
118144
|
}
|
|
118094
118145
|
const dataSourceFn = await resolveDataSource(dataSourceDef);
|
|
@@ -118379,4 +118430,4 @@ export {
|
|
|
118379
118430
|
Behavioral
|
|
118380
118431
|
};
|
|
118381
118432
|
|
|
118382
|
-
//# debugId=
|
|
118433
|
+
//# debugId=7CBAC3512DE449E364756E2164756E21
|