orchestrated 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +33 -3
- package/index.js +146 -25
- package/index.js.map +9 -9
- package/package.json +1 -1
package/index.d.ts
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
import type { Batch } from 'openai/resources';
|
|
11
11
|
import type { ChatCompletionCreateParamsBase } from 'openai/resources/chat/completions.mjs';
|
|
12
12
|
import { ClientOptions } from 'openai';
|
|
13
|
+
import { default as default_2 } from 'openai';
|
|
13
14
|
import type { Metadata } from 'openai/resources';
|
|
14
15
|
import OpenAI from 'openai';
|
|
15
16
|
import type { z } from 'zod';
|
|
@@ -175,7 +176,9 @@ declare interface BatchResult {
|
|
|
175
176
|
};
|
|
176
177
|
}
|
|
177
178
|
|
|
178
|
-
export declare const Behavioral: (args: unknown) => Promise<Score
|
|
179
|
+
export declare const Behavioral: ((args: unknown) => Promise<Score>) & {
|
|
180
|
+
definition?: SerializableScorerDefinition;
|
|
181
|
+
};
|
|
179
182
|
|
|
180
183
|
/**
|
|
181
184
|
* Bundle reference with fingerprint
|
|
@@ -204,7 +207,9 @@ export declare const colors: {
|
|
|
204
207
|
yellow: string;
|
|
205
208
|
};
|
|
206
209
|
|
|
207
|
-
export declare const ContentSafety: (args: unknown) => Promise<Score
|
|
210
|
+
export declare const ContentSafety: ((args: unknown) => Promise<Score>) & {
|
|
211
|
+
definition?: SerializableScorerDefinition;
|
|
212
|
+
};
|
|
208
213
|
|
|
209
214
|
/**
|
|
210
215
|
* Context builder with progressive type refinement
|
|
@@ -284,7 +289,9 @@ declare interface DataSourceMetadata {
|
|
|
284
289
|
ctx: ResolvedDataCtx;
|
|
285
290
|
}
|
|
286
291
|
|
|
287
|
-
export declare const Effectiveness: (args: unknown) => Promise<Score
|
|
292
|
+
export declare const Effectiveness: ((args: unknown) => Promise<Score>) & {
|
|
293
|
+
definition?: SerializableScorerDefinition;
|
|
294
|
+
};
|
|
288
295
|
|
|
289
296
|
/**
|
|
290
297
|
* Eval overload for SerializableEvaluation (from API responses or definitions.json)
|
|
@@ -421,16 +428,36 @@ export declare interface EvalOptions<EvalReport = boolean> {
|
|
|
421
428
|
* Callback for pending batch UI (optional, CLI injects ink renderer)
|
|
422
429
|
*/
|
|
423
430
|
onPendingBatch?: PendingBatchCallback;
|
|
431
|
+
/**
|
|
432
|
+
* Execution mode for evaluations:
|
|
433
|
+
* - "batch": Use batching for expensive LLM scorers (default, cost-effective)
|
|
434
|
+
* - "sync": Execute scorers synchronously without batching (faster, real-time)
|
|
435
|
+
*/
|
|
436
|
+
execute?: "batch" | "sync";
|
|
437
|
+
/**
|
|
438
|
+
* Callback for streaming individual results as they complete (sync mode only)
|
|
439
|
+
* Called after each test case is evaluated
|
|
440
|
+
* Useful for real-time progress updates and streaming to clients
|
|
441
|
+
*/
|
|
442
|
+
onResult?: (result: EvalResult) => void | Promise<void>;
|
|
424
443
|
/**
|
|
425
444
|
* Optional batch client for managing batch requests.
|
|
445
|
+
* Only used when execute is "batch".
|
|
426
446
|
* If not provided, a default BatchClient will be created.
|
|
427
447
|
*/
|
|
428
448
|
batchClient?: BatchClient;
|
|
429
449
|
/**
|
|
430
450
|
* Optional eval client for LLM scoring operations.
|
|
451
|
+
* Only used when execute is "batch".
|
|
431
452
|
* If not provided, a default EvalClient will be created.
|
|
432
453
|
*/
|
|
433
454
|
evalClient?: EvalClient;
|
|
455
|
+
/**
|
|
456
|
+
* Optional OpenAI client for sync LLM operations.
|
|
457
|
+
* Only used when execute is "sync".
|
|
458
|
+
* If not provided, a default OpenAI client will be created.
|
|
459
|
+
*/
|
|
460
|
+
openaiClient?: default_2;
|
|
434
461
|
__schedule?: string;
|
|
435
462
|
}
|
|
436
463
|
|
|
@@ -446,6 +473,9 @@ export declare interface EvalResult<Input = any, Output = any, Expected = any> {
|
|
|
446
473
|
tags?: string[];
|
|
447
474
|
id?: string;
|
|
448
475
|
hasPendingBatch?: boolean;
|
|
476
|
+
ctx: {
|
|
477
|
+
[k: string]: any;
|
|
478
|
+
};
|
|
449
479
|
}
|
|
450
480
|
|
|
451
481
|
/**
|
package/index.js
CHANGED
|
@@ -21089,7 +21089,7 @@ async function initState(partial2 = {}, skipAuth = false) {
|
|
|
21089
21089
|
throw new Error("State is immutable and already initialized. Use resetState() for testing.");
|
|
21090
21090
|
}
|
|
21091
21091
|
if (globalThis.__ORCHESTRATED_SHARED_STATE__) {
|
|
21092
|
-
const sharedState = globalThis.__ORCHESTRATED_SHARED_STATE__;
|
|
21092
|
+
const { lazyLoad, ...sharedState } = globalThis.__ORCHESTRATED_SHARED_STATE__;
|
|
21093
21093
|
globalState = Object.freeze({ ...sharedState });
|
|
21094
21094
|
isInitialized = true;
|
|
21095
21095
|
return;
|
|
@@ -21148,7 +21148,8 @@ async function initState(partial2 = {}, skipAuth = false) {
|
|
|
21148
21148
|
}
|
|
21149
21149
|
function getState() {
|
|
21150
21150
|
if (!isInitialized && globalThis.__ORCHESTRATED_SHARED_STATE__) {
|
|
21151
|
-
|
|
21151
|
+
const { lazyLoad, ...shareableState } = globalThis.__ORCHESTRATED_SHARED_STATE__;
|
|
21152
|
+
return shareableState;
|
|
21152
21153
|
}
|
|
21153
21154
|
if (!isInitialized) {
|
|
21154
21155
|
throw new Error("State not initialized. Call await initState() before using getState().");
|
|
@@ -21242,10 +21243,10 @@ var init_data_source = __esm(() => {
|
|
|
21242
21243
|
id: exports_external.string(),
|
|
21243
21244
|
ctx: exports_external.object({
|
|
21244
21245
|
systemPrompt: exports_external.string().optional()
|
|
21245
|
-
}),
|
|
21246
|
+
}).loose(),
|
|
21246
21247
|
input: exports_external.string(),
|
|
21247
21248
|
output: exports_external.string()
|
|
21248
|
-
}))
|
|
21249
|
+
}).loose())
|
|
21249
21250
|
}).passthrough();
|
|
21250
21251
|
});
|
|
21251
21252
|
|
|
@@ -88550,10 +88551,34 @@ function buildPromptScorer(config2) {
|
|
|
88550
88551
|
enumerable: false,
|
|
88551
88552
|
configurable: true
|
|
88552
88553
|
});
|
|
88554
|
+
const serializedSchema = config2.parameters ? serializeZodSchema(config2.parameters) : {
|
|
88555
|
+
type: "zod",
|
|
88556
|
+
definition: JSON.stringify({ type: "object" })
|
|
88557
|
+
};
|
|
88558
|
+
const definition = {
|
|
88559
|
+
type: "prompt",
|
|
88560
|
+
name: config2.name,
|
|
88561
|
+
slug: config2.slug || config2.name.toLowerCase().replace(/[_\s]+/g, "-"),
|
|
88562
|
+
description: config2.description || `Prompt-based scorer: ${config2.name}`,
|
|
88563
|
+
schema: serializedSchema,
|
|
88564
|
+
promptTemplate: config2.promptTemplate,
|
|
88565
|
+
choiceScores: config2.choiceScores,
|
|
88566
|
+
model: config2.model,
|
|
88567
|
+
useCoT: config2.useCoT,
|
|
88568
|
+
temperature: config2.temperature,
|
|
88569
|
+
metadata: config2.metadata
|
|
88570
|
+
};
|
|
88571
|
+
Object.defineProperty(scorerFunction, "definition", {
|
|
88572
|
+
value: definition,
|
|
88573
|
+
writable: false,
|
|
88574
|
+
enumerable: false,
|
|
88575
|
+
configurable: true
|
|
88576
|
+
});
|
|
88553
88577
|
return scorerFunction;
|
|
88554
88578
|
}
|
|
88555
88579
|
var init_scorer = __esm(() => {
|
|
88556
88580
|
init_jsdist();
|
|
88581
|
+
init_schema_serializer();
|
|
88557
88582
|
});
|
|
88558
88583
|
|
|
88559
88584
|
// src/serialization/types.ts
|
|
@@ -117088,6 +117113,22 @@ async function serializeScorer(scorer, evalName, index) {
|
|
|
117088
117113
|
}
|
|
117089
117114
|
if (typeof scorer === "function") {
|
|
117090
117115
|
const scorerName = scorer.name;
|
|
117116
|
+
console.log({
|
|
117117
|
+
scorerName,
|
|
117118
|
+
scorer
|
|
117119
|
+
});
|
|
117120
|
+
const definition = scorer.definition;
|
|
117121
|
+
if (definition && typeof definition === "object" && definition.type) {
|
|
117122
|
+
const alreadyRegistered = registry2.scorers.find((s) => s.name === definition.name);
|
|
117123
|
+
if (!alreadyRegistered) {
|
|
117124
|
+
registry2.scorers.push(definition);
|
|
117125
|
+
}
|
|
117126
|
+
return {
|
|
117127
|
+
type: "custom_scorer",
|
|
117128
|
+
slug: definition.slug || definition.name.toLowerCase(),
|
|
117129
|
+
fingerprint: definition.fingerprint
|
|
117130
|
+
};
|
|
117131
|
+
}
|
|
117091
117132
|
const registered = registry2.scorers.find((s) => s.name === scorerName);
|
|
117092
117133
|
if (registered) {
|
|
117093
117134
|
if (registered.type === "custom_scorer") {
|
|
@@ -117108,8 +117149,8 @@ async function serializeScorer(scorer, evalName, index) {
|
|
|
117108
117149
|
fingerprint: registered.fingerprint
|
|
117109
117150
|
};
|
|
117110
117151
|
}
|
|
117111
|
-
const inlineName = `${evalName}_Scorer_${index}`;
|
|
117112
|
-
const slug = inlineName.toLowerCase().replace(/_
|
|
117152
|
+
const inlineName = scorerName || `${evalName}_Scorer_${index}`;
|
|
117153
|
+
const slug = inlineName.toLowerCase().replace(/[_\s]+/g, "-");
|
|
117113
117154
|
const handlerName = `${inlineName}Handler`;
|
|
117114
117155
|
registerHandler(handlerName, scorer, {
|
|
117115
117156
|
location: "eval",
|
|
@@ -117626,8 +117667,37 @@ var traced = {
|
|
|
117626
117667
|
};
|
|
117627
117668
|
|
|
117628
117669
|
// src/evaluator/core.ts
|
|
117670
|
+
function deterministicStringify(obj) {
|
|
117671
|
+
if (obj === null)
|
|
117672
|
+
return "null";
|
|
117673
|
+
if (obj === undefined)
|
|
117674
|
+
return "undefined";
|
|
117675
|
+
if (typeof obj === "string")
|
|
117676
|
+
return JSON.stringify(obj);
|
|
117677
|
+
if (typeof obj === "number" || typeof obj === "boolean")
|
|
117678
|
+
return String(obj);
|
|
117679
|
+
if (obj instanceof Date)
|
|
117680
|
+
return obj.toISOString();
|
|
117681
|
+
if (Array.isArray(obj)) {
|
|
117682
|
+
const items = obj.map(deterministicStringify);
|
|
117683
|
+
return `[${items.join(",")}]`;
|
|
117684
|
+
}
|
|
117685
|
+
if (typeof obj === "object") {
|
|
117686
|
+
const keys = Object.keys(obj).sort();
|
|
117687
|
+
const pairs2 = keys.map((key) => `${JSON.stringify(key)}:${deterministicStringify(obj[key])}`);
|
|
117688
|
+
return `{${pairs2.join(",")}}`;
|
|
117689
|
+
}
|
|
117690
|
+
return String(obj);
|
|
117691
|
+
}
|
|
117629
117692
|
function generateTestCasesChecksum(testCases) {
|
|
117630
|
-
const
|
|
117693
|
+
const stableData = testCases.map((testCase) => ({
|
|
117694
|
+
id: testCase.id,
|
|
117695
|
+
input: testCase.input,
|
|
117696
|
+
output: testCase.output,
|
|
117697
|
+
expected: testCase.expected,
|
|
117698
|
+
ctx: testCase.ctx
|
|
117699
|
+
}));
|
|
117700
|
+
const content = deterministicStringify(stableData);
|
|
117631
117701
|
return createHash2("sha256").update(content).digest("hex");
|
|
117632
117702
|
}
|
|
117633
117703
|
function generateCaseId(dataCase) {
|
|
@@ -117745,10 +117815,20 @@ function getEvaluationOptions(name, options, state) {
|
|
|
117745
117815
|
const returnResults = merged.returnResults ?? true;
|
|
117746
117816
|
const scorerFailAsZero = merged.scorerFailAsZero ?? false;
|
|
117747
117817
|
const progress = !jsonl && createProgressTracker ? createProgressTracker(name) : new NullProgressTracker;
|
|
117748
|
-
const
|
|
117749
|
-
|
|
117750
|
-
|
|
117751
|
-
|
|
117818
|
+
const execute = merged.execute ?? "batch";
|
|
117819
|
+
let batchClient;
|
|
117820
|
+
let evalClient;
|
|
117821
|
+
let openaiClient;
|
|
117822
|
+
if (execute === "batch") {
|
|
117823
|
+
batchClient = merged.batchClient || new BatchClient;
|
|
117824
|
+
evalClient = merged.evalClient || new EvalClient({
|
|
117825
|
+
batchClient
|
|
117826
|
+
});
|
|
117827
|
+
} else {
|
|
117828
|
+
const OpenAI4 = __require("openai").default;
|
|
117829
|
+
openaiClient = merged.openaiClient || new OpenAI4;
|
|
117830
|
+
}
|
|
117831
|
+
const onResult = merged.onResult;
|
|
117752
117832
|
return {
|
|
117753
117833
|
state,
|
|
117754
117834
|
reporter,
|
|
@@ -117759,12 +117839,15 @@ function getEvaluationOptions(name, options, state) {
|
|
|
117759
117839
|
progress,
|
|
117760
117840
|
createProgressTracker: createProgressTracker || (() => new NullProgressTracker),
|
|
117761
117841
|
onPendingBatch,
|
|
117842
|
+
execute,
|
|
117762
117843
|
batchClient,
|
|
117763
|
-
evalClient
|
|
117844
|
+
evalClient,
|
|
117845
|
+
openaiClient,
|
|
117846
|
+
onResult
|
|
117764
117847
|
};
|
|
117765
117848
|
}
|
|
117766
117849
|
async function evaluateDataCase(dataCase, evaluator, _ctx, options) {
|
|
117767
|
-
const
|
|
117850
|
+
const client2 = options.execute === "sync" ? options.openaiClient : options.evalClient;
|
|
117768
117851
|
const caseId = generateCaseId(dataCase);
|
|
117769
117852
|
const dataCaseCtx = dataCase.ctx;
|
|
117770
117853
|
const ctx = _ctx.mutate({
|
|
@@ -117787,7 +117870,7 @@ async function evaluateDataCase(dataCase, evaluator, _ctx, options) {
|
|
|
117787
117870
|
expected: dataCase.expected,
|
|
117788
117871
|
tags: dataCase.tags,
|
|
117789
117872
|
id: caseId,
|
|
117790
|
-
client:
|
|
117873
|
+
client: client2
|
|
117791
117874
|
};
|
|
117792
117875
|
const fieldsToExclude = new Set(["state", "ctx", "tags", "id", "client"]);
|
|
117793
117876
|
const argsForStorage = Object.fromEntries(Object.entries(scorerArgs).filter(([key]) => !fieldsToExclude.has(key)));
|
|
@@ -117801,7 +117884,8 @@ async function evaluateDataCase(dataCase, evaluator, _ctx, options) {
|
|
|
117801
117884
|
tags: dataCase.tags,
|
|
117802
117885
|
id: caseId,
|
|
117803
117886
|
hasPendingBatch: result.hasPendingBatch,
|
|
117804
|
-
error: result.error
|
|
117887
|
+
error: result.error,
|
|
117888
|
+
ctx: argsForStorage
|
|
117805
117889
|
};
|
|
117806
117890
|
if (verbose) {
|
|
117807
117891
|
console.dir(result.scores, { depth: null });
|
|
@@ -117817,7 +117901,25 @@ function getScorerName(scorer2, index) {
|
|
|
117817
117901
|
return scorer2;
|
|
117818
117902
|
}
|
|
117819
117903
|
if (typeof scorer2 === "function") {
|
|
117820
|
-
return scorer2.name
|
|
117904
|
+
return scorer2.name || `Scorer ${index ?? 0}`;
|
|
117905
|
+
}
|
|
117906
|
+
if (scorer2 && typeof scorer2 === "object") {
|
|
117907
|
+
if (scorer2.type === "internal" && scorer2.name) {
|
|
117908
|
+
return scorer2.name;
|
|
117909
|
+
}
|
|
117910
|
+
if (scorer2.type === "custom_scorer" && scorer2.slug) {
|
|
117911
|
+
try {
|
|
117912
|
+
const registry2 = getRegistry2();
|
|
117913
|
+
const scorerDef = registry2.scorers.find((s) => s.slug === scorer2.slug);
|
|
117914
|
+
if (scorerDef && scorerDef.name) {
|
|
117915
|
+
return scorerDef.name;
|
|
117916
|
+
}
|
|
117917
|
+
} catch (e) {}
|
|
117918
|
+
return scorer2.slug.split("-").map((word) => word.charAt(0).toUpperCase() + word.slice(1)).join(" ");
|
|
117919
|
+
}
|
|
117920
|
+
if (scorer2.name) {
|
|
117921
|
+
return scorer2.name;
|
|
117922
|
+
}
|
|
117821
117923
|
}
|
|
117822
117924
|
return `Scorer ${index ?? 0}`;
|
|
117823
117925
|
}
|
|
@@ -118011,7 +118113,15 @@ async function executeScorer(scorer2, scorerArgs) {
|
|
|
118011
118113
|
return scorerFn(scorerArgs);
|
|
118012
118114
|
}
|
|
118013
118115
|
async function runEval(name, evaluator, options) {
|
|
118014
|
-
const {
|
|
118116
|
+
const {
|
|
118117
|
+
state,
|
|
118118
|
+
verbose,
|
|
118119
|
+
jsonl,
|
|
118120
|
+
returnResults,
|
|
118121
|
+
progress,
|
|
118122
|
+
execute,
|
|
118123
|
+
batchClient
|
|
118124
|
+
} = options;
|
|
118015
118125
|
const ctx = init({
|
|
118016
118126
|
...evaluator.ctx ?? {},
|
|
118017
118127
|
state,
|
|
@@ -118023,9 +118133,11 @@ async function runEval(name, evaluator, options) {
|
|
|
118023
118133
|
[ATTR_DATASET_SOURCE_TYPE]: data.ctx.sourceType,
|
|
118024
118134
|
[ATTR_EVAL_EXECUTION_METADATA_TEST_CASE_COUNT]: data.ctx.caseCount
|
|
118025
118135
|
});
|
|
118026
|
-
|
|
118027
|
-
|
|
118028
|
-
|
|
118136
|
+
if (execute === "batch" && batchClient) {
|
|
118137
|
+
await batchClient.initialize(name, data.ctx.checksum, data.ctx.dataSourceType);
|
|
118138
|
+
if (batchClient.hasPendingBatch) {
|
|
118139
|
+
return batchClient.getPending();
|
|
118140
|
+
}
|
|
118029
118141
|
}
|
|
118030
118142
|
const results = [];
|
|
118031
118143
|
const errors6 = new Map;
|
|
@@ -118036,6 +118148,9 @@ async function runEval(name, evaluator, options) {
|
|
|
118036
118148
|
}), options);
|
|
118037
118149
|
results.push(caseResult.result);
|
|
118038
118150
|
aggregateScorerErrors(errors6, caseResult.errors);
|
|
118151
|
+
if (execute === "sync" && options.onResult) {
|
|
118152
|
+
await options.onResult(caseResult.result);
|
|
118153
|
+
}
|
|
118039
118154
|
if (!jsonl) {
|
|
118040
118155
|
progress.increment();
|
|
118041
118156
|
}
|
|
@@ -118044,9 +118159,12 @@ async function runEval(name, evaluator, options) {
|
|
|
118044
118159
|
if (!jsonl) {
|
|
118045
118160
|
progress.stop();
|
|
118046
118161
|
}
|
|
118047
|
-
|
|
118048
|
-
if (
|
|
118049
|
-
|
|
118162
|
+
let batch;
|
|
118163
|
+
if (execute === "batch" && batchClient) {
|
|
118164
|
+
batch = await batchClient.submit();
|
|
118165
|
+
if (verbose) {
|
|
118166
|
+
console.log(batch);
|
|
118167
|
+
}
|
|
118050
118168
|
}
|
|
118051
118169
|
const summary = createEvaluationSummary(name, results, errors6);
|
|
118052
118170
|
const result = {
|
|
@@ -118262,12 +118380,15 @@ class ScorerRegistry {
|
|
|
118262
118380
|
}
|
|
118263
118381
|
return buildPromptScorer({
|
|
118264
118382
|
name: config2.name,
|
|
118383
|
+
slug: config2.slug,
|
|
118384
|
+
description: config2.description,
|
|
118265
118385
|
promptTemplate: config2.promptTemplate,
|
|
118266
118386
|
choiceScores: config2.choiceScores,
|
|
118267
118387
|
model: config2.model,
|
|
118268
118388
|
useCoT: config2.useCoT,
|
|
118269
118389
|
temperature: config2.temperature,
|
|
118270
|
-
parameters: config2.parameters
|
|
118390
|
+
parameters: config2.parameters,
|
|
118391
|
+
metadata: config2.metadata
|
|
118271
118392
|
});
|
|
118272
118393
|
}
|
|
118273
118394
|
createCustomScorer(config2, inLazyMode) {
|
|
@@ -118430,4 +118551,4 @@ export {
|
|
|
118430
118551
|
Behavioral
|
|
118431
118552
|
};
|
|
118432
118553
|
|
|
118433
|
-
//# debugId=
|
|
118554
|
+
//# debugId=5F5616B0EFE8647164756E2164756E21
|