orchestrated 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +92 -2
- package/index.js +64 -11
- package/index.js.map +8 -8
- package/package.json +1 -1
package/index.d.ts
CHANGED
|
@@ -247,6 +247,12 @@ declare type DataSourceDefinition = {
|
|
|
247
247
|
type: "dataset";
|
|
248
248
|
config: {
|
|
249
249
|
name: string;
|
|
250
|
+
/**
|
|
251
|
+
* Optional group name for organizing datasets in the same namespace
|
|
252
|
+
* If not provided, the name will be used for grouping
|
|
253
|
+
* This is used to build the source_type namespace (e.g., "live.interactions")
|
|
254
|
+
*/
|
|
255
|
+
group_name?: string;
|
|
250
256
|
source: {
|
|
251
257
|
baseUrl: string;
|
|
252
258
|
path: string;
|
|
@@ -257,6 +263,11 @@ declare type DataSourceDefinition = {
|
|
|
257
263
|
} | {
|
|
258
264
|
type: "static";
|
|
259
265
|
data: unknown[];
|
|
266
|
+
/**
|
|
267
|
+
* Optional group name for static datasets
|
|
268
|
+
* If not provided, defaults to "static"
|
|
269
|
+
*/
|
|
270
|
+
group_name?: string;
|
|
260
271
|
};
|
|
261
272
|
|
|
262
273
|
/**
|
|
@@ -447,7 +458,10 @@ declare interface EvalScorerContext {
|
|
|
447
458
|
scorerName: string;
|
|
448
459
|
caseId: string;
|
|
449
460
|
datasetId: string;
|
|
450
|
-
|
|
461
|
+
/**
|
|
462
|
+
* Namespaced source type (e.g., "live.interactions", "synthetic.my-group")
|
|
463
|
+
*/
|
|
464
|
+
datasetSourceType: string;
|
|
451
465
|
dataCase: EvalCase<any, any, any, any>;
|
|
452
466
|
}
|
|
453
467
|
|
|
@@ -755,6 +769,13 @@ export declare interface InteractionsDatasetOptions extends HTTPDataSourceOption
|
|
|
755
769
|
* @example "production", "staging", "development"
|
|
756
770
|
*/
|
|
757
771
|
environment: string;
|
|
772
|
+
/**
|
|
773
|
+
* Optional group name for organizing datasets in the same namespace
|
|
774
|
+
* If not provided, defaults to "interactions"
|
|
775
|
+
* This is used to build the source_type namespace (e.g., "live.interactions")
|
|
776
|
+
* @example "sessions", "user-queries"
|
|
777
|
+
*/
|
|
778
|
+
group_name?: string;
|
|
758
779
|
/**
|
|
759
780
|
* Optional month filter (date string)
|
|
760
781
|
* @example "01/2026"
|
|
@@ -905,7 +926,13 @@ export declare function resetState(): void;
|
|
|
905
926
|
|
|
906
927
|
declare interface ResolvedDataCtx {
|
|
907
928
|
isLiveData: boolean;
|
|
908
|
-
|
|
929
|
+
/**
|
|
930
|
+
* Source type with namespace pattern:
|
|
931
|
+
* - "live.interactions" - live data from interactions dataset
|
|
932
|
+
* - "live.sessions" - live data from sessions dataset
|
|
933
|
+
* - "synthetic.my-group" - synthetic data with custom grouping
|
|
934
|
+
*/
|
|
935
|
+
sourceType: string;
|
|
909
936
|
checksum: string;
|
|
910
937
|
caseCount: number;
|
|
911
938
|
}
|
|
@@ -1133,6 +1160,69 @@ export declare interface SerializableScorerDefinition extends SerializableScorer
|
|
|
1133
1160
|
temperature?: number;
|
|
1134
1161
|
}
|
|
1135
1162
|
|
|
1163
|
+
/**
|
|
1164
|
+
* Creates a dataset definition for the sessions dataset
|
|
1165
|
+
* Fetches sessions with their last interaction's input/output messages
|
|
1166
|
+
* and session-level metadata (usage_by_model, segments, etc.)
|
|
1167
|
+
*
|
|
1168
|
+
* @example
|
|
1169
|
+
* ```ts
|
|
1170
|
+
* import { Eval } from "./evaluator";
|
|
1171
|
+
* import { sessions } from "./data-source/apigateway";
|
|
1172
|
+
*
|
|
1173
|
+
* await Eval("Session Quality Eval", {
|
|
1174
|
+
* data: sessions({
|
|
1175
|
+
* tenantId: "shortcuts",
|
|
1176
|
+
* serviceName: "shortie",
|
|
1177
|
+
* environment: "production",
|
|
1178
|
+
* }),
|
|
1179
|
+
* scores: ["Effectiveness", "GuardrailAdherence"]
|
|
1180
|
+
* });
|
|
1181
|
+
* ```
|
|
1182
|
+
*/
|
|
1183
|
+
export declare function sessions(options?: Partial<Omit<SessionsDatasetOptions, "baseUrl">>): DataSourceDefinition;
|
|
1184
|
+
|
|
1185
|
+
/**
|
|
1186
|
+
* Options for fetching the sessions dataset
|
|
1187
|
+
*/
|
|
1188
|
+
export declare interface SessionsDatasetOptions extends HTTPDataSourceOptions {
|
|
1189
|
+
/**
|
|
1190
|
+
* Tenant ID to filter sessions
|
|
1191
|
+
*/
|
|
1192
|
+
tenantId: string;
|
|
1193
|
+
/**
|
|
1194
|
+
* Service name to filter sessions
|
|
1195
|
+
*/
|
|
1196
|
+
serviceName: string;
|
|
1197
|
+
/**
|
|
1198
|
+
* Environment to filter sessions
|
|
1199
|
+
* @example "production", "staging", "development"
|
|
1200
|
+
*/
|
|
1201
|
+
environment: string;
|
|
1202
|
+
/**
|
|
1203
|
+
* Optional group name for organizing datasets in the same namespace
|
|
1204
|
+
* If not provided, defaults to "sessions"
|
|
1205
|
+
* This is used to build the source_type namespace (e.g., "live.sessions")
|
|
1206
|
+
* @example "user-sessions", "support-sessions"
|
|
1207
|
+
*/
|
|
1208
|
+
group_name?: string;
|
|
1209
|
+
/**
|
|
1210
|
+
* Optional month filter (date string)
|
|
1211
|
+
* @example "01/2026"
|
|
1212
|
+
*/
|
|
1213
|
+
month?: string;
|
|
1214
|
+
/**
|
|
1215
|
+
* Optional period filter (date string)
|
|
1216
|
+
* @example "01/01/2026"
|
|
1217
|
+
*/
|
|
1218
|
+
startDate?: string;
|
|
1219
|
+
/**
|
|
1220
|
+
* Optional period filter (date string)
|
|
1221
|
+
* @example "01/01/2026"
|
|
1222
|
+
*/
|
|
1223
|
+
endDate?: string;
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1136
1226
|
/**
|
|
1137
1227
|
* An interface that represents a span. A span represents a single operation
|
|
1138
1228
|
* within a trace. Examples of span might include remote procedure calls or a
|
package/index.js
CHANGED
|
@@ -21185,6 +21185,7 @@ async function getDataset(datasetName, source, options) {
|
|
|
21185
21185
|
path2 = `me/${path2}`;
|
|
21186
21186
|
}
|
|
21187
21187
|
const params = {
|
|
21188
|
+
...options,
|
|
21188
21189
|
name: datasetName,
|
|
21189
21190
|
tenantId: options.tenantId || state.tenantId,
|
|
21190
21191
|
serviceName: options.serviceName || state.serviceName,
|
|
@@ -117326,10 +117327,6 @@ async function initTelemetry(state) {
|
|
|
117326
117327
|
} else {
|
|
117327
117328
|
throw new Error("Authentication is required. Either provide accessToken, apiKey option, or set ORCHESTRATED_ACCESS_TOKEN/ORCHESTRATED_API_KEY environment variable. Run 'orcha login' to authenticate.");
|
|
117328
117329
|
}
|
|
117329
|
-
console.log({
|
|
117330
|
-
headers,
|
|
117331
|
-
url: `${state.otelEndpoint}${isAccessToken ? "/me" : ""}/v1/traces`
|
|
117332
|
-
});
|
|
117333
117330
|
try {
|
|
117334
117331
|
const exporter = new import_exporter_trace_otlp_http.OTLPTraceExporter({
|
|
117335
117332
|
url: `${state.otelEndpoint}${isAccessToken ? "/me" : ""}/v1/traces`,
|
|
@@ -117510,7 +117507,7 @@ var traced = {
|
|
|
117510
117507
|
[ATTR_DATASET_CASE_ID]: ctx.caseId,
|
|
117511
117508
|
[ATTR_DATASET_SOURCE_TYPE]: ctx.dataset.sourceType
|
|
117512
117509
|
});
|
|
117513
|
-
if (ctx.dataset.sourceType
|
|
117510
|
+
if (ctx.dataset.sourceType.startsWith("live") && ctx.traceId && ctx.spanId) {
|
|
117514
117511
|
evalSpan.addLink({
|
|
117515
117512
|
context: {
|
|
117516
117513
|
traceId: ctx.traceId,
|
|
@@ -117550,7 +117547,7 @@ var traced = {
|
|
|
117550
117547
|
[ATTR_DATASET_CASE_METADATA]: JSON.stringify(ctx.dataCase.ctx)
|
|
117551
117548
|
});
|
|
117552
117549
|
const dataCaseCtx = ctx.dataCase.ctx;
|
|
117553
|
-
if (ctx.datasetSourceType
|
|
117550
|
+
if (ctx.datasetSourceType.startsWith("live") && dataCaseCtx && dataCaseCtx.traceId && dataCaseCtx.spanId) {
|
|
117554
117551
|
scorerSpan.addLink({
|
|
117555
117552
|
context: {
|
|
117556
117553
|
traceId: dataCaseCtx.traceId,
|
|
@@ -117629,10 +117626,23 @@ function calculatePercentile(sortedValues, percentile) {
|
|
|
117629
117626
|
const upperValue = sortedValues[upperIndex];
|
|
117630
117627
|
return lowerValue + (upperValue - lowerValue) * fraction;
|
|
117631
117628
|
}
|
|
117632
|
-
|
|
117629
|
+
function buildSourceType(isLiveData, datasetName, groupName) {
|
|
117630
|
+
const prefix = isLiveData ? "live" : "synthetic";
|
|
117631
|
+
let namespace;
|
|
117632
|
+
if (groupName) {
|
|
117633
|
+
namespace = groupName;
|
|
117634
|
+
} else if (datasetName) {
|
|
117635
|
+
const parts = datasetName.split(".");
|
|
117636
|
+
namespace = parts[parts.length - 1];
|
|
117637
|
+
} else {
|
|
117638
|
+
namespace = isLiveData ? "data" : "data";
|
|
117639
|
+
}
|
|
117640
|
+
return `${prefix}.${namespace}`;
|
|
117641
|
+
}
|
|
117642
|
+
async function resolveDataSet(dataset, datasetMetadata) {
|
|
117633
117643
|
return traced.dataSource(async () => {
|
|
117634
117644
|
const isLiveData = dataset.some((c) => c.ctx?.traceId && c.ctx?.spanId);
|
|
117635
|
-
const sourceType = isLiveData
|
|
117645
|
+
const sourceType = buildSourceType(isLiveData, datasetMetadata?.name, datasetMetadata?.group_name);
|
|
117636
117646
|
const checksum = generateTestCasesChecksum(dataset);
|
|
117637
117647
|
return {
|
|
117638
117648
|
dataset,
|
|
@@ -117959,7 +117969,7 @@ async function runEval(name, evaluator, options) {
|
|
|
117959
117969
|
evaluatorName: name
|
|
117960
117970
|
});
|
|
117961
117971
|
return traced.execution(ctx, async (rootSpan) => {
|
|
117962
|
-
const data = await resolveDataSet(evaluator.data);
|
|
117972
|
+
const data = await resolveDataSet(evaluator.data, evaluator.datasetMetadata);
|
|
117963
117973
|
rootSpan.setAttributes({
|
|
117964
117974
|
[ATTR_DATASET_SOURCE_TYPE]: data.ctx.sourceType,
|
|
117965
117975
|
[ATTR_EVAL_EXECUTION_METADATA_TEST_CASE_COUNT]: data.ctx.caseCount
|
|
@@ -118040,8 +118050,10 @@ function Eval(name, evaluator, options) {
|
|
|
118040
118050
|
}
|
|
118041
118051
|
if (pendingInfo.batches.length > 0) {
|
|
118042
118052
|
await resolvedOptions.onPendingBatch(pendingInfo.evalName, pendingInfo.batches);
|
|
118053
|
+
await shutdownTelemetry();
|
|
118043
118054
|
return evalResult;
|
|
118044
118055
|
}
|
|
118056
|
+
await shutdownTelemetry();
|
|
118045
118057
|
return evalResult;
|
|
118046
118058
|
}
|
|
118047
118059
|
if (evalResult) {
|
|
@@ -118059,13 +118071,27 @@ function Eval(name, evaluator, options) {
|
|
|
118059
118071
|
registerEvaluation(promise3);
|
|
118060
118072
|
promise3.catch((error48) => {
|
|
118061
118073
|
console.error(`Error in Eval "${name}":`, error48);
|
|
118074
|
+
return shutdownTelemetry();
|
|
118062
118075
|
});
|
|
118063
118076
|
return promise3;
|
|
118064
118077
|
}
|
|
118065
118078
|
async function resolveEvaluatorDefinitions(name, evaluator) {
|
|
118066
118079
|
let resolvedData;
|
|
118080
|
+
let datasetMetadata;
|
|
118067
118081
|
if (typeof evaluator.data === "object" && evaluator.data !== null && !Array.isArray(evaluator.data) && "type" in evaluator.data && (evaluator.data.type === "dataset" || evaluator.data.type === "static")) {
|
|
118068
|
-
const
|
|
118082
|
+
const dataSourceDef = evaluator.data;
|
|
118083
|
+
if (dataSourceDef.type === "dataset") {
|
|
118084
|
+
datasetMetadata = {
|
|
118085
|
+
name: dataSourceDef.config.name,
|
|
118086
|
+
group_name: dataSourceDef.config.group_name
|
|
118087
|
+
};
|
|
118088
|
+
} else if (dataSourceDef.type === "static") {
|
|
118089
|
+
datasetMetadata = {
|
|
118090
|
+
name: "static",
|
|
118091
|
+
group_name: dataSourceDef.group_name
|
|
118092
|
+
};
|
|
118093
|
+
}
|
|
118094
|
+
const dataSourceFn = await resolveDataSource(dataSourceDef);
|
|
118069
118095
|
resolvedData = await dataSourceFn();
|
|
118070
118096
|
} else {
|
|
118071
118097
|
resolvedData = evaluator.data;
|
|
@@ -118095,6 +118121,7 @@ async function resolveEvaluatorDefinitions(name, evaluator) {
|
|
|
118095
118121
|
name,
|
|
118096
118122
|
ctx: evaluator.ctx,
|
|
118097
118123
|
data: resolvedData,
|
|
118124
|
+
datasetMetadata,
|
|
118098
118125
|
task: resolvedTask,
|
|
118099
118126
|
scores: resolvedScores
|
|
118100
118127
|
};
|
|
@@ -118106,6 +118133,31 @@ function interactions(options = {}) {
|
|
|
118106
118133
|
type: "dataset",
|
|
118107
118134
|
config: {
|
|
118108
118135
|
name: "orchestrated.dataset.interactions",
|
|
118136
|
+
group_name: options.group_name,
|
|
118137
|
+
source: {
|
|
118138
|
+
baseUrl: getApiUrl(),
|
|
118139
|
+
path: "/dataset",
|
|
118140
|
+
method: "POST"
|
|
118141
|
+
},
|
|
118142
|
+
options: {
|
|
118143
|
+
tenantId: options.tenantId || state.tenantId,
|
|
118144
|
+
serviceName: options.serviceName || state.serviceName,
|
|
118145
|
+
environment: options.environment || state.environment,
|
|
118146
|
+
month: options.month,
|
|
118147
|
+
startDate: options.startDate,
|
|
118148
|
+
endDate: options.endDate,
|
|
118149
|
+
timeout: options.timeout
|
|
118150
|
+
}
|
|
118151
|
+
}
|
|
118152
|
+
};
|
|
118153
|
+
}
|
|
118154
|
+
function sessions(options = {}) {
|
|
118155
|
+
const state = getState();
|
|
118156
|
+
return {
|
|
118157
|
+
type: "dataset",
|
|
118158
|
+
config: {
|
|
118159
|
+
name: "orchestrated.dataset.sessions",
|
|
118160
|
+
group_name: options.group_name,
|
|
118109
118161
|
source: {
|
|
118110
118162
|
baseUrl: getApiUrl(),
|
|
118111
118163
|
path: "/dataset",
|
|
@@ -118305,6 +118357,7 @@ var legacyReporter = {
|
|
|
118305
118357
|
export {
|
|
118306
118358
|
waitForEvaluations,
|
|
118307
118359
|
traced,
|
|
118360
|
+
sessions,
|
|
118308
118361
|
resetState,
|
|
118309
118362
|
registerEvaluation,
|
|
118310
118363
|
projects,
|
|
@@ -118326,4 +118379,4 @@ export {
|
|
|
118326
118379
|
Behavioral
|
|
118327
118380
|
};
|
|
118328
118381
|
|
|
118329
|
-
//# debugId=
|
|
118382
|
+
//# debugId=1A3AD42D6FAE354F64756E2164756E21
|