orchestrated 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +92 -2
- package/index.js +60 -23
- package/index.js.map +7 -7
- package/package.json +1 -1
package/index.d.ts
CHANGED
|
@@ -247,6 +247,12 @@ declare type DataSourceDefinition = {
|
|
|
247
247
|
type: "dataset";
|
|
248
248
|
config: {
|
|
249
249
|
name: string;
|
|
250
|
+
/**
|
|
251
|
+
* Optional group name for organizing datasets in the same namespace
|
|
252
|
+
* If not provided, the name will be used for grouping
|
|
253
|
+
* This is used to build the source_type namespace (e.g., "live.interactions")
|
|
254
|
+
*/
|
|
255
|
+
group_name?: string;
|
|
250
256
|
source: {
|
|
251
257
|
baseUrl: string;
|
|
252
258
|
path: string;
|
|
@@ -257,6 +263,11 @@ declare type DataSourceDefinition = {
|
|
|
257
263
|
} | {
|
|
258
264
|
type: "static";
|
|
259
265
|
data: unknown[];
|
|
266
|
+
/**
|
|
267
|
+
* Optional group name for static datasets
|
|
268
|
+
* If not provided, defaults to "static"
|
|
269
|
+
*/
|
|
270
|
+
group_name?: string;
|
|
260
271
|
};
|
|
261
272
|
|
|
262
273
|
/**
|
|
@@ -447,7 +458,10 @@ declare interface EvalScorerContext {
|
|
|
447
458
|
scorerName: string;
|
|
448
459
|
caseId: string;
|
|
449
460
|
datasetId: string;
|
|
450
|
-
|
|
461
|
+
/**
|
|
462
|
+
* Namespaced source type (e.g., "live.interactions", "synthetic.my-group")
|
|
463
|
+
*/
|
|
464
|
+
datasetSourceType: string;
|
|
451
465
|
dataCase: EvalCase<any, any, any, any>;
|
|
452
466
|
}
|
|
453
467
|
|
|
@@ -755,6 +769,13 @@ export declare interface InteractionsDatasetOptions extends HTTPDataSourceOption
|
|
|
755
769
|
* @example "production", "staging", "development"
|
|
756
770
|
*/
|
|
757
771
|
environment: string;
|
|
772
|
+
/**
|
|
773
|
+
* Optional group name for organizing datasets in the same namespace
|
|
774
|
+
* If not provided, defaults to "interactions"
|
|
775
|
+
* This is used to build the source_type namespace (e.g., "live.interactions")
|
|
776
|
+
* @example "sessions", "user-queries"
|
|
777
|
+
*/
|
|
778
|
+
group_name?: string;
|
|
758
779
|
/**
|
|
759
780
|
* Optional month filter (date string)
|
|
760
781
|
* @example "01/2026"
|
|
@@ -905,7 +926,13 @@ export declare function resetState(): void;
|
|
|
905
926
|
|
|
906
927
|
declare interface ResolvedDataCtx {
|
|
907
928
|
isLiveData: boolean;
|
|
908
|
-
|
|
929
|
+
/**
|
|
930
|
+
* Source type with namespace pattern:
|
|
931
|
+
* - "live.interactions" - live data from interactions dataset
|
|
932
|
+
* - "live.sessions" - live data from sessions dataset
|
|
933
|
+
* - "synthetic.my-group" - synthetic data with custom grouping
|
|
934
|
+
*/
|
|
935
|
+
sourceType: string;
|
|
909
936
|
checksum: string;
|
|
910
937
|
caseCount: number;
|
|
911
938
|
}
|
|
@@ -1133,6 +1160,69 @@ export declare interface SerializableScorerDefinition extends SerializableScorer
|
|
|
1133
1160
|
temperature?: number;
|
|
1134
1161
|
}
|
|
1135
1162
|
|
|
1163
|
+
/**
|
|
1164
|
+
* Creates a dataset definition for the sessions dataset
|
|
1165
|
+
* Fetches sessions with their last interaction's input/output messages
|
|
1166
|
+
* and session-level metadata (usage_by_model, segments, etc.)
|
|
1167
|
+
*
|
|
1168
|
+
* @example
|
|
1169
|
+
* ```ts
|
|
1170
|
+
* import { Eval } from "./evaluator";
|
|
1171
|
+
* import { sessions } from "./data-source/apigateway";
|
|
1172
|
+
*
|
|
1173
|
+
* await Eval("Session Quality Eval", {
|
|
1174
|
+
* data: sessions({
|
|
1175
|
+
* tenantId: "shortcuts",
|
|
1176
|
+
* serviceName: "shortie",
|
|
1177
|
+
* environment: "production",
|
|
1178
|
+
* }),
|
|
1179
|
+
* scores: ["Effectiveness", "GuardrailAdherence"]
|
|
1180
|
+
* });
|
|
1181
|
+
* ```
|
|
1182
|
+
*/
|
|
1183
|
+
export declare function sessions(options?: Partial<Omit<SessionsDatasetOptions, "baseUrl">>): DataSourceDefinition;
|
|
1184
|
+
|
|
1185
|
+
/**
|
|
1186
|
+
* Options for fetching the sessions dataset
|
|
1187
|
+
*/
|
|
1188
|
+
export declare interface SessionsDatasetOptions extends HTTPDataSourceOptions {
|
|
1189
|
+
/**
|
|
1190
|
+
* Tenant ID to filter sessions
|
|
1191
|
+
*/
|
|
1192
|
+
tenantId: string;
|
|
1193
|
+
/**
|
|
1194
|
+
* Service name to filter sessions
|
|
1195
|
+
*/
|
|
1196
|
+
serviceName: string;
|
|
1197
|
+
/**
|
|
1198
|
+
* Environment to filter sessions
|
|
1199
|
+
* @example "production", "staging", "development"
|
|
1200
|
+
*/
|
|
1201
|
+
environment: string;
|
|
1202
|
+
/**
|
|
1203
|
+
* Optional group name for organizing datasets in the same namespace
|
|
1204
|
+
* If not provided, defaults to "sessions"
|
|
1205
|
+
* This is used to build the source_type namespace (e.g., "live.sessions")
|
|
1206
|
+
* @example "user-sessions", "support-sessions"
|
|
1207
|
+
*/
|
|
1208
|
+
group_name?: string;
|
|
1209
|
+
/**
|
|
1210
|
+
* Optional month filter (date string)
|
|
1211
|
+
* @example "01/2026"
|
|
1212
|
+
*/
|
|
1213
|
+
month?: string;
|
|
1214
|
+
/**
|
|
1215
|
+
* Optional period filter (date string)
|
|
1216
|
+
* @example "01/01/2026"
|
|
1217
|
+
*/
|
|
1218
|
+
startDate?: string;
|
|
1219
|
+
/**
|
|
1220
|
+
* Optional period filter (date string)
|
|
1221
|
+
* @example "01/01/2026"
|
|
1222
|
+
*/
|
|
1223
|
+
endDate?: string;
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1136
1226
|
/**
|
|
1137
1227
|
* An interface that represents a span. A span represents a single operation
|
|
1138
1228
|
* within a trace. Examples of span might include remote procedure calls or a
|
package/index.js
CHANGED
|
@@ -117326,10 +117326,6 @@ async function initTelemetry(state) {
|
|
|
117326
117326
|
} else {
|
|
117327
117327
|
throw new Error("Authentication is required. Either provide accessToken, apiKey option, or set ORCHESTRATED_ACCESS_TOKEN/ORCHESTRATED_API_KEY environment variable. Run 'orcha login' to authenticate.");
|
|
117328
117328
|
}
|
|
117329
|
-
console.log({
|
|
117330
|
-
headers,
|
|
117331
|
-
url: `${state.otelEndpoint}${isAccessToken ? "/me" : ""}/v1/traces`
|
|
117332
|
-
});
|
|
117333
117329
|
try {
|
|
117334
117330
|
const exporter = new import_exporter_trace_otlp_http.OTLPTraceExporter({
|
|
117335
117331
|
url: `${state.otelEndpoint}${isAccessToken ? "/me" : ""}/v1/traces`,
|
|
@@ -117365,17 +117361,6 @@ async function flushTelemetry(timeoutMillis = 30000) {
|
|
|
117365
117361
|
} catch (error48) {}
|
|
117366
117362
|
}
|
|
117367
117363
|
}
|
|
117368
|
-
async function shutdownTelemetry(timeoutMillis = 30000) {
|
|
117369
|
-
if (provider) {
|
|
117370
|
-
try {
|
|
117371
|
-
await provider.shutdown(), console.debug("Telemetry shutdown complete");
|
|
117372
|
-
} catch (error48) {
|
|
117373
|
-
console.error("Error during telemetry shutdown:", error48 instanceof Error ? error48.message : String(error48));
|
|
117374
|
-
}
|
|
117375
|
-
provider = null;
|
|
117376
|
-
isInitialized2 = false;
|
|
117377
|
-
}
|
|
117378
|
-
}
|
|
117379
117364
|
// src/telemetry/tracer.ts
|
|
117380
117365
|
var import_api = __toESM(require_src(), 1);
|
|
117381
117366
|
var TRACER_NAME = "@orchestrated/sdk";
|
|
@@ -117510,7 +117495,7 @@ var traced = {
|
|
|
117510
117495
|
[ATTR_DATASET_CASE_ID]: ctx.caseId,
|
|
117511
117496
|
[ATTR_DATASET_SOURCE_TYPE]: ctx.dataset.sourceType
|
|
117512
117497
|
});
|
|
117513
|
-
if (ctx.dataset.sourceType
|
|
117498
|
+
if (ctx.dataset.sourceType.startsWith("live") && ctx.traceId && ctx.spanId) {
|
|
117514
117499
|
evalSpan.addLink({
|
|
117515
117500
|
context: {
|
|
117516
117501
|
traceId: ctx.traceId,
|
|
@@ -117550,7 +117535,7 @@ var traced = {
|
|
|
117550
117535
|
[ATTR_DATASET_CASE_METADATA]: JSON.stringify(ctx.dataCase.ctx)
|
|
117551
117536
|
});
|
|
117552
117537
|
const dataCaseCtx = ctx.dataCase.ctx;
|
|
117553
|
-
if (ctx.datasetSourceType
|
|
117538
|
+
if (ctx.datasetSourceType.startsWith("live") && dataCaseCtx && dataCaseCtx.traceId && dataCaseCtx.spanId) {
|
|
117554
117539
|
scorerSpan.addLink({
|
|
117555
117540
|
context: {
|
|
117556
117541
|
traceId: dataCaseCtx.traceId,
|
|
@@ -117629,10 +117614,23 @@ function calculatePercentile(sortedValues, percentile) {
|
|
|
117629
117614
|
const upperValue = sortedValues[upperIndex];
|
|
117630
117615
|
return lowerValue + (upperValue - lowerValue) * fraction;
|
|
117631
117616
|
}
|
|
117632
|
-
|
|
117617
|
+
function buildSourceType(isLiveData, datasetName, groupName) {
|
|
117618
|
+
const prefix = isLiveData ? "live" : "synthetic";
|
|
117619
|
+
let namespace;
|
|
117620
|
+
if (groupName) {
|
|
117621
|
+
namespace = groupName;
|
|
117622
|
+
} else if (datasetName) {
|
|
117623
|
+
const parts = datasetName.split(".");
|
|
117624
|
+
namespace = parts[parts.length - 1];
|
|
117625
|
+
} else {
|
|
117626
|
+
namespace = isLiveData ? "data" : "data";
|
|
117627
|
+
}
|
|
117628
|
+
return `${prefix}.${namespace}`;
|
|
117629
|
+
}
|
|
117630
|
+
async function resolveDataSet(dataset, datasetMetadata) {
|
|
117633
117631
|
return traced.dataSource(async () => {
|
|
117634
117632
|
const isLiveData = dataset.some((c) => c.ctx?.traceId && c.ctx?.spanId);
|
|
117635
|
-
const sourceType = isLiveData
|
|
117633
|
+
const sourceType = buildSourceType(isLiveData, datasetMetadata?.name, datasetMetadata?.group_name);
|
|
117636
117634
|
const checksum = generateTestCasesChecksum(dataset);
|
|
117637
117635
|
return {
|
|
117638
117636
|
dataset,
|
|
@@ -117959,7 +117957,7 @@ async function runEval(name, evaluator, options) {
|
|
|
117959
117957
|
evaluatorName: name
|
|
117960
117958
|
});
|
|
117961
117959
|
return traced.execution(ctx, async (rootSpan) => {
|
|
117962
|
-
const data = await resolveDataSet(evaluator.data);
|
|
117960
|
+
const data = await resolveDataSet(evaluator.data, evaluator.datasetMetadata);
|
|
117963
117961
|
rootSpan.setAttributes({
|
|
117964
117962
|
[ATTR_DATASET_SOURCE_TYPE]: data.ctx.sourceType,
|
|
117965
117963
|
[ATTR_EVAL_EXECUTION_METADATA_TEST_CASE_COUNT]: data.ctx.caseCount
|
|
@@ -118053,7 +118051,6 @@ function Eval(name, evaluator, options) {
|
|
|
118053
118051
|
}
|
|
118054
118052
|
await resolvedOptions.reporter.reportEval(name, evalResult, { verbose: resolvedOptions.verbose, jsonl: resolvedOptions.jsonl });
|
|
118055
118053
|
}
|
|
118056
|
-
await shutdownTelemetry();
|
|
118057
118054
|
return evalResult;
|
|
118058
118055
|
})();
|
|
118059
118056
|
registerEvaluation(promise3);
|
|
@@ -118064,8 +118061,21 @@ function Eval(name, evaluator, options) {
|
|
|
118064
118061
|
}
|
|
118065
118062
|
async function resolveEvaluatorDefinitions(name, evaluator) {
|
|
118066
118063
|
let resolvedData;
|
|
118064
|
+
let datasetMetadata;
|
|
118067
118065
|
if (typeof evaluator.data === "object" && evaluator.data !== null && !Array.isArray(evaluator.data) && "type" in evaluator.data && (evaluator.data.type === "dataset" || evaluator.data.type === "static")) {
|
|
118068
|
-
const
|
|
118066
|
+
const dataSourceDef = evaluator.data;
|
|
118067
|
+
if (dataSourceDef.type === "dataset") {
|
|
118068
|
+
datasetMetadata = {
|
|
118069
|
+
name: dataSourceDef.config.name,
|
|
118070
|
+
group_name: dataSourceDef.config.group_name
|
|
118071
|
+
};
|
|
118072
|
+
} else if (dataSourceDef.type === "static") {
|
|
118073
|
+
datasetMetadata = {
|
|
118074
|
+
name: "static",
|
|
118075
|
+
group_name: dataSourceDef.group_name
|
|
118076
|
+
};
|
|
118077
|
+
}
|
|
118078
|
+
const dataSourceFn = await resolveDataSource(dataSourceDef);
|
|
118069
118079
|
resolvedData = await dataSourceFn();
|
|
118070
118080
|
} else {
|
|
118071
118081
|
resolvedData = evaluator.data;
|
|
@@ -118095,6 +118105,7 @@ async function resolveEvaluatorDefinitions(name, evaluator) {
|
|
|
118095
118105
|
name,
|
|
118096
118106
|
ctx: evaluator.ctx,
|
|
118097
118107
|
data: resolvedData,
|
|
118108
|
+
datasetMetadata,
|
|
118098
118109
|
task: resolvedTask,
|
|
118099
118110
|
scores: resolvedScores
|
|
118100
118111
|
};
|
|
@@ -118106,6 +118117,31 @@ function interactions(options = {}) {
|
|
|
118106
118117
|
type: "dataset",
|
|
118107
118118
|
config: {
|
|
118108
118119
|
name: "orchestrated.dataset.interactions",
|
|
118120
|
+
group_name: options.group_name,
|
|
118121
|
+
source: {
|
|
118122
|
+
baseUrl: getApiUrl(),
|
|
118123
|
+
path: "/dataset",
|
|
118124
|
+
method: "POST"
|
|
118125
|
+
},
|
|
118126
|
+
options: {
|
|
118127
|
+
tenantId: options.tenantId || state.tenantId,
|
|
118128
|
+
serviceName: options.serviceName || state.serviceName,
|
|
118129
|
+
environment: options.environment || state.environment,
|
|
118130
|
+
month: options.month,
|
|
118131
|
+
startDate: options.startDate,
|
|
118132
|
+
endDate: options.endDate,
|
|
118133
|
+
timeout: options.timeout
|
|
118134
|
+
}
|
|
118135
|
+
}
|
|
118136
|
+
};
|
|
118137
|
+
}
|
|
118138
|
+
function sessions(options = {}) {
|
|
118139
|
+
const state = getState();
|
|
118140
|
+
return {
|
|
118141
|
+
type: "dataset",
|
|
118142
|
+
config: {
|
|
118143
|
+
name: "orchestrated.dataset.sessions",
|
|
118144
|
+
group_name: options.group_name,
|
|
118109
118145
|
source: {
|
|
118110
118146
|
baseUrl: getApiUrl(),
|
|
118111
118147
|
path: "/dataset",
|
|
@@ -118305,6 +118341,7 @@ var legacyReporter = {
|
|
|
118305
118341
|
export {
|
|
118306
118342
|
waitForEvaluations,
|
|
118307
118343
|
traced,
|
|
118344
|
+
sessions,
|
|
118308
118345
|
resetState,
|
|
118309
118346
|
registerEvaluation,
|
|
118310
118347
|
projects,
|
|
@@ -118326,4 +118363,4 @@ export {
|
|
|
118326
118363
|
Behavioral
|
|
118327
118364
|
};
|
|
118328
118365
|
|
|
118329
|
-
//# debugId=
|
|
118366
|
+
//# debugId=00BF794DA7CC11A364756E2164756E21
|