@arizeai/phoenix-client 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/client.d.ts +13 -1
- package/dist/esm/client.d.ts.map +1 -1
- package/dist/esm/client.js +4 -1
- package/dist/esm/client.js.map +1 -1
- package/dist/esm/experiments/instrumention.d.ts +18 -0
- package/dist/esm/experiments/instrumention.d.ts.map +1 -0
- package/dist/esm/experiments/instrumention.js +34 -0
- package/dist/esm/experiments/instrumention.js.map +1 -0
- package/dist/esm/experiments/runExperiment.d.ts +19 -16
- package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
- package/dist/esm/experiments/runExperiment.js +209 -98
- package/dist/esm/experiments/runExperiment.js.map +1 -1
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/experiments.d.ts +0 -1
- package/dist/esm/types/experiments.d.ts.map +1 -1
- package/dist/esm/utils/ensureString.d.ts +8 -0
- package/dist/esm/utils/ensureString.d.ts.map +1 -0
- package/dist/esm/utils/ensureString.js +14 -0
- package/dist/esm/utils/ensureString.js.map +1 -0
- package/dist/esm/utils/objectAsAttributes.d.ts +3 -0
- package/dist/esm/utils/objectAsAttributes.d.ts.map +1 -0
- package/dist/esm/utils/objectAsAttributes.js +4 -0
- package/dist/esm/utils/objectAsAttributes.js.map +1 -0
- package/dist/src/client.d.ts +13 -1
- package/dist/src/client.d.ts.map +1 -1
- package/dist/src/client.js +1 -1
- package/dist/src/client.js.map +1 -1
- package/dist/src/experiments/instrumention.d.ts +18 -0
- package/dist/src/experiments/instrumention.d.ts.map +1 -0
- package/dist/src/experiments/instrumention.js +38 -0
- package/dist/src/experiments/instrumention.js.map +1 -0
- package/dist/src/experiments/runExperiment.d.ts +19 -16
- package/dist/src/experiments/runExperiment.d.ts.map +1 -1
- package/dist/src/experiments/runExperiment.js +211 -102
- package/dist/src/experiments/runExperiment.js.map +1 -1
- package/dist/src/types/experiments.d.ts +0 -1
- package/dist/src/types/experiments.d.ts.map +1 -1
- package/dist/src/utils/ensureString.d.ts +8 -0
- package/dist/src/utils/ensureString.d.ts.map +1 -0
- package/dist/src/utils/ensureString.js +18 -0
- package/dist/src/utils/ensureString.js.map +1 -0
- package/dist/src/utils/objectAsAttributes.d.ts +3 -0
- package/dist/src/utils/objectAsAttributes.d.ts.map +1 -0
- package/dist/src/utils/objectAsAttributes.js +7 -0
- package/dist/src/utils/objectAsAttributes.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +9 -1
- package/src/client.ts +4 -1
- package/src/experiments/instrumention.ts +52 -0
- package/src/experiments/runExperiment.ts +246 -108
- package/src/types/experiments.ts +0 -1
- package/src/utils/ensureString.ts +14 -0
- package/src/utils/objectAsAttributes.ts +9 -0
package/dist/esm/client.d.ts
CHANGED
|
@@ -52,7 +52,19 @@ export declare const getMergedOptions: ({ options, getEnvironmentOptions, }?: {
|
|
|
52
52
|
export declare const createClient: (config?: {
|
|
53
53
|
options?: Partial<ClientOptions>;
|
|
54
54
|
getEnvironmentOptions?: () => Partial<ClientOptions>;
|
|
55
|
-
}) =>
|
|
55
|
+
}) => {
|
|
56
|
+
config: ClientOptions;
|
|
57
|
+
GET: import("openapi-fetch").ClientMethod<oapiPathsV1, "get", `${string}/${string}`>;
|
|
58
|
+
PUT: import("openapi-fetch").ClientMethod<oapiPathsV1, "put", `${string}/${string}`>;
|
|
59
|
+
POST: import("openapi-fetch").ClientMethod<oapiPathsV1, "post", `${string}/${string}`>;
|
|
60
|
+
DELETE: import("openapi-fetch").ClientMethod<oapiPathsV1, "delete", `${string}/${string}`>;
|
|
61
|
+
OPTIONS: import("openapi-fetch").ClientMethod<oapiPathsV1, "options", `${string}/${string}`>;
|
|
62
|
+
HEAD: import("openapi-fetch").ClientMethod<oapiPathsV1, "head", `${string}/${string}`>;
|
|
63
|
+
PATCH: import("openapi-fetch").ClientMethod<oapiPathsV1, "patch", `${string}/${string}`>;
|
|
64
|
+
TRACE: import("openapi-fetch").ClientMethod<oapiPathsV1, "trace", `${string}/${string}`>;
|
|
65
|
+
use(...middleware: import("openapi-fetch").Middleware[]): void;
|
|
66
|
+
eject(...middleware: import("openapi-fetch").Middleware[]): void;
|
|
67
|
+
};
|
|
56
68
|
/**
|
|
57
69
|
* Resolved type of the Phoenix client
|
|
58
70
|
*/
|
package/dist/esm/client.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/client.ts"],"names":[],"mappings":"AAAA,OAA4B,EAAE,KAAK,aAAa,EAAE,MAAM,eAAe,CAAC;AACxE,OAAO,KAAK,EACV,KAAK,IAAI,WAAW,EACpB,UAAU,IAAI,gBAAgB,EAC9B,UAAU,IAAI,gBAAgB,EAC/B,MAAM,6BAA6B,CAAC;AAMrC,KAAK,OAAO,GAAG,WAAW,CAAC;AAC3B,KAAK,YAAY,GAAG,gBAAgB,CAAC;AACrC,KAAK,YAAY,GAAG,gBAAgB,CAAC;AAErC;;GAEG;AACH,MAAM,MAAM,KAAK,GAAG;IAClB,EAAE,EAAE;QACF,KAAK,EAAE,OAAO,CAAC;QACf,UAAU,EAAE,YAAY,CAAC;QACzB,UAAU,EAAE,YAAY,CAAC;KAC1B,CAAC;CACH,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,gBAAgB,GAAI,sCAG9B;IACD,OAAO,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IACjC,qBAAqB,CAAC,EAAE,MAAM,OAAO,CAAC,aAAa,CAAC,CAAC;CACjD,KAAG,aAQR,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,eAAO,MAAM,YAAY,GACvB,SAAQ;IACN,OAAO,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IACjC,qBAAqB,CAAC,EAAE,MAAM,OAAO,CAAC,aAAa,CAAC,CAAC;CACjD,
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/client.ts"],"names":[],"mappings":"AAAA,OAA4B,EAAE,KAAK,aAAa,EAAE,MAAM,eAAe,CAAC;AACxE,OAAO,KAAK,EACV,KAAK,IAAI,WAAW,EACpB,UAAU,IAAI,gBAAgB,EAC9B,UAAU,IAAI,gBAAgB,EAC/B,MAAM,6BAA6B,CAAC;AAMrC,KAAK,OAAO,GAAG,WAAW,CAAC;AAC3B,KAAK,YAAY,GAAG,gBAAgB,CAAC;AACrC,KAAK,YAAY,GAAG,gBAAgB,CAAC;AAErC;;GAEG;AACH,MAAM,MAAM,KAAK,GAAG;IAClB,EAAE,EAAE;QACF,KAAK,EAAE,OAAO,CAAC;QACf,UAAU,EAAE,YAAY,CAAC;QACzB,UAAU,EAAE,YAAY,CAAC;KAC1B,CAAC;CACH,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,gBAAgB,GAAI,sCAG9B;IACD,OAAO,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IACjC,qBAAqB,CAAC,EAAE,MAAM,OAAO,CAAC,aAAa,CAAC,CAAC;CACjD,KAAG,aAQR,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,eAAO,MAAM,YAAY,GACvB,SAAQ;IACN,OAAO,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IACjC,qBAAqB,CAAC,EAAE,MAAM,OAAO,CAAC,aAAa,CAAC,CAAC;CACjD;;;;;;;;;;;;CAOP,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC"}
|
package/dist/esm/client.js
CHANGED
|
@@ -43,6 +43,9 @@ export const getMergedOptions = ({ options = {}, getEnvironmentOptions = default
|
|
|
43
43
|
*/
|
|
44
44
|
export const createClient = (config = {}) => {
|
|
45
45
|
const mergedOptions = getMergedOptions(config);
|
|
46
|
-
return
|
|
46
|
+
return {
|
|
47
|
+
...createOpenApiClient(mergedOptions),
|
|
48
|
+
config: mergedOptions,
|
|
49
|
+
};
|
|
47
50
|
};
|
|
48
51
|
//# sourceMappingURL=client.js.map
|
package/dist/esm/client.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/client.ts"],"names":[],"mappings":"AAAA,OAAO,mBAA2C,MAAM,eAAe,CAAC;AAMxE,OAAO,EACL,4BAA4B,EAC5B,wBAAwB,GACzB,MAAM,UAAU,CAAC;AAiBlB;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,EAC/B,OAAO,GAAG,EAAE,EACZ,qBAAqB,GAAG,4BAA4B,MAIlD,EAAE,EAAiB,EAAE;IACvB,MAAM,cAAc,GAAG,wBAAwB,EAAE,CAAC;IAClD,MAAM,kBAAkB,GAAG,qBAAqB,EAAE,CAAC;IACnD,OAAO;QACL,GAAG,cAAc;QACjB,GAAG,kBAAkB;QACrB,GAAG,OAAO;KACX,CAAC;AACJ,CAAC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,CAC1B,SAGI,EAAE,EACN,EAAE;IACF,MAAM,aAAa,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAC/C,OAAO,mBAAmB,CAAU,aAAa,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/client.ts"],"names":[],"mappings":"AAAA,OAAO,mBAA2C,MAAM,eAAe,CAAC;AAMxE,OAAO,EACL,4BAA4B,EAC5B,wBAAwB,GACzB,MAAM,UAAU,CAAC;AAiBlB;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,EAC/B,OAAO,GAAG,EAAE,EACZ,qBAAqB,GAAG,4BAA4B,MAIlD,EAAE,EAAiB,EAAE;IACvB,MAAM,cAAc,GAAG,wBAAwB,EAAE,CAAC;IAClD,MAAM,kBAAkB,GAAG,qBAAqB,EAAE,CAAC;IACnD,OAAO;QACL,GAAG,cAAc;QACjB,GAAG,kBAAkB;QACrB,GAAG,OAAO;KACX,CAAC;AACJ,CAAC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,CAC1B,SAGI,EAAE,EACN,EAAE;IACF,MAAM,aAAa,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAC/C,OAAO;QACL,GAAG,mBAAmB,CAAU,aAAa,CAAC;QAC9C,MAAM,EAAE,aAAa;KACtB,CAAC;AACJ,CAAC,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
|
|
2
|
+
import { HeadersOptions } from "openapi-fetch";
|
|
3
|
+
/**
|
|
4
|
+
* Creates a provider that exports traces to Phoenix.
|
|
5
|
+
*/
|
|
6
|
+
export declare function createProvider({ projectName, baseUrl, headers, }: {
|
|
7
|
+
projectName: string;
|
|
8
|
+
headers: HeadersOptions;
|
|
9
|
+
/**
|
|
10
|
+
* The base URL of the Phoenix. Doesn't include the /v1/traces path.
|
|
11
|
+
*/
|
|
12
|
+
baseUrl: string;
|
|
13
|
+
}): NodeTracerProvider;
|
|
14
|
+
/**
|
|
15
|
+
* For dry runs we create a provider that doesn't export traces.
|
|
16
|
+
*/
|
|
17
|
+
export declare function createNoOpProvider(): NodeTracerProvider;
|
|
18
|
+
//# sourceMappingURL=instrumention.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"instrumention.d.ts","sourceRoot":"","sources":["../../../src/experiments/instrumention.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AAEnE,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAE/C;;GAEG;AACH,wBAAgB,cAAc,CAAC,EAC7B,WAAW,EACX,OAAO,EACP,OAAO,GACR,EAAE;IACD,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,cAAc,CAAC;IACxB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB,sBAoBA;AAED;;GAEG;AACH,wBAAgB,kBAAkB,uBAIjC"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
|
|
2
|
+
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-proto";
|
|
3
|
+
import { resourceFromAttributes } from "@opentelemetry/resources";
|
|
4
|
+
import { SimpleSpanProcessor } from "@opentelemetry/sdk-trace-base";
|
|
5
|
+
import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
|
|
6
|
+
import { SEMRESATTRS_PROJECT_NAME } from "@arizeai/openinference-semantic-conventions";
|
|
7
|
+
/**
|
|
8
|
+
* Creates a provider that exports traces to Phoenix.
|
|
9
|
+
*/
|
|
10
|
+
export function createProvider({ projectName, baseUrl, headers, }) {
|
|
11
|
+
diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.ERROR);
|
|
12
|
+
const provider = new NodeTracerProvider({
|
|
13
|
+
resource: resourceFromAttributes({
|
|
14
|
+
[SEMRESATTRS_PROJECT_NAME]: projectName,
|
|
15
|
+
}),
|
|
16
|
+
spanProcessors: [
|
|
17
|
+
new SimpleSpanProcessor(new OTLPTraceExporter({
|
|
18
|
+
url: `${baseUrl}/v1/traces`,
|
|
19
|
+
headers: Array.isArray(headers)
|
|
20
|
+
? Object.fromEntries(headers)
|
|
21
|
+
: headers,
|
|
22
|
+
})),
|
|
23
|
+
],
|
|
24
|
+
});
|
|
25
|
+
return provider;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* For dry runs we create a provider that doesn't export traces.
|
|
29
|
+
*/
|
|
30
|
+
export function createNoOpProvider() {
|
|
31
|
+
const provider = new NodeTracerProvider({});
|
|
32
|
+
return provider;
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=instrumention.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"instrumention.js","sourceRoot":"","sources":["../../../src/experiments/instrumention.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,0CAA0C,CAAC;AAC7E,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAClE,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,wBAAwB,EAAE,MAAM,6CAA6C,CAAC;AAGvF;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,EAC7B,WAAW,EACX,OAAO,EACP,OAAO,GAQR;IACC,IAAI,CAAC,SAAS,CAAC,IAAI,iBAAiB,EAAE,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC;IAE5D,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC;QACtC,QAAQ,EAAE,sBAAsB,CAAC;YAC/B,CAAC,wBAAwB,CAAC,EAAE,WAAW;SACxC,CAAC;QACF,cAAc,EAAE;YACd,IAAI,mBAAmB,CACrB,IAAI,iBAAiB,CAAC;gBACpB,GAAG,EAAE,GAAG,OAAO,YAAY;gBAC3B,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC;oBAC7B,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC;oBAC7B,CAAC,CAAC,OAAO;aACZ,CAAC,CACH;SACF;KACF,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,EAAE,CAAC,CAAC;IAE5C,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -35,10 +35,6 @@ export type RunExperimentParams = ClientFn & {
|
|
|
35
35
|
* The evaluators to use
|
|
36
36
|
*/
|
|
37
37
|
evaluators?: Evaluator[];
|
|
38
|
-
/**
|
|
39
|
-
* The project under which the experiment task traces are recorded
|
|
40
|
-
*/
|
|
41
|
-
projectName?: string;
|
|
42
38
|
/**
|
|
43
39
|
* The logger to use
|
|
44
40
|
*/
|
|
@@ -58,7 +54,23 @@ export type RunExperimentParams = ClientFn & {
|
|
|
58
54
|
dryRun?: number | boolean;
|
|
59
55
|
};
|
|
60
56
|
/**
|
|
61
|
-
*
|
|
57
|
+
* Runs an experiment using a given set of dataset of examples.
|
|
58
|
+
*
|
|
59
|
+
* An experiment is a user-defined task that runs on each example in a dataset. The results from
|
|
60
|
+
* each experiment can be evaluated using any number of evaluators to measure the behavior of the
|
|
61
|
+
* task. The experiment and evaluation results are stored in the Phoenix database for comparison
|
|
62
|
+
* and analysis.
|
|
63
|
+
*
|
|
64
|
+
* A `task` is either a sync or async function that returns a JSON serializable
|
|
65
|
+
* output. If the `task` is a function of one argument then that argument will be bound to the
|
|
66
|
+
* `input` field of the dataset example. Alternatively, the `task` can be a function of any
|
|
67
|
+
* combination of specific argument names that will be bound to special values:
|
|
68
|
+
*
|
|
69
|
+
* - `input`: The input field of the dataset example
|
|
70
|
+
* - `expected`: The expected or reference output of the dataset example
|
|
71
|
+
* - `reference`: An alias for `expected`
|
|
72
|
+
* - `metadata`: Metadata associated with the dataset example
|
|
73
|
+
* - `example`: The dataset `Example` object with all associated fields
|
|
62
74
|
*
|
|
63
75
|
* @example
|
|
64
76
|
* ```ts
|
|
@@ -68,14 +80,12 @@ export type RunExperimentParams = ClientFn & {
|
|
|
68
80
|
* dataset: "my-dataset",
|
|
69
81
|
* task: async (example) => example.input,
|
|
70
82
|
* evaluators: [
|
|
71
|
-
* asEvaluator("my-evaluator", "CODE", async (params) => params.output),
|
|
83
|
+
* asEvaluator({ name: "my-evaluator", kind: "CODE", evaluate: async (params) => params.output }),
|
|
72
84
|
* ],
|
|
73
85
|
* });
|
|
74
86
|
* ```
|
|
75
|
-
*
|
|
76
|
-
* @experimental This feature is not complete, and will change in the future.
|
|
77
87
|
*/
|
|
78
|
-
export declare function runExperiment({ experimentName
|
|
88
|
+
export declare function runExperiment({ experimentName, experimentDescription, experimentMetadata, client: _client, dataset: _dataset, task, evaluators, logger, record, concurrency, dryRun, }: RunExperimentParams): Promise<RanExperiment>;
|
|
79
89
|
/**
|
|
80
90
|
* Evaluate an experiment.
|
|
81
91
|
*
|
|
@@ -116,11 +126,4 @@ export declare function asEvaluator({ name, kind, evaluate, }: {
|
|
|
116
126
|
kind: AnnotatorKind;
|
|
117
127
|
evaluate: Evaluator["evaluate"];
|
|
118
128
|
}): Evaluator;
|
|
119
|
-
/**
|
|
120
|
-
* Generate a unique id.
|
|
121
|
-
*
|
|
122
|
-
* @deprecated Use id generated by phoenix instead.
|
|
123
|
-
* @returns A unique id.
|
|
124
|
-
*/
|
|
125
|
-
export declare function id(): string;
|
|
126
129
|
//# sourceMappingURL=runExperiment.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runExperiment.d.ts","sourceRoot":"","sources":["../../../src/experiments/runExperiment.ts"],"names":[],"mappings":"AAEA,OAAO,EAAgB,KAAK,aAAa,EAAE,MAAM,WAAW,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EACV,SAAS,
|
|
1
|
+
{"version":3,"file":"runExperiment.d.ts","sourceRoot":"","sources":["../../../src/experiments/runExperiment.ts"],"names":[],"mappings":"AAEA,OAAO,EAAgB,KAAK,aAAa,EAAE,MAAM,WAAW,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EACV,SAAS,EAIT,cAAc,EACd,aAAa,EACd,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,KAAK,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAI9C,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAYrD;;;;GAIG;AACH,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;OAEG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC7C;;OAEG;IACH,OAAO,EAAE,OAAO,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC;IACtC;;OAEG;IACH,IAAI,EAAE,cAAc,CAAC;IACrB;;OAEG;IACH,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB;;OAEG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;CAC3B,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,aAAa,CAAC,EAClC,cAAc,EACd,qBAAqB,EACrB,kBAAkB,EAClB,MAAM,EAAE,OAAO,EACf,OAAO,EAAE,QAAQ,EACjB,IAAI,EACJ,UAAU,EACV,MAAgB,EAChB,MAAa,EACb,WAAe,EACf,MAAc,GACf,EAAE,mBAAmB,GAAG,OAAO,CAAC,aAAa,CAAC,CAoH9C;AAwHD;;;;GAIG;AACH,wBAAsB,kBAAkB,CAAC,EACvC,UAAU,EACV,UAAU,EACV,MAAM,EAAE,OAAO,EACf,MAAM,EACN,WAAe,EACf,MAAc,GACf,EAAE;IACD;;;QAGI;IACJ,UAAU,EAAE,aAAa,CAAC;IAC1B,4BAA4B;IAC5B,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,wBAAwB;IACxB,MAAM,CAAC,EAAE,aAAa,CAAC;IACvB,wBAAwB;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,WAAW,EAAE,MAAM,CAAC;IACpB;;;;SAIK;IACL,MAAM,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;CAC3B,GAAG,OAAO,CAAC,aAAa,CAAC,CAgKzB;AA8DD;;;;;;;;GAQG;AACH,wBAAgB,WAAW,CAAC,EAC1B,IAAI,EACJ,IAAI,EACJ,QAAQ,GACT,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,aAAa,CAAC;IACpB,QAAQ,EAAE,SAAS,CAAC,UAAU,CAAC,CAAC;CACjC,GAAG,SAAS,CAMZ"}
|
|
@@ -4,8 +4,29 @@ import { createClient } from "../client.js";
|
|
|
4
4
|
import { getDatasetBySelector } from "../utils/getDatasetBySelector.js";
|
|
5
5
|
import { pluralize } from "../utils/pluralize.js";
|
|
6
6
|
import { promisifyResult } from "../utils/promisifyResult.js";
|
|
7
|
+
import { createProvider, createNoOpProvider } from "./instrumention.js";
|
|
8
|
+
import { SpanStatusCode } from "@opentelemetry/api";
|
|
9
|
+
import { MimeType, OpenInferenceSpanKind, SemanticConventions, } from "@arizeai/openinference-semantic-conventions";
|
|
10
|
+
import { ensureString } from "../utils/ensureString.js";
|
|
11
|
+
import { objectAsAttributes } from "../utils/objectAsAttributes.js";
|
|
7
12
|
/**
|
|
8
|
-
*
|
|
13
|
+
* Runs an experiment using a given set of dataset of examples.
|
|
14
|
+
*
|
|
15
|
+
* An experiment is a user-defined task that runs on each example in a dataset. The results from
|
|
16
|
+
* each experiment can be evaluated using any number of evaluators to measure the behavior of the
|
|
17
|
+
* task. The experiment and evaluation results are stored in the Phoenix database for comparison
|
|
18
|
+
* and analysis.
|
|
19
|
+
*
|
|
20
|
+
* A `task` is either a sync or async function that returns a JSON serializable
|
|
21
|
+
* output. If the `task` is a function of one argument then that argument will be bound to the
|
|
22
|
+
* `input` field of the dataset example. Alternatively, the `task` can be a function of any
|
|
23
|
+
* combination of specific argument names that will be bound to special values:
|
|
24
|
+
*
|
|
25
|
+
* - `input`: The input field of the dataset example
|
|
26
|
+
* - `expected`: The expected or reference output of the dataset example
|
|
27
|
+
* - `reference`: An alias for `expected`
|
|
28
|
+
* - `metadata`: Metadata associated with the dataset example
|
|
29
|
+
* - `example`: The dataset `Example` object with all associated fields
|
|
9
30
|
*
|
|
10
31
|
* @example
|
|
11
32
|
* ```ts
|
|
@@ -15,14 +36,13 @@ import { promisifyResult } from "../utils/promisifyResult.js";
|
|
|
15
36
|
* dataset: "my-dataset",
|
|
16
37
|
* task: async (example) => example.input,
|
|
17
38
|
* evaluators: [
|
|
18
|
-
* asEvaluator("my-evaluator", "CODE", async (params) => params.output),
|
|
39
|
+
* asEvaluator({ name: "my-evaluator", kind: "CODE", evaluate: async (params) => params.output }),
|
|
19
40
|
* ],
|
|
20
41
|
* });
|
|
21
42
|
* ```
|
|
22
|
-
*
|
|
23
|
-
* @experimental This feature is not complete, and will change in the future.
|
|
24
43
|
*/
|
|
25
|
-
export async function runExperiment({ experimentName
|
|
44
|
+
export async function runExperiment({ experimentName, experimentDescription, experimentMetadata, client: _client, dataset: _dataset, task, evaluators, logger = console, record = true, concurrency = 5, dryRun = false, }) {
|
|
45
|
+
let provider;
|
|
26
46
|
const isDryRun = typeof dryRun === "number" || dryRun === true;
|
|
27
47
|
const client = _client ?? createClient();
|
|
28
48
|
const dataset = await getDatasetBySelector({ dataset: _dataset, client });
|
|
@@ -31,18 +51,18 @@ export async function runExperiment({ experimentName: _experimentName, experimen
|
|
|
31
51
|
const nExamples = typeof dryRun === "number"
|
|
32
52
|
? Math.max(dryRun, dataset.examples.length)
|
|
33
53
|
: dataset.examples.length;
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
};
|
|
54
|
+
let projectName = `${dataset.name}-exp-${new Date().toISOString()}`;
|
|
55
|
+
// initialize the tracer into scope
|
|
56
|
+
let taskTracer;
|
|
38
57
|
let experiment;
|
|
39
58
|
if (isDryRun) {
|
|
40
59
|
experiment = {
|
|
41
|
-
id:
|
|
60
|
+
id: localId(),
|
|
42
61
|
datasetId: dataset.id,
|
|
43
62
|
datasetVersionId: dataset.versionId,
|
|
44
63
|
projectName,
|
|
45
64
|
};
|
|
65
|
+
taskTracer = createNoOpProvider().getTracer("no-op");
|
|
46
66
|
}
|
|
47
67
|
else {
|
|
48
68
|
const experimentResponse = await client
|
|
@@ -61,12 +81,22 @@ export async function runExperiment({ experimentName: _experimentName, experimen
|
|
|
61
81
|
})
|
|
62
82
|
.then((res) => res.data?.data);
|
|
63
83
|
invariant(experimentResponse, `Failed to create experiment`);
|
|
84
|
+
projectName = experimentResponse.project_name ?? projectName;
|
|
64
85
|
experiment = {
|
|
65
86
|
id: experimentResponse.id,
|
|
66
87
|
datasetId: dataset.id,
|
|
67
88
|
datasetVersionId: dataset.versionId,
|
|
68
89
|
projectName,
|
|
69
90
|
};
|
|
91
|
+
// Initialize the tracer, now that we have a project name
|
|
92
|
+
const baseUrl = client.config.baseUrl;
|
|
93
|
+
invariant(baseUrl, "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client.");
|
|
94
|
+
provider = createProvider({
|
|
95
|
+
projectName,
|
|
96
|
+
baseUrl,
|
|
97
|
+
headers: client.config.headers ?? {},
|
|
98
|
+
});
|
|
99
|
+
taskTracer = provider.getTracer(projectName);
|
|
70
100
|
}
|
|
71
101
|
if (!record) {
|
|
72
102
|
logger.info(`🔧 Running experiment in readonly mode. Results will not be recorded.`);
|
|
@@ -85,13 +115,17 @@ export async function runExperiment({ experimentName: _experimentName, experimen
|
|
|
85
115
|
concurrency,
|
|
86
116
|
isDryRun,
|
|
87
117
|
nExamples,
|
|
118
|
+
tracer: taskTracer,
|
|
88
119
|
});
|
|
89
120
|
logger.info(`✅ Task runs completed`);
|
|
90
121
|
const ranExperiment = {
|
|
91
122
|
...experiment,
|
|
92
|
-
params: experimentParams,
|
|
93
123
|
runs,
|
|
94
124
|
};
|
|
125
|
+
// Shut down the provider so that the experiments run
|
|
126
|
+
if (provider) {
|
|
127
|
+
await provider.shutdown?.();
|
|
128
|
+
}
|
|
95
129
|
const { evaluationRuns } = await evaluateExperiment({
|
|
96
130
|
experiment: ranExperiment,
|
|
97
131
|
evaluators: evaluators ?? [],
|
|
@@ -107,60 +141,78 @@ export async function runExperiment({ experimentName: _experimentName, experimen
|
|
|
107
141
|
/**
|
|
108
142
|
* Run a task against n examples in a dataset.
|
|
109
143
|
*/
|
|
110
|
-
function runTask({ client, experimentId, task, dataset, onComplete, logger, concurrency = 5, isDryRun, nExamples, }) {
|
|
144
|
+
function runTask({ client, experimentId, task, dataset, onComplete, logger, concurrency = 5, isDryRun, nExamples, tracer, }) {
|
|
111
145
|
logger.info(`🔧 Running task "${task.name}" on dataset "${dataset.id}"`);
|
|
112
146
|
const run = async (example) => {
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
147
|
+
return tracer.startActiveSpan(`Task: ${task.name}`, async (span) => {
|
|
148
|
+
logger.info(`🔧 Running task "${task.name}" on example "${example.id} of dataset "${dataset.id}"`);
|
|
149
|
+
const traceId = span.spanContext().traceId;
|
|
150
|
+
const thisRun = {
|
|
151
|
+
id: localId(), // initialized with local id, will be replaced with server-assigned id when dry run is false
|
|
152
|
+
traceId,
|
|
153
|
+
experimentId,
|
|
154
|
+
datasetExampleId: example.id,
|
|
155
|
+
startTime: new Date(),
|
|
156
|
+
endTime: new Date(), // will get replaced with actual end time
|
|
157
|
+
output: null,
|
|
158
|
+
error: null,
|
|
159
|
+
};
|
|
160
|
+
try {
|
|
161
|
+
const taskOutput = await promisifyResult(task(example));
|
|
162
|
+
thisRun.output =
|
|
163
|
+
typeof taskOutput === "string"
|
|
164
|
+
? taskOutput
|
|
165
|
+
: JSON.stringify(taskOutput);
|
|
166
|
+
}
|
|
167
|
+
catch (error) {
|
|
168
|
+
thisRun.error =
|
|
169
|
+
error instanceof Error ? error.message : "Unknown error";
|
|
170
|
+
span.setStatus({ code: SpanStatusCode.ERROR });
|
|
171
|
+
}
|
|
172
|
+
thisRun.endTime = new Date();
|
|
173
|
+
if (!isDryRun) {
|
|
174
|
+
// Log the run to the server
|
|
175
|
+
const res = await client.POST("/v1/experiments/{experiment_id}/runs", {
|
|
176
|
+
params: {
|
|
177
|
+
path: {
|
|
178
|
+
experiment_id: experimentId,
|
|
179
|
+
},
|
|
143
180
|
},
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
181
|
+
body: {
|
|
182
|
+
dataset_example_id: example.id,
|
|
183
|
+
output: thisRun.output,
|
|
184
|
+
repetition_number: 0,
|
|
185
|
+
start_time: thisRun.startTime.toISOString(),
|
|
186
|
+
end_time: thisRun.endTime.toISOString(),
|
|
187
|
+
trace_id: thisRun.traceId,
|
|
188
|
+
error: thisRun.error,
|
|
189
|
+
},
|
|
190
|
+
});
|
|
191
|
+
// replace the local run id with the server-assigned id
|
|
192
|
+
thisRun.id = res.data?.data.id ?? thisRun.id;
|
|
193
|
+
const inputMimeType = typeof example.input === "string" ? MimeType.TEXT : MimeType.JSON;
|
|
194
|
+
const outputMimeType = typeof thisRun.output === "string" ? MimeType.TEXT : MimeType.JSON;
|
|
195
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
196
|
+
span.setAttributes({
|
|
197
|
+
[SemanticConventions.OPENINFERENCE_SPAN_KIND]: OpenInferenceSpanKind.CHAIN,
|
|
198
|
+
[SemanticConventions.INPUT_MIME_TYPE]: inputMimeType,
|
|
199
|
+
[SemanticConventions.INPUT_VALUE]: ensureString(example.input),
|
|
200
|
+
[SemanticConventions.OUTPUT_MIME_TYPE]: outputMimeType,
|
|
201
|
+
[SemanticConventions.OUTPUT_VALUE]: ensureString(thisRun.output),
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
span?.end();
|
|
205
|
+
onComplete(thisRun);
|
|
206
|
+
return thisRun;
|
|
207
|
+
});
|
|
160
208
|
};
|
|
161
209
|
const q = queue(run, concurrency);
|
|
162
210
|
const examplesToUse = dataset.examples.slice(0, nExamples);
|
|
163
|
-
examplesToUse.forEach((example) => q.push(example)
|
|
211
|
+
examplesToUse.forEach((example) => q.push(example, (err) => {
|
|
212
|
+
if (err) {
|
|
213
|
+
logger.error(`Error running task "${task.name}" on example "${example.id}": ${err}`);
|
|
214
|
+
}
|
|
215
|
+
}));
|
|
164
216
|
return q.drain();
|
|
165
217
|
}
|
|
166
218
|
/**
|
|
@@ -170,10 +222,26 @@ function runTask({ client, experimentId, task, dataset, onComplete, logger, conc
|
|
|
170
222
|
*/
|
|
171
223
|
export async function evaluateExperiment({ experiment, evaluators, client: _client, logger, concurrency = 5, dryRun = false, }) {
|
|
172
224
|
const isDryRun = typeof dryRun === "number" || dryRun === true;
|
|
225
|
+
const client = _client ?? createClient();
|
|
226
|
+
const baseUrl = client.config.baseUrl;
|
|
227
|
+
invariant(baseUrl, "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client.");
|
|
228
|
+
let provider;
|
|
229
|
+
if (!isDryRun) {
|
|
230
|
+
provider = createProvider({
|
|
231
|
+
projectName: "evaluators",
|
|
232
|
+
baseUrl,
|
|
233
|
+
headers: client.config.headers ?? {},
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
provider = createNoOpProvider();
|
|
238
|
+
}
|
|
239
|
+
const tracer = isDryRun
|
|
240
|
+
? provider.getTracer("no-op")
|
|
241
|
+
: provider.getTracer("evaluators");
|
|
173
242
|
const nRuns = typeof dryRun === "number"
|
|
174
243
|
? Math.max(dryRun, Object.keys(experiment.runs).length)
|
|
175
244
|
: Object.keys(experiment.runs).length;
|
|
176
|
-
const client = _client ?? createClient();
|
|
177
245
|
const dataset = await getDatasetBySelector({
|
|
178
246
|
dataset: experiment.datasetId,
|
|
179
247
|
client,
|
|
@@ -204,35 +272,78 @@ export async function evaluateExperiment({ experiment, evaluators, client: _clie
|
|
|
204
272
|
run,
|
|
205
273
|
})));
|
|
206
274
|
const evaluatorsQueue = queue(async (evaluatorAndRun) => {
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
logger.info(`📝 Logging evaluation ${evalResult.id}`);
|
|
215
|
-
// Log the evaluation to the server
|
|
216
|
-
// We log this without awaiting (e.g. best effort)
|
|
217
|
-
client.POST("/v1/experiment_evaluations", {
|
|
218
|
-
body: {
|
|
219
|
-
experiment_run_id: evaluatorAndRun.run.id,
|
|
220
|
-
name: evaluatorAndRun.evaluator.name,
|
|
221
|
-
annotator_kind: evaluatorAndRun.evaluator.kind,
|
|
222
|
-
start_time: evalResult.startTime.toISOString(),
|
|
223
|
-
end_time: evalResult.endTime.toISOString(),
|
|
224
|
-
result: {
|
|
225
|
-
...evalResult.result,
|
|
226
|
-
},
|
|
227
|
-
error: evalResult.error,
|
|
228
|
-
trace_id: evalResult.traceId,
|
|
229
|
-
},
|
|
275
|
+
return tracer.startActiveSpan(`Evaluation: ${evaluatorAndRun.evaluator.name}`, async (span) => {
|
|
276
|
+
const evalResult = await runEvaluator({
|
|
277
|
+
evaluator: evaluatorAndRun.evaluator,
|
|
278
|
+
run: evaluatorAndRun.run,
|
|
279
|
+
exampleCache: examplesById,
|
|
280
|
+
onComplete: onEvaluationComplete,
|
|
281
|
+
logger,
|
|
230
282
|
});
|
|
231
|
-
|
|
283
|
+
span.setAttributes({
|
|
284
|
+
[SemanticConventions.OPENINFERENCE_SPAN_KIND]: OpenInferenceSpanKind.EVALUATOR,
|
|
285
|
+
[SemanticConventions.INPUT_MIME_TYPE]: MimeType.JSON,
|
|
286
|
+
[SemanticConventions.INPUT_VALUE]: JSON.stringify({
|
|
287
|
+
input: examplesById[evaluatorAndRun.run.datasetExampleId]?.input,
|
|
288
|
+
output: evaluatorAndRun.run.output,
|
|
289
|
+
expected: examplesById[evaluatorAndRun.run.datasetExampleId]?.output,
|
|
290
|
+
metadata: examplesById[evaluatorAndRun.run.datasetExampleId]?.metadata,
|
|
291
|
+
}),
|
|
292
|
+
[SemanticConventions.OUTPUT_MIME_TYPE]: MimeType.JSON,
|
|
293
|
+
[SemanticConventions.OUTPUT_VALUE]: ensureString(evalResult.result),
|
|
294
|
+
});
|
|
295
|
+
if (evalResult.error) {
|
|
296
|
+
span.setStatus({
|
|
297
|
+
code: SpanStatusCode.ERROR,
|
|
298
|
+
message: evalResult.error,
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
else {
|
|
302
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
303
|
+
}
|
|
304
|
+
if (evalResult.result) {
|
|
305
|
+
span.setAttributes(objectAsAttributes(evalResult.result));
|
|
306
|
+
}
|
|
307
|
+
evalResult.traceId = span.spanContext().traceId;
|
|
308
|
+
if (!isDryRun) {
|
|
309
|
+
// Log the evaluation to the server
|
|
310
|
+
// We log this without awaiting (e.g. best effort)
|
|
311
|
+
client.POST("/v1/experiment_evaluations", {
|
|
312
|
+
body: {
|
|
313
|
+
experiment_run_id: evaluatorAndRun.run.id,
|
|
314
|
+
name: evaluatorAndRun.evaluator.name,
|
|
315
|
+
annotator_kind: evaluatorAndRun.evaluator.kind,
|
|
316
|
+
start_time: evalResult.startTime.toISOString(),
|
|
317
|
+
end_time: evalResult.endTime.toISOString(),
|
|
318
|
+
result: {
|
|
319
|
+
...evalResult.result,
|
|
320
|
+
},
|
|
321
|
+
error: evalResult.error,
|
|
322
|
+
trace_id: evalResult.traceId,
|
|
323
|
+
},
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
span.end();
|
|
327
|
+
return evalResult;
|
|
328
|
+
});
|
|
232
329
|
}, concurrency);
|
|
233
|
-
evaluatorsAndRuns.
|
|
330
|
+
if (!evaluatorsAndRuns.length) {
|
|
331
|
+
logger.info(`⛔ No evaluators to run`);
|
|
332
|
+
return {
|
|
333
|
+
...experiment,
|
|
334
|
+
evaluationRuns: [],
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
evaluatorsAndRuns.forEach((evaluatorAndRun) => evaluatorsQueue.push(evaluatorAndRun, (err) => {
|
|
338
|
+
if (err) {
|
|
339
|
+
logger.error(`❌ Error running evaluator "${evaluatorAndRun.evaluator.name}" on run "${evaluatorAndRun.run.id}": ${err}`);
|
|
340
|
+
}
|
|
341
|
+
}));
|
|
234
342
|
await evaluatorsQueue.drain();
|
|
235
343
|
logger.info(`✅ Evaluation runs completed`);
|
|
344
|
+
if (provider) {
|
|
345
|
+
await provider.shutdown?.();
|
|
346
|
+
}
|
|
236
347
|
return {
|
|
237
348
|
...experiment,
|
|
238
349
|
evaluationRuns: Object.values(evaluationRuns),
|
|
@@ -243,20 +354,21 @@ export async function evaluateExperiment({ experiment, evaluators, client: _clie
|
|
|
243
354
|
*
|
|
244
355
|
* @experimental This feature is not complete, and will change in the future.
|
|
245
356
|
*/
|
|
246
|
-
async function runEvaluator({ evaluator, run, exampleCache, onComplete, }) {
|
|
357
|
+
async function runEvaluator({ evaluator, run, exampleCache, onComplete, logger, }) {
|
|
247
358
|
const example = exampleCache[run.datasetExampleId];
|
|
248
359
|
invariant(example, `Example "${run.datasetExampleId}" not found`);
|
|
249
360
|
const evaluate = async () => {
|
|
361
|
+
logger.info(`🧠 Evaluating run "${run.id}" with evaluator "${evaluator.name}"`);
|
|
250
362
|
const thisEval = {
|
|
251
|
-
id:
|
|
252
|
-
traceId: null,
|
|
363
|
+
id: localId(),
|
|
364
|
+
traceId: null,
|
|
253
365
|
experimentRunId: run.id,
|
|
254
366
|
startTime: new Date(),
|
|
255
367
|
endTime: new Date(), // will get replaced with actual end time
|
|
256
368
|
name: evaluator.name,
|
|
257
369
|
result: null,
|
|
258
370
|
error: null,
|
|
259
|
-
annotatorKind:
|
|
371
|
+
annotatorKind: evaluator.kind,
|
|
260
372
|
};
|
|
261
373
|
try {
|
|
262
374
|
const result = await evaluator.evaluate({
|
|
@@ -266,9 +378,11 @@ async function runEvaluator({ evaluator, run, exampleCache, onComplete, }) {
|
|
|
266
378
|
metadata: example.metadata,
|
|
267
379
|
});
|
|
268
380
|
thisEval.result = result;
|
|
381
|
+
logger.info(`✅ Evaluator "${evaluator.name}" on run "${run.id}" completed`);
|
|
269
382
|
}
|
|
270
383
|
catch (error) {
|
|
271
384
|
thisEval.error = error instanceof Error ? error.message : "Unknown error";
|
|
385
|
+
logger.error(`❌ Evaluator "${evaluator.name}" on run "${run.id}" failed: ${thisEval.error}`);
|
|
272
386
|
}
|
|
273
387
|
thisEval.endTime = new Date();
|
|
274
388
|
onComplete(thisEval);
|
|
@@ -292,17 +406,14 @@ export function asEvaluator({ name, kind, evaluate, }) {
|
|
|
292
406
|
evaluate,
|
|
293
407
|
};
|
|
294
408
|
}
|
|
295
|
-
let
|
|
409
|
+
let _localIdIndex = 1000;
|
|
296
410
|
/**
|
|
297
|
-
* Generate a
|
|
411
|
+
* Generate a local id.
|
|
298
412
|
*
|
|
299
|
-
* @
|
|
300
|
-
* @returns A unique id.
|
|
413
|
+
* @returns A semi-unique id.
|
|
301
414
|
*/
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
return _id.toString();
|
|
306
|
-
})();
|
|
415
|
+
function localId() {
|
|
416
|
+
_localIdIndex++;
|
|
417
|
+
return `local_${_localIdIndex}`;
|
|
307
418
|
}
|
|
308
419
|
//# sourceMappingURL=runExperiment.js.map
|