npm - @arizeai/phoenix-client - Versions diffs - 1.2.0 → 1.3.0 - Mend

@arizeai/phoenix-client 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/dist/esm/client.d.ts +13 -1
package/dist/esm/client.d.ts.map +1 -1
package/dist/esm/client.js +4 -1
package/dist/esm/client.js.map +1 -1
package/dist/esm/experiments/instrumention.d.ts +18 -0
package/dist/esm/experiments/instrumention.d.ts.map +1 -0
package/dist/esm/experiments/instrumention.js +34 -0
package/dist/esm/experiments/instrumention.js.map +1 -0
package/dist/esm/experiments/runExperiment.d.ts +19 -16
package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
package/dist/esm/experiments/runExperiment.js +209 -98
package/dist/esm/experiments/runExperiment.js.map +1 -1
package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
package/dist/esm/types/experiments.d.ts +0 -1
package/dist/esm/types/experiments.d.ts.map +1 -1
package/dist/esm/utils/ensureString.d.ts +8 -0
package/dist/esm/utils/ensureString.d.ts.map +1 -0
package/dist/esm/utils/ensureString.js +14 -0
package/dist/esm/utils/ensureString.js.map +1 -0
package/dist/esm/utils/objectAsAttributes.d.ts +3 -0
package/dist/esm/utils/objectAsAttributes.d.ts.map +1 -0
package/dist/esm/utils/objectAsAttributes.js +4 -0
package/dist/esm/utils/objectAsAttributes.js.map +1 -0
package/dist/src/client.d.ts +13 -1
package/dist/src/client.d.ts.map +1 -1
package/dist/src/client.js +1 -1
package/dist/src/client.js.map +1 -1
package/dist/src/experiments/instrumention.d.ts +18 -0
package/dist/src/experiments/instrumention.d.ts.map +1 -0
package/dist/src/experiments/instrumention.js +38 -0
package/dist/src/experiments/instrumention.js.map +1 -0
package/dist/src/experiments/runExperiment.d.ts +19 -16
package/dist/src/experiments/runExperiment.d.ts.map +1 -1
package/dist/src/experiments/runExperiment.js +211 -102
package/dist/src/experiments/runExperiment.js.map +1 -1
package/dist/src/types/experiments.d.ts +0 -1
package/dist/src/types/experiments.d.ts.map +1 -1
package/dist/src/utils/ensureString.d.ts +8 -0
package/dist/src/utils/ensureString.d.ts.map +1 -0
package/dist/src/utils/ensureString.js +18 -0
package/dist/src/utils/ensureString.js.map +1 -0
package/dist/src/utils/objectAsAttributes.d.ts +3 -0
package/dist/src/utils/objectAsAttributes.d.ts.map +1 -0
package/dist/src/utils/objectAsAttributes.js +7 -0
package/dist/src/utils/objectAsAttributes.js.map +1 -0
package/dist/tsconfig.tsbuildinfo +1 -1
package/package.json +9 -1
package/src/client.ts +4 -1
package/src/experiments/instrumention.ts +52 -0
package/src/experiments/runExperiment.ts +246 -108
package/src/types/experiments.ts +0 -1
package/src/utils/ensureString.ts +14 -0
package/src/utils/objectAsAttributes.ts +9 -0

package/dist/esm/client.d.ts CHANGED Viewed

@@ -52,7 +52,19 @@ export declare const getMergedOptions: ({ options, getEnvironmentOptions, }?: {
 export declare const createClient: (config?: {
     options?: Partial<ClientOptions>;
     getEnvironmentOptions?: () => Partial<ClientOptions>;
-}) => import("openapi-fetch").Client<oapiPathsV1, `${string}/${string}`>;
+}) => {
+    config: ClientOptions;
+    GET: import("openapi-fetch").ClientMethod<oapiPathsV1, "get", `${string}/${string}`>;
+    PUT: import("openapi-fetch").ClientMethod<oapiPathsV1, "put", `${string}/${string}`>;
+    POST: import("openapi-fetch").ClientMethod<oapiPathsV1, "post", `${string}/${string}`>;
+    DELETE: import("openapi-fetch").ClientMethod<oapiPathsV1, "delete", `${string}/${string}`>;
+    OPTIONS: import("openapi-fetch").ClientMethod<oapiPathsV1, "options", `${string}/${string}`>;
+    HEAD: import("openapi-fetch").ClientMethod<oapiPathsV1, "head", `${string}/${string}`>;
+    PATCH: import("openapi-fetch").ClientMethod<oapiPathsV1, "patch", `${string}/${string}`>;
+    TRACE: import("openapi-fetch").ClientMethod<oapiPathsV1, "trace", `${string}/${string}`>;
+    use(...middleware: import("openapi-fetch").Middleware[]): void;
+    eject(...middleware: import("openapi-fetch").Middleware[]): void;
+};
 /**
  * Resolved type of the Phoenix client
  */

package/dist/esm/client.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/client.ts"],"names":[],"mappings":"AAAA,OAA4B,EAAE,KAAK,aAAa,EAAE,MAAM,eAAe,CAAC;AACxE,OAAO,KAAK,EACV,KAAK,IAAI,WAAW,EACpB,UAAU,IAAI,gBAAgB,EAC9B,UAAU,IAAI,gBAAgB,EAC/B,MAAM,6BAA6B,CAAC;AAMrC,KAAK,OAAO,GAAG,WAAW,CAAC;AAC3B,KAAK,YAAY,GAAG,gBAAgB,CAAC;AACrC,KAAK,YAAY,GAAG,gBAAgB,CAAC;AAErC;;GAEG;AACH,MAAM,MAAM,KAAK,GAAG;IAClB,EAAE,EAAE;QACF,KAAK,EAAE,OAAO,CAAC;QACf,UAAU,EAAE,YAAY,CAAC;QACzB,UAAU,EAAE,YAAY,CAAC;KAC1B,CAAC;CACH,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,gBAAgB,GAAI,sCAG9B;IACD,OAAO,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IACjC,qBAAqB,CAAC,EAAE,MAAM,OAAO,CAAC,aAAa,CAAC,CAAC;CACjD,KAAG,aAQR,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,eAAO,MAAM,YAAY,GACvB,SAAQ;IACN,OAAO,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IACjC,qBAAqB,CAAC,EAAE,MAAM,OAAO,CAAC,aAAa,CAAC,CAAC;CACjD,~~uEAIP,~~CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC"}
1	+ {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../src/client.ts"],"names":[],"mappings":"AAAA,OAA4B,EAAE,KAAK,aAAa,EAAE,MAAM,eAAe,CAAC;AACxE,OAAO,KAAK,EACV,KAAK,IAAI,WAAW,EACpB,UAAU,IAAI,gBAAgB,EAC9B,UAAU,IAAI,gBAAgB,EAC/B,MAAM,6BAA6B,CAAC;AAMrC,KAAK,OAAO,GAAG,WAAW,CAAC;AAC3B,KAAK,YAAY,GAAG,gBAAgB,CAAC;AACrC,KAAK,YAAY,GAAG,gBAAgB,CAAC;AAErC;;GAEG;AACH,MAAM,MAAM,KAAK,GAAG;IAClB,EAAE,EAAE;QACF,KAAK,EAAE,OAAO,CAAC;QACf,UAAU,EAAE,YAAY,CAAC;QACzB,UAAU,EAAE,YAAY,CAAC;KAC1B,CAAC;CACH,CAAC;AAEF;;;;;;;;GAQG;AACH,eAAO,MAAM,gBAAgB,GAAI,sCAG9B;IACD,OAAO,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IACjC,qBAAqB,CAAC,EAAE,MAAM,OAAO,CAAC,aAAa,CAAC,CAAC;CACjD,KAAG,aAQR,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,eAAO,MAAM,YAAY,GACvB,SAAQ;IACN,OAAO,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IACjC,qBAAqB,CAAC,EAAE,MAAM,OAAO,CAAC,aAAa,CAAC,CAAC;CACjD;;;;;;;;;;;;CAOP,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,UAAU,CAAC,OAAO,YAAY,CAAC,CAAC"}

package/dist/esm/client.js CHANGED Viewed

@@ -43,6 +43,9 @@ export const getMergedOptions = ({ options = {}, getEnvironmentOptions = default
  */
 export const createClient = (config = {}) => {
     const mergedOptions = getMergedOptions(config);
-    return createOpenApiClient(mergedOptions);
+    return {
+        ...createOpenApiClient(mergedOptions),
+        config: mergedOptions,
+    };
 };
 //# sourceMappingURL=client.js.map

package/dist/esm/client.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/client.ts"],"names":[],"mappings":"AAAA,OAAO,mBAA2C,MAAM,eAAe,CAAC;AAMxE,OAAO,EACL,4BAA4B,EAC5B,wBAAwB,GACzB,MAAM,UAAU,CAAC;AAiBlB;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,EAC/B,OAAO,GAAG,EAAE,EACZ,qBAAqB,GAAG,4BAA4B,MAIlD,EAAE,EAAiB,EAAE;IACvB,MAAM,cAAc,GAAG,wBAAwB,EAAE,CAAC;IAClD,MAAM,kBAAkB,GAAG,qBAAqB,EAAE,CAAC;IACnD,OAAO;QACL,GAAG,cAAc;QACjB,GAAG,kBAAkB;QACrB,GAAG,OAAO;KACX,CAAC;AACJ,CAAC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,CAC1B,SAGI,EAAE,EACN,EAAE;IACF,MAAM,aAAa,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAC/C,OAAO,mBAAmB,CAAU,aAAa,CAAC,CAAC;~~AACrD~~,CAAC,CAAC"}
1	+ {"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/client.ts"],"names":[],"mappings":"AAAA,OAAO,mBAA2C,MAAM,eAAe,CAAC;AAMxE,OAAO,EACL,4BAA4B,EAC5B,wBAAwB,GACzB,MAAM,UAAU,CAAC;AAiBlB;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,EAC/B,OAAO,GAAG,EAAE,EACZ,qBAAqB,GAAG,4BAA4B,MAIlD,EAAE,EAAiB,EAAE;IACvB,MAAM,cAAc,GAAG,wBAAwB,EAAE,CAAC;IAClD,MAAM,kBAAkB,GAAG,qBAAqB,EAAE,CAAC;IACnD,OAAO;QACL,GAAG,cAAc;QACjB,GAAG,kBAAkB;QACrB,GAAG,OAAO;KACX,CAAC;AACJ,CAAC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,CAC1B,SAGI,EAAE,EACN,EAAE;IACF,MAAM,aAAa,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAC/C,OAAO;QACL,GAAG,mBAAmB,CAAU,aAAa,CAAC;QAC9C,MAAM,EAAE,aAAa;KACtB,CAAC;AACJ,CAAC,CAAC"}

package/dist/esm/experiments/instrumention.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
+import { HeadersOptions } from "openapi-fetch";
+/**
+ * Creates a provider that exports traces to Phoenix.
+ */
+export declare function createProvider({ projectName, baseUrl, headers, }: {
+    projectName: string;
+    headers: HeadersOptions;
+    /**
+     * The base URL of the Phoenix. Doesn't include the /v1/traces path.
+     */
+    baseUrl: string;
+}): NodeTracerProvider;
+/**
+ * For dry runs we create a provider that doesn't export traces.
+ */
+export declare function createNoOpProvider(): NodeTracerProvider;
+//# sourceMappingURL=instrumention.d.ts.map

package/dist/esm/experiments/instrumention.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"instrumention.d.ts","sourceRoot":"","sources":["../../../src/experiments/instrumention.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AAEnE,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAE/C;;GAEG;AACH,wBAAgB,cAAc,CAAC,EAC7B,WAAW,EACX,OAAO,EACP,OAAO,GACR,EAAE;IACD,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,cAAc,CAAC;IACxB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;CACjB,sBAoBA;AAED;;GAEG;AACH,wBAAgB,kBAAkB,uBAIjC"}

package/dist/esm/experiments/instrumention.js ADDED Viewed

@@ -0,0 +1,34 @@
+import { diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
+import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-proto";
+import { resourceFromAttributes } from "@opentelemetry/resources";
+import { SimpleSpanProcessor } from "@opentelemetry/sdk-trace-base";
+import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
+import { SEMRESATTRS_PROJECT_NAME } from "@arizeai/openinference-semantic-conventions";
+/**
+ * Creates a provider that exports traces to Phoenix.
+ */
+export function createProvider({ projectName, baseUrl, headers, }) {
+    diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.ERROR);
+    const provider = new NodeTracerProvider({
+        resource: resourceFromAttributes({
+            [SEMRESATTRS_PROJECT_NAME]: projectName,
+        }),
+        spanProcessors: [
+            new SimpleSpanProcessor(new OTLPTraceExporter({
+                url: `${baseUrl}/v1/traces`,
+                headers: Array.isArray(headers)
+                    ? Object.fromEntries(headers)
+                    : headers,
+            })),
+        ],
+    });
+    return provider;
+}
+/**
+ * For dry runs we create a provider that doesn't export traces.
+ */
+export function createNoOpProvider() {
+    const provider = new NodeTracerProvider({});
+    return provider;
+}
+//# sourceMappingURL=instrumention.js.map

package/dist/esm/experiments/instrumention.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"instrumention.js","sourceRoot":"","sources":["../../../src/experiments/instrumention.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,0CAA0C,CAAC;AAC7E,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAClE,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,wBAAwB,EAAE,MAAM,6CAA6C,CAAC;AAGvF;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,EAC7B,WAAW,EACX,OAAO,EACP,OAAO,GAQR;IACC,IAAI,CAAC,SAAS,CAAC,IAAI,iBAAiB,EAAE,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC;IAE5D,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC;QACtC,QAAQ,EAAE,sBAAsB,CAAC;YAC/B,CAAC,wBAAwB,CAAC,EAAE,WAAW;SACxC,CAAC;QACF,cAAc,EAAE;YACd,IAAI,mBAAmB,CACrB,IAAI,iBAAiB,CAAC;gBACpB,GAAG,EAAE,GAAG,OAAO,YAAY;gBAC3B,OAAO,EAAE,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC;oBAC7B,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC;oBAC7B,CAAC,CAAC,OAAO;aACZ,CAAC,CACH;SACF;KACF,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,EAAE,CAAC,CAAC;IAE5C,OAAO,QAAQ,CAAC;AAClB,CAAC"}

package/dist/esm/experiments/runExperiment.d.ts CHANGED Viewed

@@ -35,10 +35,6 @@ export type RunExperimentParams = ClientFn & {
      * The evaluators to use
      */
     evaluators?: Evaluator[];
-    /**
-     * The project under which the experiment task traces are recorded
-     */
-    projectName?: string;
     /**
      * The logger to use
      */
@@ -58,7 +54,23 @@ export type RunExperimentParams = ClientFn & {
     dryRun?: number | boolean;
 };
 /**
- * Run an experiment.
+ * Runs an experiment using a given set of dataset of examples.
+ *
+ *   An experiment is a user-defined task that runs on each example in a dataset. The results from
+ *   each experiment can be evaluated using any number of evaluators to measure the behavior of the
+ *   task. The experiment and evaluation results are stored in the Phoenix database for comparison
+ *   and analysis.
+ *
+ *   A `task` is either a sync or async function that returns a JSON serializable
+ *   output. If the `task` is a function of one argument then that argument will be bound to the
+ *   `input` field of the dataset example. Alternatively, the `task` can be a function of any
+ *   combination of specific argument names that will be bound to special values:
+ *
+ *   - `input`: The input field of the dataset example
+ *   - `expected`: The expected or reference output of the dataset example
+ *   - `reference`: An alias for `expected`
+ *   - `metadata`: Metadata associated with the dataset example
+ *   - `example`: The dataset `Example` object with all associated fields
  *
  * @example
  * ```ts
@@ -68,14 +80,12 @@ export type RunExperimentParams = ClientFn & {
  *   dataset: "my-dataset",
  *   task: async (example) => example.input,
  *   evaluators: [
- *     asEvaluator("my-evaluator", "CODE", async (params) => params.output),
+ *     asEvaluator({ name: "my-evaluator", kind: "CODE", evaluate: async (params) => params.output }),
  *   ],
  * });
  * ```
- *
- * @experimental This feature is not complete, and will change in the future.
  */
-export declare function runExperiment({ experimentName: _experimentName, experimentDescription, experimentMetadata, client: _client, dataset: _dataset, task, evaluators, projectName, logger, record, concurrency, dryRun, }: RunExperimentParams): Promise<RanExperiment>;
+export declare function runExperiment({ experimentName, experimentDescription, experimentMetadata, client: _client, dataset: _dataset, task, evaluators, logger, record, concurrency, dryRun, }: RunExperimentParams): Promise<RanExperiment>;
 /**
  * Evaluate an experiment.
  *
@@ -116,11 +126,4 @@ export declare function asEvaluator({ name, kind, evaluate, }: {
     kind: AnnotatorKind;
     evaluate: Evaluator["evaluate"];
 }): Evaluator;
-/**
- * Generate a unique id.
- *
- * @deprecated Use id generated by phoenix instead.
- * @returns A unique id.
- */
-export declare function id(): string;
 //# sourceMappingURL=runExperiment.d.ts.map

package/dist/esm/experiments/runExperiment.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"runExperiment.d.ts","sourceRoot":"","sources":["../../../src/experiments/runExperiment.ts"],"names":[],"mappings":"AAEA,OAAO,EAAgB,KAAK,aAAa,EAAE,MAAM,WAAW,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EACV,SAAS,~~EAKT~~,cAAc,EACd,aAAa,EACd,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,KAAK,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAI9C,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;~~AAErD~~;;;;GAIG;AACH,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;OAEG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC7C;;OAEG;IACH,OAAO,EAAE,OAAO,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC;IACtC;;OAEG;IACH,IAAI,EAAE,cAAc,CAAC;IACrB;;OAEG;IACH,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB;;OAEG;IACH,~~WAAW,CAAC,EAAE,~~MAAM,CAAC~~;IACrB;;OAEG;IACH~~,~~MAAM,CAAC,~~EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB;;OAEG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;CAC3B,CAAC;AAEF~~;;;;;;;;;;;;;;;;;GAiBG~~;AACH,wBAAsB,aAAa,CAAC,EAClC,cAAc,~~EAAE~~,~~eAAe,EAC/B,~~qBAAqB,EACrB,kBAAkB,EAClB,MAAM,EAAE,OAAO,EACf,OAAO,EAAE,QAAQ,EACjB,IAAI,EACJ,UAAU,EACV,~~WAAuB,EACvB,~~MAAgB,EAChB,MAAa,EACb,WAAe,EACf,MAAc,GACf,EAAE,mBAAmB,GAAG,OAAO,CAAC,aAAa,CAAC,~~CAmG9C~~;~~AA4FD~~;;;;GAIG;AACH,wBAAsB,kBAAkB,CAAC,EACvC,UAAU,EACV,UAAU,EACV,MAAM,EAAE,OAAO,EACf,MAAM,EACN,WAAe,EACf,MAAc,GACf,EAAE;IACD;;;QAGI;IACJ,UAAU,EAAE,aAAa,CAAC;IAC1B,4BAA4B;IAC5B,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,wBAAwB;IACxB,MAAM,CAAC,EAAE,aAAa,CAAC;IACvB,wBAAwB;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,WAAW,EAAE,MAAM,CAAC;IACpB;;;;SAIK;IACL,MAAM,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;CAC3B,GAAG,OAAO,CAAC,aAAa,CAAC,~~CA6FzB~~;~~AAmDD~~;;;;;;;;GAQG;AACH,wBAAgB,WAAW,CAAC,EAC1B,IAAI,EACJ,IAAI,EACJ,QAAQ,GACT,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,aAAa,CAAC;IACpB,QAAQ,EAAE,SAAS,CAAC,UAAU,CAAC,CAAC;CACjC,GAAG,SAAS,CAMZ~~;AAID;;;;;GAKG;AACH,wBAAgB,EAAE,IAAI,MAAM,CAK3B~~"}
1	+ {"version":3,"file":"runExperiment.d.ts","sourceRoot":"","sources":["../../../src/experiments/runExperiment.ts"],"names":[],"mappings":"AAEA,OAAO,EAAgB,KAAK,aAAa,EAAE,MAAM,WAAW,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EACV,SAAS,EAIT,cAAc,EACd,aAAa,EACd,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,KAAK,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAI9C,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAYrD;;;;GAIG;AACH,MAAM,MAAM,mBAAmB,GAAG,QAAQ,GAAG;IAC3C;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;OAEG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC7C;;OAEG;IACH,OAAO,EAAE,OAAO,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC;IACtC;;OAEG;IACH,IAAI,EAAE,cAAc,CAAC;IACrB;;OAEG;IACH,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB;;OAEG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;CAC3B,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAsB,aAAa,CAAC,EAClC,cAAc,EACd,qBAAqB,EACrB,kBAAkB,EAClB,MAAM,EAAE,OAAO,EACf,OAAO,EAAE,QAAQ,EACjB,IAAI,EACJ,UAAU,EACV,MAAgB,EAChB,MAAa,EACb,WAAe,EACf,MAAc,GACf,EAAE,mBAAmB,GAAG,OAAO,CAAC,aAAa,CAAC,CAoH9C;AAwHD;;;;GAIG;AACH,wBAAsB,kBAAkB,CAAC,EACvC,UAAU,EACV,UAAU,EACV,MAAM,EAAE,OAAO,EACf,MAAM,EACN,WAAe,EACf,MAAc,GACf,EAAE;IACD;;;QAGI;IACJ,UAAU,EAAE,aAAa,CAAC;IAC1B,4BAA4B;IAC5B,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,wBAAwB;IACxB,MAAM,CAAC,EAAE,aAAa,CAAC;IACvB,wBAAwB;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,WAAW,EAAE,MAAM,CAAC;IACpB;;;;SAIK;IACL,MAAM,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;CAC3B,GAAG,OAAO,CAAC,aAAa,CAAC,CAgKzB;AA8DD;;;;;;;;GAQG;AACH,wBAAgB,WAAW,CAAC,EAC1B,IAAI,EACJ,IAAI,EACJ,QAAQ,GACT,EAAE;IACD,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,aAAa,CAAC;IACpB,QAAQ,EAAE,SAAS,CAAC,UAAU,CAAC,CAAC;CACjC,GAAG,SAAS,CAMZ"}

package/dist/esm/experiments/runExperiment.js CHANGED Viewed

@@ -4,8 +4,29 @@ import { createClient } from "../client.js";
 import { getDatasetBySelector } from "../utils/getDatasetBySelector.js";
 import { pluralize } from "../utils/pluralize.js";
 import { promisifyResult } from "../utils/promisifyResult.js";
+import { createProvider, createNoOpProvider } from "./instrumention.js";
+import { SpanStatusCode } from "@opentelemetry/api";
+import { MimeType, OpenInferenceSpanKind, SemanticConventions, } from "@arizeai/openinference-semantic-conventions";
+import { ensureString } from "../utils/ensureString.js";
+import { objectAsAttributes } from "../utils/objectAsAttributes.js";
 /**
- * Run an experiment.
+ * Runs an experiment using a given set of dataset of examples.
+ *
+ *   An experiment is a user-defined task that runs on each example in a dataset. The results from
+ *   each experiment can be evaluated using any number of evaluators to measure the behavior of the
+ *   task. The experiment and evaluation results are stored in the Phoenix database for comparison
+ *   and analysis.
+ *
+ *   A `task` is either a sync or async function that returns a JSON serializable
+ *   output. If the `task` is a function of one argument then that argument will be bound to the
+ *   `input` field of the dataset example. Alternatively, the `task` can be a function of any
+ *   combination of specific argument names that will be bound to special values:
+ *
+ *   - `input`: The input field of the dataset example
+ *   - `expected`: The expected or reference output of the dataset example
+ *   - `reference`: An alias for `expected`
+ *   - `metadata`: Metadata associated with the dataset example
+ *   - `example`: The dataset `Example` object with all associated fields
  *
  * @example
  * ```ts
@@ -15,14 +36,13 @@ import { promisifyResult } from "../utils/promisifyResult.js";
  *   dataset: "my-dataset",
  *   task: async (example) => example.input,
  *   evaluators: [
- *     asEvaluator("my-evaluator", "CODE", async (params) => params.output),
+ *     asEvaluator({ name: "my-evaluator", kind: "CODE", evaluate: async (params) => params.output }),
  *   ],
  * });
  * ```
- *
- * @experimental This feature is not complete, and will change in the future.
  */
-export async function runExperiment({ experimentName: _experimentName, experimentDescription, experimentMetadata, client: _client, dataset: _dataset, task, evaluators, projectName = "default", logger = console, record = true, concurrency = 5, dryRun = false, }) {
+export async function runExperiment({ experimentName, experimentDescription, experimentMetadata, client: _client, dataset: _dataset, task, evaluators, logger = console, record = true, concurrency = 5, dryRun = false, }) {
+    let provider;
     const isDryRun = typeof dryRun === "number" || dryRun === true;
     const client = _client ?? createClient();
     const dataset = await getDatasetBySelector({ dataset: _dataset, client });
@@ -31,18 +51,18 @@ export async function runExperiment({ experimentName: _experimentName, experimen
     const nExamples = typeof dryRun === "number"
         ? Math.max(dryRun, dataset.examples.length)
         : dataset.examples.length;
-    const experimentName = _experimentName ?? `${dataset.name}-${new Date().toISOString()}`;
-    const experimentParams = {
-        nExamples,
-    };
+    let projectName = `${dataset.name}-exp-${new Date().toISOString()}`;
+    // initialize the tracer into scope
+    let taskTracer;
     let experiment;
     if (isDryRun) {
         experiment = {
-            id: id(),
+            id: localId(),
             datasetId: dataset.id,
             datasetVersionId: dataset.versionId,
             projectName,
         };
+        taskTracer = createNoOpProvider().getTracer("no-op");
     }
     else {
         const experimentResponse = await client
@@ -61,12 +81,22 @@ export async function runExperiment({ experimentName: _experimentName, experimen
         })
             .then((res) => res.data?.data);
         invariant(experimentResponse, `Failed to create experiment`);
+        projectName = experimentResponse.project_name ?? projectName;
         experiment = {
             id: experimentResponse.id,
             datasetId: dataset.id,
             datasetVersionId: dataset.versionId,
             projectName,
         };
+        // Initialize the tracer, now that we have a project name
+        const baseUrl = client.config.baseUrl;
+        invariant(baseUrl, "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client.");
+        provider = createProvider({
+            projectName,
+            baseUrl,
+            headers: client.config.headers ?? {},
+        });
+        taskTracer = provider.getTracer(projectName);
     }
     if (!record) {
         logger.info(`🔧 Running experiment in readonly mode. Results will not be recorded.`);
@@ -85,13 +115,17 @@ export async function runExperiment({ experimentName: _experimentName, experimen
         concurrency,
         isDryRun,
         nExamples,
+        tracer: taskTracer,
     });
     logger.info(`✅ Task runs completed`);
     const ranExperiment = {
         ...experiment,
-        params: experimentParams,
         runs,
     };
+    // Shut down the provider so that the experiments run
+    if (provider) {
+        await provider.shutdown?.();
+    }
     const { evaluationRuns } = await evaluateExperiment({
         experiment: ranExperiment,
         evaluators: evaluators ?? [],
@@ -107,60 +141,78 @@ export async function runExperiment({ experimentName: _experimentName, experimen
 /**
  * Run a task against n examples in a dataset.
  */
-function runTask({ client, experimentId, task, dataset, onComplete, logger, concurrency = 5, isDryRun, nExamples, }) {
+function runTask({ client, experimentId, task, dataset, onComplete, logger, concurrency = 5, isDryRun, nExamples, tracer, }) {
     logger.info(`🔧 Running task "${task.name}" on dataset "${dataset.id}"`);
     const run = async (example) => {
-        logger.info(`🔧 Running task "${task.name}" on example "${example.id} of dataset "${dataset.id}"`);
-        const thisRun = {
-            id: id(),
-            traceId: null, // TODO: fill this in once we trace experiments
-            experimentId,
-            datasetExampleId: example.id,
-            startTime: new Date(),
-            endTime: new Date(), // will get replaced with actual end time
-            output: null,
-            error: null,
-        };
-        try {
-            const taskOutput = await promisifyResult(task(example));
-            // TODO: why doesn't run output type match task output type?
-            thisRun.output =
-                typeof taskOutput === "string"
-                    ? taskOutput
-                    : JSON.stringify(taskOutput);
-        }
-        catch (error) {
-            thisRun.error = error instanceof Error ? error.message : "Unknown error";
-        }
-        thisRun.endTime = new Date();
-        if (!isDryRun) {
-            // Log the run to the server
-            // We log this without awaiting (e.g. best effort)
-            const res = await client.POST("/v1/experiments/{experiment_id}/runs", {
-                params: {
-                    path: {
-                        experiment_id: experimentId,
+        return tracer.startActiveSpan(`Task: ${task.name}`, async (span) => {
+            logger.info(`🔧 Running task "${task.name}" on example "${example.id} of dataset "${dataset.id}"`);
+            const traceId = span.spanContext().traceId;
+            const thisRun = {
+                id: localId(), // initialized with local id, will be replaced with server-assigned id when dry run is false
+                traceId,
+                experimentId,
+                datasetExampleId: example.id,
+                startTime: new Date(),
+                endTime: new Date(), // will get replaced with actual end time
+                output: null,
+                error: null,
+            };
+            try {
+                const taskOutput = await promisifyResult(task(example));
+                thisRun.output =
+                    typeof taskOutput === "string"
+                        ? taskOutput
+                        : JSON.stringify(taskOutput);
+            }
+            catch (error) {
+                thisRun.error =
+                    error instanceof Error ? error.message : "Unknown error";
+                span.setStatus({ code: SpanStatusCode.ERROR });
+            }
+            thisRun.endTime = new Date();
+            if (!isDryRun) {
+                // Log the run to the server
+                const res = await client.POST("/v1/experiments/{experiment_id}/runs", {
+                    params: {
+                        path: {
+                            experiment_id: experimentId,
+                        },
                     },
-                },
-                body: {
-                    dataset_example_id: example.id,
-                    output: thisRun.output,
-                    repetition_number: 0,
-                    start_time: thisRun.startTime.toISOString(),
-                    end_time: thisRun.endTime.toISOString(),
-                    trace_id: thisRun.traceId,
-                    error: thisRun.error,
-                },
-            });
-            // replace the local run id with the server-assigned id
-            thisRun.id = res.data?.data.id ?? thisRun.id;
-        }
-        onComplete(thisRun);
-        return thisRun;
+                    body: {
+                        dataset_example_id: example.id,
+                        output: thisRun.output,
+                        repetition_number: 0,
+                        start_time: thisRun.startTime.toISOString(),
+                        end_time: thisRun.endTime.toISOString(),
+                        trace_id: thisRun.traceId,
+                        error: thisRun.error,
+                    },
+                });
+                // replace the local run id with the server-assigned id
+                thisRun.id = res.data?.data.id ?? thisRun.id;
+                const inputMimeType = typeof example.input === "string" ? MimeType.TEXT : MimeType.JSON;
+                const outputMimeType = typeof thisRun.output === "string" ? MimeType.TEXT : MimeType.JSON;
+                span.setStatus({ code: SpanStatusCode.OK });
+                span.setAttributes({
+                    [SemanticConventions.OPENINFERENCE_SPAN_KIND]: OpenInferenceSpanKind.CHAIN,
+                    [SemanticConventions.INPUT_MIME_TYPE]: inputMimeType,
+                    [SemanticConventions.INPUT_VALUE]: ensureString(example.input),
+                    [SemanticConventions.OUTPUT_MIME_TYPE]: outputMimeType,
+                    [SemanticConventions.OUTPUT_VALUE]: ensureString(thisRun.output),
+                });
+            }
+            span?.end();
+            onComplete(thisRun);
+            return thisRun;
+        });
     };
     const q = queue(run, concurrency);
     const examplesToUse = dataset.examples.slice(0, nExamples);
-    examplesToUse.forEach((example) => q.push(example));
+    examplesToUse.forEach((example) => q.push(example, (err) => {
+        if (err) {
+            logger.error(`Error running task "${task.name}" on example "${example.id}": ${err}`);
+        }
+    }));
     return q.drain();
 }
 /**
@@ -170,10 +222,26 @@ function runTask({ client, experimentId, task, dataset, onComplete, logger, conc
  */
 export async function evaluateExperiment({ experiment, evaluators, client: _client, logger, concurrency = 5, dryRun = false, }) {
     const isDryRun = typeof dryRun === "number" || dryRun === true;
+    const client = _client ?? createClient();
+    const baseUrl = client.config.baseUrl;
+    invariant(baseUrl, "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client.");
+    let provider;
+    if (!isDryRun) {
+        provider = createProvider({
+            projectName: "evaluators",
+            baseUrl,
+            headers: client.config.headers ?? {},
+        });
+    }
+    else {
+        provider = createNoOpProvider();
+    }
+    const tracer = isDryRun
+        ? provider.getTracer("no-op")
+        : provider.getTracer("evaluators");
     const nRuns = typeof dryRun === "number"
         ? Math.max(dryRun, Object.keys(experiment.runs).length)
         : Object.keys(experiment.runs).length;
-    const client = _client ?? createClient();
     const dataset = await getDatasetBySelector({
         dataset: experiment.datasetId,
         client,
@@ -204,35 +272,78 @@ export async function evaluateExperiment({ experiment, evaluators, client: _clie
         run,
     })));
     const evaluatorsQueue = queue(async (evaluatorAndRun) => {
-        const evalResult = await runEvaluator({
-            evaluator: evaluatorAndRun.evaluator,
-            run: evaluatorAndRun.run,
-            exampleCache: examplesById,
-            onComplete: onEvaluationComplete,
-        });
-        if (!isDryRun) {
-            logger.info(`📝 Logging evaluation ${evalResult.id}`);
-            // Log the evaluation to the server
-            // We log this without awaiting (e.g. best effort)
-            client.POST("/v1/experiment_evaluations", {
-                body: {
-                    experiment_run_id: evaluatorAndRun.run.id,
-                    name: evaluatorAndRun.evaluator.name,
-                    annotator_kind: evaluatorAndRun.evaluator.kind,
-                    start_time: evalResult.startTime.toISOString(),
-                    end_time: evalResult.endTime.toISOString(),
-                    result: {
-                        ...evalResult.result,
-                    },
-                    error: evalResult.error,
-                    trace_id: evalResult.traceId,
-                },
+        return tracer.startActiveSpan(`Evaluation: ${evaluatorAndRun.evaluator.name}`, async (span) => {
+            const evalResult = await runEvaluator({
+                evaluator: evaluatorAndRun.evaluator,
+                run: evaluatorAndRun.run,
+                exampleCache: examplesById,
+                onComplete: onEvaluationComplete,
+                logger,
             });
-        }
+            span.setAttributes({
+                [SemanticConventions.OPENINFERENCE_SPAN_KIND]: OpenInferenceSpanKind.EVALUATOR,
+                [SemanticConventions.INPUT_MIME_TYPE]: MimeType.JSON,
+                [SemanticConventions.INPUT_VALUE]: JSON.stringify({
+                    input: examplesById[evaluatorAndRun.run.datasetExampleId]?.input,
+                    output: evaluatorAndRun.run.output,
+                    expected: examplesById[evaluatorAndRun.run.datasetExampleId]?.output,
+                    metadata: examplesById[evaluatorAndRun.run.datasetExampleId]?.metadata,
+                }),
+                [SemanticConventions.OUTPUT_MIME_TYPE]: MimeType.JSON,
+                [SemanticConventions.OUTPUT_VALUE]: ensureString(evalResult.result),
+            });
+            if (evalResult.error) {
+                span.setStatus({
+                    code: SpanStatusCode.ERROR,
+                    message: evalResult.error,
+                });
+            }
+            else {
+                span.setStatus({ code: SpanStatusCode.OK });
+            }
+            if (evalResult.result) {
+                span.setAttributes(objectAsAttributes(evalResult.result));
+            }
+            evalResult.traceId = span.spanContext().traceId;
+            if (!isDryRun) {
+                // Log the evaluation to the server
+                // We log this without awaiting (e.g. best effort)
+                client.POST("/v1/experiment_evaluations", {
+                    body: {
+                        experiment_run_id: evaluatorAndRun.run.id,
+                        name: evaluatorAndRun.evaluator.name,
+                        annotator_kind: evaluatorAndRun.evaluator.kind,
+                        start_time: evalResult.startTime.toISOString(),
+                        end_time: evalResult.endTime.toISOString(),
+                        result: {
+                            ...evalResult.result,
+                        },
+                        error: evalResult.error,
+                        trace_id: evalResult.traceId,
+                    },
+                });
+            }
+            span.end();
+            return evalResult;
+        });
     }, concurrency);
-    evaluatorsAndRuns.forEach((evaluatorAndRun) => evaluatorsQueue.push(evaluatorAndRun));
+    if (!evaluatorsAndRuns.length) {
+        logger.info(`⛔ No evaluators to run`);
+        return {
+            ...experiment,
+            evaluationRuns: [],
+        };
+    }
+    evaluatorsAndRuns.forEach((evaluatorAndRun) => evaluatorsQueue.push(evaluatorAndRun, (err) => {
+        if (err) {
+            logger.error(`❌ Error running evaluator "${evaluatorAndRun.evaluator.name}" on run "${evaluatorAndRun.run.id}": ${err}`);
+        }
+    }));
     await evaluatorsQueue.drain();
     logger.info(`✅ Evaluation runs completed`);
+    if (provider) {
+        await provider.shutdown?.();
+    }
     return {
         ...experiment,
         evaluationRuns: Object.values(evaluationRuns),
@@ -243,20 +354,21 @@ export async function evaluateExperiment({ experiment, evaluators, client: _clie
  *
  * @experimental This feature is not complete, and will change in the future.
  */
-async function runEvaluator({ evaluator, run, exampleCache, onComplete, }) {
+async function runEvaluator({ evaluator, run, exampleCache, onComplete, logger, }) {
     const example = exampleCache[run.datasetExampleId];
     invariant(example, `Example "${run.datasetExampleId}" not found`);
     const evaluate = async () => {
+        logger.info(`🧠 Evaluating run "${run.id}" with evaluator "${evaluator.name}"`);
         const thisEval = {
-            id: id(),
-            traceId: null, // TODO: fill this in once we trace experiments
+            id: localId(),
+            traceId: null,
             experimentRunId: run.id,
             startTime: new Date(),
             endTime: new Date(), // will get replaced with actual end time
             name: evaluator.name,
             result: null,
             error: null,
-            annotatorKind: "LLM", // TODO: make configurable via evaluator def
+            annotatorKind: evaluator.kind,
         };
         try {
             const result = await evaluator.evaluate({
@@ -266,9 +378,11 @@ async function runEvaluator({ evaluator, run, exampleCache, onComplete, }) {
                 metadata: example.metadata,
             });
             thisEval.result = result;
+            logger.info(`✅ Evaluator "${evaluator.name}" on run "${run.id}" completed`);
         }
         catch (error) {
             thisEval.error = error instanceof Error ? error.message : "Unknown error";
+            logger.error(`❌ Evaluator "${evaluator.name}" on run "${run.id}" failed: ${thisEval.error}`);
         }
         thisEval.endTime = new Date();
         onComplete(thisEval);
@@ -292,17 +406,14 @@ export function asEvaluator({ name, kind, evaluate, }) {
         evaluate,
     };
 }
-let _id = 1000;
+let _localIdIndex = 1000;
 /**
- * Generate a unique id.
+ * Generate a local id.
  *
- * @deprecated Use id generated by phoenix instead.
- * @returns A unique id.
+ * @returns A semi-unique id.
  */
-export function id() {
-    return (() => {
-        _id++;
-        return _id.toString();
-    })();
+function localId() {
+    _localIdIndex++;
+    return `local_${_localIdIndex}`;
 }
 //# sourceMappingURL=runExperiment.js.map