@arizeai/phoenix-client 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -0
- package/dist/esm/datasets/appendDatasetExamples.d.ts +21 -0
- package/dist/esm/datasets/appendDatasetExamples.d.ts.map +1 -0
- package/dist/esm/datasets/appendDatasetExamples.js +32 -0
- package/dist/esm/datasets/appendDatasetExamples.js.map +1 -0
- package/dist/esm/datasets/createDataset.d.ts +25 -0
- package/dist/esm/datasets/createDataset.d.ts.map +1 -0
- package/dist/esm/datasets/createDataset.js +34 -0
- package/dist/esm/datasets/createDataset.js.map +1 -0
- package/dist/esm/datasets/getDataset.d.ts +10 -0
- package/dist/esm/datasets/getDataset.d.ts.map +1 -0
- package/dist/esm/datasets/getDataset.js +18 -0
- package/dist/esm/datasets/getDataset.js.map +1 -0
- package/dist/esm/datasets/getDatasetExamples.d.ts +10 -0
- package/dist/esm/datasets/getDatasetExamples.d.ts.map +1 -0
- package/dist/esm/datasets/getDatasetExamples.js +25 -0
- package/dist/esm/datasets/getDatasetExamples.js.map +1 -0
- package/dist/esm/datasets/getDatasetInfo.d.ts +11 -0
- package/dist/esm/datasets/getDatasetInfo.d.ts.map +1 -0
- package/dist/esm/datasets/getDatasetInfo.js +25 -0
- package/dist/esm/datasets/getDatasetInfo.js.map +1 -0
- package/dist/esm/datasets/index.d.ts +7 -0
- package/dist/esm/datasets/index.d.ts.map +1 -0
- package/dist/esm/datasets/index.js +7 -0
- package/dist/esm/datasets/index.js.map +1 -0
- package/dist/esm/datasets/listDatasets.d.ts +23 -0
- package/dist/esm/datasets/listDatasets.d.ts.map +1 -0
- package/dist/esm/datasets/listDatasets.js +26 -0
- package/dist/esm/datasets/listDatasets.js.map +1 -0
- package/dist/esm/experiments/getExperiment.d.ts +14 -0
- package/dist/esm/experiments/getExperiment.d.ts.map +1 -0
- package/dist/esm/experiments/getExperiment.js +25 -0
- package/dist/esm/experiments/getExperiment.js.map +1 -0
- package/dist/esm/experiments/getExperimentInfo.d.ts +13 -0
- package/dist/esm/experiments/getExperimentInfo.d.ts.map +1 -0
- package/dist/esm/experiments/getExperimentInfo.js +24 -0
- package/dist/esm/experiments/getExperimentInfo.js.map +1 -0
- package/dist/esm/experiments/getExperimentRuns.d.ts +15 -0
- package/dist/esm/experiments/getExperimentRuns.d.ts.map +1 -0
- package/dist/esm/experiments/getExperimentRuns.js +33 -0
- package/dist/esm/experiments/getExperimentRuns.js.map +1 -0
- package/dist/esm/experiments/index.d.ts +3 -0
- package/dist/esm/experiments/index.d.ts.map +1 -1
- package/dist/esm/experiments/index.js +3 -0
- package/dist/esm/experiments/index.js.map +1 -1
- package/dist/esm/experiments/runExperiment.d.ts +6 -6
- package/dist/esm/experiments/runExperiment.d.ts.map +1 -1
- package/dist/esm/experiments/runExperiment.js +14 -12
- package/dist/esm/experiments/runExperiment.js.map +1 -1
- package/dist/esm/schemas/llm/anthropic/converters.d.ts +28 -28
- package/dist/esm/schemas/llm/anthropic/messagePartSchemas.d.ts +8 -8
- package/dist/esm/schemas/llm/anthropic/messageSchemas.d.ts +24 -24
- package/dist/esm/schemas/llm/anthropic/toolCallSchemas.d.ts +8 -8
- package/dist/esm/schemas/llm/constants.d.ts +6 -6
- package/dist/esm/schemas/llm/converters.d.ts +24 -24
- package/dist/esm/schemas/llm/openai/converters.d.ts +6 -6
- package/dist/esm/schemas/llm/schemas.d.ts +22 -22
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/datasets.d.ts +33 -8
- package/dist/esm/types/datasets.d.ts.map +1 -1
- package/dist/esm/types/experiments.d.ts +17 -3
- package/dist/esm/types/experiments.d.ts.map +1 -1
- package/dist/src/datasets/appendDatasetExamples.d.ts +21 -0
- package/dist/src/datasets/appendDatasetExamples.d.ts.map +1 -0
- package/dist/src/datasets/appendDatasetExamples.js +50 -0
- package/dist/src/datasets/appendDatasetExamples.js.map +1 -0
- package/dist/src/datasets/createDataset.d.ts +25 -0
- package/dist/src/datasets/createDataset.d.ts.map +1 -0
- package/dist/src/datasets/createDataset.js +52 -0
- package/dist/src/datasets/createDataset.js.map +1 -0
- package/dist/src/datasets/getDataset.d.ts +10 -0
- package/dist/src/datasets/getDataset.d.ts.map +1 -0
- package/dist/src/datasets/getDataset.js +29 -0
- package/dist/src/datasets/getDataset.js.map +1 -0
- package/dist/src/datasets/getDatasetExamples.d.ts +10 -0
- package/dist/src/datasets/getDatasetExamples.d.ts.map +1 -0
- package/dist/src/datasets/getDatasetExamples.js +40 -0
- package/dist/src/datasets/getDatasetExamples.js.map +1 -0
- package/dist/src/datasets/getDatasetInfo.d.ts +11 -0
- package/dist/src/datasets/getDatasetInfo.d.ts.map +1 -0
- package/dist/src/datasets/getDatasetInfo.js +43 -0
- package/dist/src/datasets/getDatasetInfo.js.map +1 -0
- package/dist/src/datasets/index.d.ts +7 -0
- package/dist/src/datasets/index.d.ts.map +1 -0
- package/dist/src/datasets/index.js +23 -0
- package/dist/src/datasets/index.js.map +1 -0
- package/dist/src/datasets/listDatasets.d.ts +23 -0
- package/dist/src/datasets/listDatasets.d.ts.map +1 -0
- package/dist/src/datasets/listDatasets.js +40 -0
- package/dist/src/datasets/listDatasets.js.map +1 -0
- package/dist/src/experiments/getExperiment.d.ts +14 -0
- package/dist/src/experiments/getExperiment.d.ts.map +1 -0
- package/dist/src/experiments/getExperiment.js +36 -0
- package/dist/src/experiments/getExperiment.js.map +1 -0
- package/dist/src/experiments/getExperimentInfo.d.ts +13 -0
- package/dist/src/experiments/getExperimentInfo.d.ts.map +1 -0
- package/dist/src/experiments/getExperimentInfo.js +41 -0
- package/dist/src/experiments/getExperimentInfo.js.map +1 -0
- package/dist/src/experiments/getExperimentRuns.d.ts +15 -0
- package/dist/src/experiments/getExperimentRuns.d.ts.map +1 -0
- package/dist/src/experiments/getExperimentRuns.js +50 -0
- package/dist/src/experiments/getExperimentRuns.js.map +1 -0
- package/dist/src/experiments/index.d.ts +3 -0
- package/dist/src/experiments/index.d.ts.map +1 -1
- package/dist/src/experiments/index.js +3 -0
- package/dist/src/experiments/index.js.map +1 -1
- package/dist/src/experiments/runExperiment.d.ts +6 -6
- package/dist/src/experiments/runExperiment.d.ts.map +1 -1
- package/dist/src/experiments/runExperiment.js +14 -12
- package/dist/src/experiments/runExperiment.js.map +1 -1
- package/dist/src/schemas/llm/anthropic/converters.d.ts +28 -28
- package/dist/src/schemas/llm/anthropic/messagePartSchemas.d.ts +8 -8
- package/dist/src/schemas/llm/anthropic/messageSchemas.d.ts +24 -24
- package/dist/src/schemas/llm/anthropic/toolCallSchemas.d.ts +8 -8
- package/dist/src/schemas/llm/constants.d.ts +6 -6
- package/dist/src/schemas/llm/converters.d.ts +24 -24
- package/dist/src/schemas/llm/openai/converters.d.ts +6 -6
- package/dist/src/schemas/llm/schemas.d.ts +22 -22
- package/dist/src/types/datasets.d.ts +33 -8
- package/dist/src/types/datasets.d.ts.map +1 -1
- package/dist/src/types/experiments.d.ts +17 -3
- package/dist/src/types/experiments.d.ts.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +5 -1
- package/src/datasets/appendDatasetExamples.ts +55 -0
- package/src/datasets/createDataset.ts +60 -0
- package/src/datasets/getDataset.ts +27 -0
- package/src/datasets/getDatasetExamples.ts +34 -0
- package/src/datasets/getDatasetInfo.ts +34 -0
- package/src/datasets/index.ts +6 -0
- package/src/datasets/listDatasets.ts +37 -0
- package/src/experiments/getExperiment.ts +40 -0
- package/src/experiments/getExperimentInfo.ts +39 -0
- package/src/experiments/getExperimentRuns.ts +45 -0
- package/src/experiments/index.ts +3 -0
- package/src/experiments/runExperiment.ts +31 -25
- package/src/types/datasets.ts +35 -9
- package/src/types/experiments.ts +19 -3
- package/dist/esm/utils/getDatasetBySelector.d.ts +0 -25
- package/dist/esm/utils/getDatasetBySelector.d.ts.map +0 -1
- package/dist/esm/utils/getDatasetBySelector.js +0 -37
- package/dist/esm/utils/getDatasetBySelector.js.map +0 -1
- package/dist/src/utils/getDatasetBySelector.d.ts +0 -25
- package/dist/src/utils/getDatasetBySelector.d.ts.map +0 -1
- package/dist/src/utils/getDatasetBySelector.js +0 -47
- package/dist/src/utils/getDatasetBySelector.js.map +0 -1
- package/src/utils/getDatasetBySelector.ts +0 -55
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arizeai/phoenix-client",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "A client for the Phoenix API",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|
|
@@ -22,6 +22,10 @@
|
|
|
22
22
|
"import": "./dist/esm/experiments/index.js",
|
|
23
23
|
"require": "./dist/src/experiments/index.js"
|
|
24
24
|
},
|
|
25
|
+
"./datasets": {
|
|
26
|
+
"import": "./dist/esm/datasets/index.js",
|
|
27
|
+
"require": "./dist/src/datasets/index.js"
|
|
28
|
+
},
|
|
25
29
|
"./utils/*": {
|
|
26
30
|
"import": "./dist/esm/utils/*.js",
|
|
27
31
|
"require": "./dist/src/utils/*.js"
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { createClient } from "../client";
|
|
2
|
+
import { ClientFn } from "../types/core";
|
|
3
|
+
import { Example, DatasetSelector } from "../types/datasets";
|
|
4
|
+
import invariant from "tiny-invariant";
|
|
5
|
+
|
|
6
|
+
export type AppendDatasetExamplesParams = ClientFn & {
|
|
7
|
+
/**
|
|
8
|
+
* The dataset to append examples to
|
|
9
|
+
*/
|
|
10
|
+
dataset: DatasetSelector;
|
|
11
|
+
/**
|
|
12
|
+
* The examples to append to the dataset
|
|
13
|
+
*/
|
|
14
|
+
examples: Example[];
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
export type AppendDatasetExamplesResponse = {
|
|
18
|
+
datasetId: string;
|
|
19
|
+
// TODO: respond with the versionId
|
|
20
|
+
// versionId: string;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Append examples to an existing dataset
|
|
25
|
+
* @experimental this interface may change in the future
|
|
26
|
+
*/
|
|
27
|
+
export async function appendDatasetExamples({
|
|
28
|
+
client: _client,
|
|
29
|
+
dataset,
|
|
30
|
+
examples,
|
|
31
|
+
}: AppendDatasetExamplesParams): Promise<AppendDatasetExamplesResponse> {
|
|
32
|
+
const client = _client || createClient();
|
|
33
|
+
const inputs = examples.map((example) => example.input);
|
|
34
|
+
const outputs = examples.map((example) => example.output ?? {}); // Treat null as an empty object
|
|
35
|
+
const metadata = examples.map((example) => example.metadata);
|
|
36
|
+
const appendResponse = await client.POST("/v1/datasets/upload", {
|
|
37
|
+
params: {
|
|
38
|
+
query: {
|
|
39
|
+
sync: true,
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
body: {
|
|
43
|
+
name: dataset.datasetId,
|
|
44
|
+
action: "append",
|
|
45
|
+
inputs,
|
|
46
|
+
outputs,
|
|
47
|
+
metadata,
|
|
48
|
+
},
|
|
49
|
+
});
|
|
50
|
+
invariant(appendResponse.data?.data, "Failed to append dataset examples");
|
|
51
|
+
const datasetId = appendResponse.data.data.dataset_id;
|
|
52
|
+
return {
|
|
53
|
+
datasetId,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { createClient } from "../client";
|
|
2
|
+
import { ClientFn } from "../types/core";
|
|
3
|
+
import { Example } from "../types/datasets";
|
|
4
|
+
import invariant from "tiny-invariant";
|
|
5
|
+
|
|
6
|
+
export type CreateDatasetParams = ClientFn & {
|
|
7
|
+
/**
|
|
8
|
+
* The name of the dataset
|
|
9
|
+
*/
|
|
10
|
+
name: string;
|
|
11
|
+
/**
|
|
12
|
+
* The description of the dataset
|
|
13
|
+
*/
|
|
14
|
+
description: string;
|
|
15
|
+
/**
|
|
16
|
+
* The examples to create in the dataset
|
|
17
|
+
*/
|
|
18
|
+
examples: Example[];
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
export type CreateDatasetResponse = {
|
|
22
|
+
datasetId: string;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Create a new dataset
|
|
27
|
+
* @experimental this interface may change in the future
|
|
28
|
+
*/
|
|
29
|
+
export async function createDataset({
|
|
30
|
+
client: _client,
|
|
31
|
+
name,
|
|
32
|
+
description,
|
|
33
|
+
examples,
|
|
34
|
+
}: CreateDatasetParams): Promise<CreateDatasetResponse> {
|
|
35
|
+
const client = _client || createClient();
|
|
36
|
+
const inputs = examples.map((example) => example.input);
|
|
37
|
+
const outputs = examples.map((example) => example.output ?? {}); // Treat null as an empty object
|
|
38
|
+
const metadata = examples.map((example) => example.metadata);
|
|
39
|
+
const createDatasetResponse = await client.POST("/v1/datasets/upload", {
|
|
40
|
+
params: {
|
|
41
|
+
query: {
|
|
42
|
+
// TODO: parameterize this
|
|
43
|
+
sync: true,
|
|
44
|
+
},
|
|
45
|
+
},
|
|
46
|
+
body: {
|
|
47
|
+
name,
|
|
48
|
+
description,
|
|
49
|
+
action: "create",
|
|
50
|
+
inputs,
|
|
51
|
+
outputs,
|
|
52
|
+
metadata,
|
|
53
|
+
},
|
|
54
|
+
});
|
|
55
|
+
invariant(createDatasetResponse.data?.data, "Failed to create dataset");
|
|
56
|
+
const datasetId = createDatasetResponse.data.data.dataset_id;
|
|
57
|
+
return {
|
|
58
|
+
datasetId,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { createClient } from "../client";
|
|
2
|
+
import { ClientFn } from "../types/core";
|
|
3
|
+
import { DatasetSelector, Dataset } from "../types/datasets";
|
|
4
|
+
import { getDatasetExamples } from "./getDatasetExamples";
|
|
5
|
+
import { getDatasetInfo } from "./getDatasetInfo";
|
|
6
|
+
|
|
7
|
+
export type GetDatasetParams = ClientFn & {
|
|
8
|
+
dataset: DatasetSelector;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Get dataset info and the examples from the latest version of the dataset
|
|
13
|
+
*/
|
|
14
|
+
export async function getDataset({
|
|
15
|
+
client: _client,
|
|
16
|
+
dataset,
|
|
17
|
+
}: GetDatasetParams): Promise<Dataset> {
|
|
18
|
+
const client = _client || createClient();
|
|
19
|
+
const [datasetInfo, datasetExamples] = await Promise.all([
|
|
20
|
+
getDatasetInfo({ client, dataset }),
|
|
21
|
+
getDatasetExamples({ client, dataset }),
|
|
22
|
+
]);
|
|
23
|
+
return {
|
|
24
|
+
...datasetInfo,
|
|
25
|
+
...datasetExamples,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import invariant from "tiny-invariant";
|
|
2
|
+
import { createClient } from "../client";
|
|
3
|
+
import { ClientFn } from "../types/core";
|
|
4
|
+
import { DatasetSelector, DatasetExamples } from "../types/datasets";
|
|
5
|
+
|
|
6
|
+
export type GetDatasetExamplesParams = ClientFn & {
|
|
7
|
+
dataset: DatasetSelector;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Get the latest examples from a dataset
|
|
12
|
+
*/
|
|
13
|
+
export async function getDatasetExamples({
|
|
14
|
+
client: _client,
|
|
15
|
+
dataset,
|
|
16
|
+
}: GetDatasetExamplesParams): Promise<DatasetExamples> {
|
|
17
|
+
const client = _client || createClient();
|
|
18
|
+
const response = await client.GET("/v1/datasets/{id}/examples", {
|
|
19
|
+
params: {
|
|
20
|
+
path: {
|
|
21
|
+
id: dataset.datasetId,
|
|
22
|
+
},
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
invariant(response.data?.data, "Failed to get dataset examples");
|
|
26
|
+
const examplesData = response.data.data;
|
|
27
|
+
return {
|
|
28
|
+
versionId: examplesData.version_id,
|
|
29
|
+
examples: examplesData.examples.map((example) => ({
|
|
30
|
+
...example,
|
|
31
|
+
updatedAt: new Date(example.updated_at),
|
|
32
|
+
})),
|
|
33
|
+
};
|
|
34
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import invariant from "tiny-invariant";
|
|
2
|
+
import { createClient } from "../client";
|
|
3
|
+
import { ClientFn } from "../types/core";
|
|
4
|
+
import { DatasetSelector, DatasetInfo } from "../types/datasets";
|
|
5
|
+
|
|
6
|
+
export type GetDatasetInfoParams = ClientFn & {
|
|
7
|
+
dataset: DatasetSelector;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Get an overview of the information in a dataset
|
|
12
|
+
* Note: this does not include the examples contained in the dataset
|
|
13
|
+
*/
|
|
14
|
+
export async function getDatasetInfo({
|
|
15
|
+
client: _client,
|
|
16
|
+
dataset,
|
|
17
|
+
}: GetDatasetInfoParams): Promise<DatasetInfo> {
|
|
18
|
+
const client = _client || createClient();
|
|
19
|
+
const datasetResponse = await client.GET("/v1/datasets/{id}", {
|
|
20
|
+
params: {
|
|
21
|
+
path: {
|
|
22
|
+
id: dataset.datasetId,
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
});
|
|
26
|
+
invariant(datasetResponse.data?.data, "Failed to get dataset info");
|
|
27
|
+
const datasetInfo = datasetResponse.data.data;
|
|
28
|
+
return {
|
|
29
|
+
id: datasetInfo.id,
|
|
30
|
+
name: datasetInfo.name,
|
|
31
|
+
description: datasetInfo.description || undefined,
|
|
32
|
+
metadata: datasetInfo.metadata,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { createClient } from "../client";
|
|
2
|
+
import { DatasetInfo } from "../types/datasets";
|
|
3
|
+
import { ClientFn } from "../types/core";
|
|
4
|
+
import invariant from "tiny-invariant";
|
|
5
|
+
|
|
6
|
+
export type ListDatasetsParams = ClientFn;
|
|
7
|
+
|
|
8
|
+
type FullDatasetInfo = DatasetInfo & {
|
|
9
|
+
startDate: Date;
|
|
10
|
+
endDate: Date;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* List the information about all datasets available to the client.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* import { listDatasets } from "@arizeai/phoenix-client/datasets";
|
|
19
|
+
*
|
|
20
|
+
* const datasets = await listDatasets({});
|
|
21
|
+
* console.log(datasets);
|
|
22
|
+
* ```
|
|
23
|
+
*
|
|
24
|
+
* @throws {Error} If the datasets cannot be listed or the response is invalid.
|
|
25
|
+
*/
|
|
26
|
+
export async function listDatasets({
|
|
27
|
+
client: _client,
|
|
28
|
+
}: ListDatasetsParams): Promise<FullDatasetInfo[]> {
|
|
29
|
+
const client = _client || createClient();
|
|
30
|
+
const response = await client.GET("/v1/datasets");
|
|
31
|
+
invariant(response.data?.data, "Failed to list datasets");
|
|
32
|
+
return response.data.data.map((dataset) => ({
|
|
33
|
+
...dataset,
|
|
34
|
+
startDate: new Date(dataset.created_at),
|
|
35
|
+
endDate: new Date(dataset.updated_at),
|
|
36
|
+
}));
|
|
37
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { createClient } from "../client";
|
|
2
|
+
import { ClientFn } from "../types/core";
|
|
3
|
+
import { ExperimentRunsMap, RanExperiment } from "../types/experiments";
|
|
4
|
+
import { getExperimentInfo } from "./getExperimentInfo";
|
|
5
|
+
import { getExperimentRuns } from "./getExperimentRuns";
|
|
6
|
+
|
|
7
|
+
export type GetExperimentResultParams = ClientFn & {
|
|
8
|
+
/**
|
|
9
|
+
* The experiment ID.
|
|
10
|
+
*/
|
|
11
|
+
experimentId: string;
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* A function that gets the result of a experiment.
|
|
16
|
+
* Fetches the experiment data as well as the runs.
|
|
17
|
+
*/
|
|
18
|
+
export async function getExperiment({
|
|
19
|
+
client: _client,
|
|
20
|
+
experimentId,
|
|
21
|
+
}: GetExperimentResultParams): Promise<RanExperiment> {
|
|
22
|
+
const client = _client || createClient();
|
|
23
|
+
const [experiment, experimentRuns] = await Promise.all([
|
|
24
|
+
getExperimentInfo({ client, experimentId }),
|
|
25
|
+
getExperimentRuns({ client, experimentId }),
|
|
26
|
+
]);
|
|
27
|
+
const experimentRunsMap: ExperimentRunsMap = {
|
|
28
|
+
runs: experimentRuns.runs.reduce(
|
|
29
|
+
(acc, run) => {
|
|
30
|
+
acc[run.id] = run;
|
|
31
|
+
return acc;
|
|
32
|
+
},
|
|
33
|
+
{} as ExperimentRunsMap["runs"]
|
|
34
|
+
),
|
|
35
|
+
};
|
|
36
|
+
return {
|
|
37
|
+
...experiment,
|
|
38
|
+
...experimentRunsMap,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import invariant from "tiny-invariant";
|
|
2
|
+
import { createClient } from "../client";
|
|
3
|
+
import { ClientFn } from "../types/core";
|
|
4
|
+
import { type ExperimentInfo } from "../types/experiments";
|
|
5
|
+
|
|
6
|
+
export type GetExperimentParams = ClientFn & {
|
|
7
|
+
/**
|
|
8
|
+
* The experiment ID
|
|
9
|
+
*/
|
|
10
|
+
experimentId: string;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Returns an object containing the high-level info about an experiment
|
|
15
|
+
*/
|
|
16
|
+
export async function getExperimentInfo({
|
|
17
|
+
client: _client,
|
|
18
|
+
experimentId: experiment_id,
|
|
19
|
+
}: GetExperimentParams): Promise<ExperimentInfo> {
|
|
20
|
+
const client = _client || createClient();
|
|
21
|
+
const { data: { data: experimentData } = {} } = await client.GET(
|
|
22
|
+
"/v1/experiments/{experiment_id}",
|
|
23
|
+
{
|
|
24
|
+
params: {
|
|
25
|
+
path: {
|
|
26
|
+
experiment_id,
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
}
|
|
30
|
+
);
|
|
31
|
+
invariant(experimentData, "Failed to get experiment");
|
|
32
|
+
return {
|
|
33
|
+
id: experimentData.id,
|
|
34
|
+
datasetId: experimentData.dataset_id,
|
|
35
|
+
datasetVersionId: experimentData.dataset_version_id,
|
|
36
|
+
projectName: experimentData.project_name || "", // This will never happen
|
|
37
|
+
metadata: experimentData.metadata,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { createClient } from "../client";
|
|
2
|
+
import invariant from "tiny-invariant";
|
|
3
|
+
import { ClientFn } from "../types/core";
|
|
4
|
+
import { ExperimentRun } from "../types/experiments";
|
|
5
|
+
|
|
6
|
+
export type GetExperimentRunsParams = ClientFn & {
|
|
7
|
+
/**
|
|
8
|
+
* The experiment ID.
|
|
9
|
+
*/
|
|
10
|
+
experimentId: string;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* A function that gets the runs (e.g. the results) of a experiment
|
|
15
|
+
*/
|
|
16
|
+
export async function getExperimentRuns({
|
|
17
|
+
client: _client,
|
|
18
|
+
experimentId,
|
|
19
|
+
}: GetExperimentRunsParams): Promise<{ runs: ExperimentRun[] }> {
|
|
20
|
+
const client = _client || createClient();
|
|
21
|
+
const getRunsPromise = client.GET("/v1/experiments/{experiment_id}/runs", {
|
|
22
|
+
params: {
|
|
23
|
+
path: {
|
|
24
|
+
experiment_id: experimentId,
|
|
25
|
+
},
|
|
26
|
+
},
|
|
27
|
+
});
|
|
28
|
+
const [experimentRunResponse] = await Promise.all([getRunsPromise]);
|
|
29
|
+
const { data: { data: experimentRunsData } = {} } = experimentRunResponse;
|
|
30
|
+
invariant(experimentRunsData, "Failed to retrieve experiment runs");
|
|
31
|
+
return {
|
|
32
|
+
runs: experimentRunsData.map((run) => {
|
|
33
|
+
return {
|
|
34
|
+
id: run.id,
|
|
35
|
+
traceId: run.trace_id || null,
|
|
36
|
+
experimentId: run.experiment_id,
|
|
37
|
+
datasetExampleId: run.dataset_example_id,
|
|
38
|
+
startTime: new Date(run.start_time),
|
|
39
|
+
endTime: new Date(run.end_time),
|
|
40
|
+
output: run.output as ExperimentRun["output"],
|
|
41
|
+
error: run.error || null,
|
|
42
|
+
};
|
|
43
|
+
}),
|
|
44
|
+
};
|
|
45
|
+
}
|
package/src/experiments/index.ts
CHANGED
|
@@ -2,17 +2,23 @@ import { queue } from "async";
|
|
|
2
2
|
import invariant from "tiny-invariant";
|
|
3
3
|
import { createClient, type PhoenixClient } from "../client";
|
|
4
4
|
import { ClientFn } from "../types/core";
|
|
5
|
-
import {
|
|
5
|
+
import {
|
|
6
|
+
Dataset,
|
|
7
|
+
DatasetSelector,
|
|
8
|
+
Example,
|
|
9
|
+
ExampleWithId,
|
|
10
|
+
} from "../types/datasets";
|
|
6
11
|
import type {
|
|
7
12
|
Evaluator,
|
|
8
|
-
|
|
13
|
+
ExperimentInfo,
|
|
9
14
|
ExperimentEvaluationRun,
|
|
10
15
|
ExperimentRun,
|
|
16
|
+
ExperimentRunID,
|
|
11
17
|
ExperimentTask,
|
|
12
18
|
RanExperiment,
|
|
13
19
|
} from "../types/experiments";
|
|
14
20
|
import { type Logger } from "../types/logger";
|
|
15
|
-
import {
|
|
21
|
+
import { getDataset } from "../datasets/getDataset";
|
|
16
22
|
import { pluralize } from "../utils/pluralize";
|
|
17
23
|
import { promisifyResult } from "../utils/promisifyResult";
|
|
18
24
|
import { AnnotatorKind } from "../types/annotations";
|
|
@@ -44,12 +50,13 @@ export type RunExperimentParams = ClientFn & {
|
|
|
44
50
|
experimentDescription?: string;
|
|
45
51
|
/**
|
|
46
52
|
* Experiment metadata
|
|
53
|
+
* E.x. modelName
|
|
47
54
|
*/
|
|
48
55
|
experimentMetadata?: Record<string, unknown>;
|
|
49
56
|
/**
|
|
50
57
|
* The dataset to run the experiment on
|
|
51
58
|
*/
|
|
52
|
-
dataset:
|
|
59
|
+
dataset: DatasetSelector;
|
|
53
60
|
/**
|
|
54
61
|
* The task to run
|
|
55
62
|
*/
|
|
@@ -112,9 +119,9 @@ export type RunExperimentParams = ClientFn & {
|
|
|
112
119
|
export async function runExperiment({
|
|
113
120
|
experimentName,
|
|
114
121
|
experimentDescription,
|
|
115
|
-
experimentMetadata,
|
|
122
|
+
experimentMetadata = {},
|
|
116
123
|
client: _client,
|
|
117
|
-
dataset:
|
|
124
|
+
dataset: DatasetSelector,
|
|
118
125
|
task,
|
|
119
126
|
evaluators,
|
|
120
127
|
logger = console,
|
|
@@ -125,24 +132,25 @@ export async function runExperiment({
|
|
|
125
132
|
let provider: NodeTracerProvider | undefined;
|
|
126
133
|
const isDryRun = typeof dryRun === "number" || dryRun === true;
|
|
127
134
|
const client = _client ?? createClient();
|
|
128
|
-
const dataset = await
|
|
135
|
+
const dataset = await getDataset({ dataset: DatasetSelector, client });
|
|
129
136
|
invariant(dataset, `Dataset not found`);
|
|
130
137
|
invariant(dataset.examples.length > 0, `Dataset has no examples`);
|
|
131
138
|
const nExamples =
|
|
132
139
|
typeof dryRun === "number"
|
|
133
|
-
? Math.
|
|
140
|
+
? Math.min(dryRun, dataset.examples.length)
|
|
134
141
|
: dataset.examples.length;
|
|
135
142
|
|
|
136
143
|
let projectName = `${dataset.name}-exp-${new Date().toISOString()}`;
|
|
137
144
|
// initialize the tracer into scope
|
|
138
145
|
let taskTracer: Tracer;
|
|
139
|
-
let experiment:
|
|
146
|
+
let experiment: ExperimentInfo;
|
|
140
147
|
if (isDryRun) {
|
|
141
148
|
experiment = {
|
|
142
149
|
id: localId(),
|
|
143
150
|
datasetId: dataset.id,
|
|
144
151
|
datasetVersionId: dataset.versionId,
|
|
145
152
|
projectName,
|
|
153
|
+
metadata: experimentMetadata,
|
|
146
154
|
};
|
|
147
155
|
taskTracer = createNoOpProvider().getTracer("no-op");
|
|
148
156
|
} else {
|
|
@@ -165,9 +173,10 @@ export async function runExperiment({
|
|
|
165
173
|
projectName = experimentResponse.project_name ?? projectName;
|
|
166
174
|
experiment = {
|
|
167
175
|
id: experimentResponse.id,
|
|
168
|
-
datasetId:
|
|
169
|
-
datasetVersionId:
|
|
176
|
+
datasetId: experimentResponse.dataset_id,
|
|
177
|
+
datasetVersionId: experimentResponse.dataset_version_id,
|
|
170
178
|
projectName,
|
|
179
|
+
metadata: experimentResponse.metadata,
|
|
171
180
|
};
|
|
172
181
|
// Initialize the tracer, now that we have a project name
|
|
173
182
|
const baseUrl = client.config.baseUrl;
|
|
@@ -189,16 +198,14 @@ export async function runExperiment({
|
|
|
189
198
|
}
|
|
190
199
|
|
|
191
200
|
logger.info(
|
|
192
|
-
`🧪 Starting experiment "${experimentName}" on dataset "${dataset.id}" with task "${task.name}" and ${evaluators?.length ?? 0} ${pluralize(
|
|
201
|
+
`🧪 Starting experiment "${experimentName || `<unnamed>`}" on dataset "${dataset.id}" with task "${task.name}" and ${evaluators?.length ?? 0} ${pluralize(
|
|
193
202
|
"evaluator",
|
|
194
203
|
evaluators?.length ?? 0
|
|
195
204
|
)} and ${concurrency} concurrent runs`
|
|
196
205
|
);
|
|
197
206
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
const runs: Record<ExperimentRunId, ExperimentRun> = {};
|
|
201
|
-
await runTask({
|
|
207
|
+
const runs: Record<ExperimentRunID, ExperimentRun> = {};
|
|
208
|
+
await runTaskWithExamples({
|
|
202
209
|
client,
|
|
203
210
|
experimentId: experiment.id,
|
|
204
211
|
task,
|
|
@@ -242,7 +249,7 @@ export async function runExperiment({
|
|
|
242
249
|
/**
|
|
243
250
|
* Run a task against n examples in a dataset.
|
|
244
251
|
*/
|
|
245
|
-
function
|
|
252
|
+
function runTaskWithExamples({
|
|
246
253
|
client,
|
|
247
254
|
experimentId,
|
|
248
255
|
task,
|
|
@@ -274,9 +281,9 @@ function runTask({
|
|
|
274
281
|
nExamples: number;
|
|
275
282
|
/** TraceProvider instance that will be used to create spans from task calls */
|
|
276
283
|
tracer: Tracer;
|
|
277
|
-
}) {
|
|
284
|
+
}): Promise<void> {
|
|
278
285
|
logger.info(`🔧 Running task "${task.name}" on dataset "${dataset.id}"`);
|
|
279
|
-
const run = async (example:
|
|
286
|
+
const run = async (example: ExampleWithId) => {
|
|
280
287
|
return tracer.startActiveSpan(`Task: ${task.name}`, async (span) => {
|
|
281
288
|
logger.info(
|
|
282
289
|
`🔧 Running task "${task.name}" on example "${example.id} of dataset "${dataset.id}"`
|
|
@@ -366,13 +373,12 @@ export async function evaluateExperiment({
|
|
|
366
373
|
experiment,
|
|
367
374
|
evaluators,
|
|
368
375
|
client: _client,
|
|
369
|
-
logger,
|
|
376
|
+
logger = console,
|
|
370
377
|
concurrency = 5,
|
|
371
378
|
dryRun = false,
|
|
372
379
|
}: {
|
|
373
380
|
/**
|
|
374
381
|
* The experiment to evaluate
|
|
375
|
-
* @todo also accept Experiment, and attempt to fetch the runs from the server
|
|
376
382
|
**/
|
|
377
383
|
experiment: RanExperiment;
|
|
378
384
|
/** The evaluators to use */
|
|
@@ -380,9 +386,9 @@ export async function evaluateExperiment({
|
|
|
380
386
|
/** The client to use */
|
|
381
387
|
client?: PhoenixClient;
|
|
382
388
|
/** The logger to use */
|
|
383
|
-
logger
|
|
389
|
+
logger?: Logger;
|
|
384
390
|
/** The number of evaluators to run in parallel */
|
|
385
|
-
concurrency
|
|
391
|
+
concurrency?: number;
|
|
386
392
|
/**
|
|
387
393
|
* Whether to run the evaluation as a dry run
|
|
388
394
|
* If a number is provided, the evaluation will be run for the first n runs
|
|
@@ -414,8 +420,8 @@ export async function evaluateExperiment({
|
|
|
414
420
|
typeof dryRun === "number"
|
|
415
421
|
? Math.max(dryRun, Object.keys(experiment.runs).length)
|
|
416
422
|
: Object.keys(experiment.runs).length;
|
|
417
|
-
const dataset = await
|
|
418
|
-
dataset: experiment.datasetId,
|
|
423
|
+
const dataset = await getDataset({
|
|
424
|
+
dataset: { datasetId: experiment.datasetId },
|
|
419
425
|
client,
|
|
420
426
|
});
|
|
421
427
|
invariant(dataset, `Dataset "${experiment.datasetId}" not found`);
|
package/src/types/datasets.ts
CHANGED
|
@@ -1,22 +1,48 @@
|
|
|
1
1
|
import { Node } from "./core";
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* A dataset can be identified by its datasetId
|
|
5
|
+
* TODO: add support for datasetName and datasetVersionId via discriminated union
|
|
6
|
+
*/
|
|
7
|
+
export type DatasetSelector = { datasetId: string };
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Overview information about a dataset
|
|
11
|
+
*/
|
|
12
|
+
export interface DatasetInfo extends Node {
|
|
13
|
+
name: string;
|
|
14
|
+
description?: string | null;
|
|
15
|
+
metadata?: Record<string, unknown>;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* A dataset's examples
|
|
20
|
+
*/
|
|
21
|
+
export interface DatasetExamples {
|
|
22
|
+
examples: ExampleWithId[];
|
|
23
|
+
/**
|
|
24
|
+
* The version ID of the dataset examples
|
|
25
|
+
*/
|
|
26
|
+
versionId: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
3
29
|
/**
|
|
4
30
|
* An example is a record to feed into an AI task
|
|
5
31
|
*/
|
|
6
|
-
export interface Example
|
|
7
|
-
id: string;
|
|
8
|
-
updatedAt: Date;
|
|
32
|
+
export interface Example {
|
|
9
33
|
input: Record<string, unknown>;
|
|
10
34
|
output: Record<string, unknown> | null;
|
|
11
35
|
metadata: Record<string, unknown>;
|
|
12
36
|
}
|
|
13
37
|
|
|
14
38
|
/**
|
|
15
|
-
*
|
|
39
|
+
* An example that has been synced to the server
|
|
16
40
|
*/
|
|
17
|
-
export interface
|
|
18
|
-
|
|
19
|
-
name: string;
|
|
20
|
-
versionId: string;
|
|
21
|
-
examples: Example[];
|
|
41
|
+
export interface ExampleWithId extends Example, Node {
|
|
42
|
+
updatedAt: Date;
|
|
22
43
|
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* A dataset is a collection of examples for an AI task
|
|
47
|
+
*/
|
|
48
|
+
export interface Dataset extends DatasetInfo, DatasetExamples, Node {}
|
package/src/types/experiments.ts
CHANGED
|
@@ -5,17 +5,33 @@ import { Example } from "./datasets";
|
|
|
5
5
|
/**
|
|
6
6
|
* An experiment is a set of task runs on a dataset version
|
|
7
7
|
*/
|
|
8
|
-
export interface
|
|
8
|
+
export interface ExperimentInfo extends Node {
|
|
9
9
|
datasetId: string;
|
|
10
10
|
datasetVersionId: string;
|
|
11
11
|
/**
|
|
12
12
|
* The project under which the experiment task traces are recorded
|
|
13
13
|
*/
|
|
14
14
|
projectName: string;
|
|
15
|
+
/**
|
|
16
|
+
* Metadata about the experiment as an object of key values
|
|
17
|
+
* e.x. model name
|
|
18
|
+
*/
|
|
19
|
+
metadata: Record<string, unknown>;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export type ExperimentRunID = string;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* A map of an experiment runId to the run
|
|
26
|
+
*/
|
|
27
|
+
export interface ExperimentRunsMap {
|
|
28
|
+
runs: Record<ExperimentRunID, ExperimentRun>;
|
|
15
29
|
}
|
|
16
30
|
|
|
17
|
-
|
|
18
|
-
|
|
31
|
+
/**
|
|
32
|
+
* An experiment that has been run and been recorded on the server
|
|
33
|
+
*/
|
|
34
|
+
export interface RanExperiment extends ExperimentInfo, ExperimentRunsMap {
|
|
19
35
|
evaluationRuns?: ExperimentEvaluationRun[];
|
|
20
36
|
}
|
|
21
37
|
|