arize-phoenix 4.10.2rc1__py3-none-any.whl → 4.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,157 +1,178 @@
1
- from datetime import datetime
2
- from typing import Any, Dict, List, Optional
3
-
4
- from fastapi import APIRouter, HTTPException, Path, Query
5
- from pydantic import BaseModel
6
1
  from sqlalchemy import and_, func, select
7
2
  from starlette.requests import Request
3
+ from starlette.responses import JSONResponse, Response
8
4
  from starlette.status import HTTP_404_NOT_FOUND
9
5
  from strawberry.relay import GlobalID
10
6
 
11
- from phoenix.db.models import (
12
- Dataset as ORMDataset,
13
- )
14
- from phoenix.db.models import (
15
- DatasetExample as ORMDatasetExample,
16
- )
17
- from phoenix.db.models import (
18
- DatasetExampleRevision as ORMDatasetExampleRevision,
19
- )
20
- from phoenix.db.models import (
21
- DatasetVersion as ORMDatasetVersion,
22
- )
23
-
24
- from .utils import ResponseBody, add_errors_to_responses
25
-
26
- router = APIRouter(tags=["datasets"])
27
-
28
-
29
- class DatasetExample(BaseModel):
30
- id: str
31
- input: Dict[str, Any]
32
- output: Dict[str, Any]
33
- metadata: Dict[str, Any]
34
- updated_at: datetime
35
-
36
-
37
- class ListDatasetExamplesData(BaseModel):
38
- dataset_id: str
39
- version_id: str
40
- examples: List[DatasetExample]
41
-
42
-
43
- class ListDatasetExamplesResponseBody(ResponseBody[ListDatasetExamplesData]):
44
- pass
45
-
46
-
47
- @router.get(
48
- "/datasets/{id}/examples",
49
- operation_id="getDatasetExamples",
50
- summary="Get examples from a dataset",
51
- responses=add_errors_to_responses([HTTP_404_NOT_FOUND]),
52
- )
53
- async def get_dataset_examples(
54
- request: Request,
55
- id: str = Path(description="The ID of the dataset"),
56
- version_id: Optional[str] = Query(
57
- default=None,
58
- description=(
59
- "The ID of the dataset version " "(if omitted, returns data from the latest version)"
60
- ),
61
- ),
62
- ) -> ListDatasetExamplesResponseBody:
63
- dataset_gid = GlobalID.from_id(id)
64
- version_gid = GlobalID.from_id(version_id) if version_id else None
65
-
66
- if (dataset_type := dataset_gid.type_name) != "Dataset":
67
- raise HTTPException(
68
- detail=f"ID {dataset_gid} refers to a {dataset_type}", status_code=HTTP_404_NOT_FOUND
7
+ from phoenix.db.models import Dataset, DatasetExample, DatasetExampleRevision, DatasetVersion
8
+
9
+
10
+ async def list_dataset_examples(request: Request) -> Response:
11
+ """
12
+ summary: Get dataset examples by dataset ID
13
+ operationId: getDatasetExamples
14
+ tags:
15
+ - datasets
16
+ parameters:
17
+ - in: path
18
+ name: id
19
+ required: true
20
+ schema:
21
+ type: string
22
+ description: Dataset ID
23
+ - in: query
24
+ name: version_id
25
+ schema:
26
+ type: string
27
+ description: Dataset version ID. If omitted, returns the latest version.
28
+ responses:
29
+ 200:
30
+ description: Success
31
+ content:
32
+ application/json:
33
+ schema:
34
+ type: object
35
+ properties:
36
+ data:
37
+ type: object
38
+ properties:
39
+ dataset_id:
40
+ type: string
41
+ description: ID of the dataset
42
+ version_id:
43
+ type: string
44
+ description: ID of the version
45
+ examples:
46
+ type: array
47
+ items:
48
+ type: object
49
+ properties:
50
+ id:
51
+ type: string
52
+ description: ID of the dataset example
53
+ input:
54
+ type: object
55
+ description: Input data of the example
56
+ output:
57
+ type: object
58
+ description: Output data of the example
59
+ metadata:
60
+ type: object
61
+ description: Metadata of the example
62
+ updated_at:
63
+ type: string
64
+ format: date-time
65
+ description: ISO formatted timestamp of when the example was updated
66
+ required:
67
+ - id
68
+ - input
69
+ - output
70
+ - metadata
71
+ - updated_at
72
+ required:
73
+ - dataset_id
74
+ - version_id
75
+ - examples
76
+ 403:
77
+ description: Forbidden
78
+ 404:
79
+ description: Dataset does not exist.
80
+ """
81
+ dataset_id = GlobalID.from_id(request.path_params["id"])
82
+ raw_version_id = request.query_params.get("version_id")
83
+ version_id = GlobalID.from_id(raw_version_id) if raw_version_id else None
84
+
85
+ if (dataset_type := dataset_id.type_name) != "Dataset":
86
+ return Response(
87
+ content=f"ID {dataset_id} refers to a {dataset_type}", status_code=HTTP_404_NOT_FOUND
69
88
  )
70
89
 
71
- if version_gid and (version_type := version_gid.type_name) != "DatasetVersion":
72
- raise HTTPException(
73
- detail=f"ID {version_gid} refers to a {version_type}", status_code=HTTP_404_NOT_FOUND
90
+ if version_id and (version_type := version_id.type_name) != "DatasetVersion":
91
+ return Response(
92
+ content=f"ID {version_id} refers to a {version_type}", status_code=HTTP_404_NOT_FOUND
74
93
  )
75
94
 
76
95
  async with request.app.state.db() as session:
77
96
  if (
78
97
  resolved_dataset_id := await session.scalar(
79
- select(ORMDataset.id).where(ORMDataset.id == int(dataset_gid.node_id))
98
+ select(Dataset.id).where(Dataset.id == int(dataset_id.node_id))
80
99
  )
81
100
  ) is None:
82
- raise HTTPException(
83
- detail=f"No dataset with id {dataset_gid} can be found.",
101
+ return Response(
102
+ content=f"No dataset with id {dataset_id} can be found.",
84
103
  status_code=HTTP_404_NOT_FOUND,
85
104
  )
86
105
 
87
106
  # Subquery to find the maximum created_at for each dataset_example_id
88
107
  # timestamp tiebreaks are resolved by the largest id
89
108
  partial_subquery = select(
90
- func.max(ORMDatasetExampleRevision.id).label("max_id"),
91
- ).group_by(ORMDatasetExampleRevision.dataset_example_id)
109
+ func.max(DatasetExampleRevision.id).label("max_id"),
110
+ ).group_by(DatasetExampleRevision.dataset_example_id)
92
111
 
93
- if version_gid:
112
+ if version_id:
94
113
  if (
95
114
  resolved_version_id := await session.scalar(
96
- select(ORMDatasetVersion.id).where(
115
+ select(DatasetVersion.id).where(
97
116
  and_(
98
- ORMDatasetVersion.dataset_id == resolved_dataset_id,
99
- ORMDatasetVersion.id == int(version_gid.node_id),
117
+ DatasetVersion.dataset_id == resolved_dataset_id,
118
+ DatasetVersion.id == int(version_id.node_id),
100
119
  )
101
120
  )
102
121
  )
103
122
  ) is None:
104
- raise HTTPException(
105
- detail=f"No dataset version with id {version_id} can be found.",
123
+ return Response(
124
+ content=f"No dataset version with id {version_id} can be found.",
106
125
  status_code=HTTP_404_NOT_FOUND,
107
126
  )
108
127
  # if a version_id is provided, filter the subquery to only include revisions from that
109
128
  partial_subquery = partial_subquery.filter(
110
- ORMDatasetExampleRevision.dataset_version_id <= resolved_version_id
129
+ DatasetExampleRevision.dataset_version_id <= resolved_version_id
111
130
  )
112
131
  else:
113
132
  if (
114
133
  resolved_version_id := await session.scalar(
115
- select(func.max(ORMDatasetVersion.id)).where(
116
- ORMDatasetVersion.dataset_id == resolved_dataset_id
134
+ select(func.max(DatasetVersion.id)).where(
135
+ DatasetVersion.dataset_id == resolved_dataset_id
117
136
  )
118
137
  )
119
138
  ) is None:
120
- raise HTTPException(
121
- detail="Dataset has no versions.",
139
+ return Response(
140
+ content="Dataset has no versions.",
122
141
  status_code=HTTP_404_NOT_FOUND,
123
142
  )
124
143
 
125
144
  subquery = partial_subquery.subquery()
126
145
  # Query for the most recent example revisions that are not deleted
127
146
  query = (
128
- select(ORMDatasetExample, ORMDatasetExampleRevision)
147
+ select(DatasetExample, DatasetExampleRevision)
129
148
  .join(
130
- ORMDatasetExampleRevision,
131
- ORMDatasetExample.id == ORMDatasetExampleRevision.dataset_example_id,
149
+ DatasetExampleRevision,
150
+ DatasetExample.id == DatasetExampleRevision.dataset_example_id,
132
151
  )
133
152
  .join(
134
153
  subquery,
135
- (subquery.c.max_id == ORMDatasetExampleRevision.id),
154
+ (subquery.c.max_id == DatasetExampleRevision.id),
136
155
  )
137
- .filter(ORMDatasetExample.dataset_id == resolved_dataset_id)
138
- .filter(ORMDatasetExampleRevision.revision_kind != "DELETE")
139
- .order_by(ORMDatasetExample.id.asc())
156
+ .filter(DatasetExample.dataset_id == resolved_dataset_id)
157
+ .filter(DatasetExampleRevision.revision_kind != "DELETE")
158
+ .order_by(DatasetExample.id.asc())
140
159
  )
141
160
  examples = [
142
- DatasetExample(
143
- id=str(GlobalID("DatasetExample", str(example.id))),
144
- input=revision.input,
145
- output=revision.output,
146
- metadata=revision.metadata_,
147
- updated_at=revision.created_at,
148
- )
161
+ {
162
+ "id": str(GlobalID("DatasetExample", str(example.id))),
163
+ "input": revision.input,
164
+ "output": revision.output,
165
+ "metadata": revision.metadata_,
166
+ "updated_at": revision.created_at.isoformat(),
167
+ }
149
168
  async for example, revision in await session.stream(query)
150
169
  ]
151
- return ListDatasetExamplesResponseBody(
152
- data=ListDatasetExamplesData(
153
- dataset_id=str(GlobalID("Dataset", str(resolved_dataset_id))),
154
- version_id=str(GlobalID("DatasetVersion", str(resolved_version_id))),
155
- examples=examples,
156
- )
170
+ return JSONResponse(
171
+ {
172
+ "data": {
173
+ "dataset_id": str(GlobalID("Dataset", str(resolved_dataset_id))),
174
+ "version_id": str(GlobalID("DatasetVersion", str(resolved_version_id))),
175
+ "examples": examples,
176
+ }
177
+ }
157
178
  )