arize-phoenix 4.4.3__py3-none-any.whl → 4.4.4rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc0.dist-info}/METADATA +4 -4
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc0.dist-info}/RECORD +108 -55
- phoenix/__init__.py +0 -27
- phoenix/config.py +21 -7
- phoenix/core/model.py +25 -25
- phoenix/core/model_schema.py +64 -62
- phoenix/core/model_schema_adapter.py +27 -25
- phoenix/datasets/__init__.py +0 -0
- phoenix/datasets/evaluators.py +275 -0
- phoenix/datasets/experiments.py +469 -0
- phoenix/datasets/tracing.py +66 -0
- phoenix/datasets/types.py +212 -0
- phoenix/db/bulk_inserter.py +54 -14
- phoenix/db/insertion/dataset.py +234 -0
- phoenix/db/insertion/evaluation.py +6 -6
- phoenix/db/insertion/helpers.py +13 -2
- phoenix/db/migrations/types.py +29 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +291 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +2 -28
- phoenix/db/models.py +230 -3
- phoenix/inferences/fixtures.py +23 -23
- phoenix/inferences/inferences.py +7 -7
- phoenix/inferences/validation.py +1 -1
- phoenix/server/api/context.py +16 -0
- phoenix/server/api/dataloaders/__init__.py +16 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +100 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +43 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +85 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +43 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +49 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +2 -3
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/trace_row_ids.py +39 -0
- phoenix/server/api/helpers/dataset_helpers.py +178 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +9 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/mutations/__init__.py +13 -0
- phoenix/server/api/mutations/auth.py +11 -0
- phoenix/server/api/mutations/dataset_mutations.py +520 -0
- phoenix/server/api/mutations/experiment_mutations.py +65 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +17 -14
- phoenix/server/api/mutations/project_mutations.py +42 -0
- phoenix/server/api/queries.py +503 -0
- phoenix/server/api/routers/v1/__init__.py +77 -2
- phoenix/server/api/routers/v1/dataset_examples.py +178 -0
- phoenix/server/api/routers/v1/datasets.py +861 -0
- phoenix/server/api/routers/v1/evaluations.py +4 -2
- phoenix/server/api/routers/v1/experiment_evaluations.py +65 -0
- phoenix/server/api/routers/v1/experiment_runs.py +108 -0
- phoenix/server/api/routers/v1/experiments.py +174 -0
- phoenix/server/api/routers/v1/spans.py +3 -1
- phoenix/server/api/routers/v1/traces.py +1 -4
- phoenix/server/api/schema.py +2 -303
- phoenix/server/api/types/AnnotatorKind.py +10 -0
- phoenix/server/api/types/Cluster.py +19 -19
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/Dataset.py +282 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +30 -29
- phoenix/server/api/types/EmbeddingDimension.py +40 -34
- phoenix/server/api/types/Event.py +16 -16
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +135 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +19 -0
- phoenix/server/api/types/ExperimentRun.py +91 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +57 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/Model.py +43 -42
- phoenix/server/api/types/Project.py +26 -12
- phoenix/server/api/types/Span.py +78 -2
- phoenix/server/api/types/TimeSeries.py +6 -6
- phoenix/server/api/types/Trace.py +15 -4
- phoenix/server/api/types/UMAPPoints.py +1 -1
- phoenix/server/api/types/node.py +5 -111
- phoenix/server/api/types/pagination.py +10 -52
- phoenix/server/app.py +99 -49
- phoenix/server/main.py +49 -27
- phoenix/server/openapi/docs.py +3 -0
- phoenix/server/static/index.js +2246 -1368
- phoenix/server/templates/index.html +1 -0
- phoenix/services.py +15 -15
- phoenix/session/client.py +316 -21
- phoenix/session/session.py +47 -37
- phoenix/trace/exporter.py +14 -9
- phoenix/trace/fixtures.py +133 -7
- phoenix/trace/span_evaluations.py +3 -3
- phoenix/trace/trace_dataset.py +6 -6
- phoenix/utilities/json.py +61 -0
- phoenix/utilities/re.py +50 -0
- phoenix/version.py +1 -1
- phoenix/server/api/types/DatasetRole.py +0 -23
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc0.dist-info}/WHEEL +0 -0
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/server/api/{helpers.py → helpers/__init__.py} +0 -0
phoenix/trace/exporter.py
CHANGED
|
@@ -7,13 +7,17 @@ from types import MethodType
|
|
|
7
7
|
from typing import Any, Optional
|
|
8
8
|
from urllib.parse import urljoin
|
|
9
9
|
|
|
10
|
-
import
|
|
10
|
+
import httpx
|
|
11
11
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
12
|
-
from requests import Session
|
|
13
12
|
from typing_extensions import TypeAlias, assert_never
|
|
14
13
|
|
|
15
14
|
import phoenix.trace.v1 as pb
|
|
16
|
-
from phoenix.config import
|
|
15
|
+
from phoenix.config import (
|
|
16
|
+
get_env_client_headers,
|
|
17
|
+
get_env_collector_endpoint,
|
|
18
|
+
get_env_host,
|
|
19
|
+
get_env_port,
|
|
20
|
+
)
|
|
17
21
|
|
|
18
22
|
logger = logging.getLogger(__name__)
|
|
19
23
|
logger.addHandler(logging.NullHandler())
|
|
@@ -75,9 +79,10 @@ class HttpExporter:
|
|
|
75
79
|
)
|
|
76
80
|
self._base_url = base_url if base_url.endswith("/") else base_url + "/"
|
|
77
81
|
_warn_if_phoenix_is_not_running(self._base_url)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
self.
|
|
82
|
+
headers = get_env_client_headers()
|
|
83
|
+
self._client = httpx.Client(headers=headers)
|
|
84
|
+
weakref.finalize(self, self._client.close)
|
|
85
|
+
self._client.headers.update(
|
|
81
86
|
{
|
|
82
87
|
"content-type": "application/x-protobuf",
|
|
83
88
|
"content-encoding": "gzip",
|
|
@@ -110,9 +115,9 @@ class HttpExporter:
|
|
|
110
115
|
|
|
111
116
|
def _send(self, message: Message) -> None:
|
|
112
117
|
serialized = message.SerializeToString()
|
|
113
|
-
|
|
118
|
+
content = gzip.compress(serialized)
|
|
114
119
|
try:
|
|
115
|
-
self.
|
|
120
|
+
self._client.post(self._url(message), content=content).raise_for_status()
|
|
116
121
|
except Exception as e:
|
|
117
122
|
logger.exception(e)
|
|
118
123
|
|
|
@@ -125,7 +130,7 @@ class HttpExporter:
|
|
|
125
130
|
|
|
126
131
|
def _warn_if_phoenix_is_not_running(base_url: str) -> None:
|
|
127
132
|
try:
|
|
128
|
-
|
|
133
|
+
httpx.get(urljoin(base_url, "arize_phoenix_version")).raise_for_status()
|
|
129
134
|
except Exception:
|
|
130
135
|
logger.warning(
|
|
131
136
|
f"Arize Phoenix is not running on {base_url}. Launch Phoenix "
|
phoenix/trace/fixtures.py
CHANGED
|
@@ -1,18 +1,29 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import shutil
|
|
1
3
|
from binascii import hexlify
|
|
2
4
|
from dataclasses import dataclass, field, replace
|
|
3
5
|
from datetime import datetime, timezone
|
|
6
|
+
from io import StringIO
|
|
4
7
|
from random import getrandbits
|
|
5
|
-
from
|
|
8
|
+
from tempfile import NamedTemporaryFile
|
|
9
|
+
from time import sleep, time
|
|
10
|
+
from typing import Dict, Iterable, Iterator, List, NamedTuple, Optional, Sequence, Tuple, cast
|
|
6
11
|
from urllib import request
|
|
12
|
+
from urllib.parse import urljoin
|
|
7
13
|
|
|
14
|
+
import httpx
|
|
8
15
|
import pandas as pd
|
|
9
16
|
from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
|
|
17
|
+
from httpx import ConnectError, HTTPStatusError
|
|
10
18
|
|
|
11
19
|
import phoenix.trace.v1 as pb
|
|
20
|
+
from phoenix import Client
|
|
12
21
|
from phoenix.trace.schemas import Span
|
|
13
22
|
from phoenix.trace.trace_dataset import TraceDataset
|
|
14
23
|
from phoenix.trace.utils import json_lines_to_df
|
|
15
24
|
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
16
27
|
|
|
17
28
|
class EvaluationResultSchema(NamedTuple):
|
|
18
29
|
label: Optional[str] = "label"
|
|
@@ -32,12 +43,44 @@ class DocumentEvaluationFixture(EvaluationFixture):
|
|
|
32
43
|
document_position: str = "document_position"
|
|
33
44
|
|
|
34
45
|
|
|
46
|
+
@dataclass(frozen=True)
|
|
47
|
+
class DatasetFixture:
|
|
48
|
+
file_name: str
|
|
49
|
+
name: str
|
|
50
|
+
input_keys: Sequence[str]
|
|
51
|
+
output_keys: Sequence[str]
|
|
52
|
+
metadata_keys: Sequence[str] = ()
|
|
53
|
+
description: Optional[str] = field(default=None)
|
|
54
|
+
_df: Optional[pd.DataFrame] = field(default=None, init=False, repr=False)
|
|
55
|
+
_csv: Optional[str] = field(default=None, init=False, repr=False)
|
|
56
|
+
|
|
57
|
+
def load(self) -> "DatasetFixture":
|
|
58
|
+
if self._df is None:
|
|
59
|
+
df = pd.read_csv(_url(self.file_name))
|
|
60
|
+
object.__setattr__(self, "_df", df)
|
|
61
|
+
return self
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def dataframe(self) -> pd.DataFrame:
|
|
65
|
+
self.load()
|
|
66
|
+
return cast(pd.DataFrame, self._df).copy(deep=False)
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def csv(self) -> StringIO:
|
|
70
|
+
if self._csv is None:
|
|
71
|
+
with StringIO() as buffer:
|
|
72
|
+
self.dataframe.to_csv(buffer, index=False)
|
|
73
|
+
object.__setattr__(self, "_csv", buffer.getvalue())
|
|
74
|
+
return StringIO(self._csv)
|
|
75
|
+
|
|
76
|
+
|
|
35
77
|
@dataclass(frozen=True)
|
|
36
78
|
class TracesFixture:
|
|
37
79
|
name: str
|
|
38
80
|
description: str
|
|
39
81
|
file_name: str
|
|
40
82
|
evaluation_fixtures: Iterable[EvaluationFixture] = ()
|
|
83
|
+
dataset_fixtures: Iterable[DatasetFixture] = ()
|
|
41
84
|
|
|
42
85
|
|
|
43
86
|
llama_index_rag_fixture = TracesFixture(
|
|
@@ -58,6 +101,36 @@ llama_index_rag_fixture = TracesFixture(
|
|
|
58
101
|
file_name="llama_index_rag_v8.retrieved_documents_eval.parquet",
|
|
59
102
|
),
|
|
60
103
|
),
|
|
104
|
+
dataset_fixtures=(
|
|
105
|
+
DatasetFixture(
|
|
106
|
+
file_name="hybridial_samples.csv.gz",
|
|
107
|
+
input_keys=("messages", "ctxs"),
|
|
108
|
+
output_keys=("answers",),
|
|
109
|
+
name="ChatRAG-Bench: Hybrid Dialogue (samples)",
|
|
110
|
+
description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/hybridial",
|
|
111
|
+
),
|
|
112
|
+
DatasetFixture(
|
|
113
|
+
file_name="sqa_samples.csv.gz",
|
|
114
|
+
input_keys=("messages", "ctxs"),
|
|
115
|
+
output_keys=("answers",),
|
|
116
|
+
name="ChatRAG-Bench: SQA (samples)",
|
|
117
|
+
description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/sqa",
|
|
118
|
+
),
|
|
119
|
+
DatasetFixture(
|
|
120
|
+
file_name="doqa_cooking_samples.csv.gz",
|
|
121
|
+
input_keys=("messages", "ctxs"),
|
|
122
|
+
output_keys=("answers",),
|
|
123
|
+
name="ChatRAG-Bench: DoQA Cooking (samples)",
|
|
124
|
+
description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/doqa_cooking",
|
|
125
|
+
),
|
|
126
|
+
DatasetFixture(
|
|
127
|
+
file_name="synthetic_convqa_samples.csv.gz",
|
|
128
|
+
input_keys=("messages", "document"),
|
|
129
|
+
output_keys=("answers",),
|
|
130
|
+
name="ChatQA-Train: Synthetic ConvQA (samples)",
|
|
131
|
+
description="https://huggingface.co/datasets/nvidia/ChatQA-Training-Data/viewer/synthetic_convqa",
|
|
132
|
+
),
|
|
133
|
+
),
|
|
61
134
|
)
|
|
62
135
|
|
|
63
136
|
llama_index_calculator_agent_fixture = TracesFixture(
|
|
@@ -138,16 +211,69 @@ def download_traces_fixture(
|
|
|
138
211
|
return cast(List[str], f.readlines())
|
|
139
212
|
|
|
140
213
|
|
|
141
|
-
def load_example_traces(
|
|
214
|
+
def load_example_traces(fixture_name: str) -> TraceDataset:
|
|
142
215
|
"""
|
|
143
216
|
Loads a trace dataframe by name.
|
|
144
217
|
"""
|
|
145
|
-
fixture = get_trace_fixture_by_name(
|
|
218
|
+
fixture = get_trace_fixture_by_name(fixture_name)
|
|
146
219
|
return TraceDataset(json_lines_to_df(download_traces_fixture(fixture)))
|
|
147
220
|
|
|
148
221
|
|
|
149
|
-
def
|
|
150
|
-
fixture
|
|
222
|
+
def get_dataset_fixtures(fixture_name: str) -> Iterable[DatasetFixture]:
|
|
223
|
+
return (fixture.load() for fixture in get_trace_fixture_by_name(fixture_name).dataset_fixtures)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def send_dataset_fixtures(
|
|
227
|
+
endpoint: str,
|
|
228
|
+
fixtures: Iterable[DatasetFixture],
|
|
229
|
+
) -> None:
|
|
230
|
+
expiration = time() + 5
|
|
231
|
+
while time() < expiration:
|
|
232
|
+
try:
|
|
233
|
+
url = urljoin(endpoint, "/healthz")
|
|
234
|
+
httpx.get(url=url).raise_for_status()
|
|
235
|
+
except ConnectError:
|
|
236
|
+
sleep(0.1)
|
|
237
|
+
continue
|
|
238
|
+
except Exception as e:
|
|
239
|
+
print(str(e))
|
|
240
|
+
raise
|
|
241
|
+
break
|
|
242
|
+
client = Client(endpoint=endpoint)
|
|
243
|
+
for i, fixture in enumerate(fixtures):
|
|
244
|
+
try:
|
|
245
|
+
if i % 2:
|
|
246
|
+
client.upload_dataset(
|
|
247
|
+
fixture.dataframe,
|
|
248
|
+
name=fixture.name,
|
|
249
|
+
input_keys=fixture.input_keys,
|
|
250
|
+
output_keys=fixture.output_keys,
|
|
251
|
+
metadata_keys=fixture.metadata_keys,
|
|
252
|
+
description=fixture.description,
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
with NamedTemporaryFile() as tf:
|
|
256
|
+
with open(tf.name, "w") as f:
|
|
257
|
+
shutil.copyfileobj(fixture.csv, f)
|
|
258
|
+
f.flush()
|
|
259
|
+
client.upload_dataset(
|
|
260
|
+
tf.name,
|
|
261
|
+
name=fixture.name,
|
|
262
|
+
input_keys=fixture.input_keys,
|
|
263
|
+
output_keys=fixture.output_keys,
|
|
264
|
+
metadata_keys=fixture.metadata_keys,
|
|
265
|
+
description=fixture.description,
|
|
266
|
+
)
|
|
267
|
+
except HTTPStatusError as e:
|
|
268
|
+
print(e.response.content.decode())
|
|
269
|
+
pass
|
|
270
|
+
else:
|
|
271
|
+
name, df = fixture.name, fixture.dataframe
|
|
272
|
+
print(f"Dataset sent: {name=}, {len(df)=}")
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def get_evals_from_fixture(fixture_name: str) -> Iterator[pb.Evaluation]:
|
|
276
|
+
fixture = get_trace_fixture_by_name(fixture_name)
|
|
151
277
|
for eval_fixture in fixture.evaluation_fixtures:
|
|
152
278
|
yield from _read_eval_fixture(eval_fixture)
|
|
153
279
|
|
|
@@ -195,8 +321,8 @@ def _read_eval_fixture(eval_fixture: EvaluationFixture) -> Iterator[pb.Evaluatio
|
|
|
195
321
|
def _url(
|
|
196
322
|
file_name: str,
|
|
197
323
|
host: Optional[str] = "https://storage.googleapis.com/",
|
|
198
|
-
bucket: Optional[str] = "arize-assets",
|
|
199
|
-
prefix: Optional[str] = "
|
|
324
|
+
bucket: Optional[str] = "arize-phoenix-assets",
|
|
325
|
+
prefix: Optional[str] = "traces/",
|
|
200
326
|
) -> str:
|
|
201
327
|
return f"{host}{bucket}/{prefix}{file_name}"
|
|
202
328
|
|
|
@@ -11,7 +11,7 @@ import pandas as pd
|
|
|
11
11
|
from pandas.api.types import is_integer_dtype, is_numeric_dtype, is_string_dtype
|
|
12
12
|
from pyarrow import RecordBatchStreamReader, Schema, Table, parquet
|
|
13
13
|
|
|
14
|
-
from phoenix.config import
|
|
14
|
+
from phoenix.config import TRACE_DATASETS_DIR
|
|
15
15
|
from phoenix.exceptions import PhoenixEvaluationNameIsMissing
|
|
16
16
|
from phoenix.trace.errors import InvalidParquetMetadataError
|
|
17
17
|
|
|
@@ -201,7 +201,7 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
|
|
|
201
201
|
UUID: The ID of the evaluations, which can be used as a key to load
|
|
202
202
|
the evaluations from disk using `load`.
|
|
203
203
|
"""
|
|
204
|
-
directory = Path(directory) if directory else
|
|
204
|
+
directory = Path(directory) if directory else TRACE_DATASETS_DIR
|
|
205
205
|
path = directory / EVAL_PARQUET_FILE_NAME.format(id=self.id)
|
|
206
206
|
table = self.to_pyarrow_table()
|
|
207
207
|
parquet.write_table(table, path)
|
|
@@ -229,7 +229,7 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
|
|
|
229
229
|
"""
|
|
230
230
|
if not isinstance(id, UUID):
|
|
231
231
|
id = UUID(id)
|
|
232
|
-
path = Path(directory or
|
|
232
|
+
path = Path(directory or TRACE_DATASETS_DIR) / EVAL_PARQUET_FILE_NAME.format(id=id)
|
|
233
233
|
schema = parquet.read_schema(path)
|
|
234
234
|
eval_id, eval_name, evaluations_cls = _parse_schema_metadata(schema)
|
|
235
235
|
if id != eval_id:
|
phoenix/trace/trace_dataset.py
CHANGED
|
@@ -14,7 +14,7 @@ from openinference.semconv.trace import (
|
|
|
14
14
|
from pandas import DataFrame, read_parquet
|
|
15
15
|
from pyarrow import Schema, Table, parquet
|
|
16
16
|
|
|
17
|
-
from phoenix.config import
|
|
17
|
+
from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR, TRACE_DATASETS_DIR
|
|
18
18
|
from phoenix.datetime_utils import normalize_timestamps
|
|
19
19
|
from phoenix.trace.attributes import unflatten
|
|
20
20
|
from phoenix.trace.errors import InvalidParquetMetadataError
|
|
@@ -138,7 +138,7 @@ class TraceDataset:
|
|
|
138
138
|
self._id = uuid4()
|
|
139
139
|
self.dataframe = normalize_dataframe(dataframe)
|
|
140
140
|
# TODO: This is not used in any meaningful way. Should remove
|
|
141
|
-
self.name = name or f"{
|
|
141
|
+
self.name = name or f"{GENERATED_INFERENCES_NAME_PREFIX}{str(self._id)}"
|
|
142
142
|
self.evaluations = list(evaluations)
|
|
143
143
|
|
|
144
144
|
@classmethod
|
|
@@ -201,13 +201,13 @@ class TraceDataset:
|
|
|
201
201
|
@classmethod
|
|
202
202
|
def from_name(cls, name: str) -> "TraceDataset":
|
|
203
203
|
"""Retrieves a dataset by name from the file system"""
|
|
204
|
-
directory =
|
|
204
|
+
directory = INFERENCES_DIR / name
|
|
205
205
|
df = read_parquet(directory / cls._data_file_name)
|
|
206
206
|
return cls(df, name)
|
|
207
207
|
|
|
208
208
|
def to_disc(self) -> None:
|
|
209
209
|
"""writes the data to disc"""
|
|
210
|
-
directory =
|
|
210
|
+
directory = INFERENCES_DIR / self.name
|
|
211
211
|
directory.mkdir(parents=True, exist_ok=True)
|
|
212
212
|
get_serializable_spans_dataframe(self.dataframe).to_parquet(
|
|
213
213
|
directory / self._data_file_name,
|
|
@@ -230,7 +230,7 @@ class TraceDataset:
|
|
|
230
230
|
UUID: The id of the trace dataset, which can be used as key to load
|
|
231
231
|
the dataset from disk using `load`.
|
|
232
232
|
"""
|
|
233
|
-
directory = Path(directory or
|
|
233
|
+
directory = Path(directory or TRACE_DATASETS_DIR)
|
|
234
234
|
for evals in self.evaluations:
|
|
235
235
|
evals.save(directory)
|
|
236
236
|
path = directory / TRACE_DATASET_PARQUET_FILE_NAME.format(id=self._id)
|
|
@@ -280,7 +280,7 @@ class TraceDataset:
|
|
|
280
280
|
"""
|
|
281
281
|
if not isinstance(id, UUID):
|
|
282
282
|
id = UUID(id)
|
|
283
|
-
path = Path(directory or
|
|
283
|
+
path = Path(directory or TRACE_DATASETS_DIR) / TRACE_DATASET_PARQUET_FILE_NAME.format(id=id)
|
|
284
284
|
schema = parquet.read_schema(path)
|
|
285
285
|
dataset_id, dataset_name, eval_ids = _parse_schema_metadata(schema)
|
|
286
286
|
if id != dataset_id:
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import datetime
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Mapping, Sequence, SupportsFloat, Union, get_args, get_origin
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def jsonify(obj: Any) -> Any:
|
|
11
|
+
"""
|
|
12
|
+
Coerce object to be json serializable.
|
|
13
|
+
"""
|
|
14
|
+
if isinstance(obj, Enum):
|
|
15
|
+
return jsonify(obj.value)
|
|
16
|
+
if isinstance(obj, (str, int, float, bool)) or obj is None:
|
|
17
|
+
return obj
|
|
18
|
+
if isinstance(obj, np.ndarray):
|
|
19
|
+
return [jsonify(v) for v in obj]
|
|
20
|
+
if isinstance(obj, SupportsFloat):
|
|
21
|
+
return float(obj)
|
|
22
|
+
if dataclasses.is_dataclass(obj):
|
|
23
|
+
return {
|
|
24
|
+
k: jsonify(v)
|
|
25
|
+
for field in dataclasses.fields(obj)
|
|
26
|
+
if not (
|
|
27
|
+
(v := getattr(obj, (k := field.name))) is None
|
|
28
|
+
and get_origin(field) is Union
|
|
29
|
+
and type(None) in get_args(field)
|
|
30
|
+
)
|
|
31
|
+
}
|
|
32
|
+
if isinstance(obj, (Sequence, set, frozenset)):
|
|
33
|
+
return [jsonify(v) for v in obj]
|
|
34
|
+
if isinstance(obj, Mapping):
|
|
35
|
+
return {jsonify(k): jsonify(v) for k, v in obj.items()}
|
|
36
|
+
if isinstance(obj, (datetime.date, datetime.datetime, datetime.time)):
|
|
37
|
+
return obj.isoformat()
|
|
38
|
+
if isinstance(obj, datetime.timedelta):
|
|
39
|
+
return obj.total_seconds()
|
|
40
|
+
if isinstance(obj, Path):
|
|
41
|
+
return str(obj)
|
|
42
|
+
if isinstance(obj, BaseException):
|
|
43
|
+
return str(obj)
|
|
44
|
+
if hasattr(obj, "model_dump") and callable(obj.model_dump):
|
|
45
|
+
# pydantic v2
|
|
46
|
+
try:
|
|
47
|
+
assert isinstance(d := obj.model_dump(), dict)
|
|
48
|
+
except BaseException:
|
|
49
|
+
pass
|
|
50
|
+
else:
|
|
51
|
+
return jsonify(d)
|
|
52
|
+
if hasattr(obj, "dict") and callable(obj.dict):
|
|
53
|
+
# pydantic v1
|
|
54
|
+
try:
|
|
55
|
+
assert isinstance(d := obj.dict(), dict)
|
|
56
|
+
except BaseException:
|
|
57
|
+
pass
|
|
58
|
+
else:
|
|
59
|
+
return jsonify(d)
|
|
60
|
+
cls = obj.__class__
|
|
61
|
+
return f"<{cls.__module__}.{cls.__name__} object>"
|
phoenix/utilities/re.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from logging import getLogger
|
|
2
|
+
from re import compile, split
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
from urllib.parse import unquote
|
|
5
|
+
|
|
6
|
+
_logger = getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
# Optional whitespace
|
|
9
|
+
_OWS = r"[ \t]*"
|
|
10
|
+
# A key contains printable US-ASCII characters except: SP and "(),/:;<=>?@[\]{}
|
|
11
|
+
_KEY_FORMAT = r"[\x21\x23-\x27\x2a\x2b\x2d\x2e\x30-\x39\x41-\x5a\x5e-\x7a\x7c\x7e]+"
|
|
12
|
+
# A value contains a URL-encoded UTF-8 string. The encoded form can contain any
|
|
13
|
+
# printable US-ASCII characters (0x20-0x7f) other than SP, DEL, and ",;/
|
|
14
|
+
_VALUE_FORMAT = r"[\x21\x23-\x2b\x2d-\x3a\x3c-\x5b\x5d-\x7e]*"
|
|
15
|
+
# A key-value is key=value, with optional whitespace surrounding key and value
|
|
16
|
+
_KEY_VALUE_FORMAT = rf"{_OWS}{_KEY_FORMAT}{_OWS}={_OWS}{_VALUE_FORMAT}{_OWS}"
|
|
17
|
+
|
|
18
|
+
_HEADER_PATTERN = compile(_KEY_VALUE_FORMAT)
|
|
19
|
+
_DELIMITER_PATTERN = compile(r"[ \t]*,[ \t]*")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_env_headers(s: str) -> Dict[str, str]:
|
|
23
|
+
"""
|
|
24
|
+
Parse ``s``, which is a ``str`` instance containing HTTP headers encoded
|
|
25
|
+
for use in ENV variables per the W3C Baggage HTTP header format at
|
|
26
|
+
https://www.w3.org/TR/baggage/#baggage-http-header-format, except that
|
|
27
|
+
additional semi-colon delimited metadata is not supported.
|
|
28
|
+
|
|
29
|
+
src: https://github.com/open-telemetry/opentelemetry-python/blob/2d5cd58f33bd8a16f45f30be620a96699bc14297/opentelemetry-api/src/opentelemetry/util/re.py#L52
|
|
30
|
+
"""
|
|
31
|
+
headers: Dict[str, str] = {}
|
|
32
|
+
headers_list: List[str] = split(_DELIMITER_PATTERN, s)
|
|
33
|
+
for header in headers_list:
|
|
34
|
+
if not header: # empty string
|
|
35
|
+
continue
|
|
36
|
+
match = _HEADER_PATTERN.fullmatch(header.strip())
|
|
37
|
+
if not match:
|
|
38
|
+
_logger.warning(
|
|
39
|
+
"Header format invalid! Header values in environment variables must be "
|
|
40
|
+
"URL encoded: %s",
|
|
41
|
+
header,
|
|
42
|
+
)
|
|
43
|
+
continue
|
|
44
|
+
# value may contain any number of `=`
|
|
45
|
+
name, value = match.string.split("=", 1)
|
|
46
|
+
name = unquote(name).strip().lower()
|
|
47
|
+
value = unquote(value).strip()
|
|
48
|
+
headers[name] = value
|
|
49
|
+
|
|
50
|
+
return headers
|
phoenix/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "4.4.
|
|
1
|
+
__version__ = "4.4.4rc0"
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
from typing import Dict, Union
|
|
3
|
-
|
|
4
|
-
import strawberry
|
|
5
|
-
|
|
6
|
-
from phoenix.core.model_schema import PRIMARY, REFERENCE
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@strawberry.enum
|
|
10
|
-
class DatasetRole(Enum):
|
|
11
|
-
primary = PRIMARY
|
|
12
|
-
reference = REFERENCE
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class AncillaryDatasetRole(Enum):
|
|
16
|
-
corpus = "DatasetRole.CORPUS"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
STR_TO_DATASET_ROLE: Dict[str, Union[DatasetRole, AncillaryDatasetRole]] = {
|
|
20
|
-
str(DatasetRole.primary.value): DatasetRole.primary,
|
|
21
|
-
str(DatasetRole.reference.value): DatasetRole.reference,
|
|
22
|
-
str(AncillaryDatasetRole.corpus.value): AncillaryDatasetRole.corpus,
|
|
23
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|