arize-phoenix 4.5.0__py3-none-any.whl → 4.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/METADATA +16 -8
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/RECORD +122 -58
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/WHEEL +1 -1
- phoenix/__init__.py +0 -27
- phoenix/config.py +42 -7
- phoenix/core/model.py +25 -25
- phoenix/core/model_schema.py +64 -62
- phoenix/core/model_schema_adapter.py +27 -25
- phoenix/datetime_utils.py +4 -0
- phoenix/db/bulk_inserter.py +54 -14
- phoenix/db/insertion/dataset.py +237 -0
- phoenix/db/insertion/evaluation.py +10 -10
- phoenix/db/insertion/helpers.py +17 -14
- phoenix/db/insertion/span.py +3 -3
- phoenix/db/migrations/types.py +29 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +291 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +2 -28
- phoenix/db/models.py +236 -4
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +153 -0
- phoenix/experiments/evaluators/code_evaluators.py +99 -0
- phoenix/experiments/evaluators/llm_evaluators.py +244 -0
- phoenix/experiments/evaluators/utils.py +186 -0
- phoenix/experiments/functions.py +757 -0
- phoenix/experiments/tracing.py +85 -0
- phoenix/experiments/types.py +753 -0
- phoenix/experiments/utils.py +24 -0
- phoenix/inferences/fixtures.py +23 -23
- phoenix/inferences/inferences.py +7 -7
- phoenix/inferences/validation.py +1 -1
- phoenix/server/api/context.py +20 -0
- phoenix/server/api/dataloaders/__init__.py +20 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +100 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +43 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +85 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +43 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +49 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +2 -3
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/trace_row_ids.py +39 -0
- phoenix/server/api/helpers/dataset_helpers.py +179 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +9 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/mutations/__init__.py +13 -0
- phoenix/server/api/mutations/auth.py +11 -0
- phoenix/server/api/mutations/dataset_mutations.py +520 -0
- phoenix/server/api/mutations/experiment_mutations.py +65 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +17 -14
- phoenix/server/api/mutations/project_mutations.py +47 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +6 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +503 -0
- phoenix/server/api/routers/v1/__init__.py +77 -2
- phoenix/server/api/routers/v1/dataset_examples.py +178 -0
- phoenix/server/api/routers/v1/datasets.py +965 -0
- phoenix/server/api/routers/v1/evaluations.py +8 -13
- phoenix/server/api/routers/v1/experiment_evaluations.py +143 -0
- phoenix/server/api/routers/v1/experiment_runs.py +220 -0
- phoenix/server/api/routers/v1/experiments.py +302 -0
- phoenix/server/api/routers/v1/spans.py +9 -5
- phoenix/server/api/routers/v1/traces.py +1 -4
- phoenix/server/api/schema.py +2 -303
- phoenix/server/api/types/AnnotatorKind.py +10 -0
- phoenix/server/api/types/Cluster.py +19 -19
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/Dataset.py +282 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +30 -29
- phoenix/server/api/types/EmbeddingDimension.py +40 -34
- phoenix/server/api/types/Event.py +16 -16
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +147 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +19 -0
- phoenix/server/api/types/ExperimentRun.py +91 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +57 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/Model.py +43 -42
- phoenix/server/api/types/Project.py +26 -12
- phoenix/server/api/types/Span.py +79 -2
- phoenix/server/api/types/TimeSeries.py +6 -6
- phoenix/server/api/types/Trace.py +15 -4
- phoenix/server/api/types/UMAPPoints.py +1 -1
- phoenix/server/api/types/node.py +5 -111
- phoenix/server/api/types/pagination.py +10 -52
- phoenix/server/app.py +103 -49
- phoenix/server/main.py +49 -27
- phoenix/server/openapi/docs.py +3 -0
- phoenix/server/static/index.js +2300 -1294
- phoenix/server/templates/index.html +1 -0
- phoenix/services.py +15 -15
- phoenix/session/client.py +581 -22
- phoenix/session/session.py +47 -37
- phoenix/trace/exporter.py +14 -9
- phoenix/trace/fixtures.py +133 -7
- phoenix/trace/schemas.py +1 -2
- phoenix/trace/span_evaluations.py +3 -3
- phoenix/trace/trace_dataset.py +6 -6
- phoenix/utilities/json.py +61 -0
- phoenix/utilities/re.py +50 -0
- phoenix/version.py +1 -1
- phoenix/server/api/types/DatasetRole.py +0 -23
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.5.0.dist-info → arize_phoenix-4.6.2.dist-info}/licenses/LICENSE +0 -0
- /phoenix/server/api/{helpers.py → helpers/__init__.py} +0 -0
phoenix/session/session.py
CHANGED
|
@@ -37,10 +37,16 @@ from phoenix.config import (
|
|
|
37
37
|
get_exported_files,
|
|
38
38
|
get_working_dir,
|
|
39
39
|
)
|
|
40
|
-
from phoenix.core.model_schema_adapter import
|
|
40
|
+
from phoenix.core.model_schema_adapter import create_model_from_inferences
|
|
41
41
|
from phoenix.inferences.inferences import EMPTY_INFERENCES, Inferences
|
|
42
42
|
from phoenix.pointcloud.umap_parameters import get_umap_parameters
|
|
43
|
-
from phoenix.server.app import
|
|
43
|
+
from phoenix.server.app import (
|
|
44
|
+
SessionFactory,
|
|
45
|
+
_db,
|
|
46
|
+
create_app,
|
|
47
|
+
create_engine_and_run_migrations,
|
|
48
|
+
instrument_engine_if_enabled,
|
|
49
|
+
)
|
|
44
50
|
from phoenix.server.thread_server import ThreadServer
|
|
45
51
|
from phoenix.services import AppService
|
|
46
52
|
from phoenix.session.client import Client
|
|
@@ -108,9 +114,9 @@ class Session(TraceDataExtractor, ABC):
|
|
|
108
114
|
def __init__(
|
|
109
115
|
self,
|
|
110
116
|
database_url: str,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
117
|
+
primary_inferences: Inferences,
|
|
118
|
+
reference_inferences: Optional[Inferences] = None,
|
|
119
|
+
corpus_inferences: Optional[Inferences] = None,
|
|
114
120
|
trace_dataset: Optional[TraceDataset] = None,
|
|
115
121
|
default_umap_parameters: Optional[Mapping[str, Any]] = None,
|
|
116
122
|
host: Optional[str] = None,
|
|
@@ -118,9 +124,9 @@ class Session(TraceDataExtractor, ABC):
|
|
|
118
124
|
notebook_env: Optional[NotebookEnvironment] = None,
|
|
119
125
|
):
|
|
120
126
|
self._database_url = database_url
|
|
121
|
-
self.
|
|
122
|
-
self.
|
|
123
|
-
self.
|
|
127
|
+
self.primary_inferences = primary_inferences
|
|
128
|
+
self.reference_inferences = reference_inferences
|
|
129
|
+
self.corpus_inferences = corpus_inferences
|
|
124
130
|
self.trace_dataset = trace_dataset
|
|
125
131
|
self.umap_parameters = get_umap_parameters(default_umap_parameters)
|
|
126
132
|
self.host = host or get_env_host()
|
|
@@ -264,9 +270,9 @@ class ProcessSession(Session):
|
|
|
264
270
|
def __init__(
|
|
265
271
|
self,
|
|
266
272
|
database_url: str,
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
273
|
+
primary_inferences: Inferences,
|
|
274
|
+
reference_inferences: Optional[Inferences] = None,
|
|
275
|
+
corpus_inferences: Optional[Inferences] = None,
|
|
270
276
|
trace_dataset: Optional[TraceDataset] = None,
|
|
271
277
|
default_umap_parameters: Optional[Mapping[str, Any]] = None,
|
|
272
278
|
host: Optional[str] = None,
|
|
@@ -276,20 +282,20 @@ class ProcessSession(Session):
|
|
|
276
282
|
) -> None:
|
|
277
283
|
super().__init__(
|
|
278
284
|
database_url=database_url,
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
285
|
+
primary_inferences=primary_inferences,
|
|
286
|
+
reference_inferences=reference_inferences,
|
|
287
|
+
corpus_inferences=corpus_inferences,
|
|
282
288
|
trace_dataset=trace_dataset,
|
|
283
289
|
default_umap_parameters=default_umap_parameters,
|
|
284
290
|
host=host,
|
|
285
291
|
port=port,
|
|
286
292
|
notebook_env=notebook_env,
|
|
287
293
|
)
|
|
288
|
-
|
|
289
|
-
if isinstance(
|
|
290
|
-
|
|
291
|
-
if isinstance(
|
|
292
|
-
|
|
294
|
+
primary_inferences.to_disc()
|
|
295
|
+
if isinstance(reference_inferences, Inferences):
|
|
296
|
+
reference_inferences.to_disc()
|
|
297
|
+
if isinstance(corpus_inferences, Inferences):
|
|
298
|
+
corpus_inferences.to_disc()
|
|
293
299
|
if isinstance(trace_dataset, TraceDataset):
|
|
294
300
|
trace_dataset.to_disc()
|
|
295
301
|
umap_params_str = (
|
|
@@ -304,13 +310,13 @@ class ProcessSession(Session):
|
|
|
304
310
|
host=self.host,
|
|
305
311
|
port=self.port,
|
|
306
312
|
root_path=self.root_path,
|
|
307
|
-
|
|
313
|
+
primary_inferences_name=self.primary_inferences.name,
|
|
308
314
|
umap_params=umap_params_str,
|
|
309
|
-
|
|
310
|
-
self.
|
|
315
|
+
reference_inferences_name=(
|
|
316
|
+
self.reference_inferences.name if self.reference_inferences is not None else None
|
|
311
317
|
),
|
|
312
|
-
|
|
313
|
-
self.
|
|
318
|
+
corpus_inferences_name=(
|
|
319
|
+
self.corpus_inferences.name if self.corpus_inferences is not None else None
|
|
314
320
|
),
|
|
315
321
|
trace_dataset_name=(
|
|
316
322
|
self.trace_dataset.name if self.trace_dataset is not None else None
|
|
@@ -330,9 +336,9 @@ class ThreadSession(Session):
|
|
|
330
336
|
def __init__(
|
|
331
337
|
self,
|
|
332
338
|
database_url: str,
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
339
|
+
primary_inferences: Inferences,
|
|
340
|
+
reference_inferences: Optional[Inferences] = None,
|
|
341
|
+
corpus_inferences: Optional[Inferences] = None,
|
|
336
342
|
trace_dataset: Optional[TraceDataset] = None,
|
|
337
343
|
default_umap_parameters: Optional[Mapping[str, Any]] = None,
|
|
338
344
|
host: Optional[str] = None,
|
|
@@ -342,29 +348,32 @@ class ThreadSession(Session):
|
|
|
342
348
|
):
|
|
343
349
|
super().__init__(
|
|
344
350
|
database_url=database_url,
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
351
|
+
primary_inferences=primary_inferences,
|
|
352
|
+
reference_inferences=reference_inferences,
|
|
353
|
+
corpus_inferences=corpus_inferences,
|
|
348
354
|
trace_dataset=trace_dataset,
|
|
349
355
|
default_umap_parameters=default_umap_parameters,
|
|
350
356
|
host=host,
|
|
351
357
|
port=port,
|
|
352
358
|
notebook_env=notebook_env,
|
|
353
359
|
)
|
|
354
|
-
self.model =
|
|
355
|
-
|
|
356
|
-
|
|
360
|
+
self.model = create_model_from_inferences(
|
|
361
|
+
primary_inferences,
|
|
362
|
+
reference_inferences,
|
|
357
363
|
)
|
|
358
364
|
self.corpus = (
|
|
359
|
-
|
|
360
|
-
|
|
365
|
+
create_model_from_inferences(
|
|
366
|
+
corpus_inferences,
|
|
361
367
|
)
|
|
362
|
-
if
|
|
368
|
+
if corpus_inferences is not None
|
|
363
369
|
else None
|
|
364
370
|
)
|
|
365
371
|
# Initialize an app service that keeps the server running
|
|
372
|
+
engine = create_engine_and_run_migrations(database_url)
|
|
373
|
+
instrumentation_cleanups = instrument_engine_if_enabled(engine)
|
|
374
|
+
factory = SessionFactory(session_factory=_db(engine), dialect=engine.dialect.name)
|
|
366
375
|
self.app = create_app(
|
|
367
|
-
|
|
376
|
+
db=factory,
|
|
368
377
|
export_path=self.export_path,
|
|
369
378
|
model=self.model,
|
|
370
379
|
corpus=self.corpus,
|
|
@@ -375,6 +384,7 @@ class ThreadSession(Session):
|
|
|
375
384
|
if (trace_dataset and (initial_evaluations := trace_dataset.evaluations))
|
|
376
385
|
else None
|
|
377
386
|
),
|
|
387
|
+
clean_up_callbacks=instrumentation_cleanups,
|
|
378
388
|
)
|
|
379
389
|
self.server = ThreadServer(
|
|
380
390
|
app=self.app,
|
phoenix/trace/exporter.py
CHANGED
|
@@ -7,13 +7,17 @@ from types import MethodType
|
|
|
7
7
|
from typing import Any, Optional
|
|
8
8
|
from urllib.parse import urljoin
|
|
9
9
|
|
|
10
|
-
import
|
|
10
|
+
import httpx
|
|
11
11
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
12
|
-
from requests import Session
|
|
13
12
|
from typing_extensions import TypeAlias, assert_never
|
|
14
13
|
|
|
15
14
|
import phoenix.trace.v1 as pb
|
|
16
|
-
from phoenix.config import
|
|
15
|
+
from phoenix.config import (
|
|
16
|
+
get_env_client_headers,
|
|
17
|
+
get_env_collector_endpoint,
|
|
18
|
+
get_env_host,
|
|
19
|
+
get_env_port,
|
|
20
|
+
)
|
|
17
21
|
|
|
18
22
|
logger = logging.getLogger(__name__)
|
|
19
23
|
logger.addHandler(logging.NullHandler())
|
|
@@ -75,9 +79,10 @@ class HttpExporter:
|
|
|
75
79
|
)
|
|
76
80
|
self._base_url = base_url if base_url.endswith("/") else base_url + "/"
|
|
77
81
|
_warn_if_phoenix_is_not_running(self._base_url)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
self.
|
|
82
|
+
headers = get_env_client_headers()
|
|
83
|
+
self._client = httpx.Client(headers=headers)
|
|
84
|
+
weakref.finalize(self, self._client.close)
|
|
85
|
+
self._client.headers.update(
|
|
81
86
|
{
|
|
82
87
|
"content-type": "application/x-protobuf",
|
|
83
88
|
"content-encoding": "gzip",
|
|
@@ -110,9 +115,9 @@ class HttpExporter:
|
|
|
110
115
|
|
|
111
116
|
def _send(self, message: Message) -> None:
|
|
112
117
|
serialized = message.SerializeToString()
|
|
113
|
-
|
|
118
|
+
content = gzip.compress(serialized)
|
|
114
119
|
try:
|
|
115
|
-
self.
|
|
120
|
+
self._client.post(self._url(message), content=content).raise_for_status()
|
|
116
121
|
except Exception as e:
|
|
117
122
|
logger.exception(e)
|
|
118
123
|
|
|
@@ -125,7 +130,7 @@ class HttpExporter:
|
|
|
125
130
|
|
|
126
131
|
def _warn_if_phoenix_is_not_running(base_url: str) -> None:
|
|
127
132
|
try:
|
|
128
|
-
|
|
133
|
+
httpx.get(urljoin(base_url, "arize_phoenix_version")).raise_for_status()
|
|
129
134
|
except Exception:
|
|
130
135
|
logger.warning(
|
|
131
136
|
f"Arize Phoenix is not running on {base_url}. Launch Phoenix "
|
phoenix/trace/fixtures.py
CHANGED
|
@@ -1,18 +1,29 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import shutil
|
|
1
3
|
from binascii import hexlify
|
|
2
4
|
from dataclasses import dataclass, field, replace
|
|
3
5
|
from datetime import datetime, timezone
|
|
6
|
+
from io import StringIO
|
|
4
7
|
from random import getrandbits
|
|
5
|
-
from
|
|
8
|
+
from tempfile import NamedTemporaryFile
|
|
9
|
+
from time import sleep, time
|
|
10
|
+
from typing import Dict, Iterable, Iterator, List, NamedTuple, Optional, Sequence, Tuple, cast
|
|
6
11
|
from urllib import request
|
|
12
|
+
from urllib.parse import urljoin
|
|
7
13
|
|
|
14
|
+
import httpx
|
|
8
15
|
import pandas as pd
|
|
9
16
|
from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
|
|
17
|
+
from httpx import ConnectError, HTTPStatusError
|
|
10
18
|
|
|
11
19
|
import phoenix.trace.v1 as pb
|
|
20
|
+
from phoenix import Client
|
|
12
21
|
from phoenix.trace.schemas import Span
|
|
13
22
|
from phoenix.trace.trace_dataset import TraceDataset
|
|
14
23
|
from phoenix.trace.utils import json_lines_to_df
|
|
15
24
|
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
16
27
|
|
|
17
28
|
class EvaluationResultSchema(NamedTuple):
|
|
18
29
|
label: Optional[str] = "label"
|
|
@@ -32,12 +43,44 @@ class DocumentEvaluationFixture(EvaluationFixture):
|
|
|
32
43
|
document_position: str = "document_position"
|
|
33
44
|
|
|
34
45
|
|
|
46
|
+
@dataclass(frozen=True)
|
|
47
|
+
class DatasetFixture:
|
|
48
|
+
file_name: str
|
|
49
|
+
name: str
|
|
50
|
+
input_keys: Sequence[str]
|
|
51
|
+
output_keys: Sequence[str]
|
|
52
|
+
metadata_keys: Sequence[str] = ()
|
|
53
|
+
description: Optional[str] = field(default=None)
|
|
54
|
+
_df: Optional[pd.DataFrame] = field(default=None, init=False, repr=False)
|
|
55
|
+
_csv: Optional[str] = field(default=None, init=False, repr=False)
|
|
56
|
+
|
|
57
|
+
def load(self) -> "DatasetFixture":
|
|
58
|
+
if self._df is None:
|
|
59
|
+
df = pd.read_csv(_url(self.file_name))
|
|
60
|
+
object.__setattr__(self, "_df", df)
|
|
61
|
+
return self
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def dataframe(self) -> pd.DataFrame:
|
|
65
|
+
self.load()
|
|
66
|
+
return cast(pd.DataFrame, self._df).copy(deep=False)
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def csv(self) -> StringIO:
|
|
70
|
+
if self._csv is None:
|
|
71
|
+
with StringIO() as buffer:
|
|
72
|
+
self.dataframe.to_csv(buffer, index=False)
|
|
73
|
+
object.__setattr__(self, "_csv", buffer.getvalue())
|
|
74
|
+
return StringIO(self._csv)
|
|
75
|
+
|
|
76
|
+
|
|
35
77
|
@dataclass(frozen=True)
|
|
36
78
|
class TracesFixture:
|
|
37
79
|
name: str
|
|
38
80
|
description: str
|
|
39
81
|
file_name: str
|
|
40
82
|
evaluation_fixtures: Iterable[EvaluationFixture] = ()
|
|
83
|
+
dataset_fixtures: Iterable[DatasetFixture] = ()
|
|
41
84
|
|
|
42
85
|
|
|
43
86
|
llama_index_rag_fixture = TracesFixture(
|
|
@@ -58,6 +101,36 @@ llama_index_rag_fixture = TracesFixture(
|
|
|
58
101
|
file_name="llama_index_rag_v8.retrieved_documents_eval.parquet",
|
|
59
102
|
),
|
|
60
103
|
),
|
|
104
|
+
dataset_fixtures=(
|
|
105
|
+
DatasetFixture(
|
|
106
|
+
file_name="hybridial_samples.csv.gz",
|
|
107
|
+
input_keys=("messages", "ctxs"),
|
|
108
|
+
output_keys=("answers",),
|
|
109
|
+
name="ChatRAG-Bench: Hybrid Dialogue (samples)",
|
|
110
|
+
description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/hybridial",
|
|
111
|
+
),
|
|
112
|
+
DatasetFixture(
|
|
113
|
+
file_name="sqa_samples.csv.gz",
|
|
114
|
+
input_keys=("messages", "ctxs"),
|
|
115
|
+
output_keys=("answers",),
|
|
116
|
+
name="ChatRAG-Bench: SQA (samples)",
|
|
117
|
+
description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/sqa",
|
|
118
|
+
),
|
|
119
|
+
DatasetFixture(
|
|
120
|
+
file_name="doqa_cooking_samples.csv.gz",
|
|
121
|
+
input_keys=("messages", "ctxs"),
|
|
122
|
+
output_keys=("answers",),
|
|
123
|
+
name="ChatRAG-Bench: DoQA Cooking (samples)",
|
|
124
|
+
description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/doqa_cooking",
|
|
125
|
+
),
|
|
126
|
+
DatasetFixture(
|
|
127
|
+
file_name="synthetic_convqa_samples.csv.gz",
|
|
128
|
+
input_keys=("messages", "document"),
|
|
129
|
+
output_keys=("answers",),
|
|
130
|
+
name="ChatQA-Train: Synthetic ConvQA (samples)",
|
|
131
|
+
description="https://huggingface.co/datasets/nvidia/ChatQA-Training-Data/viewer/synthetic_convqa",
|
|
132
|
+
),
|
|
133
|
+
),
|
|
61
134
|
)
|
|
62
135
|
|
|
63
136
|
llama_index_calculator_agent_fixture = TracesFixture(
|
|
@@ -138,16 +211,69 @@ def download_traces_fixture(
|
|
|
138
211
|
return cast(List[str], f.readlines())
|
|
139
212
|
|
|
140
213
|
|
|
141
|
-
def load_example_traces(
|
|
214
|
+
def load_example_traces(fixture_name: str) -> TraceDataset:
|
|
142
215
|
"""
|
|
143
216
|
Loads a trace dataframe by name.
|
|
144
217
|
"""
|
|
145
|
-
fixture = get_trace_fixture_by_name(
|
|
218
|
+
fixture = get_trace_fixture_by_name(fixture_name)
|
|
146
219
|
return TraceDataset(json_lines_to_df(download_traces_fixture(fixture)))
|
|
147
220
|
|
|
148
221
|
|
|
149
|
-
def
|
|
150
|
-
fixture
|
|
222
|
+
def get_dataset_fixtures(fixture_name: str) -> Iterable[DatasetFixture]:
|
|
223
|
+
return (fixture.load() for fixture in get_trace_fixture_by_name(fixture_name).dataset_fixtures)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def send_dataset_fixtures(
|
|
227
|
+
endpoint: str,
|
|
228
|
+
fixtures: Iterable[DatasetFixture],
|
|
229
|
+
) -> None:
|
|
230
|
+
expiration = time() + 5
|
|
231
|
+
while time() < expiration:
|
|
232
|
+
try:
|
|
233
|
+
url = urljoin(endpoint, "/healthz")
|
|
234
|
+
httpx.get(url=url).raise_for_status()
|
|
235
|
+
except ConnectError:
|
|
236
|
+
sleep(0.1)
|
|
237
|
+
continue
|
|
238
|
+
except Exception as e:
|
|
239
|
+
print(str(e))
|
|
240
|
+
raise
|
|
241
|
+
break
|
|
242
|
+
client = Client(endpoint=endpoint)
|
|
243
|
+
for i, fixture in enumerate(fixtures):
|
|
244
|
+
try:
|
|
245
|
+
if i % 2:
|
|
246
|
+
client.upload_dataset(
|
|
247
|
+
dataset_name=fixture.name,
|
|
248
|
+
dataframe=fixture.dataframe,
|
|
249
|
+
input_keys=fixture.input_keys,
|
|
250
|
+
output_keys=fixture.output_keys,
|
|
251
|
+
metadata_keys=fixture.metadata_keys,
|
|
252
|
+
dataset_description=fixture.description,
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
with NamedTemporaryFile() as tf:
|
|
256
|
+
with open(tf.name, "w") as f:
|
|
257
|
+
shutil.copyfileobj(fixture.csv, f)
|
|
258
|
+
f.flush()
|
|
259
|
+
client.upload_dataset(
|
|
260
|
+
dataset_name=fixture.name,
|
|
261
|
+
csv_file_path=tf.name,
|
|
262
|
+
input_keys=fixture.input_keys,
|
|
263
|
+
output_keys=fixture.output_keys,
|
|
264
|
+
metadata_keys=fixture.metadata_keys,
|
|
265
|
+
dataset_description=fixture.description,
|
|
266
|
+
)
|
|
267
|
+
except HTTPStatusError as e:
|
|
268
|
+
print(e.response.content.decode())
|
|
269
|
+
pass
|
|
270
|
+
else:
|
|
271
|
+
name, df = fixture.name, fixture.dataframe
|
|
272
|
+
print(f"Dataset sent: {name=}, {len(df)=}")
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def get_evals_from_fixture(fixture_name: str) -> Iterator[pb.Evaluation]:
|
|
276
|
+
fixture = get_trace_fixture_by_name(fixture_name)
|
|
151
277
|
for eval_fixture in fixture.evaluation_fixtures:
|
|
152
278
|
yield from _read_eval_fixture(eval_fixture)
|
|
153
279
|
|
|
@@ -195,8 +321,8 @@ def _read_eval_fixture(eval_fixture: EvaluationFixture) -> Iterator[pb.Evaluatio
|
|
|
195
321
|
def _url(
|
|
196
322
|
file_name: str,
|
|
197
323
|
host: Optional[str] = "https://storage.googleapis.com/",
|
|
198
|
-
bucket: Optional[str] = "arize-assets",
|
|
199
|
-
prefix: Optional[str] = "
|
|
324
|
+
bucket: Optional[str] = "arize-phoenix-assets",
|
|
325
|
+
prefix: Optional[str] = "traces/",
|
|
200
326
|
) -> str:
|
|
201
327
|
return f"{host}{bucket}/{prefix}{file_name}"
|
|
202
328
|
|
phoenix/trace/schemas.py
CHANGED
|
@@ -29,8 +29,6 @@ class SpanKind(Enum):
|
|
|
29
29
|
"""
|
|
30
30
|
SpanKind is loosely inspired by OpenTelemetry's SpanKind
|
|
31
31
|
It captures the type of work that a Span encapsulates.
|
|
32
|
-
|
|
33
|
-
NB: this is actively under construction
|
|
34
32
|
"""
|
|
35
33
|
|
|
36
34
|
TOOL = "TOOL"
|
|
@@ -40,6 +38,7 @@ class SpanKind(Enum):
|
|
|
40
38
|
EMBEDDING = "EMBEDDING"
|
|
41
39
|
AGENT = "AGENT"
|
|
42
40
|
RERANKER = "RERANKER"
|
|
41
|
+
EVALUATOR = "EVALUATOR"
|
|
43
42
|
UNKNOWN = "UNKNOWN"
|
|
44
43
|
|
|
45
44
|
def __str__(self) -> str:
|
|
@@ -11,7 +11,7 @@ import pandas as pd
|
|
|
11
11
|
from pandas.api.types import is_integer_dtype, is_numeric_dtype, is_string_dtype
|
|
12
12
|
from pyarrow import RecordBatchStreamReader, Schema, Table, parquet
|
|
13
13
|
|
|
14
|
-
from phoenix.config import
|
|
14
|
+
from phoenix.config import TRACE_DATASETS_DIR
|
|
15
15
|
from phoenix.exceptions import PhoenixEvaluationNameIsMissing
|
|
16
16
|
from phoenix.trace.errors import InvalidParquetMetadataError
|
|
17
17
|
|
|
@@ -201,7 +201,7 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
|
|
|
201
201
|
UUID: The ID of the evaluations, which can be used as a key to load
|
|
202
202
|
the evaluations from disk using `load`.
|
|
203
203
|
"""
|
|
204
|
-
directory = Path(directory) if directory else
|
|
204
|
+
directory = Path(directory) if directory else TRACE_DATASETS_DIR
|
|
205
205
|
path = directory / EVAL_PARQUET_FILE_NAME.format(id=self.id)
|
|
206
206
|
table = self.to_pyarrow_table()
|
|
207
207
|
parquet.write_table(table, path)
|
|
@@ -229,7 +229,7 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
|
|
|
229
229
|
"""
|
|
230
230
|
if not isinstance(id, UUID):
|
|
231
231
|
id = UUID(id)
|
|
232
|
-
path = Path(directory or
|
|
232
|
+
path = Path(directory or TRACE_DATASETS_DIR) / EVAL_PARQUET_FILE_NAME.format(id=id)
|
|
233
233
|
schema = parquet.read_schema(path)
|
|
234
234
|
eval_id, eval_name, evaluations_cls = _parse_schema_metadata(schema)
|
|
235
235
|
if id != eval_id:
|
phoenix/trace/trace_dataset.py
CHANGED
|
@@ -14,7 +14,7 @@ from openinference.semconv.trace import (
|
|
|
14
14
|
from pandas import DataFrame, read_parquet
|
|
15
15
|
from pyarrow import Schema, Table, parquet
|
|
16
16
|
|
|
17
|
-
from phoenix.config import
|
|
17
|
+
from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR, TRACE_DATASETS_DIR
|
|
18
18
|
from phoenix.datetime_utils import normalize_timestamps
|
|
19
19
|
from phoenix.trace.attributes import unflatten
|
|
20
20
|
from phoenix.trace.errors import InvalidParquetMetadataError
|
|
@@ -138,7 +138,7 @@ class TraceDataset:
|
|
|
138
138
|
self._id = uuid4()
|
|
139
139
|
self.dataframe = normalize_dataframe(dataframe)
|
|
140
140
|
# TODO: This is not used in any meaningful way. Should remove
|
|
141
|
-
self.name = name or f"{
|
|
141
|
+
self.name = name or f"{GENERATED_INFERENCES_NAME_PREFIX}{str(self._id)}"
|
|
142
142
|
self.evaluations = list(evaluations)
|
|
143
143
|
|
|
144
144
|
@classmethod
|
|
@@ -201,13 +201,13 @@ class TraceDataset:
|
|
|
201
201
|
@classmethod
|
|
202
202
|
def from_name(cls, name: str) -> "TraceDataset":
|
|
203
203
|
"""Retrieves a dataset by name from the file system"""
|
|
204
|
-
directory =
|
|
204
|
+
directory = INFERENCES_DIR / name
|
|
205
205
|
df = read_parquet(directory / cls._data_file_name)
|
|
206
206
|
return cls(df, name)
|
|
207
207
|
|
|
208
208
|
def to_disc(self) -> None:
|
|
209
209
|
"""writes the data to disc"""
|
|
210
|
-
directory =
|
|
210
|
+
directory = INFERENCES_DIR / self.name
|
|
211
211
|
directory.mkdir(parents=True, exist_ok=True)
|
|
212
212
|
get_serializable_spans_dataframe(self.dataframe).to_parquet(
|
|
213
213
|
directory / self._data_file_name,
|
|
@@ -230,7 +230,7 @@ class TraceDataset:
|
|
|
230
230
|
UUID: The id of the trace dataset, which can be used as key to load
|
|
231
231
|
the dataset from disk using `load`.
|
|
232
232
|
"""
|
|
233
|
-
directory = Path(directory or
|
|
233
|
+
directory = Path(directory or TRACE_DATASETS_DIR)
|
|
234
234
|
for evals in self.evaluations:
|
|
235
235
|
evals.save(directory)
|
|
236
236
|
path = directory / TRACE_DATASET_PARQUET_FILE_NAME.format(id=self._id)
|
|
@@ -280,7 +280,7 @@ class TraceDataset:
|
|
|
280
280
|
"""
|
|
281
281
|
if not isinstance(id, UUID):
|
|
282
282
|
id = UUID(id)
|
|
283
|
-
path = Path(directory or
|
|
283
|
+
path = Path(directory or TRACE_DATASETS_DIR) / TRACE_DATASET_PARQUET_FILE_NAME.format(id=id)
|
|
284
284
|
schema = parquet.read_schema(path)
|
|
285
285
|
dataset_id, dataset_name, eval_ids = _parse_schema_metadata(schema)
|
|
286
286
|
if id != dataset_id:
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import datetime
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Mapping, Sequence, Union, get_args, get_origin
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def jsonify(obj: Any) -> Any:
|
|
11
|
+
"""
|
|
12
|
+
Coerce object to be json serializable.
|
|
13
|
+
"""
|
|
14
|
+
if isinstance(obj, Enum):
|
|
15
|
+
return jsonify(obj.value)
|
|
16
|
+
if isinstance(obj, (str, int, float, bool)) or obj is None:
|
|
17
|
+
return obj
|
|
18
|
+
if isinstance(obj, (list, set, frozenset, Sequence)):
|
|
19
|
+
return [jsonify(v) for v in obj]
|
|
20
|
+
if isinstance(obj, (dict, Mapping)):
|
|
21
|
+
return {jsonify(k): jsonify(v) for k, v in obj.items()}
|
|
22
|
+
if dataclasses.is_dataclass(obj):
|
|
23
|
+
return {
|
|
24
|
+
k: jsonify(v)
|
|
25
|
+
for field in dataclasses.fields(obj)
|
|
26
|
+
if not (
|
|
27
|
+
(v := getattr(obj, (k := field.name))) is None
|
|
28
|
+
and get_origin(field) is Union
|
|
29
|
+
and type(None) in get_args(field)
|
|
30
|
+
)
|
|
31
|
+
}
|
|
32
|
+
if isinstance(obj, (datetime.date, datetime.datetime, datetime.time)):
|
|
33
|
+
return obj.isoformat()
|
|
34
|
+
if isinstance(obj, datetime.timedelta):
|
|
35
|
+
return obj.total_seconds()
|
|
36
|
+
if isinstance(obj, Path):
|
|
37
|
+
return str(obj)
|
|
38
|
+
if isinstance(obj, BaseException):
|
|
39
|
+
return str(obj)
|
|
40
|
+
if isinstance(obj, np.ndarray):
|
|
41
|
+
return [jsonify(v) for v in obj]
|
|
42
|
+
if hasattr(obj, "__float__"):
|
|
43
|
+
return float(obj)
|
|
44
|
+
if hasattr(obj, "model_dump") and callable(obj.model_dump):
|
|
45
|
+
# pydantic v2
|
|
46
|
+
try:
|
|
47
|
+
assert isinstance(d := obj.model_dump(), dict)
|
|
48
|
+
except BaseException:
|
|
49
|
+
pass
|
|
50
|
+
else:
|
|
51
|
+
return jsonify(d)
|
|
52
|
+
if hasattr(obj, "dict") and callable(obj.dict):
|
|
53
|
+
# pydantic v1
|
|
54
|
+
try:
|
|
55
|
+
assert isinstance(d := obj.dict(), dict)
|
|
56
|
+
except BaseException:
|
|
57
|
+
pass
|
|
58
|
+
else:
|
|
59
|
+
return jsonify(d)
|
|
60
|
+
cls = obj.__class__
|
|
61
|
+
return f"<{cls.__module__}.{cls.__name__} object>"
|
phoenix/utilities/re.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from logging import getLogger
|
|
2
|
+
from re import compile, split
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
from urllib.parse import unquote
|
|
5
|
+
|
|
6
|
+
_logger = getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
# Optional whitespace
|
|
9
|
+
_OWS = r"[ \t]*"
|
|
10
|
+
# A key contains printable US-ASCII characters except: SP and "(),/:;<=>?@[\]{}
|
|
11
|
+
_KEY_FORMAT = r"[\x21\x23-\x27\x2a\x2b\x2d\x2e\x30-\x39\x41-\x5a\x5e-\x7a\x7c\x7e]+"
|
|
12
|
+
# A value contains a URL-encoded UTF-8 string. The encoded form can contain any
|
|
13
|
+
# printable US-ASCII characters (0x20-0x7f) other than SP, DEL, and ",;/
|
|
14
|
+
_VALUE_FORMAT = r"[\x21\x23-\x2b\x2d-\x3a\x3c-\x5b\x5d-\x7e]*"
|
|
15
|
+
# A key-value is key=value, with optional whitespace surrounding key and value
|
|
16
|
+
_KEY_VALUE_FORMAT = rf"{_OWS}{_KEY_FORMAT}{_OWS}={_OWS}{_VALUE_FORMAT}{_OWS}"
|
|
17
|
+
|
|
18
|
+
_HEADER_PATTERN = compile(_KEY_VALUE_FORMAT)
|
|
19
|
+
_DELIMITER_PATTERN = compile(r"[ \t]*,[ \t]*")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_env_headers(s: str) -> Dict[str, str]:
|
|
23
|
+
"""
|
|
24
|
+
Parse ``s``, which is a ``str`` instance containing HTTP headers encoded
|
|
25
|
+
for use in ENV variables per the W3C Baggage HTTP header format at
|
|
26
|
+
https://www.w3.org/TR/baggage/#baggage-http-header-format, except that
|
|
27
|
+
additional semi-colon delimited metadata is not supported.
|
|
28
|
+
|
|
29
|
+
src: https://github.com/open-telemetry/opentelemetry-python/blob/2d5cd58f33bd8a16f45f30be620a96699bc14297/opentelemetry-api/src/opentelemetry/util/re.py#L52
|
|
30
|
+
"""
|
|
31
|
+
headers: Dict[str, str] = {}
|
|
32
|
+
headers_list: List[str] = split(_DELIMITER_PATTERN, s)
|
|
33
|
+
for header in headers_list:
|
|
34
|
+
if not header: # empty string
|
|
35
|
+
continue
|
|
36
|
+
match = _HEADER_PATTERN.fullmatch(header.strip())
|
|
37
|
+
if not match:
|
|
38
|
+
_logger.warning(
|
|
39
|
+
"Header format invalid! Header values in environment variables must be "
|
|
40
|
+
"URL encoded: %s",
|
|
41
|
+
header,
|
|
42
|
+
)
|
|
43
|
+
continue
|
|
44
|
+
# value may contain any number of `=`
|
|
45
|
+
name, value = match.string.split("=", 1)
|
|
46
|
+
name = unquote(name).strip().lower()
|
|
47
|
+
value = unquote(value).strip()
|
|
48
|
+
headers[name] = value
|
|
49
|
+
|
|
50
|
+
return headers
|
phoenix/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "4.
|
|
1
|
+
__version__ = "4.6.2"
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
from typing import Dict, Union
|
|
3
|
-
|
|
4
|
-
import strawberry
|
|
5
|
-
|
|
6
|
-
from phoenix.core.model_schema import PRIMARY, REFERENCE
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@strawberry.enum
|
|
10
|
-
class DatasetRole(Enum):
|
|
11
|
-
primary = PRIMARY
|
|
12
|
-
reference = REFERENCE
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class AncillaryDatasetRole(Enum):
|
|
16
|
-
corpus = "DatasetRole.CORPUS"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
STR_TO_DATASET_ROLE: Dict[str, Union[DatasetRole, AncillaryDatasetRole]] = {
|
|
20
|
-
str(DatasetRole.primary.value): DatasetRole.primary,
|
|
21
|
-
str(DatasetRole.reference.value): DatasetRole.reference,
|
|
22
|
-
str(AncillaryDatasetRole.corpus.value): AncillaryDatasetRole.corpus,
|
|
23
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|