arize-phoenix 4.4.4rc5__py3-none-any.whl → 4.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (118) hide show
  1. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/METADATA +5 -5
  2. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/RECORD +56 -117
  3. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/WHEEL +1 -1
  4. phoenix/__init__.py +27 -0
  5. phoenix/config.py +7 -21
  6. phoenix/core/model.py +25 -25
  7. phoenix/core/model_schema.py +62 -64
  8. phoenix/core/model_schema_adapter.py +25 -27
  9. phoenix/db/bulk_inserter.py +14 -54
  10. phoenix/db/insertion/evaluation.py +6 -6
  11. phoenix/db/insertion/helpers.py +2 -13
  12. phoenix/db/migrations/versions/cf03bd6bae1d_init.py +28 -2
  13. phoenix/db/models.py +4 -236
  14. phoenix/inferences/fixtures.py +23 -23
  15. phoenix/inferences/inferences.py +7 -7
  16. phoenix/inferences/validation.py +1 -1
  17. phoenix/server/api/context.py +0 -18
  18. phoenix/server/api/dataloaders/__init__.py +0 -18
  19. phoenix/server/api/dataloaders/span_descendants.py +3 -2
  20. phoenix/server/api/routers/v1/__init__.py +2 -77
  21. phoenix/server/api/routers/v1/evaluations.py +2 -4
  22. phoenix/server/api/routers/v1/spans.py +1 -3
  23. phoenix/server/api/routers/v1/traces.py +4 -1
  24. phoenix/server/api/schema.py +303 -2
  25. phoenix/server/api/types/Cluster.py +19 -19
  26. phoenix/server/api/types/Dataset.py +63 -282
  27. phoenix/server/api/types/DatasetRole.py +23 -0
  28. phoenix/server/api/types/Dimension.py +29 -30
  29. phoenix/server/api/types/EmbeddingDimension.py +34 -40
  30. phoenix/server/api/types/Event.py +16 -16
  31. phoenix/server/api/{mutations/export_events_mutations.py → types/ExportEventsMutation.py} +14 -17
  32. phoenix/server/api/types/Model.py +42 -43
  33. phoenix/server/api/types/Project.py +12 -26
  34. phoenix/server/api/types/Span.py +2 -79
  35. phoenix/server/api/types/TimeSeries.py +6 -6
  36. phoenix/server/api/types/Trace.py +4 -15
  37. phoenix/server/api/types/UMAPPoints.py +1 -1
  38. phoenix/server/api/types/node.py +111 -5
  39. phoenix/server/api/types/pagination.py +52 -10
  40. phoenix/server/app.py +49 -101
  41. phoenix/server/main.py +27 -49
  42. phoenix/server/openapi/docs.py +0 -3
  43. phoenix/server/static/index.js +2595 -3523
  44. phoenix/server/templates/index.html +0 -1
  45. phoenix/services.py +15 -15
  46. phoenix/session/client.py +21 -438
  47. phoenix/session/session.py +37 -47
  48. phoenix/trace/exporter.py +9 -14
  49. phoenix/trace/fixtures.py +7 -133
  50. phoenix/trace/schemas.py +2 -1
  51. phoenix/trace/span_evaluations.py +3 -3
  52. phoenix/trace/trace_dataset.py +6 -6
  53. phoenix/version.py +1 -1
  54. phoenix/datasets/__init__.py +0 -0
  55. phoenix/datasets/evaluators/__init__.py +0 -18
  56. phoenix/datasets/evaluators/code_evaluators.py +0 -99
  57. phoenix/datasets/evaluators/llm_evaluators.py +0 -244
  58. phoenix/datasets/evaluators/utils.py +0 -292
  59. phoenix/datasets/experiments.py +0 -550
  60. phoenix/datasets/tracing.py +0 -85
  61. phoenix/datasets/types.py +0 -178
  62. phoenix/db/insertion/dataset.py +0 -237
  63. phoenix/db/migrations/types.py +0 -29
  64. phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -291
  65. phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -100
  66. phoenix/server/api/dataloaders/dataset_example_spans.py +0 -43
  67. phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -85
  68. phoenix/server/api/dataloaders/experiment_error_rates.py +0 -43
  69. phoenix/server/api/dataloaders/experiment_run_counts.py +0 -42
  70. phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -49
  71. phoenix/server/api/dataloaders/project_by_name.py +0 -31
  72. phoenix/server/api/dataloaders/span_projects.py +0 -33
  73. phoenix/server/api/dataloaders/trace_row_ids.py +0 -39
  74. phoenix/server/api/helpers/dataset_helpers.py +0 -179
  75. phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -16
  76. phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -14
  77. phoenix/server/api/input_types/ClearProjectInput.py +0 -15
  78. phoenix/server/api/input_types/CreateDatasetInput.py +0 -12
  79. phoenix/server/api/input_types/DatasetExampleInput.py +0 -14
  80. phoenix/server/api/input_types/DatasetSort.py +0 -17
  81. phoenix/server/api/input_types/DatasetVersionSort.py +0 -16
  82. phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -13
  83. phoenix/server/api/input_types/DeleteDatasetInput.py +0 -7
  84. phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -9
  85. phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -35
  86. phoenix/server/api/input_types/PatchDatasetInput.py +0 -14
  87. phoenix/server/api/mutations/__init__.py +0 -13
  88. phoenix/server/api/mutations/auth.py +0 -11
  89. phoenix/server/api/mutations/dataset_mutations.py +0 -520
  90. phoenix/server/api/mutations/experiment_mutations.py +0 -65
  91. phoenix/server/api/mutations/project_mutations.py +0 -47
  92. phoenix/server/api/openapi/__init__.py +0 -0
  93. phoenix/server/api/openapi/main.py +0 -6
  94. phoenix/server/api/openapi/schema.py +0 -16
  95. phoenix/server/api/queries.py +0 -503
  96. phoenix/server/api/routers/v1/dataset_examples.py +0 -178
  97. phoenix/server/api/routers/v1/datasets.py +0 -965
  98. phoenix/server/api/routers/v1/experiment_evaluations.py +0 -66
  99. phoenix/server/api/routers/v1/experiment_runs.py +0 -108
  100. phoenix/server/api/routers/v1/experiments.py +0 -174
  101. phoenix/server/api/types/AnnotatorKind.py +0 -10
  102. phoenix/server/api/types/CreateDatasetPayload.py +0 -8
  103. phoenix/server/api/types/DatasetExample.py +0 -85
  104. phoenix/server/api/types/DatasetExampleRevision.py +0 -34
  105. phoenix/server/api/types/DatasetVersion.py +0 -14
  106. phoenix/server/api/types/ExampleRevisionInterface.py +0 -14
  107. phoenix/server/api/types/Experiment.py +0 -140
  108. phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -13
  109. phoenix/server/api/types/ExperimentComparison.py +0 -19
  110. phoenix/server/api/types/ExperimentRun.py +0 -91
  111. phoenix/server/api/types/ExperimentRunAnnotation.py +0 -57
  112. phoenix/server/api/types/Inferences.py +0 -80
  113. phoenix/server/api/types/InferencesRole.py +0 -23
  114. phoenix/utilities/json.py +0 -61
  115. phoenix/utilities/re.py +0 -50
  116. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/IP_NOTICE +0 -0
  117. {arize_phoenix-4.4.4rc5.dist-info → arize_phoenix-4.5.0.dist-info}/licenses/LICENSE +0 -0
  118. /phoenix/server/api/{helpers/__init__.py → helpers.py} +0 -0
@@ -37,16 +37,10 @@ from phoenix.config import (
37
37
  get_exported_files,
38
38
  get_working_dir,
39
39
  )
40
- from phoenix.core.model_schema_adapter import create_model_from_inferences
40
+ from phoenix.core.model_schema_adapter import create_model_from_datasets
41
41
  from phoenix.inferences.inferences import EMPTY_INFERENCES, Inferences
42
42
  from phoenix.pointcloud.umap_parameters import get_umap_parameters
43
- from phoenix.server.app import (
44
- SessionFactory,
45
- _db,
46
- create_app,
47
- create_engine_and_run_migrations,
48
- instrument_engine_if_enabled,
49
- )
43
+ from phoenix.server.app import create_app
50
44
  from phoenix.server.thread_server import ThreadServer
51
45
  from phoenix.services import AppService
52
46
  from phoenix.session.client import Client
@@ -114,9 +108,9 @@ class Session(TraceDataExtractor, ABC):
114
108
  def __init__(
115
109
  self,
116
110
  database_url: str,
117
- primary_inferences: Inferences,
118
- reference_inferences: Optional[Inferences] = None,
119
- corpus_inferences: Optional[Inferences] = None,
111
+ primary_dataset: Inferences,
112
+ reference_dataset: Optional[Inferences] = None,
113
+ corpus_dataset: Optional[Inferences] = None,
120
114
  trace_dataset: Optional[TraceDataset] = None,
121
115
  default_umap_parameters: Optional[Mapping[str, Any]] = None,
122
116
  host: Optional[str] = None,
@@ -124,9 +118,9 @@ class Session(TraceDataExtractor, ABC):
124
118
  notebook_env: Optional[NotebookEnvironment] = None,
125
119
  ):
126
120
  self._database_url = database_url
127
- self.primary_inferences = primary_inferences
128
- self.reference_inferences = reference_inferences
129
- self.corpus_inferences = corpus_inferences
121
+ self.primary_dataset = primary_dataset
122
+ self.reference_dataset = reference_dataset
123
+ self.corpus_dataset = corpus_dataset
130
124
  self.trace_dataset = trace_dataset
131
125
  self.umap_parameters = get_umap_parameters(default_umap_parameters)
132
126
  self.host = host or get_env_host()
@@ -270,9 +264,9 @@ class ProcessSession(Session):
270
264
  def __init__(
271
265
  self,
272
266
  database_url: str,
273
- primary_inferences: Inferences,
274
- reference_inferences: Optional[Inferences] = None,
275
- corpus_inferences: Optional[Inferences] = None,
267
+ primary_dataset: Inferences,
268
+ reference_dataset: Optional[Inferences] = None,
269
+ corpus_dataset: Optional[Inferences] = None,
276
270
  trace_dataset: Optional[TraceDataset] = None,
277
271
  default_umap_parameters: Optional[Mapping[str, Any]] = None,
278
272
  host: Optional[str] = None,
@@ -282,20 +276,20 @@ class ProcessSession(Session):
282
276
  ) -> None:
283
277
  super().__init__(
284
278
  database_url=database_url,
285
- primary_inferences=primary_inferences,
286
- reference_inferences=reference_inferences,
287
- corpus_inferences=corpus_inferences,
279
+ primary_dataset=primary_dataset,
280
+ reference_dataset=reference_dataset,
281
+ corpus_dataset=corpus_dataset,
288
282
  trace_dataset=trace_dataset,
289
283
  default_umap_parameters=default_umap_parameters,
290
284
  host=host,
291
285
  port=port,
292
286
  notebook_env=notebook_env,
293
287
  )
294
- primary_inferences.to_disc()
295
- if isinstance(reference_inferences, Inferences):
296
- reference_inferences.to_disc()
297
- if isinstance(corpus_inferences, Inferences):
298
- corpus_inferences.to_disc()
288
+ primary_dataset.to_disc()
289
+ if isinstance(reference_dataset, Inferences):
290
+ reference_dataset.to_disc()
291
+ if isinstance(corpus_dataset, Inferences):
292
+ corpus_dataset.to_disc()
299
293
  if isinstance(trace_dataset, TraceDataset):
300
294
  trace_dataset.to_disc()
301
295
  umap_params_str = (
@@ -310,13 +304,13 @@ class ProcessSession(Session):
310
304
  host=self.host,
311
305
  port=self.port,
312
306
  root_path=self.root_path,
313
- primary_inferences_name=self.primary_inferences.name,
307
+ primary_dataset_name=self.primary_dataset.name,
314
308
  umap_params=umap_params_str,
315
- reference_inferences_name=(
316
- self.reference_inferences.name if self.reference_inferences is not None else None
309
+ reference_dataset_name=(
310
+ self.reference_dataset.name if self.reference_dataset is not None else None
317
311
  ),
318
- corpus_inferences_name=(
319
- self.corpus_inferences.name if self.corpus_inferences is not None else None
312
+ corpus_dataset_name=(
313
+ self.corpus_dataset.name if self.corpus_dataset is not None else None
320
314
  ),
321
315
  trace_dataset_name=(
322
316
  self.trace_dataset.name if self.trace_dataset is not None else None
@@ -336,9 +330,9 @@ class ThreadSession(Session):
336
330
  def __init__(
337
331
  self,
338
332
  database_url: str,
339
- primary_inferences: Inferences,
340
- reference_inferences: Optional[Inferences] = None,
341
- corpus_inferences: Optional[Inferences] = None,
333
+ primary_dataset: Inferences,
334
+ reference_dataset: Optional[Inferences] = None,
335
+ corpus_dataset: Optional[Inferences] = None,
342
336
  trace_dataset: Optional[TraceDataset] = None,
343
337
  default_umap_parameters: Optional[Mapping[str, Any]] = None,
344
338
  host: Optional[str] = None,
@@ -348,32 +342,29 @@ class ThreadSession(Session):
348
342
  ):
349
343
  super().__init__(
350
344
  database_url=database_url,
351
- primary_inferences=primary_inferences,
352
- reference_inferences=reference_inferences,
353
- corpus_inferences=corpus_inferences,
345
+ primary_dataset=primary_dataset,
346
+ reference_dataset=reference_dataset,
347
+ corpus_dataset=corpus_dataset,
354
348
  trace_dataset=trace_dataset,
355
349
  default_umap_parameters=default_umap_parameters,
356
350
  host=host,
357
351
  port=port,
358
352
  notebook_env=notebook_env,
359
353
  )
360
- self.model = create_model_from_inferences(
361
- primary_inferences,
362
- reference_inferences,
354
+ self.model = create_model_from_datasets(
355
+ primary_dataset,
356
+ reference_dataset,
363
357
  )
364
358
  self.corpus = (
365
- create_model_from_inferences(
366
- corpus_inferences,
359
+ create_model_from_datasets(
360
+ corpus_dataset,
367
361
  )
368
- if corpus_inferences is not None
362
+ if corpus_dataset is not None
369
363
  else None
370
364
  )
371
365
  # Initialize an app service that keeps the server running
372
- engine = create_engine_and_run_migrations(database_url)
373
- instrumentation_cleanups = instrument_engine_if_enabled(engine)
374
- factory = SessionFactory(session_factory=_db(engine), dialect=engine.dialect.name)
375
366
  self.app = create_app(
376
- db=factory,
367
+ database_url=database_url,
377
368
  export_path=self.export_path,
378
369
  model=self.model,
379
370
  corpus=self.corpus,
@@ -384,7 +375,6 @@ class ThreadSession(Session):
384
375
  if (trace_dataset and (initial_evaluations := trace_dataset.evaluations))
385
376
  else None
386
377
  ),
387
- clean_up_callbacks=instrumentation_cleanups,
388
378
  )
389
379
  self.server = ThreadServer(
390
380
  app=self.app,
phoenix/trace/exporter.py CHANGED
@@ -7,17 +7,13 @@ from types import MethodType
7
7
  from typing import Any, Optional
8
8
  from urllib.parse import urljoin
9
9
 
10
- import httpx
10
+ import requests
11
11
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
12
+ from requests import Session
12
13
  from typing_extensions import TypeAlias, assert_never
13
14
 
14
15
  import phoenix.trace.v1 as pb
15
- from phoenix.config import (
16
- get_env_client_headers,
17
- get_env_collector_endpoint,
18
- get_env_host,
19
- get_env_port,
20
- )
16
+ from phoenix.config import get_env_collector_endpoint, get_env_host, get_env_port
21
17
 
22
18
  logger = logging.getLogger(__name__)
23
19
  logger.addHandler(logging.NullHandler())
@@ -79,10 +75,9 @@ class HttpExporter:
79
75
  )
80
76
  self._base_url = base_url if base_url.endswith("/") else base_url + "/"
81
77
  _warn_if_phoenix_is_not_running(self._base_url)
82
- headers = get_env_client_headers()
83
- self._client = httpx.Client(headers=headers)
84
- weakref.finalize(self, self._client.close)
85
- self._client.headers.update(
78
+ self._session = Session()
79
+ weakref.finalize(self, self._session.close)
80
+ self._session.headers.update(
86
81
  {
87
82
  "content-type": "application/x-protobuf",
88
83
  "content-encoding": "gzip",
@@ -115,9 +110,9 @@ class HttpExporter:
115
110
 
116
111
  def _send(self, message: Message) -> None:
117
112
  serialized = message.SerializeToString()
118
- content = gzip.compress(serialized)
113
+ data = gzip.compress(serialized)
119
114
  try:
120
- self._client.post(self._url(message), content=content).raise_for_status()
115
+ self._session.post(self._url(message), data=data).raise_for_status()
121
116
  except Exception as e:
122
117
  logger.exception(e)
123
118
 
@@ -130,7 +125,7 @@ class HttpExporter:
130
125
 
131
126
  def _warn_if_phoenix_is_not_running(base_url: str) -> None:
132
127
  try:
133
- httpx.get(urljoin(base_url, "arize_phoenix_version")).raise_for_status()
128
+ requests.get(urljoin(base_url, "arize_phoenix_version")).raise_for_status()
134
129
  except Exception:
135
130
  logger.warning(
136
131
  f"Arize Phoenix is not running on {base_url}. Launch Phoenix "
phoenix/trace/fixtures.py CHANGED
@@ -1,29 +1,18 @@
1
- import logging
2
- import shutil
3
1
  from binascii import hexlify
4
2
  from dataclasses import dataclass, field, replace
5
3
  from datetime import datetime, timezone
6
- from io import StringIO
7
4
  from random import getrandbits
8
- from tempfile import NamedTemporaryFile
9
- from time import sleep, time
10
- from typing import Dict, Iterable, Iterator, List, NamedTuple, Optional, Sequence, Tuple, cast
5
+ from typing import Dict, Iterable, Iterator, List, NamedTuple, Optional, Tuple, cast
11
6
  from urllib import request
12
- from urllib.parse import urljoin
13
7
 
14
- import httpx
15
8
  import pandas as pd
16
9
  from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
17
- from httpx import ConnectError, HTTPStatusError
18
10
 
19
11
  import phoenix.trace.v1 as pb
20
- from phoenix import Client
21
12
  from phoenix.trace.schemas import Span
22
13
  from phoenix.trace.trace_dataset import TraceDataset
23
14
  from phoenix.trace.utils import json_lines_to_df
24
15
 
25
- logger = logging.getLogger(__name__)
26
-
27
16
 
28
17
  class EvaluationResultSchema(NamedTuple):
29
18
  label: Optional[str] = "label"
@@ -43,44 +32,12 @@ class DocumentEvaluationFixture(EvaluationFixture):
43
32
  document_position: str = "document_position"
44
33
 
45
34
 
46
- @dataclass(frozen=True)
47
- class DatasetFixture:
48
- file_name: str
49
- name: str
50
- input_keys: Sequence[str]
51
- output_keys: Sequence[str]
52
- metadata_keys: Sequence[str] = ()
53
- description: Optional[str] = field(default=None)
54
- _df: Optional[pd.DataFrame] = field(default=None, init=False, repr=False)
55
- _csv: Optional[str] = field(default=None, init=False, repr=False)
56
-
57
- def load(self) -> "DatasetFixture":
58
- if self._df is None:
59
- df = pd.read_csv(_url(self.file_name))
60
- object.__setattr__(self, "_df", df)
61
- return self
62
-
63
- @property
64
- def dataframe(self) -> pd.DataFrame:
65
- self.load()
66
- return cast(pd.DataFrame, self._df).copy(deep=False)
67
-
68
- @property
69
- def csv(self) -> StringIO:
70
- if self._csv is None:
71
- with StringIO() as buffer:
72
- self.dataframe.to_csv(buffer, index=False)
73
- object.__setattr__(self, "_csv", buffer.getvalue())
74
- return StringIO(self._csv)
75
-
76
-
77
35
  @dataclass(frozen=True)
78
36
  class TracesFixture:
79
37
  name: str
80
38
  description: str
81
39
  file_name: str
82
40
  evaluation_fixtures: Iterable[EvaluationFixture] = ()
83
- dataset_fixtures: Iterable[DatasetFixture] = ()
84
41
 
85
42
 
86
43
  llama_index_rag_fixture = TracesFixture(
@@ -101,36 +58,6 @@ llama_index_rag_fixture = TracesFixture(
101
58
  file_name="llama_index_rag_v8.retrieved_documents_eval.parquet",
102
59
  ),
103
60
  ),
104
- dataset_fixtures=(
105
- DatasetFixture(
106
- file_name="hybridial_samples.csv.gz",
107
- input_keys=("messages", "ctxs"),
108
- output_keys=("answers",),
109
- name="ChatRAG-Bench: Hybrid Dialogue (samples)",
110
- description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/hybridial",
111
- ),
112
- DatasetFixture(
113
- file_name="sqa_samples.csv.gz",
114
- input_keys=("messages", "ctxs"),
115
- output_keys=("answers",),
116
- name="ChatRAG-Bench: SQA (samples)",
117
- description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/sqa",
118
- ),
119
- DatasetFixture(
120
- file_name="doqa_cooking_samples.csv.gz",
121
- input_keys=("messages", "ctxs"),
122
- output_keys=("answers",),
123
- name="ChatRAG-Bench: DoQA Cooking (samples)",
124
- description="https://huggingface.co/datasets/nvidia/ChatRAG-Bench/viewer/doqa_cooking",
125
- ),
126
- DatasetFixture(
127
- file_name="synthetic_convqa_samples.csv.gz",
128
- input_keys=("messages", "document"),
129
- output_keys=("answers",),
130
- name="ChatQA-Train: Synthetic ConvQA (samples)",
131
- description="https://huggingface.co/datasets/nvidia/ChatQA-Training-Data/viewer/synthetic_convqa",
132
- ),
133
- ),
134
61
  )
135
62
 
136
63
  llama_index_calculator_agent_fixture = TracesFixture(
@@ -211,69 +138,16 @@ def download_traces_fixture(
211
138
  return cast(List[str], f.readlines())
212
139
 
213
140
 
214
- def load_example_traces(fixture_name: str) -> TraceDataset:
141
+ def load_example_traces(use_case: str) -> TraceDataset:
215
142
  """
216
143
  Loads a trace dataframe by name.
217
144
  """
218
- fixture = get_trace_fixture_by_name(fixture_name)
145
+ fixture = get_trace_fixture_by_name(use_case)
219
146
  return TraceDataset(json_lines_to_df(download_traces_fixture(fixture)))
220
147
 
221
148
 
222
- def get_dataset_fixtures(fixture_name: str) -> Iterable[DatasetFixture]:
223
- return (fixture.load() for fixture in get_trace_fixture_by_name(fixture_name).dataset_fixtures)
224
-
225
-
226
- def send_dataset_fixtures(
227
- endpoint: str,
228
- fixtures: Iterable[DatasetFixture],
229
- ) -> None:
230
- expiration = time() + 5
231
- while time() < expiration:
232
- try:
233
- url = urljoin(endpoint, "/healthz")
234
- httpx.get(url=url).raise_for_status()
235
- except ConnectError:
236
- sleep(0.1)
237
- continue
238
- except Exception as e:
239
- print(str(e))
240
- raise
241
- break
242
- client = Client(endpoint=endpoint)
243
- for i, fixture in enumerate(fixtures):
244
- try:
245
- if i % 2:
246
- client.upload_dataset(
247
- fixture.dataframe,
248
- name=fixture.name,
249
- input_keys=fixture.input_keys,
250
- output_keys=fixture.output_keys,
251
- metadata_keys=fixture.metadata_keys,
252
- description=fixture.description,
253
- )
254
- else:
255
- with NamedTemporaryFile() as tf:
256
- with open(tf.name, "w") as f:
257
- shutil.copyfileobj(fixture.csv, f)
258
- f.flush()
259
- client.upload_dataset(
260
- tf.name,
261
- name=fixture.name,
262
- input_keys=fixture.input_keys,
263
- output_keys=fixture.output_keys,
264
- metadata_keys=fixture.metadata_keys,
265
- description=fixture.description,
266
- )
267
- except HTTPStatusError as e:
268
- print(e.response.content.decode())
269
- pass
270
- else:
271
- name, df = fixture.name, fixture.dataframe
272
- print(f"Dataset sent: {name=}, {len(df)=}")
273
-
274
-
275
- def get_evals_from_fixture(fixture_name: str) -> Iterator[pb.Evaluation]:
276
- fixture = get_trace_fixture_by_name(fixture_name)
149
+ def get_evals_from_fixture(use_case: str) -> Iterator[pb.Evaluation]:
150
+ fixture = get_trace_fixture_by_name(use_case)
277
151
  for eval_fixture in fixture.evaluation_fixtures:
278
152
  yield from _read_eval_fixture(eval_fixture)
279
153
 
@@ -321,8 +195,8 @@ def _read_eval_fixture(eval_fixture: EvaluationFixture) -> Iterator[pb.Evaluatio
321
195
  def _url(
322
196
  file_name: str,
323
197
  host: Optional[str] = "https://storage.googleapis.com/",
324
- bucket: Optional[str] = "arize-phoenix-assets",
325
- prefix: Optional[str] = "traces/",
198
+ bucket: Optional[str] = "arize-assets",
199
+ prefix: Optional[str] = "phoenix/traces/",
326
200
  ) -> str:
327
201
  return f"{host}{bucket}/{prefix}{file_name}"
328
202
 
phoenix/trace/schemas.py CHANGED
@@ -29,6 +29,8 @@ class SpanKind(Enum):
29
29
  """
30
30
  SpanKind is loosely inspired by OpenTelemetry's SpanKind
31
31
  It captures the type of work that a Span encapsulates.
32
+
33
+ NB: this is actively under construction
32
34
  """
33
35
 
34
36
  TOOL = "TOOL"
@@ -38,7 +40,6 @@ class SpanKind(Enum):
38
40
  EMBEDDING = "EMBEDDING"
39
41
  AGENT = "AGENT"
40
42
  RERANKER = "RERANKER"
41
- EVALUATOR = "EVALUATOR"
42
43
  UNKNOWN = "UNKNOWN"
43
44
 
44
45
  def __str__(self) -> str:
@@ -11,7 +11,7 @@ import pandas as pd
11
11
  from pandas.api.types import is_integer_dtype, is_numeric_dtype, is_string_dtype
12
12
  from pyarrow import RecordBatchStreamReader, Schema, Table, parquet
13
13
 
14
- from phoenix.config import TRACE_DATASETS_DIR
14
+ from phoenix.config import TRACE_DATASET_DIR
15
15
  from phoenix.exceptions import PhoenixEvaluationNameIsMissing
16
16
  from phoenix.trace.errors import InvalidParquetMetadataError
17
17
 
@@ -201,7 +201,7 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
201
201
  UUID: The ID of the evaluations, which can be used as a key to load
202
202
  the evaluations from disk using `load`.
203
203
  """
204
- directory = Path(directory) if directory else TRACE_DATASETS_DIR
204
+ directory = Path(directory) if directory else TRACE_DATASET_DIR
205
205
  path = directory / EVAL_PARQUET_FILE_NAME.format(id=self.id)
206
206
  table = self.to_pyarrow_table()
207
207
  parquet.write_table(table, path)
@@ -229,7 +229,7 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
229
229
  """
230
230
  if not isinstance(id, UUID):
231
231
  id = UUID(id)
232
- path = Path(directory or TRACE_DATASETS_DIR) / EVAL_PARQUET_FILE_NAME.format(id=id)
232
+ path = Path(directory or TRACE_DATASET_DIR) / EVAL_PARQUET_FILE_NAME.format(id=id)
233
233
  schema = parquet.read_schema(path)
234
234
  eval_id, eval_name, evaluations_cls = _parse_schema_metadata(schema)
235
235
  if id != eval_id:
@@ -14,7 +14,7 @@ from openinference.semconv.trace import (
14
14
  from pandas import DataFrame, read_parquet
15
15
  from pyarrow import Schema, Table, parquet
16
16
 
17
- from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR, TRACE_DATASETS_DIR
17
+ from phoenix.config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX, TRACE_DATASET_DIR
18
18
  from phoenix.datetime_utils import normalize_timestamps
19
19
  from phoenix.trace.attributes import unflatten
20
20
  from phoenix.trace.errors import InvalidParquetMetadataError
@@ -138,7 +138,7 @@ class TraceDataset:
138
138
  self._id = uuid4()
139
139
  self.dataframe = normalize_dataframe(dataframe)
140
140
  # TODO: This is not used in any meaningful way. Should remove
141
- self.name = name or f"{GENERATED_INFERENCES_NAME_PREFIX}{str(self._id)}"
141
+ self.name = name or f"{GENERATED_DATASET_NAME_PREFIX}{str(self._id)}"
142
142
  self.evaluations = list(evaluations)
143
143
 
144
144
  @classmethod
@@ -201,13 +201,13 @@ class TraceDataset:
201
201
  @classmethod
202
202
  def from_name(cls, name: str) -> "TraceDataset":
203
203
  """Retrieves a dataset by name from the file system"""
204
- directory = INFERENCES_DIR / name
204
+ directory = DATASET_DIR / name
205
205
  df = read_parquet(directory / cls._data_file_name)
206
206
  return cls(df, name)
207
207
 
208
208
  def to_disc(self) -> None:
209
209
  """writes the data to disc"""
210
- directory = INFERENCES_DIR / self.name
210
+ directory = DATASET_DIR / self.name
211
211
  directory.mkdir(parents=True, exist_ok=True)
212
212
  get_serializable_spans_dataframe(self.dataframe).to_parquet(
213
213
  directory / self._data_file_name,
@@ -230,7 +230,7 @@ class TraceDataset:
230
230
  UUID: The id of the trace dataset, which can be used as key to load
231
231
  the dataset from disk using `load`.
232
232
  """
233
- directory = Path(directory or TRACE_DATASETS_DIR)
233
+ directory = Path(directory or TRACE_DATASET_DIR)
234
234
  for evals in self.evaluations:
235
235
  evals.save(directory)
236
236
  path = directory / TRACE_DATASET_PARQUET_FILE_NAME.format(id=self._id)
@@ -280,7 +280,7 @@ class TraceDataset:
280
280
  """
281
281
  if not isinstance(id, UUID):
282
282
  id = UUID(id)
283
- path = Path(directory or TRACE_DATASETS_DIR) / TRACE_DATASET_PARQUET_FILE_NAME.format(id=id)
283
+ path = Path(directory or TRACE_DATASET_DIR) / TRACE_DATASET_PARQUET_FILE_NAME.format(id=id)
284
284
  schema = parquet.read_schema(path)
285
285
  dataset_id, dataset_name, eval_ids = _parse_schema_metadata(schema)
286
286
  if id != dataset_id:
phoenix/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "4.4.4rc5"
1
+ __version__ = "4.5.0"
File without changes
@@ -1,18 +0,0 @@
1
- from phoenix.datasets.evaluators.code_evaluators import ContainsKeyword, JSONParsable
2
- from phoenix.datasets.evaluators.llm_evaluators import (
3
- CoherenceEvaluator,
4
- ConcisenessEvaluator,
5
- HelpfulnessEvaluator,
6
- LLMCriteriaEvaluator,
7
- RelevanceEvaluator,
8
- )
9
-
10
- __all__ = [
11
- "ContainsKeyword",
12
- "JSONParsable",
13
- "CoherenceEvaluator",
14
- "ConcisenessEvaluator",
15
- "LLMCriteriaEvaluator",
16
- "HelpfulnessEvaluator",
17
- "RelevanceEvaluator",
18
- ]
@@ -1,99 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import re
5
- from typing import Any, List, Optional, Union
6
-
7
- from phoenix.datasets.evaluators.utils import Evaluator
8
- from phoenix.datasets.types import EvaluationResult, TaskOutput
9
-
10
-
11
- class JSONParsable(Evaluator):
12
- def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
13
- assert isinstance(output, str), "Experiment run output must be a string"
14
- try:
15
- json.loads(output)
16
- json_parsable = True
17
- except BaseException:
18
- json_parsable = False
19
- return EvaluationResult(
20
- score=int(json_parsable),
21
- )
22
-
23
-
24
- class ContainsKeyword(Evaluator):
25
- def __init__(self, keyword: str, name: Optional[str] = None) -> None:
26
- self.keyword = keyword
27
- self._name = name or f"Contains({repr(keyword)})"
28
-
29
- def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
30
- assert isinstance(output, str), "Experiment run output must be a string"
31
- found = self.keyword in output
32
- return EvaluationResult(
33
- score=float(found),
34
- explanation=(
35
- f"the string {repr(self.keyword)} was "
36
- f"{'found' if found else 'not found'} in the output"
37
- ),
38
- )
39
-
40
-
41
- class ContainsAnyKeyword(Evaluator):
42
- def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
43
- self.keywords = keywords
44
- self._name = name or f"ContainsAny({keywords})"
45
-
46
- def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
47
- assert isinstance(output, str), "Experiment run output must be a string"
48
- found = [keyword for keyword in self.keywords if keyword in output]
49
- if found:
50
- explanation = f"the keywords {found} were found in the output"
51
- else:
52
- explanation = f"none of the keywords {self.keywords} were found in the output"
53
- return EvaluationResult(
54
- score=float(bool(found)),
55
- explanation=explanation,
56
- )
57
-
58
-
59
- class ContainsAllKeywords(Evaluator):
60
- def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
61
- self.keywords = keywords
62
- self._name = name or f"ContainsAll({keywords})"
63
-
64
- def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
65
- assert isinstance(output, str), "Experiment run output must be a string"
66
- not_found = [keyword for keyword in self.keywords if keyword not in output]
67
- if not_found:
68
- contains_all = False
69
- explanation = f"the keywords {not_found} were not found in the output"
70
- else:
71
- contains_all = True
72
- explanation = f"all of the keywords {self.keywords} were found in the output"
73
- return EvaluationResult(
74
- score=float(contains_all),
75
- explanation=explanation,
76
- )
77
-
78
-
79
- class MatchesRegex(Evaluator):
80
- def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
81
- if isinstance(pattern, str):
82
- pattern = re.compile(pattern)
83
- self.pattern = pattern
84
- assert isinstance(pattern, re.Pattern)
85
- self._name = name or f"matches_({pattern})"
86
-
87
- def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
88
- assert isinstance(output, str), "Experiment run output must be a string"
89
- matches = self.pattern.findall(output)
90
- if matches:
91
- explanation = (
92
- f"the substrings {matches} matched the regex pattern {self.pattern.pattern}"
93
- )
94
- else:
95
- explanation = f"no substrings matched the regex pattern {self.pattern.pattern}"
96
- return EvaluationResult(
97
- score=float(bool(matches)),
98
- explanation=explanation,
99
- )