arize-phoenix 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arize-phoenix
3
- Version: 2.5.0
3
+ Version: 2.7.0
4
4
  Summary: ML Observability in your notebook
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -1,13 +1,13 @@
1
1
  phoenix/__init__.py,sha256=EEh0vZGRQS8686h34GQ64OjQoZ7neKYO_iO5j6Oa9Jw,1402
2
- phoenix/config.py,sha256=wYmvT1I3wad8YIunbsiJ0nBu4-W8pUZwwbs5CxJKRs8,2442
2
+ phoenix/config.py,sha256=RbQw8AkVyI4SSo5CD520AjUNcwkDNOGZA6_ErE48R7A,3454
3
3
  phoenix/datetime_utils.py,sha256=D955QLrkgrrSdUM6NyqbCeAu2SMsjhR5rHVQEsVUdng,2773
4
4
  phoenix/exceptions.py,sha256=igIWGAg3m8jm5YwQDeCY1p8ml_60A7zaGVXJ1yZhY9s,44
5
5
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
6
  phoenix/services.py,sha256=f6AeyKTuOpy9RCcTCjVH3gx5nYZhbTMFOuv1WSUOB5o,4992
7
- phoenix/version.py,sha256=fMbNgIJqxiZEaSBLadLBt4rZpCHqarzb4Okt-aWsp2E,22
7
+ phoenix/version.py,sha256=EtKWW0Hnl5oWglRNH0HZigvcDT2FEs58ek8buJdwW1E,22
8
8
  phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
10
- phoenix/core/evals.py,sha256=OrHeYlh804rpcZIXTA6kan2mzSZMfgpphNNQdPMpNoM,7597
10
+ phoenix/core/evals.py,sha256=gJyqQzpud5YjtoY8h4pgXvHDsdubGfqmEewLuZHPPmQ,10224
11
11
  phoenix/core/model.py,sha256=vQ6RxpUPlncezJvur5u6xBN0Lkrk2gW0cTyb-qqaSqA,4713
12
12
  phoenix/core/model_schema.py,sha256=rR9VdhL_oXxbprDTPQJBXs5hw5sMPQmzx__m6Kwsxug,50394
13
13
  phoenix/core/model_schema_adapter.py,sha256=3GkyzqUST4fYi-Bgs8qAam5hwMCdQRZTDLjZ9Bnzdm4,8268
@@ -15,29 +15,29 @@ phoenix/core/traces.py,sha256=O01L6qwQfHxHUHNZemKBBsAgqDo1tAIO5-1fK2g0NwE,14618
15
15
  phoenix/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  phoenix/datasets/dataset.py,sha256=scKVZ7zc6Dpc_ntt-pWhzY-KWqOJEwKePuyNnKSVTGE,30515
17
17
  phoenix/datasets/errors.py,sha256=cGp9vxnw4SewFoWBV3ZGMkhE0Kh73lPIv3Ppz_H_RoA,8261
18
- phoenix/datasets/fixtures.py,sha256=0_PacL3dw49zulKpFpPdhvxJxeGmHTguqIyf2VXkBkk,19158
18
+ phoenix/datasets/fixtures.py,sha256=rGnVnufPvt25cyrlat0vKKtlu08olOuZvbp7EnR33aU,20668
19
19
  phoenix/datasets/schema.py,sha256=bF1d2Md6NyqQZuC4Ym5A52f2_IcazkyxGFZ11HPqSg0,6668
20
20
  phoenix/datasets/validation.py,sha256=dZ9lCFUV0EY7HCkQkQBrs-GLAEIZdpOqUxwD5l4dp88,8294
21
21
  phoenix/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  phoenix/experimental/evals/__init__.py,sha256=q96YKLMt2GJD9zL8sjugvWx1INfw40Wa7E9OsHo2S4s,1885
23
- phoenix/experimental/evals/evaluators.py,sha256=b-nudXEa8A9DF0HuiBQSvMmLvOOxaT-2k3UN8moQ-zE,15852
23
+ phoenix/experimental/evals/evaluators.py,sha256=r7fXrS-l4gn58SUhLAZSfY3P8lxysouSVJwHddrZJ_Q,15956
24
24
  phoenix/experimental/evals/retrievals.py,sha256=o3fqrsYbYZjyGj_jWkN_9VQVyXjLkDKDw5Ws7l8bwdI,3828
25
25
  phoenix/experimental/evals/functions/__init__.py,sha256=NNd0-_cmIopdV7vm3rspjfgM726qoQJ4DPq_vqbnaxQ,180
26
- phoenix/experimental/evals/functions/classify.py,sha256=pcAJjkSGiE6zusOMvfcSCnNNjabs7O-Cs0C0h-6Y76Y,18985
26
+ phoenix/experimental/evals/functions/classify.py,sha256=A-seuYrwiNFdc4IK9WJkQVKY78YdBHxaCMSDPL4_SXE,19523
27
27
  phoenix/experimental/evals/functions/executor.py,sha256=bM7PI2rcPukQQzZ2rWqN_-Kfo_a935YJj0bh1Red8Ps,13406
28
- phoenix/experimental/evals/functions/generate.py,sha256=E_yt3Td_LpNWQOSR-jYCxNG7vVfe7fxX9WYeBhApMzc,5412
28
+ phoenix/experimental/evals/functions/generate.py,sha256=8LnnPAjBM9yxitdkaGZ67OabuDTOWBF3fvinJ_uCFRg,5584
29
29
  phoenix/experimental/evals/functions/processing.py,sha256=F4xtLsulLV4a8CkuLldRddsCim75dSTIShEJUYN6I6w,1823
30
30
  phoenix/experimental/evals/models/__init__.py,sha256=j1N7DhiOPbcaemtVBONcQ0miNnGQwEXz4u3P3Vwe6-4,320
31
- phoenix/experimental/evals/models/anthropic.py,sha256=Tcv8R-vTyY8sLAv1wIHeZdMCBtqhyayqMPJXRDc7blI,6267
32
- phoenix/experimental/evals/models/base.py,sha256=tutL6WOe0rM1-xywVOX8VyYKxgfpxH3oylm-BWdjV3M,8052
33
- phoenix/experimental/evals/models/bedrock.py,sha256=CRPmBuSLc_nRnKKWLHhGMxdWEISIKUJM1tzIlOQ_qWM,7927
31
+ phoenix/experimental/evals/models/anthropic.py,sha256=VRYYbZr8ZFvC-19VxScMNux_Yp_9DzSRXiSmWUuhlOc,6309
32
+ phoenix/experimental/evals/models/base.py,sha256=z8xB18s6JI_Weihq2yG22Rte2RBde_cdHq9rINAXHYw,8086
33
+ phoenix/experimental/evals/models/bedrock.py,sha256=VrLNifBxmgHVMFqp6j9d1aGQIvDDuw8yjBM8CdIZCH4,8009
34
34
  phoenix/experimental/evals/models/litellm.py,sha256=YvlYeAV-gG0IxFoVJ_OuRYwVwQ0LEtYBuWmp-uPGrNU,4368
35
- phoenix/experimental/evals/models/openai.py,sha256=EcTkv1DqdrtMNhyfiUTzD5gDbEZVUI_zQyrDgsnuYig,17168
35
+ phoenix/experimental/evals/models/openai.py,sha256=Yht-AZDq2iiwMUlkG3ghv3tCxZY8p-L7xxhSeGPtfaM,17238
36
36
  phoenix/experimental/evals/models/rate_limiters.py,sha256=5GVN0RQKt36Przg3-9jLgocRmyg-tbeO-cdbuLIx89w,10160
37
- phoenix/experimental/evals/models/vertex.py,sha256=SflOzop3iPGqKT7UoDW4dMqTzNDlLa5Q8I-HBhjFW1c,6418
37
+ phoenix/experimental/evals/models/vertex.py,sha256=52A1g8j54_VkahjQmLj0eguPKJdQj0xtI4dAlrLsgtY,6592
38
38
  phoenix/experimental/evals/models/vertexai.py,sha256=NfBpQq0l7XzP-wDEDsK27IRiQBzA1GXEdfwlAf8leX4,5609
39
39
  phoenix/experimental/evals/templates/__init__.py,sha256=GSJSoWJ4jwyoUANniidmWMUtXQhNQYbTJbfFqCvuYuo,1470
40
- phoenix/experimental/evals/templates/default_templates.py,sha256=QzPhILbgpOd8NJquSep60nWNMWVXqsDTGDmf3nYlvjc,21201
40
+ phoenix/experimental/evals/templates/default_templates.py,sha256=dVKmoLwqgAyGcRuezz9WKnXSHhw7-qk1R8j6wSmqh0s,20722
41
41
  phoenix/experimental/evals/templates/template.py,sha256=ImFSaTPo9oalPNwq7cNdOCndrvuwLuIyIFKsgDVcoJE,6715
42
42
  phoenix/experimental/evals/utils/__init__.py,sha256=608EX7sG0f5oDG__II16J8xnFJiNpY9dI9AC8vXwR00,5601
43
43
  phoenix/experimental/evals/utils/threads.py,sha256=ksI-egarPnlxit0qKKjtjZ2L82qGLxqxZ6s92O0eBA4,1005
@@ -125,24 +125,25 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
125
125
  phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
126
126
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
127
127
  phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
128
- phoenix/server/static/index.js,sha256=fmq95YEmj2bkWIA2x96bf7iV5ZisW1k8EtwAIsy4jD4,3259870
128
+ phoenix/server/static/index.js,sha256=4MEBiTUm4u7QrSnPE7OJrBEYSkFjmyZPugfrowtQOCI,3259882
129
129
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
130
130
  phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
131
131
  phoenix/server/templates/index.html,sha256=DlfcGoq1V5C2QkJWqP1j4Nu6_kPfsOzOrtzYF3ogghE,1900
132
132
  phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
133
- phoenix/session/evaluation.py,sha256=s5OivScAMSj8qfU4IexpmbyKvcGBj_nt-GP_13_o-iY,4843
134
- phoenix/session/session.py,sha256=uZnBRmqITp694-B0UahBYHodfcJNKp1FdvrVAuu5jeI,20007
133
+ phoenix/session/evaluation.py,sha256=DaAtA0XYJbXRJO_StGywa-9APlz2ORSmCXzxrtn3rvI,4997
134
+ phoenix/session/session.py,sha256=94hRilOwlEWo6npLNjutaYRCevDPLPnAQdnuP07qeGc,20826
135
135
  phoenix/trace/__init__.py,sha256=4d_MqzUIFmlY9WWcFeTONJ4xL5mPGoWZaPM2TJ0ZDBQ,266
136
+ phoenix/trace/errors.py,sha256=DbXSJnNErV7305tKv7pUWLD6jcVHJ6EBdSu4mZJ6IM4,112
136
137
  phoenix/trace/evaluation_conventions.py,sha256=t8jydM3U0-T5YpiQKRJ3tWdWGlHtzKyttYdw-ddvPOk,1048
137
138
  phoenix/trace/exporter.py,sha256=z3xrGJhIRh7XMy4Q1FkR3KmFZym-GX0XxLTZ6eSnN0Q,4347
138
139
  phoenix/trace/fixtures.py,sha256=GGNOVi8Cjj9eduxOenyYLF8mhl-XTbXHtnraP5vLlxQ,6341
139
140
  phoenix/trace/otel.py,sha256=Efc6S0IuvI-NEJ_Mv1VWEzQS94-lR_6nJ3ecTzwmyQ4,13933
140
141
  phoenix/trace/schemas.py,sha256=m1wVlYFT6qL3FovD3TtTYsEgN6OHvv52gNdJkoPCmuY,5400
141
142
  phoenix/trace/semantic_conventions.py,sha256=u6NG85ZhbreriZr8cqJaddldM_jUcew7JilszY7JUk8,4652
142
- phoenix/trace/span_evaluations.py,sha256=k6bwsa040AihvxTpve33MpkPN3gT8z_kSROpmJwOeCs,12579
143
+ phoenix/trace/span_evaluations.py,sha256=asGug9lUHUufBwK1nL_PnHIDKsOc5X4ws7cur9lfoyI,12421
143
144
  phoenix/trace/span_json_decoder.py,sha256=Xv-0uCsHgwzQb0dqTa7CuuDeXAPaXjQICyCFK3ZQaSs,3089
144
145
  phoenix/trace/span_json_encoder.py,sha256=C5y7rkyOcV08oJC5t8TZqVxsKCZMJKad7bBQzAgLoDs,1763
145
- phoenix/trace/trace_dataset.py,sha256=nFclw-wuY_q7hqpqe7fEVVH67yAku9qJ5EiJ61lz0WM,8691
146
+ phoenix/trace/trace_dataset.py,sha256=KW0TzmhlKuX8PUPLV172iTK08myYE0QXUC75KiIqJ7k,13204
146
147
  phoenix/trace/tracer.py,sha256=S8UfhI4Qhl_uulD9bj9qFdSB5vwcB42hXd8-qURGcmo,3662
147
148
  phoenix/trace/utils.py,sha256=7LurVGXn245cjj4MJsc7v6jq4DSJkpK6YGBfIaSywuw,1307
148
149
  phoenix/trace/dsl/__init__.py,sha256=WIQIjJg362XD3s50OsPJJ0xbDsGp41bSv7vDllLrPuA,144
@@ -165,8 +166,8 @@ phoenix/trace/v1/evaluation_pb2.pyi,sha256=cCbbx06gwQmaH14s3J1X25TtaARh-k1abbxQd
165
166
  phoenix/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
167
  phoenix/utilities/error_handling.py,sha256=7b5rpGFj9EWZ8yrZK1IHvxB89suWk3lggDayUQcvZds,1946
167
168
  phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,222
168
- arize_phoenix-2.5.0.dist-info/METADATA,sha256=RvT2Wd4rmB9cLgrtDE6ccxlRP16z6kq9dc5-S2ikSEw,26479
169
- arize_phoenix-2.5.0.dist-info/WHEEL,sha256=mRYSEL3Ih6g5a_CVMIcwiF__0Ae4_gLYh01YFNwiq1k,87
170
- arize_phoenix-2.5.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
171
- arize_phoenix-2.5.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
172
- arize_phoenix-2.5.0.dist-info/RECORD,,
169
+ arize_phoenix-2.7.0.dist-info/METADATA,sha256=G2XhPSpRh7gJHrTc5_MhOvrpFBTWv0_mjb_mZueDuWI,26479
170
+ arize_phoenix-2.7.0.dist-info/WHEEL,sha256=mRYSEL3Ih6g5a_CVMIcwiF__0Ae4_gLYh01YFNwiq1k,87
171
+ arize_phoenix-2.7.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
172
+ arize_phoenix-2.7.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
173
+ arize_phoenix-2.7.0.dist-info/RECORD,,
phoenix/config.py CHANGED
@@ -12,6 +12,11 @@ ENV_PHOENIX_COLLECTOR_ENDPOINT = "PHOENIX_COLLECTOR_ENDPOINT"
12
12
  The endpoint traces and evals are sent to. This must be set if the Phoenix
13
13
  server is running on a remote instance.
14
14
  """
15
+ ENV_WORKING_DIR = "PHOENIX_WORKING_DIR"
16
+ """
17
+ The directory in which to save, load, and export datasets. This directory must
18
+ be accessible by both the Phoenix server and the notebook environment.
19
+ """
15
20
 
16
21
 
17
22
  def _get_temp_path() -> Path:
@@ -36,13 +41,16 @@ def get_running_pid() -> Optional[int]:
36
41
  return None
37
42
 
38
43
 
39
- for path in (
40
- ROOT_DIR := Path.home().resolve() / ".phoenix",
41
- EXPORT_DIR := ROOT_DIR / "exports",
42
- DATASET_DIR := ROOT_DIR / "datasets",
43
- TRACE_DATASET_DIR := ROOT_DIR / "trace_datasets",
44
- ):
45
- path.mkdir(parents=True, exist_ok=True)
44
+ def get_working_dir() -> Path:
45
+ """
46
+ Get the working directory for saving, loading, and exporting datasets.
47
+ """
48
+ working_dir_str = os.getenv(ENV_WORKING_DIR)
49
+ if working_dir_str is not None:
50
+ return Path(working_dir_str)
51
+ # Fall back to ~/.phoenix if PHOENIX_WORKING_DIR is not set
52
+ return Path.home().resolve() / ".phoenix"
53
+
46
54
 
47
55
  PHOENIX_DIR = Path(__file__).resolve().parent
48
56
  # Server config
@@ -53,6 +61,23 @@ HOST = "0.0.0.0"
53
61
  PORT = 6006
54
62
  # The prefix of datasets that are auto-assigned a name
55
63
  GENERATED_DATASET_NAME_PREFIX = "phoenix_dataset_"
64
+ # The work directory for saving, loading, and exporting datasets
65
+ WORKING_DIR = get_working_dir()
66
+
67
+ try:
68
+ for path in (
69
+ ROOT_DIR := WORKING_DIR,
70
+ EXPORT_DIR := ROOT_DIR / "exports",
71
+ DATASET_DIR := ROOT_DIR / "datasets",
72
+ TRACE_DATASET_DIR := ROOT_DIR / "trace_datasets",
73
+ ):
74
+ path.mkdir(parents=True, exist_ok=True)
75
+ except Exception as e:
76
+ print(
77
+ f"⚠️ Failed to initialize the working directory at {WORKING_DIR} due to an error: {str(e)}"
78
+ )
79
+ print("⚠️ While phoenix will still run, you will not be able to save, load, or export data")
80
+ print("ℹ️ To change, set the `{ENV_WORKING_DIR}` environment variable before importing phoenix.")
56
81
 
57
82
 
58
83
  def get_exported_files(directory: Path) -> List[Path]:
phoenix/core/evals.py CHANGED
@@ -9,10 +9,12 @@ from typing import DefaultDict, Dict, List, Optional, Set, Tuple
9
9
 
10
10
  import numpy as np
11
11
  from google.protobuf.json_format import MessageToDict
12
+ from pandas import DataFrame, Index, MultiIndex
12
13
  from typing_extensions import TypeAlias, assert_never
13
14
 
14
15
  import phoenix.trace.v1 as pb
15
16
  from phoenix.trace.schemas import SpanID, TraceID
17
+ from phoenix.trace.span_evaluations import DocumentEvaluations, Evaluations, SpanEvaluations
16
18
 
17
19
  logger = logging.getLogger(__name__)
18
20
  logger.addHandler(logging.NullHandler())
@@ -171,3 +173,54 @@ class Evals:
171
173
  if result.HasField("score") and document_position < num_documents:
172
174
  scores[document_position] = result.score.value
173
175
  return scores
176
+
177
+ def export_evaluations(self) -> List[Evaluations]:
178
+ evaluations: List[Evaluations] = []
179
+ evaluations.extend(self._export_span_evaluations())
180
+ evaluations.extend(self._export_document_evaluations())
181
+ return evaluations
182
+
183
+ def _export_span_evaluations(self) -> List[SpanEvaluations]:
184
+ span_evaluations = []
185
+ with self._lock:
186
+ span_evaluations_by_name = tuple(self._span_evaluations_by_name.items())
187
+ for eval_name, _span_evaluations_by_id in span_evaluations_by_name:
188
+ span_ids = []
189
+ rows = []
190
+ with self._lock:
191
+ span_evaluations_by_id = tuple(_span_evaluations_by_id.items())
192
+ for span_id, pb_eval in span_evaluations_by_id:
193
+ span_ids.append(span_id)
194
+ rows.append(MessageToDict(pb_eval.result))
195
+ dataframe = DataFrame(rows, index=Index(span_ids, name="context.span_id"))
196
+ span_evaluations.append(SpanEvaluations(eval_name, dataframe))
197
+ return span_evaluations
198
+
199
+ def _export_document_evaluations(self) -> List[DocumentEvaluations]:
200
+ evaluations = []
201
+ with self._lock:
202
+ document_evaluations_by_name = tuple(self._document_evaluations_by_name.items())
203
+ for eval_name, _document_evaluations_by_id in document_evaluations_by_name:
204
+ span_ids = []
205
+ document_positions = []
206
+ rows = []
207
+ with self._lock:
208
+ document_evaluations_by_id = tuple(_document_evaluations_by_id.items())
209
+ for span_id, _document_evaluations_by_position in document_evaluations_by_id:
210
+ with self._lock:
211
+ document_evaluations_by_position = sorted(
212
+ _document_evaluations_by_position.items()
213
+ ) # ensure the evals are sorted by document position
214
+ for document_position, pb_eval in document_evaluations_by_position:
215
+ span_ids.append(span_id)
216
+ document_positions.append(document_position)
217
+ rows.append(MessageToDict(pb_eval.result))
218
+ dataframe = DataFrame(
219
+ rows,
220
+ index=MultiIndex.from_arrays(
221
+ (span_ids, document_positions),
222
+ names=("context.span_id", "document_position"),
223
+ ),
224
+ )
225
+ evaluations.append(DocumentEvaluations(eval_name, dataframe))
226
+ return evaluations
@@ -240,6 +240,51 @@ click_through_rate_fixture = Fixture(
240
240
  reference_file_name="click_through_rate_train.parquet",
241
241
  )
242
242
 
243
+ chatbot_queries_schema = Schema(
244
+ prediction_id_column_name="id",
245
+ prompt_column_names=RetrievalEmbeddingColumnNames(
246
+ vector_column_name="prompt",
247
+ raw_data_column_name="prompt_text",
248
+ context_retrieval_ids_column_name="document_ids",
249
+ context_retrieval_scores_column_name="document_scores",
250
+ ),
251
+ response_column_names="response",
252
+ tag_column_names=[
253
+ "answer_relevancy",
254
+ "context_relevancy",
255
+ "faithfulness",
256
+ "document_similarity_0",
257
+ "document_similarity_1",
258
+ "openai_relevance_0",
259
+ "openai_relevance_1",
260
+ "user_feedback",
261
+ ],
262
+ )
263
+
264
+ chatbot_database_schema = Schema(
265
+ prediction_id_column_name="document_id",
266
+ prompt_column_names=EmbeddingColumnNames(
267
+ vector_column_name="text_vector",
268
+ raw_data_column_name="text",
269
+ ),
270
+ )
271
+
272
+ chatbot_fixture = Fixture(
273
+ name="chatbot",
274
+ description="""
275
+ Investigate RAG performance for a chatbot built on top of Arize's documentation.
276
+ This use-case highlights how embedding visualizations for a RAG application can
277
+ highlight issues with the application's retrieval and performance.
278
+
279
+ The data contains relevance metrics generated by LLM Evals as well as RAGAS.
280
+ """,
281
+ primary_schema=chatbot_queries_schema,
282
+ corpus_schema=chatbot_database_schema,
283
+ prefix="unstructured/llm/chatbot",
284
+ primary_file_name="chatbot_queries_with_ragas.parquet",
285
+ corpus_file_name="chatbot_database_ds.parquet",
286
+ )
287
+
243
288
  wide_data_primary_schema = Schema(
244
289
  actual_label_column_name="actual_label",
245
290
  prediction_label_column_name="predicted_label",
@@ -363,6 +408,7 @@ FIXTURES: Tuple[Fixture, ...] = (
363
408
  deep_data_fixture,
364
409
  llm_summarization_fixture,
365
410
  wikipedia_fixture,
411
+ chatbot_fixture,
366
412
  )
367
413
  NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
368
414
 
@@ -36,6 +36,10 @@ class LLMEvaluator:
36
36
  self._model = model
37
37
  self._template = template
38
38
 
39
+ @property
40
+ def default_concurrency(self) -> int:
41
+ return self._model.default_concurrency
42
+
39
43
  def reload_client(self) -> None:
40
44
  self._model.reload_client()
41
45
 
@@ -73,7 +73,7 @@ def llm_classify(
73
73
  include_prompt: bool = False,
74
74
  include_response: bool = False,
75
75
  run_sync: bool = False,
76
- concurrency: int = 20,
76
+ concurrency: Optional[int] = None,
77
77
  ) -> pd.DataFrame:
78
78
  """Classifies each input row of the dataframe using an LLM. Returns a pandas.DataFrame
79
79
  where the first column is named `label` and contains the classification labels. An optional
@@ -116,8 +116,9 @@ def llm_classify(
116
116
  run_sync (bool, default=False): If True, forces synchronous request submission. Otherwise
117
117
  evaluations will be run asynchronously if possible.
118
118
 
119
- concurrency (int, default=20): The number of concurrent evals if async submission is
120
- possible.
119
+ concurrency (Optional[int], default=None): The number of concurrent evals if async
120
+ submission is possible. If not provided, a recommended default concurrency is set on a
121
+ per-model basis.
121
122
 
122
123
  Returns:
123
124
  pandas.DataFrame: A dataframe where the `label` column (at column position 0) contains
@@ -127,6 +128,7 @@ def llm_classify(
127
128
  from the entries in the rails argument or "NOT_PARSABLE" if the model's output could
128
129
  not be parsed.
129
130
  """
131
+ concurrency = concurrency or model.default_concurrency
130
132
  # clients need to be reloaded to ensure that async evals work properly
131
133
  model.reload_client()
132
134
 
@@ -353,7 +355,7 @@ def run_evals(
353
355
  provide_explanation: bool = False,
354
356
  use_function_calling_if_available: bool = True,
355
357
  verbose: bool = False,
356
- concurrency: int = 20,
358
+ concurrency: Optional[int] = None,
357
359
  ) -> List[DataFrame]:
358
360
  """
359
361
  Applies a list of evaluators to a dataframe. Outputs a list of dataframes in
@@ -381,13 +383,21 @@ def run_evals(
381
383
  as model invocation parameters and details about retries and snapping to
382
384
  rails.
383
385
 
384
- concurrency (int, optional): The number of concurrent evals if async
385
- submission is possible.
386
+ concurrency (Optional[int], default=None): The number of concurrent evals if async
387
+ submission is possible. If not provided, a recommended default concurrency is set on a
388
+ per-model basis.
386
389
 
387
390
  Returns:
388
391
  List[DataFrame]: A list of dataframes, one for each evaluator, all of
389
392
  which have the same number of rows as the input dataframe.
390
393
  """
394
+ # use the minimum default concurrency of all the models
395
+ if concurrency is None:
396
+ if len(evaluators) == 0:
397
+ concurrency = 1
398
+ else:
399
+ concurrency = min(evaluator.default_concurrency for evaluator in evaluators)
400
+
391
401
  # clients need to be reloaded to ensure that async evals work properly
392
402
  for evaluator in evaluators:
393
403
  evaluator.reload_client()
@@ -31,7 +31,7 @@ def llm_generate(
31
31
  include_prompt: bool = False,
32
32
  include_response: bool = False,
33
33
  run_sync: bool = False,
34
- concurrency: int = 20,
34
+ concurrency: Optional[int] = None,
35
35
  ) -> pd.DataFrame:
36
36
  """
37
37
  Generates a text using a template using an LLM. This function is useful
@@ -70,14 +70,17 @@ def llm_generate(
70
70
  run_sync (bool, default=False): If True, forces synchronous request submission. Otherwise
71
71
  evaluations will be run asynchronously if possible.
72
72
 
73
- concurrency (int, default=20): The number of concurrent evals if async submission is
74
- possible.
73
+ concurrency (Optional[int], default=None): The number of concurrent evals if async
74
+ submission is possible. If not provided, a recommended default concurrency is set on a
75
+ per-model basis.
75
76
 
76
77
  Returns:
77
78
  generations_dataframe (pandas.DataFrame): A dataframe where each row
78
79
  represents the generated output
79
80
 
80
81
  """
82
+ concurrency = concurrency or model.default_concurrency
83
+
81
84
  # clients need to be reloaded to ensure that async evals work properly
82
85
  model.reload_client()
83
86
 
@@ -1,4 +1,3 @@
1
- import logging
2
1
  from dataclasses import dataclass, field
3
2
  from typing import TYPE_CHECKING, Any, Dict, List, Optional
4
3
 
@@ -8,8 +7,6 @@ from phoenix.experimental.evals.models.rate_limiters import RateLimiter
8
7
  if TYPE_CHECKING:
9
8
  from tiktoken import Encoding
10
9
 
11
- logger = logging.getLogger(__name__)
12
-
13
10
  MODEL_TOKEN_LIMIT_MAPPING = {
14
11
  "claude-2.1": 200000,
15
12
  "claude-2.0": 100000,
@@ -80,7 +77,6 @@ class AnthropicModel(BaseEvalModel):
80
77
  try:
81
78
  encoding = self._tiktoken.encoding_for_model(self.model)
82
79
  except KeyError:
83
- logger.warning("Warning: model not found. Using cl100k_base encoding.")
84
80
  encoding = self._tiktoken.get_encoding("cl100k_base")
85
81
  self._tiktoken_encoding = encoding
86
82
 
@@ -149,6 +145,9 @@ class AnthropicModel(BaseEvalModel):
149
145
  return _completion_with_retry(**kwargs)
150
146
 
151
147
  async def _async_generate(self, prompt: str, **kwargs: Dict[str, Any]) -> str:
148
+ # instruction is an invalid input to Anthropic models, it is passed in by
149
+ # BaseEvalModel.__call__ and needs to be removed
150
+ kwargs.pop("instruction", None)
152
151
  invocation_parameters = self.invocation_parameters()
153
152
  invocation_parameters.update(kwargs)
154
153
  response = await self._async_generate_with_retry(
@@ -58,6 +58,7 @@ def set_verbosity(
58
58
 
59
59
  @dataclass
60
60
  class BaseEvalModel(ABC):
61
+ default_concurrency: int = 20
61
62
  _verbose: bool = False
62
63
  _rate_limiter: RateLimiter = field(default_factory=RateLimiter)
63
64
 
@@ -87,7 +87,6 @@ class BedrockModel(BaseEvalModel):
87
87
  try:
88
88
  encoding = self._tiktoken.encoding_for_model(self.model_id)
89
89
  except KeyError:
90
- logger.warning("Warning: model not found. Using cl100k_base encoding.")
91
90
  encoding = self._tiktoken.get_encoding("cl100k_base")
92
91
  self._tiktoken_encoding = encoding
93
92
 
@@ -165,7 +164,7 @@ class BedrockModel(BaseEvalModel):
165
164
  "temperature": self.temperature,
166
165
  "topP": self.top_p,
167
166
  "maxTokens": self.max_tokens,
168
- "stopSequences": [self.stop_sequences],
167
+ "stopSequences": self.stop_sequences,
169
168
  },
170
169
  **self.extra_parameters,
171
170
  }
@@ -204,6 +203,9 @@ class BedrockModel(BaseEvalModel):
204
203
  elif self.model_id.startswith("anthropic"):
205
204
  body = json.loads(response.get("body").read().decode())
206
205
  return body.get("completion")
206
+ elif self.model_id.startswith("amazon"):
207
+ body = json.loads(response.get("body").read())
208
+ return body.get("results")[0].get("outputText")
207
209
  else:
208
210
  body = json.loads(response.get("body").read())
209
211
  return body.get("results")[0].get("data").get("outputText")
@@ -31,6 +31,8 @@ MODEL_TOKEN_LIMIT_MAPPING = {
31
31
  "gpt-4-0613": 8192, # Current gpt-4 default
32
32
  "gpt-4-32k-0314": 32768,
33
33
  "gpt-4-32k-0613": 32768,
34
+ "gpt-4-1106-preview": 128000,
35
+ "gpt-4-vision-preview": 128000,
34
36
  }
35
37
  LEGACY_COMPLETION_API_MODELS = ("gpt-3.5-turbo-instruct",)
36
38
  logger = logging.getLogger(__name__)
@@ -21,6 +21,9 @@ MODEL_TOKEN_LIMIT_MAPPING = {
21
21
 
22
22
  @dataclass
23
23
  class GeminiModel(BaseEvalModel):
24
+ # The vertex SDK runs into connection pool limits at high concurrency
25
+ default_concurrency: int = 5
26
+
24
27
  model: str = "gemini-pro"
25
28
  """The model name to use."""
26
29
  temperature: float = 0.0
@@ -50,6 +53,9 @@ class GeminiModel(BaseEvalModel):
50
53
  max_retries=self.max_retries,
51
54
  )
52
55
 
56
+ def reload_client(self) -> None:
57
+ self._init_client()
58
+
53
59
  def _init_client(self) -> None:
54
60
  try:
55
61
  from google.api_core import exceptions # type:ignore
@@ -73,13 +73,6 @@ your response.
73
73
  [END DATA]
74
74
 
75
75
  Is the answer above factual or hallucinated based on the query and reference text?
76
-
77
- Your response should be a single word: either "factual" or "hallucinated", and
78
- it should not include any other text or characters. "hallucinated" indicates that the answer
79
- provides factually inaccurate information to the query based on the reference text. "factual"
80
- indicates that the answer to the question is correct relative to the reference text, and does not
81
- contain made up information. Please read the query and reference text carefully before determining
82
- your response.
83
76
  """
84
77
  HALLUCINATION_PROMPT_TEMPLATE_WITH_EXPLANATION = """
85
78
  In this task, you will be presented with a query, a reference text and an answer. The answer is
@@ -6717,7 +6717,7 @@ fragment SpanEvaluationsTable_evals on Span {
6717
6717
  gap: var(--ac-global-dimension-static-size-200);
6718
6718
  `,children:i.map((o,l)=>x("li",{children:_(ft,{padding:"size-200",backgroundColor:"purple-100",borderColor:"purple-700",borderWidth:"thin",borderRadius:"medium",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"embedded text"}),x("pre",{css:ee`
6719
6719
  margin: var(--ac-global-dimension-static-size-100) 0;
6720
- `,children:o[mtt]})]})},l))})}):null})}function Xxn(t){let{spanAttributes:e}=t,n=(0,br.useMemo)(()=>{let l=e[wr.tool];return typeof l=="object"?l:{}},[e]);if(!(Object.keys(n).length>0))return null;let r=n[vB.name],a=n[vB.description],o=n[vB.parameters];return x(Be,{direction:"column",gap:"size-200",children:x(uu,{title:"Tool"+(typeof r=="string"?`: ${r}`:""),...eg,children:_(Be,{direction:"column",children:[a!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",backgroundColor:"light",children:_(Be,{direction:"column",alignItems:"start",gap:"size-50",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Description"}),x(Me,{children:a})]})}):null,o!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",children:_(Be,{direction:"column",alignItems:"start",width:"100%",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Parameters"}),x(Tc,{value:JSON.stringify(o),mimeType:"json"})]})}):null]})})})}var Sxn=["irrelevant"];function Gse({document:t,documentEvaluations:e,backgroundColor:n,borderColor:i,labelColor:r}){let a=t[htt],o=e&&e.length;return x(ft,{borderRadius:"medium",backgroundColor:n,borderColor:i,borderWidth:"thin",children:_(Be,{direction:"column",children:[x(ft,{width:"100%",borderBottomWidth:"thin",borderBottomColor:i,children:_(Be,{direction:"row",justifyContent:"space-between",margin:"size-200",alignItems:"center",children:[_(Be,{direction:"row",gap:"size-50",alignItems:"center",children:[x(pt,{svg:x(Et.FileOutline,{})}),_(Nn,{level:4,children:["document ",t[Itt]]})]}),typeof t[Wse]=="number"&&x(Zs,{color:r,children:`score ${mh(t[Wse])}`})]})}),x("pre",{css:ee`
6720
+ `,children:o[mtt]})]})},l))})}):null})}function Xxn(t){let{spanAttributes:e}=t,n=(0,br.useMemo)(()=>{let l=e[wr.tool];return typeof l=="object"?l:{}},[e]);if(!(Object.keys(n).length>0))return null;let r=n[vB.name],a=n[vB.description],o=n[vB.parameters];return x(Be,{direction:"column",gap:"size-200",children:x(uu,{title:"Tool"+(typeof r=="string"?`: ${r}`:""),...eg,children:_(Be,{direction:"column",children:[a!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",backgroundColor:"light",children:_(Be,{direction:"column",alignItems:"start",gap:"size-50",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Description"}),x(Me,{children:a})]})}):null,o!=null?x(ft,{paddingStart:"size-200",paddingEnd:"size-200",paddingTop:"size-100",paddingBottom:"size-100",borderBottomColor:"dark",borderBottomWidth:"thin",children:_(Be,{direction:"column",alignItems:"start",width:"100%",children:[x(Me,{color:"text-700",fontStyle:"italic",children:"Parameters"}),x(Tc,{value:JSON.stringify(o),mimeType:"json"})]})}):null]})})})}var Sxn=["irrelevant","unrelated"];function Gse({document:t,documentEvaluations:e,backgroundColor:n,borderColor:i,labelColor:r}){let a=t[htt],o=e&&e.length;return x(ft,{borderRadius:"medium",backgroundColor:n,borderColor:i,borderWidth:"thin",children:_(Be,{direction:"column",children:[x(ft,{width:"100%",borderBottomWidth:"thin",borderBottomColor:i,children:_(Be,{direction:"row",justifyContent:"space-between",margin:"size-200",alignItems:"center",children:[_(Be,{direction:"row",gap:"size-50",alignItems:"center",children:[x(pt,{svg:x(Et.FileOutline,{})}),_(Nn,{level:4,children:["document ",t[Itt]]})]}),typeof t[Wse]=="number"&&x(Zs,{color:r,children:`score ${mh(t[Wse])}`})]})}),x("pre",{css:ee`
6721
6721
  padding: var(--ac-global-dimension-static-size-200);
6722
6722
  white-space: normal;
6723
6723
  margin: 0;
@@ -9,6 +9,7 @@ import math
9
9
  from time import sleep
10
10
  from typing import (
11
11
  Any,
12
+ Iterator,
12
13
  Optional,
13
14
  Sequence,
14
15
  Tuple,
@@ -33,24 +34,29 @@ __all__ = [
33
34
  from phoenix.trace.span_evaluations import Evaluations
34
35
 
35
36
 
36
- def add_evaluations(
37
- exporter: HttpExporter,
38
- evaluations: pd.DataFrame,
39
- evaluation_name: str,
40
- ) -> None:
41
- index_names = evaluations.index.names
42
- for index, row in evaluations.iterrows():
37
+ def encode_evaluations(evaluations: Evaluations) -> Iterator[pb.Evaluation]:
38
+ dataframe = evaluations.dataframe
39
+ eval_name = evaluations.eval_name
40
+ index_names = dataframe.index.names
41
+ for index, row in dataframe.iterrows():
43
42
  subject_id = _extract_subject_id_from_index(
44
43
  index_names,
45
44
  cast(Union[str, Tuple[Any]], index),
46
45
  )
47
46
  if (result := _extract_result(row)) is None:
48
47
  continue
49
- evaluation = pb.Evaluation(
50
- name=evaluation_name,
48
+ yield pb.Evaluation(
49
+ name=eval_name,
51
50
  result=result,
52
51
  subject_id=subject_id,
53
52
  )
53
+
54
+
55
+ def add_evaluations(
56
+ exporter: HttpExporter,
57
+ evaluations: Evaluations,
58
+ ) -> None:
59
+ for evaluation in encode_evaluations(evaluations):
54
60
  exporter.export(evaluation)
55
61
 
56
62
 
@@ -130,7 +136,7 @@ def log_evaluations(
130
136
  return
131
137
  exporter = HttpExporter(endpoint=endpoint, host=host, port=port)
132
138
  for eval in filter(bool, evals):
133
- add_evaluations(exporter, eval.dataframe, eval.eval_name)
139
+ add_evaluations(exporter, eval)
134
140
  with tqdm(total=n, desc="Sending Evaluations") as pbar:
135
141
  while n:
136
142
  sleep(0.1)
@@ -30,6 +30,7 @@ from phoenix.pointcloud.umap_parameters import get_umap_parameters
30
30
  from phoenix.server.app import create_app
31
31
  from phoenix.server.thread_server import ThreadServer
32
32
  from phoenix.services import AppService
33
+ from phoenix.session.evaluation import encode_evaluations
33
34
  from phoenix.trace.dsl import SpanFilter
34
35
  from phoenix.trace.dsl.query import SpanQuery
35
36
  from phoenix.trace.otel import encode
@@ -46,6 +47,8 @@ logger = logging.getLogger(__name__)
46
47
  # type workaround
47
48
  # https://github.com/python/mypy/issues/5264#issuecomment-399407428
48
49
  if TYPE_CHECKING:
50
+ from phoenix.trace import Evaluations
51
+
49
52
  _BaseList = UserList[pd.DataFrame]
50
53
  else:
51
54
  _BaseList = UserList
@@ -123,6 +126,10 @@ class Session(ABC):
123
126
  self.traces.put(encode(span))
124
127
 
125
128
  self.evals: Evals = Evals()
129
+ if trace_dataset:
130
+ for evaluations in trace_dataset.evaluations:
131
+ for pb_evaluation in encode_evaluations(evaluations):
132
+ self.evals.put(pb_evaluation)
126
133
 
127
134
  self.host = host or get_env_host()
128
135
  self.port = port or get_env_port()
@@ -213,6 +220,15 @@ class Session(ABC):
213
220
  return None
214
221
  return pd.json_normalize(data, max_level=1).set_index("context.span_id", drop=False)
215
222
 
223
+ def get_evaluations(self) -> List["Evaluations"]:
224
+ return self.evals.export_evaluations()
225
+
226
+ def get_trace_dataset(self) -> Optional[TraceDataset]:
227
+ if (dataframe := self.get_spans_dataframe()) is None:
228
+ return None
229
+ evaluations = self.get_evaluations()
230
+ return TraceDataset(dataframe=dataframe, evaluations=evaluations)
231
+
216
232
 
217
233
  _session: Optional[Session] = None
218
234
 
@@ -479,6 +495,9 @@ def _get_url(host: str, port: int, notebook_env: NotebookEnvironment) -> str:
479
495
  if notebook_env == NotebookEnvironment.DATABRICKS:
480
496
  context = _get_databricks_context()
481
497
  return f"{_get_databricks_notebook_base_url(context)}/{port}/"
498
+ if host == "0.0.0.0" or host == "127.0.0.1":
499
+ # The app is running locally, so use localhost
500
+ return f"http://localhost:{port}/"
482
501
  return f"http://{host}:{port}/"
483
502
 
484
503
 
@@ -0,0 +1,5 @@
1
+ from phoenix.exceptions import PhoenixException
2
+
3
+
4
+ class InvalidParquetMetadataError(PhoenixException):
5
+ pass
@@ -4,21 +4,18 @@ from dataclasses import dataclass, field
4
4
  from itertools import product
5
5
  from pathlib import Path
6
6
  from types import MappingProxyType
7
- from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union
7
+ from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union
8
8
  from uuid import UUID, uuid4
9
9
 
10
10
  import pandas as pd
11
11
  from pandas.api.types import is_integer_dtype, is_numeric_dtype, is_string_dtype
12
- from pyarrow import Table, parquet
12
+ from pyarrow import Schema, Table, parquet
13
13
 
14
14
  from phoenix.config import TRACE_DATASET_DIR
15
- from phoenix.exceptions import PhoenixException
15
+ from phoenix.trace.errors import InvalidParquetMetadataError
16
16
 
17
17
  EVAL_NAME_COLUMN_PREFIX = "eval."
18
-
19
-
20
- class InvalidParquetMetadataError(PhoenixException):
21
- pass
18
+ EVAL_PARQUET_FILE_NAME = "evaluations-{id}.parquet"
22
19
 
23
20
 
24
21
  class NeedsNamedIndex(ABC):
@@ -164,20 +161,21 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
164
161
  tuple(sorted(prod)) for prod in product(*cls.index_names.keys())
165
162
  )
166
163
 
167
- def to_parquet(self, directory: Optional[Union[str, Path]] = None) -> Path:
168
- """Persists the evaluations to a parquet file.
164
+ def save(self, directory: Optional[Union[str, Path]] = None) -> UUID:
165
+ """
166
+ Persists the evaluations to disk.
169
167
 
170
168
  Args:
171
169
  directory (Optional[Union[str, Path]], optional): An optional path
172
- to a directory where the parquet file will be saved. If not
173
- provided, the parquet file will be saved to a default location.
170
+ to a directory where the data will be saved. If not provided, the
171
+ data will be saved to a default location.
174
172
 
175
173
  Returns:
176
- Path: The path to the parquet file, including a randomly generated
177
- filename.
174
+ UUID: The ID of the evaluations, which can be used as a key to load
175
+ the evaluations from disk using `load`.
178
176
  """
179
177
  directory = Path(directory) if directory else TRACE_DATASET_DIR
180
- path = directory / f"evaluations-{self.id}.parquet"
178
+ path = directory / EVAL_PARQUET_FILE_NAME.format(id=self.id)
181
179
  table = Table.from_pandas(self.dataframe)
182
180
  table = table.replace_schema_metadata(
183
181
  {
@@ -193,23 +191,38 @@ class Evaluations(NeedsNamedIndex, NeedsResultColumns, ABC):
193
191
  }
194
192
  )
195
193
  parquet.write_table(table, path)
196
- return path
194
+ return self.id
197
195
 
198
196
  @classmethod
199
- def from_parquet(cls, path: Union[str, Path]) -> "Evaluations":
200
- """Loads the evaluations from a parquet file.
197
+ def load(
198
+ cls, id: Union[str, UUID], directory: Optional[Union[str, Path]] = None
199
+ ) -> "Evaluations":
200
+ """
201
+ Loads the evaluations from disk.
201
202
 
202
203
  Args:
203
- path (Union[str, Path]): Path to a persisted evaluations parquet
204
- file.
204
+ id (Union[str, UUID]): The ID of the evaluations to load.
205
+
206
+ directory(Optional[Union[str, Path]], optional): The path to the
207
+ directory containing the persisted evaluations. If not provided, the
208
+ parquet file will be loaded from the same default location used by
209
+ `save`.
205
210
 
206
211
  Returns:
207
212
  Evaluations: The loaded evaluations. The type of the returned
208
213
  evaluations will be the same as the type of the evaluations that
209
214
  were originally persisted.
210
215
  """
216
+ if not isinstance(id, UUID):
217
+ id = UUID(id)
218
+ path = Path(directory or TRACE_DATASET_DIR) / EVAL_PARQUET_FILE_NAME.format(id=id)
211
219
  schema = parquet.read_schema(path)
212
- eval_id, eval_name, evaluations_cls = _parse_schema_metadata(schema.metadata)
220
+ eval_id, eval_name, evaluations_cls = _parse_schema_metadata(schema)
221
+ if id != eval_id:
222
+ raise InvalidParquetMetadataError(
223
+ f"The input id {id} does not match the id {eval_id} in the parquet metadata. "
224
+ "Ensure that you have not renamed the parquet file."
225
+ )
213
226
  table = parquet.read_table(path)
214
227
  dataframe = table.to_pandas()
215
228
  evaluations = evaluations_cls(eval_name=eval_name, dataframe=dataframe)
@@ -301,48 +314,20 @@ class TraceEvaluations(
301
314
  ...
302
315
 
303
316
 
304
- def _parse_schema_metadata(metadata: Dict[bytes, Any]) -> Tuple[UUID, str, Type[Evaluations]]:
305
- """Validates and parses the schema metadata. Raises an exception if the
306
- metadata is invalid.
307
-
308
- Args:
309
- metadata (Dict[bytes, Any]): A dictionary of schema metadata from a
310
- parquet file.
311
-
312
- Returns:
313
- Tuple[str, ModuleType]: The evaluation name and the evaluations class.
317
+ def _parse_schema_metadata(schema: Schema) -> Tuple[UUID, str, Type[Evaluations]]:
318
+ """
319
+ Validates and parses the pyarrow schema metadata.
314
320
  """
315
- if not (arize_metadata_json := metadata.get(b"arize")):
316
- raise InvalidParquetMetadataError('Schema metadata is missing "arize" key')
317
321
  try:
318
- arize_metadata = json.loads(arize_metadata_json)
319
- except json.JSONDecodeError as err:
322
+ metadata = schema.metadata
323
+ arize_metadata = json.loads(metadata[b"arize"])
324
+ eval_classes = {subclass.__name__: subclass for subclass in Evaluations.__subclasses__()}
325
+ eval_id = UUID(arize_metadata["eval_id"])
326
+ if not isinstance((eval_name := arize_metadata["eval_name"]), str):
327
+ raise ValueError('Arize metadata must contain a string value for key "eval_name"')
328
+ evaluations_cls = eval_classes[arize_metadata["eval_type"]]
329
+ return eval_id, eval_name, evaluations_cls
330
+ except Exception as err:
320
331
  raise InvalidParquetMetadataError(
321
- 'Encountered invalid JSON string under "arize" key'
332
+ "An error occurred while parsing parquet schema metadata"
322
333
  ) from err
323
- evaluations_classes = {subclass.__name__: subclass for subclass in Evaluations.__subclasses__()}
324
- if not (
325
- isinstance(arize_metadata, dict)
326
- and (eval_id := _to_uuid(arize_metadata.get("eval_id")))
327
- and isinstance(eval_name := arize_metadata.get("eval_name"), str)
328
- and (eval_type := arize_metadata.get("eval_type"))
329
- and (evaluations_cls := evaluations_classes.get(eval_type))
330
- ):
331
- raise InvalidParquetMetadataError(f"Invalid Arize metadata: {arize_metadata}")
332
- return eval_id, eval_name, evaluations_cls
333
-
334
-
335
- def _to_uuid(value: Any) -> Optional[UUID]:
336
- """
337
- Converts an input to a UUID if possible, otherwise returns None.
338
-
339
- Args:
340
- value (Any): The value to convert to a UUID.
341
-
342
- Returns:
343
- Optional[UUID]: A UUID if the value could be converted, otherwise None.
344
- """
345
- try:
346
- return UUID(value)
347
- except Exception:
348
- return None
@@ -1,14 +1,18 @@
1
1
  import json
2
- import uuid
3
2
  from datetime import datetime
4
- from typing import Any, Iterable, Iterator, List, Optional, cast
3
+ from pathlib import Path
4
+ from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union, cast
5
+ from uuid import UUID, uuid4
6
+ from warnings import warn
5
7
 
6
8
  import pandas as pd
7
9
  from pandas import DataFrame, read_parquet
10
+ from pyarrow import Schema, Table, parquet
8
11
 
9
12
  from phoenix.datetime_utils import normalize_timestamps
13
+ from phoenix.trace.errors import InvalidParquetMetadataError
10
14
 
11
- from ..config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX
15
+ from ..config import DATASET_DIR, GENERATED_DATASET_NAME_PREFIX, TRACE_DATASET_DIR
12
16
  from .schemas import ATTRIBUTE_PREFIX, CONTEXT_PREFIX, Span
13
17
  from .semantic_conventions import (
14
18
  DOCUMENT_METADATA,
@@ -43,6 +47,8 @@ DOCUMENT_COLUMNS = [
43
47
  RERANKER_OUTPUT_DOCUMENTS_COLUMN_NAME,
44
48
  ]
45
49
 
50
+ TRACE_DATASET_PARQUET_FILE_NAME = "trace_dataset-{id}.parquet"
51
+
46
52
 
47
53
  def normalize_dataframe(dataframe: DataFrame) -> "DataFrame":
48
54
  """Makes the dataframe have appropriate data types"""
@@ -94,6 +100,7 @@ class TraceDataset:
94
100
  name: str
95
101
  dataframe: pd.DataFrame
96
102
  evaluations: List[Evaluations] = []
103
+ _id: UUID = uuid4()
97
104
  _data_file_name: str = "data.parquet"
98
105
 
99
106
  def __init__(
@@ -122,7 +129,7 @@ class TraceDataset:
122
129
  f"The dataframe is missing some required columns: {', '.join(missing_columns)}"
123
130
  )
124
131
  self.dataframe = normalize_dataframe(dataframe)
125
- self.name = name or f"{GENERATED_DATASET_NAME_PREFIX}{str(uuid.uuid4())}"
132
+ self.name = name or f"{GENERATED_DATASET_NAME_PREFIX}{str(uuid4())}"
126
133
  self.evaluations = list(evaluations)
127
134
 
128
135
  @classmethod
@@ -199,6 +206,89 @@ class TraceDataset:
199
206
  coerce_timestamps="ms",
200
207
  )
201
208
 
209
+ def save(self, directory: Optional[Union[str, Path]] = None) -> UUID:
210
+ """
211
+ Writes the trace dataset to disk. If any evaluations have been appended
212
+ to the dataset, those evaluations will be saved to separate files within
213
+ the same directory.
214
+
215
+ Args:
216
+ directory (Optional[Union[str, Path]], optional): An optional path
217
+ to a directory where the data will be written. If not provided, the
218
+ data will be written to a default location.
219
+
220
+ Returns:
221
+ UUID: The id of the trace dataset, which can be used as key to load
222
+ the dataset from disk using `load`.
223
+ """
224
+ directory = Path(directory or TRACE_DATASET_DIR)
225
+ for evals in self.evaluations:
226
+ evals.save(directory)
227
+ path = directory / TRACE_DATASET_PARQUET_FILE_NAME.format(id=self._id)
228
+ dataframe = get_serializable_spans_dataframe(self.dataframe)
229
+ dataframe.to_parquet(
230
+ path,
231
+ allow_truncated_timestamps=True,
232
+ coerce_timestamps="ms",
233
+ )
234
+ table = Table.from_pandas(self.dataframe)
235
+ table = table.replace_schema_metadata(
236
+ {
237
+ **(table.schema.metadata or {}),
238
+ # explicitly encode keys and values, which are automatically encoded regardless
239
+ b"arize": json.dumps(
240
+ {
241
+ "dataset_id": str(self._id),
242
+ "dataset_name": self.name,
243
+ "eval_ids": [str(evals.id) for evals in self.evaluations],
244
+ }
245
+ ).encode("utf-8"),
246
+ }
247
+ )
248
+ parquet.write_table(table, path)
249
+ return self._id
250
+
251
+ @classmethod
252
+ def load(
253
+ cls, id: Union[str, UUID], directory: Optional[Union[str, Path]] = None
254
+ ) -> "TraceDataset":
255
+ """
256
+ Reads in a trace dataset from disk. Any associated evaluations will
257
+ automatically be read from disk and attached to the trace dataset.
258
+
259
+ Args:
260
+ id (Union[str, UUID]): The ID of the trace dataset to be loaded.
261
+
262
+ directory (Optional[Union[str, Path]], optional): The path to the
263
+ directory containing the persisted trace dataset parquet file. If
264
+ not provided, the parquet file will be loaded from the same default
265
+ location used by `save`.
266
+
267
+ Returns:
268
+ TraceDataset: The loaded trace dataset.
269
+ """
270
+ if not isinstance(id, UUID):
271
+ id = UUID(id)
272
+ path = Path(directory or TRACE_DATASET_DIR) / TRACE_DATASET_PARQUET_FILE_NAME.format(id=id)
273
+ schema = parquet.read_schema(path)
274
+ dataset_id, dataset_name, eval_ids = _parse_schema_metadata(schema)
275
+ if id != dataset_id:
276
+ raise InvalidParquetMetadataError(
277
+ f"The input id {id} does not match the id {dataset_id} in the parquet metadata. "
278
+ "Ensure that you have not renamed the parquet file."
279
+ )
280
+ evaluations = []
281
+ for eval_id in eval_ids:
282
+ try:
283
+ evaluations.append(Evaluations.load(eval_id, path.parent))
284
+ except Exception:
285
+ warn(f'Failed to load evaluations with id: "{eval_id}"')
286
+ table = parquet.read_table(path)
287
+ dataframe = table.to_pandas()
288
+ ds = cls(dataframe, dataset_name, evaluations)
289
+ ds._id = dataset_id
290
+ return ds
291
+
202
292
  def append_evaluations(self, evaluations: Evaluations) -> None:
203
293
  """adds an evaluation to the traces"""
204
294
  # Append the evaluations to the list of evaluations
@@ -233,3 +323,20 @@ class TraceDataset:
233
323
  # Make sure the index is set to the span_id
234
324
  df = self.dataframe.set_index("context.span_id", drop=False)
235
325
  return pd.concat([df, evals_df], axis=1)
326
+
327
+
328
+ def _parse_schema_metadata(schema: Schema) -> Tuple[UUID, str, List[UUID]]:
329
+ """
330
+ Returns parsed metadata from a parquet schema or raises an exception if the
331
+ metadata is invalid.
332
+ """
333
+ try:
334
+ metadata = schema.metadata
335
+ arize_metadata = json.loads(metadata[b"arize"])
336
+ dataset_id = UUID(arize_metadata["dataset_id"])
337
+ if not isinstance(dataset_name := arize_metadata["dataset_name"], str):
338
+ raise ValueError("Arize metadata must contain a dataset_name key with string value")
339
+ eval_ids = [UUID(eval_id) for eval_id in arize_metadata["eval_ids"]]
340
+ return dataset_id, dataset_name, eval_ids
341
+ except Exception as err:
342
+ raise InvalidParquetMetadataError("Unable to parse parquet metadata") from err
phoenix/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.5.0"
1
+ __version__ = "2.7.0"