arize-phoenix 4.7.1__py3-none-any.whl → 4.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: arize-phoenix
3
- Version: 4.7.1
3
+ Version: 4.7.2
4
4
  Summary: AI Observability and Evaluation
5
5
  Project-URL: Documentation, https://docs.arize.com/phoenix/
6
6
  Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
@@ -5,7 +5,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
5
5
  phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
6
  phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
7
7
  phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
8
- phoenix/version.py,sha256=7suIQcI50zQCokBqNExDKrJDN157VogyLAp_ZZ4xqCM,22
8
+ phoenix/version.py,sha256=cmBVf5yWtMWpLVrALEkcO90h-q8n53EpZSufTL15jyo,22
9
9
  phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
11
11
  phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
@@ -31,9 +31,9 @@ phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgC
31
31
  phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
32
32
  phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
33
33
  phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
34
- phoenix/experiments/functions.py,sha256=m4PlpkTLoZxmt7ZV2KqwcWY5q18Yu3ZTrcEzAqLN-o4,31656
34
+ phoenix/experiments/functions.py,sha256=2ZWCcIs0dh_UNB21t0J_PzWcYhu_32Ai3J6sEMjbBGE,32310
35
35
  phoenix/experiments/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
36
- phoenix/experiments/types.py,sha256=HQ9k7dUTlOLZl0iGtZOnToUtZBYGos6afwvO44subAM,24035
36
+ phoenix/experiments/types.py,sha256=VuvDCcvUGeHIQuXS_xpz7Jq5xHdt3qu-O_C7IQ3DvF8,23397
37
37
  phoenix/experiments/utils.py,sha256=wLu5Kvt1b4a8rGPRWq5G8RQ9XSiV8fCIVm51zWBI3-g,758
38
38
  phoenix/experiments/evaluators/__init__.py,sha256=j63fi3fa3U7-itVPHa82GowhjQRU-wO6yhO34u_lhsA,714
39
39
  phoenix/experiments/evaluators/base.py,sha256=ani0F2TN7DMN0KLhV89LIr9-W4g-ccEl2YQJgfp44Js,5325
@@ -135,7 +135,7 @@ phoenix/server/api/routers/v1/dataset_examples.py,sha256=XfqOvDKF1oxb0pkeYfBycww
135
135
  phoenix/server/api/routers/v1/datasets.py,sha256=f2gLG-geu-_wtEw4mKSzNWK2cFb5TYOyRL3tQ7Fl7Es,31544
136
136
  phoenix/server/api/routers/v1/evaluations.py,sha256=8g6P_e2BweV3RDU0esFmpkb0L5fCwonQPXiJ0y6HLwg,9126
137
137
  phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=H_psVyuGUQImo0oxdEAKAMQ-oyVwkVIq5yaMHzHIiPc,5455
138
- phoenix/server/api/routers/v1/experiment_runs.py,sha256=u4Kgz1i5AffmCF2LHtC9Oo1hlGscZ3Dm8JlTRhM55yU,8307
138
+ phoenix/server/api/routers/v1/experiment_runs.py,sha256=jy4SynmzdtQMoUzlowmG6wsVU14SsLAzfcW4JOhXjeQ,8154
139
139
  phoenix/server/api/routers/v1/experiments.py,sha256=cG-LyIGRdB1jVTL42Xi2__nsXibVe9Up7m3hFiTIYYY,11886
140
140
  phoenix/server/api/routers/v1/spans.py,sha256=PFeS3ayKj4cUle0CH-f-CpM1fRi-JicEG7BEtkANzAo,4074
141
141
  phoenix/server/api/routers/v1/traces.py,sha256=dYEf5pThenAQCgfQljHdrnwd4tC_tAXm6Kvk6GphPYs,2774
@@ -202,7 +202,7 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
202
202
  phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
203
203
  phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
204
204
  phoenix/server/static/index.css,sha256=KKGpx4iwF91VGRm0YN-4cn8oC-oIqC6HecoPf0x3ZM8,1885
205
- phoenix/server/static/index.js,sha256=tmyBBBaYS-HaFhd0dqqXiIsy5Lnvf-klOxQ0gRHAoDM,3521766
205
+ phoenix/server/static/index.js,sha256=LmaCH-kjVuEBKvMhK3pOUxK12Wv26htH5juAzju9VWE,3524823
206
206
  phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
207
207
  phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
208
208
  phoenix/server/templates/index.html,sha256=S4z7qSoNSwnKFAH9r96AR-YJEyoKMd-VMWVlJ_IdzME,2039
@@ -247,8 +247,8 @@ phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,
247
247
  phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
248
248
  phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
249
249
  phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
250
- arize_phoenix-4.7.1.dist-info/METADATA,sha256=4l6GZ3dH0ALP9ifyZ7mSk32mjC_i7rWj_ahg2Mrvf-c,11451
251
- arize_phoenix-4.7.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
252
- arize_phoenix-4.7.1.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
253
- arize_phoenix-4.7.1.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
254
- arize_phoenix-4.7.1.dist-info/RECORD,,
250
+ arize_phoenix-4.7.2.dist-info/METADATA,sha256=m2l2uSMpqkXD-NpYdOBqsKlNF5jMUoLoSjnxo9is9h0,11451
251
+ arize_phoenix-4.7.2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
252
+ arize_phoenix-4.7.2.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
253
+ arize_phoenix-4.7.2.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
254
+ arize_phoenix-4.7.2.dist-info/RECORD,,
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import functools
2
3
  import inspect
3
4
  import json
@@ -62,7 +63,6 @@ from phoenix.experiments.types import (
62
63
  ExperimentEvaluationRun,
63
64
  ExperimentParameters,
64
65
  ExperimentRun,
65
- ExperimentRunOutput,
66
66
  ExperimentTask,
67
67
  RanExperiment,
68
68
  TaskSummary,
@@ -284,7 +284,7 @@ def run_experiment(
284
284
  experiment_id=experiment.id,
285
285
  dataset_example_id=example.id,
286
286
  repetition_number=repetition_number,
287
- experiment_run_output=ExperimentRunOutput(task_output=output),
287
+ output=output,
288
288
  error=repr(error) if error else None,
289
289
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
290
290
  )
@@ -345,13 +345,21 @@ def run_experiment(
345
345
  experiment_id=experiment.id,
346
346
  dataset_example_id=example.id,
347
347
  repetition_number=repetition_number,
348
- experiment_run_output=ExperimentRunOutput(task_output=output),
348
+ output=output,
349
349
  error=repr(error) if error else None,
350
350
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
351
351
  )
352
352
  if not dry_run:
353
- resp = await async_client.post(
354
- f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
353
+ # Below is a workaround to avoid timeout errors sometimes
354
+ # encountered when the task is a synchronous function that
355
+ # blocks for too long.
356
+ resp = await asyncio.get_running_loop().run_in_executor(
357
+ None,
358
+ functools.partial(
359
+ sync_client.post,
360
+ url=f"/v1/experiments/{experiment.id}/runs",
361
+ json=jsonify(exp_run),
362
+ ),
355
363
  )
356
364
  resp.raise_for_status()
357
365
  exp_run = replace(exp_run, id=resp.json()["data"]["id"])
@@ -393,7 +401,7 @@ def run_experiment(
393
401
  ran_experiment.__init__( # type: ignore[misc]
394
402
  params=params,
395
403
  dataset=dataset,
396
- runs={r.id: r for r in task_runs},
404
+ runs={r.id: r for r in task_runs if r is not None},
397
405
  task_summary=task_summary,
398
406
  **_asdict(experiment),
399
407
  )
@@ -498,7 +506,7 @@ def evaluate_experiment(
498
506
  stack.enter_context(capture_spans(resource))
499
507
  try:
500
508
  result = evaluator.evaluate(
501
- output=experiment_run.output,
509
+ output=deepcopy(experiment_run.output),
502
510
  expected=example.output,
503
511
  reference=example.output,
504
512
  input=example.input,
@@ -550,7 +558,7 @@ def evaluate_experiment(
550
558
  stack.enter_context(capture_spans(resource))
551
559
  try:
552
560
  result = await evaluator.async_evaluate(
553
- output=experiment_run.output,
561
+ output=deepcopy(experiment_run.output),
554
562
  expected=example.output,
555
563
  reference=example.output,
556
564
  input=example.input,
@@ -582,7 +590,17 @@ def evaluate_experiment(
582
590
  trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
583
591
  )
584
592
  if not dry_run:
585
- resp = await async_client.post("/v1/experiment_evaluations", json=jsonify(eval_run))
593
+ # Below is a workaround to avoid timeout errors sometimes
594
+ # encountered when the evaluator is a synchronous function
595
+ # that blocks for too long.
596
+ resp = await asyncio.get_running_loop().run_in_executor(
597
+ None,
598
+ functools.partial(
599
+ sync_client.post,
600
+ url="/v1/experiment_evaluations",
601
+ json=jsonify(eval_run),
602
+ ),
603
+ )
586
604
  resp.raise_for_status()
587
605
  eval_run = replace(eval_run, id=resp.json()["data"]["id"])
588
606
  return eval_run
@@ -198,20 +198,6 @@ class Experiment:
198
198
  )
199
199
 
200
200
 
201
- @dataclass(frozen=True)
202
- class ExperimentRunOutput:
203
- task_output: TaskOutput
204
-
205
- def __post_init__(self) -> None:
206
- object.__setattr__(self, "task_output", _make_read_only(self.task_output))
207
-
208
- @classmethod
209
- def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> ExperimentRunOutput:
210
- if not obj:
211
- return cls(task_output=None)
212
- return cls(task_output=obj["task_output"])
213
-
214
-
215
201
  @dataclass(frozen=True)
216
202
  class ExperimentRun:
217
203
  start_time: datetime
@@ -219,15 +205,11 @@ class ExperimentRun:
219
205
  experiment_id: ExperimentId
220
206
  dataset_example_id: ExampleId
221
207
  repetition_number: RepetitionNumber
222
- experiment_run_output: ExperimentRunOutput
208
+ output: JSONSerializable
223
209
  error: Optional[str] = None
224
210
  id: ExperimentRunId = field(default_factory=_dry_run_id)
225
211
  trace_id: Optional[TraceId] = None
226
212
 
227
- @property
228
- def output(self) -> Optional[TaskOutput]:
229
- return deepcopy(self.experiment_run_output.task_output)
230
-
231
213
  @classmethod
232
214
  def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentRun:
233
215
  return cls(
@@ -236,14 +218,14 @@ class ExperimentRun:
236
218
  experiment_id=obj["experiment_id"],
237
219
  dataset_example_id=obj["dataset_example_id"],
238
220
  repetition_number=obj.get("repetition_number") or 1,
239
- experiment_run_output=ExperimentRunOutput.from_dict(obj["experiment_run_output"]),
221
+ output=_make_read_only(obj.get("output")),
240
222
  error=obj.get("error"),
241
223
  id=obj["id"],
242
224
  trace_id=obj.get("trace_id"),
243
225
  )
244
226
 
245
227
  def __post_init__(self) -> None:
246
- if bool(self.experiment_run_output) == bool(self.error):
228
+ if bool(self.output) == bool(self.error):
247
229
  ValueError("Must specify exactly one of experiment_run_output or error")
248
230
 
249
231
 
@@ -381,7 +363,7 @@ class EvaluationSummary(_HasStats):
381
363
  if df.empty:
382
364
  df = pd.DataFrame.from_records(
383
365
  [
384
- {"evaluator": name, "error": True, "score": None, "label": None}
366
+ {"evaluator": name, "error": None, "score": None, "label": None}
385
367
  for name in params.eval_names
386
368
  ]
387
369
  )
@@ -571,7 +553,7 @@ class RanExperiment(Experiment):
571
553
  {
572
554
  "run_id": run.id,
573
555
  "error": run.error,
574
- "output": deepcopy(run.experiment_run_output.task_output),
556
+ "output": deepcopy(run.output),
575
557
  "input": deepcopy((ex := self.dataset.examples[run.dataset_example_id]).input),
576
558
  "expected": deepcopy(ex.output),
577
559
  "metadata": deepcopy(ex.metadata),
@@ -7,9 +7,8 @@ from starlette.status import HTTP_404_NOT_FOUND
7
7
  from strawberry.relay import GlobalID
8
8
 
9
9
  from phoenix.db import models
10
- from phoenix.experiments.types import ExperimentRun, ExperimentRunOutput
10
+ from phoenix.db.models import ExperimentRunOutput
11
11
  from phoenix.server.api.types.node import from_global_id_with_expected_type
12
- from phoenix.utilities.json import jsonify
13
12
 
14
13
 
15
14
  async def create_experiment_run(request: Request) -> Response:
@@ -39,9 +38,8 @@ async def create_experiment_run(request: Request) -> Response:
39
38
  trace_id:
40
39
  type: string
41
40
  description: Optional trace ID for tracking
42
- experiment_run_output:
43
- type: object
44
- description: The output of the experiment run
41
+ output:
42
+ description: The output of the experiment task
45
43
  repetition_number:
46
44
  type: integer
47
45
  description: The repetition number of the experiment run
@@ -101,7 +99,7 @@ async def create_experiment_run(request: Request) -> Response:
101
99
  )
102
100
 
103
101
  trace_id = payload.get("trace_id", None)
104
- output = payload["experiment_run_output"]
102
+ task_output = payload["output"]
105
103
  repetition_number = payload["repetition_number"]
106
104
  start_time = payload["start_time"]
107
105
  end_time = payload["end_time"]
@@ -112,7 +110,7 @@ async def create_experiment_run(request: Request) -> Response:
112
110
  experiment_id=experiment_id,
113
111
  dataset_example_id=dataset_example_id,
114
112
  trace_id=trace_id,
115
- output=output,
113
+ output=ExperimentRunOutput(task_output=task_output),
116
114
  repetition_number=repetition_number,
117
115
  start_time=datetime.fromisoformat(start_time),
118
116
  end_time=datetime.fromisoformat(end_time),
@@ -170,9 +168,8 @@ async def list_experiment_runs(request: Request) -> Response:
170
168
  type: string
171
169
  format: date-time
172
170
  description: The end time of the experiment run in ISO format
173
- experiment_run_output:
174
- type: object
175
- description: The output of the experiment run
171
+ output:
172
+ description: The output of the experiment task
176
173
  error:
177
174
  type: string
178
175
  description: Error message if the experiment run encountered an error
@@ -205,16 +202,16 @@ async def list_experiment_runs(request: Request) -> Response:
205
202
  experiment_gid = GlobalID("Experiment", str(exp_run.experiment_id))
206
203
  example_gid = GlobalID("DatasetExample", str(exp_run.dataset_example_id))
207
204
  runs.append(
208
- ExperimentRun(
209
- start_time=exp_run.start_time,
210
- end_time=exp_run.end_time,
205
+ dict(
206
+ start_time=exp_run.start_time.isoformat(),
207
+ end_time=exp_run.end_time.isoformat(),
211
208
  experiment_id=str(experiment_gid),
212
209
  dataset_example_id=str(example_gid),
213
210
  repetition_number=exp_run.repetition_number,
214
- experiment_run_output=ExperimentRunOutput.from_dict(exp_run.output),
211
+ output=exp_run.output.get("task_output"),
215
212
  error=exp_run.error,
216
213
  id=str(run_gid),
217
214
  trace_id=exp_run.trace_id,
218
215
  )
219
216
  )
220
- return JSONResponse(content={"data": jsonify(runs)}, status_code=200)
217
+ return JSONResponse(content={"data": runs}, status_code=200)