arize-phoenix 10.0.3__py3-none-any.whl → 10.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/METADATA +3 -3
- {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/RECORD +20 -20
- phoenix/experiments/functions.py +132 -18
- phoenix/server/api/helpers/playground_clients.py +4 -0
- phoenix/server/api/routers/v1/experiment_runs.py +16 -4
- phoenix/server/api/routers/v1/spans.py +521 -4
- phoenix/server/static/.vite/manifest.json +44 -44
- phoenix/server/static/assets/{components-DULKeDfL.js → components-BHJEWQsc.js} +384 -336
- phoenix/server/static/assets/{index-E0M82BdE.js → index-CAt4FOOX.js} +6 -2
- phoenix/server/static/assets/{pages-Cl0A-0U2.js → pages-kpw8RDmo.js} +535 -545
- phoenix/server/static/assets/{vendor-oB4u9zuV.js → vendor-DOUbLVp5.js} +1 -1
- phoenix/server/static/assets/{vendor-arizeai-Dy-0mSNw.js → vendor-arizeai-DHqMQzfV.js} +6 -6
- phoenix/server/static/assets/{vendor-codemirror-DBtifKNr.js → vendor-codemirror-DWdZV1Is.js} +1 -1
- phoenix/server/static/assets/{vendor-recharts-D-T4KPz2.js → vendor-recharts-BfHdRd1Y.js} +1 -1
- phoenix/server/static/assets/{vendor-shiki-BMn4O_9F.js → vendor-shiki-CHu75YVL.js} +1 -1
- phoenix/version.py +1 -1
- {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/entry_points.txt +0 -0
- {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-10.0.3.dist-info → arize_phoenix-10.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version: 10.0
|
|
3
|
+
Version: 10.1.0
|
|
4
4
|
Summary: AI Observability and Evaluation
|
|
5
5
|
Project-URL: Documentation, https://docs.arize.com/phoenix/
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -21,8 +21,8 @@ Requires-Dist: aioitertools
|
|
|
21
21
|
Requires-Dist: aiosqlite
|
|
22
22
|
Requires-Dist: alembic<2,>=1.3.0
|
|
23
23
|
Requires-Dist: arize-phoenix-client
|
|
24
|
-
Requires-Dist: arize-phoenix-evals>=0.
|
|
25
|
-
Requires-Dist: arize-phoenix-otel>=0.
|
|
24
|
+
Requires-Dist: arize-phoenix-evals>=0.20.6
|
|
25
|
+
Requires-Dist: arize-phoenix-otel>=0.9.2
|
|
26
26
|
Requires-Dist: authlib
|
|
27
27
|
Requires-Dist: cachetools
|
|
28
28
|
Requires-Dist: email-validator
|
|
@@ -6,7 +6,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
|
|
|
6
6
|
phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
7
7
|
phoenix/services.py,sha256=ngkyKGVatX3cO2WJdo2hKdaVKP-xJCMvqthvga6kJss,5196
|
|
8
8
|
phoenix/settings.py,sha256=x87BX7hWGQQZbrW_vrYqFR_izCGfO9gFc--JXUG4Tdk,754
|
|
9
|
-
phoenix/version.py,sha256=
|
|
9
|
+
phoenix/version.py,sha256=OL-z73Pbatjb7iB0kk-omeb_qR2_5zFbHqhV-TRrbvM,23
|
|
10
10
|
phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
|
|
12
12
|
phoenix/core/model.py,sha256=qBFraOtmwCCnWJltKNP18DDG0mULXigytlFsa6YOz6k,4837
|
|
@@ -56,7 +56,7 @@ phoenix/db/types/identifier.py,sha256=Opr3_1di6e5ncrBDn30WfBSr-jN_VGBnkkA4BMuSoy
|
|
|
56
56
|
phoenix/db/types/model_provider.py,sha256=96UMeqiy5X9PmYMOWA6dZAmI_BSV3yVxt9HEVYGe5Ns,157
|
|
57
57
|
phoenix/db/types/trace_retention.py,sha256=UoLVX4efhv-mcF32zHpMX_hXWe8474cmhEyPmYqeMfI,9573
|
|
58
58
|
phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
|
|
59
|
-
phoenix/experiments/functions.py,sha256=
|
|
59
|
+
phoenix/experiments/functions.py,sha256=6cSS_5O5V1EZ_5CIQ6lGOUwFyNNsADP7Uu__GmuOz4A,37983
|
|
60
60
|
phoenix/experiments/tracing.py,sha256=seNh9rBH-rtQe8_FPI_VJj1rbo3ADcP6wDvERkMoxNc,2858
|
|
61
61
|
phoenix/experiments/types.py,sha256=yntt6fnAny1U4Q9Y5Mm4ZYIb9319OaJovl-kyXFtGQE,23475
|
|
62
62
|
phoenix/experiments/utils.py,sha256=MZ1-OnTcavk_KUtbfGqt55Fk9TGtJpYG_K71WsN-zDk,785
|
|
@@ -158,7 +158,7 @@ phoenix/server/api/helpers/__init__.py,sha256=m2-xaSPqUiSs91k62JaRDjFNfl-1byxBfY
|
|
|
158
158
|
phoenix/server/api/helpers/annotations.py,sha256=9gMXKpMTfWEChoSCnvdWYuyB0hlSnNOp-qUdar9Vono,262
|
|
159
159
|
phoenix/server/api/helpers/dataset_helpers.py,sha256=DoMBTg-qXTnC_K4Evx1WKpCCYgRbITpVqyY-8efJRf0,8984
|
|
160
160
|
phoenix/server/api/helpers/experiment_run_filters.py,sha256=DOnVwrmn39eAkk2mwuZP8kIcAnR5jrOgllEwWSjsw94,29893
|
|
161
|
-
phoenix/server/api/helpers/playground_clients.py,sha256
|
|
161
|
+
phoenix/server/api/helpers/playground_clients.py,sha256=-9lbPHTiYgRa1KdzdM1pRJvISvRRgEfhn0jdfoWUWHU,42014
|
|
162
162
|
phoenix/server/api/helpers/playground_registry.py,sha256=CPLMziFB2wmr-dfbx7VbzO2f8YIG_k5RftzvGXYGQ1w,2570
|
|
163
163
|
phoenix/server/api/helpers/playground_spans.py,sha256=ObAhvV_yNwEQDkjzgU5G73wfIisc8q4cpB0OFH5cd24,16974
|
|
164
164
|
phoenix/server/api/helpers/prompts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -235,12 +235,12 @@ phoenix/server/api/routers/v1/annotations.py,sha256=oeafR2tCLu-uIwM9J72gN3MX5WDh
|
|
|
235
235
|
phoenix/server/api/routers/v1/datasets.py,sha256=Wqiy6ZKqn4BZSFyn93gzuhWx3mGn7kOkNncHzCWuBq8,37325
|
|
236
236
|
phoenix/server/api/routers/v1/evaluations.py,sha256=GFTo42aIEX0Htn0EjjoE1JZDYlvryeZ_CK9kowhwzGw,12830
|
|
237
237
|
phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=xSs004jNYsOl3eg-6Zjo2tt9TefTd7WR3twWYrsNQNk,4828
|
|
238
|
-
phoenix/server/api/routers/v1/experiment_runs.py,sha256=
|
|
238
|
+
phoenix/server/api/routers/v1/experiment_runs.py,sha256=FreGzzprPpJ_DBHUkdUckca6EGCrnvgVpqk3CLT7wRc,7000
|
|
239
239
|
phoenix/server/api/routers/v1/experiments.py,sha256=V9_sxqLTE1MKGFu9H3FEdGKr70lYMbGZx813MGaavfQ,20430
|
|
240
240
|
phoenix/server/api/routers/v1/models.py,sha256=r0nM2kFJ3mxDqgc5vFr1cjNuyOPs3RIKE_DS2VMdF48,1749
|
|
241
241
|
phoenix/server/api/routers/v1/projects.py,sha256=RVOAWW8RQIqaebQFcIbk1OGxO7B1BITtUewlcSUhasg,12615
|
|
242
242
|
phoenix/server/api/routers/v1/prompts.py,sha256=aBOUBwLDzZDIzJQkxJcR8ZKnakNJOLMwzsLKINSs1mA,26545
|
|
243
|
-
phoenix/server/api/routers/v1/spans.py,sha256=
|
|
243
|
+
phoenix/server/api/routers/v1/spans.py,sha256=qJVN0pVgZM5cMXQoNrCwmFjKDm_7-JHKdt_KU9IDFsA,32121
|
|
244
244
|
phoenix/server/api/routers/v1/traces.py,sha256=DfzeszQvtlrVxvurJLaWJJAhkCZ4BodLwpFuBYPwN5Q,8206
|
|
245
245
|
phoenix/server/api/routers/v1/utils.py,sha256=oXIOGPzPTkE0ZWUTRCoRIQQ7wTzoSwtWFaUSjlGBqts,4960
|
|
246
246
|
phoenix/server/api/types/Annotation.py,sha256=gsl8CwjIbDUbZRj4d9USwZ_w_Tkz4i7zuZh9ftV80jA,1132
|
|
@@ -347,16 +347,16 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
|
|
|
347
347
|
phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
|
|
348
348
|
phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
|
|
349
349
|
phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
|
|
350
|
-
phoenix/server/static/.vite/manifest.json,sha256=
|
|
351
|
-
phoenix/server/static/assets/components-
|
|
352
|
-
phoenix/server/static/assets/index-
|
|
353
|
-
phoenix/server/static/assets/pages-
|
|
350
|
+
phoenix/server/static/.vite/manifest.json,sha256=uZhT5WYds65YqNtUyHWehErQIV4tFNEoiyZahFTFA4Y,2165
|
|
351
|
+
phoenix/server/static/assets/components-BHJEWQsc.js,sha256=ekj4iMOzo2et9hLV5-gp70qytp--8V7mXpFWbQ-Ov1k,544202
|
|
352
|
+
phoenix/server/static/assets/index-CAt4FOOX.js,sha256=R9LYPWBofyISq2QdzxT80OiIbsv99gncJ1XdqhrYrr8,60432
|
|
353
|
+
phoenix/server/static/assets/pages-kpw8RDmo.js,sha256=NU__puRtkOse1X6iDEVq2_v-lWMOlTMcR6c8fkTZFes,1033323
|
|
354
|
+
phoenix/server/static/assets/vendor-DOUbLVp5.js,sha256=AN12odor8w2dKOG4LokzFITR1h52TP1qnBDQl1u67Dc,2744392
|
|
354
355
|
phoenix/server/static/assets/vendor-WIZid84E.css,sha256=spZD2r7XL5GfLO13ln-IuXfnjAref8l6g_n_AvxxOlI,5517
|
|
355
|
-
phoenix/server/static/assets/vendor-arizeai-
|
|
356
|
-
phoenix/server/static/assets/vendor-codemirror-
|
|
357
|
-
phoenix/server/static/assets/vendor-
|
|
358
|
-
phoenix/server/static/assets/vendor-
|
|
359
|
-
phoenix/server/static/assets/vendor-shiki-BMn4O_9F.js,sha256=LIz0aw3E5-PMDE-1oyAG08B_By2OKHMZ74AvuWJU0ug,8980312
|
|
356
|
+
phoenix/server/static/assets/vendor-arizeai-DHqMQzfV.js,sha256=zaVJbapZYf9-reZp549G232mEi7MY7yZR2cBbchRISs,193248
|
|
357
|
+
phoenix/server/static/assets/vendor-codemirror-DWdZV1Is.js,sha256=S0-OzJX5aZayqOQS3KcC-ztH5EITxeCHnOA2nk5li8A,781264
|
|
358
|
+
phoenix/server/static/assets/vendor-recharts-BfHdRd1Y.js,sha256=YdjHHSsRjbhTkgi_NdynIGnwqX1YibrXZhGgbewsOfo,282150
|
|
359
|
+
phoenix/server/static/assets/vendor-shiki-CHu75YVL.js,sha256=FaxGIHyaAJFTau_Jt8GlW41aKphdTaEur_nSd6UrbpU,8980312
|
|
360
360
|
phoenix/server/static/assets/vendor-three-C5WAXd5r.js,sha256=ELkg06u70N7h8oFmvqdoHyPuUf9VgGEWeT4LKFx4VWo,620975
|
|
361
361
|
phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
362
362
|
phoenix/server/templates/index.html,sha256=NpJ83DULqcStXFbShNamX4_NPDtnnucuBxppvUYjJa8,4409
|
|
@@ -397,9 +397,9 @@ phoenix/utilities/project.py,sha256=auVpARXkDb-JgeX5f2aStyFIkeKvGwN9l7qrFeJMVxI,
|
|
|
397
397
|
phoenix/utilities/re.py,sha256=6YyUWIkv0zc2SigsxfOWIHzdpjKA_TZo2iqKq7zJKvw,2081
|
|
398
398
|
phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
399
399
|
phoenix/utilities/template_formatters.py,sha256=gh9PJD6WEGw7TEYXfSst1UR4pWWwmjxMLrDVQ_CkpkQ,2779
|
|
400
|
-
arize_phoenix-10.0.
|
|
401
|
-
arize_phoenix-10.0.
|
|
402
|
-
arize_phoenix-10.0.
|
|
403
|
-
arize_phoenix-10.0.
|
|
404
|
-
arize_phoenix-10.0.
|
|
405
|
-
arize_phoenix-10.0.
|
|
400
|
+
arize_phoenix-10.1.0.dist-info/METADATA,sha256=LQdP2QH9SNHA7uH0XeEJq1Mkf3l3JG0s8qcg0WRTK1Q,25591
|
|
401
|
+
arize_phoenix-10.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
402
|
+
arize_phoenix-10.1.0.dist-info/entry_points.txt,sha256=Pgpn8Upxx9P8z8joPXZWl2LlnAlGc3gcQoVchb06X1Q,94
|
|
403
|
+
arize_phoenix-10.1.0.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
|
|
404
|
+
arize_phoenix-10.1.0.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
|
|
405
|
+
arize_phoenix-10.1.0.dist-info/RECORD,,
|
phoenix/experiments/functions.py
CHANGED
|
@@ -16,6 +16,7 @@ from urllib.parse import urljoin
|
|
|
16
16
|
import httpx
|
|
17
17
|
import opentelemetry.sdk.trace as trace_sdk
|
|
18
18
|
import pandas as pd
|
|
19
|
+
from httpx import HTTPStatusError
|
|
19
20
|
from openinference.semconv.resource import ResourceAttributes
|
|
20
21
|
from openinference.semconv.trace import (
|
|
21
22
|
OpenInferenceMimeTypeValues,
|
|
@@ -232,8 +233,43 @@ def run_experiment(
|
|
|
232
233
|
print(f"📺 View dataset experiments: {dataset_experiments_url}")
|
|
233
234
|
print(f"🔗 View this experiment: {experiment_compare_url}")
|
|
234
235
|
|
|
235
|
-
|
|
236
|
+
# Create a cache for task results
|
|
237
|
+
task_result_cache: dict[tuple[str, int], Any] = {}
|
|
238
|
+
|
|
239
|
+
def sync_run_experiment(test_case: TestCase) -> Optional[ExperimentRun]:
|
|
236
240
|
example, repetition_number = test_case.example, test_case.repetition_number
|
|
241
|
+
cache_key = (example.id, repetition_number)
|
|
242
|
+
|
|
243
|
+
# Check if we have a cached result
|
|
244
|
+
if cache_key in task_result_cache:
|
|
245
|
+
output = task_result_cache[cache_key]
|
|
246
|
+
exp_run = ExperimentRun(
|
|
247
|
+
start_time=datetime.now(
|
|
248
|
+
timezone.utc
|
|
249
|
+
), # Use current time since we don't have the original span
|
|
250
|
+
end_time=datetime.now(timezone.utc),
|
|
251
|
+
experiment_id=experiment.id,
|
|
252
|
+
dataset_example_id=example.id,
|
|
253
|
+
repetition_number=repetition_number,
|
|
254
|
+
output=output,
|
|
255
|
+
error=None,
|
|
256
|
+
trace_id=None, # No trace ID since we don't have the original span
|
|
257
|
+
)
|
|
258
|
+
if not dry_run:
|
|
259
|
+
try:
|
|
260
|
+
# Try to create the run directly
|
|
261
|
+
resp = sync_client.post(
|
|
262
|
+
f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
|
|
263
|
+
)
|
|
264
|
+
resp.raise_for_status()
|
|
265
|
+
exp_run = replace(exp_run, id=resp.json()["data"]["id"])
|
|
266
|
+
except HTTPStatusError as e:
|
|
267
|
+
if e.response.status_code == 409:
|
|
268
|
+
# Ignore duplicate runs - we'll get the final state from the database
|
|
269
|
+
return None
|
|
270
|
+
raise
|
|
271
|
+
return exp_run
|
|
272
|
+
|
|
237
273
|
output = None
|
|
238
274
|
error: Optional[BaseException] = None
|
|
239
275
|
status = Status(StatusCode.OK)
|
|
@@ -283,6 +319,7 @@ def run_experiment(
|
|
|
283
319
|
assert isinstance(
|
|
284
320
|
output, (dict, list, str, int, float, bool, type(None))
|
|
285
321
|
), "Output must be JSON serializable"
|
|
322
|
+
|
|
286
323
|
exp_run = ExperimentRun(
|
|
287
324
|
start_time=_decode_unix_nano(cast(int, span.start_time)),
|
|
288
325
|
end_time=_decode_unix_nano(cast(int, span.end_time)),
|
|
@@ -294,13 +331,62 @@ def run_experiment(
|
|
|
294
331
|
trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
|
|
295
332
|
)
|
|
296
333
|
if not dry_run:
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
334
|
+
try:
|
|
335
|
+
# Try to create the run directly
|
|
336
|
+
resp = sync_client.post(
|
|
337
|
+
f"/v1/experiments/{experiment.id}/runs", json=jsonify(exp_run)
|
|
338
|
+
)
|
|
339
|
+
resp.raise_for_status()
|
|
340
|
+
exp_run = replace(exp_run, id=resp.json()["data"]["id"])
|
|
341
|
+
if error is None:
|
|
342
|
+
task_result_cache[cache_key] = output
|
|
343
|
+
except HTTPStatusError as e:
|
|
344
|
+
if e.response.status_code == 409:
|
|
345
|
+
# 409 conflict errors are caused by submitting duplicate runs
|
|
346
|
+
return None
|
|
347
|
+
raise
|
|
300
348
|
return exp_run
|
|
301
349
|
|
|
302
|
-
async def async_run_experiment(test_case: TestCase) -> ExperimentRun:
|
|
350
|
+
async def async_run_experiment(test_case: TestCase) -> Optional[ExperimentRun]:
|
|
303
351
|
example, repetition_number = test_case.example, test_case.repetition_number
|
|
352
|
+
cache_key = (example.id, repetition_number)
|
|
353
|
+
|
|
354
|
+
# Check if we have a cached result
|
|
355
|
+
if cache_key in task_result_cache:
|
|
356
|
+
output = task_result_cache[cache_key]
|
|
357
|
+
exp_run = ExperimentRun(
|
|
358
|
+
start_time=datetime.now(
|
|
359
|
+
timezone.utc
|
|
360
|
+
), # Use current time since we don't have the original span
|
|
361
|
+
end_time=datetime.now(timezone.utc),
|
|
362
|
+
experiment_id=experiment.id,
|
|
363
|
+
dataset_example_id=example.id,
|
|
364
|
+
repetition_number=repetition_number,
|
|
365
|
+
output=output,
|
|
366
|
+
error=None,
|
|
367
|
+
trace_id=None, # No trace ID since we don't have the original span
|
|
368
|
+
)
|
|
369
|
+
if not dry_run:
|
|
370
|
+
try:
|
|
371
|
+
# Try to create the run directly
|
|
372
|
+
future = asyncio.get_running_loop().run_in_executor(
|
|
373
|
+
None,
|
|
374
|
+
functools.partial(
|
|
375
|
+
sync_client.post,
|
|
376
|
+
url=f"/v1/experiments/{experiment.id}/runs",
|
|
377
|
+
json=jsonify(exp_run),
|
|
378
|
+
),
|
|
379
|
+
)
|
|
380
|
+
resp = await future
|
|
381
|
+
resp.raise_for_status()
|
|
382
|
+
exp_run = replace(exp_run, id=resp.json()["data"]["id"])
|
|
383
|
+
except HTTPStatusError as e:
|
|
384
|
+
if e.response.status_code == 409:
|
|
385
|
+
# 409 conflict errors are caused by submitting duplicate runs
|
|
386
|
+
return None
|
|
387
|
+
raise
|
|
388
|
+
return exp_run
|
|
389
|
+
|
|
304
390
|
output = None
|
|
305
391
|
error: Optional[BaseException] = None
|
|
306
392
|
status = Status(StatusCode.OK)
|
|
@@ -344,6 +430,7 @@ def run_experiment(
|
|
|
344
430
|
assert isinstance(
|
|
345
431
|
output, (dict, list, str, int, float, bool, type(None))
|
|
346
432
|
), "Output must be JSON serializable"
|
|
433
|
+
|
|
347
434
|
exp_run = ExperimentRun(
|
|
348
435
|
start_time=_decode_unix_nano(cast(int, span.start_time)),
|
|
349
436
|
end_time=_decode_unix_nano(cast(int, span.end_time)),
|
|
@@ -355,19 +442,26 @@ def run_experiment(
|
|
|
355
442
|
trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
|
|
356
443
|
)
|
|
357
444
|
if not dry_run:
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
)
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
445
|
+
try:
|
|
446
|
+
# Try to create the run directly
|
|
447
|
+
future = asyncio.get_running_loop().run_in_executor(
|
|
448
|
+
None,
|
|
449
|
+
functools.partial(
|
|
450
|
+
sync_client.post,
|
|
451
|
+
url=f"/v1/experiments/{experiment.id}/runs",
|
|
452
|
+
json=jsonify(exp_run),
|
|
453
|
+
),
|
|
454
|
+
)
|
|
455
|
+
resp = await future
|
|
456
|
+
resp.raise_for_status()
|
|
457
|
+
exp_run = replace(exp_run, id=resp.json()["data"]["id"])
|
|
458
|
+
if error is None:
|
|
459
|
+
task_result_cache[cache_key] = output
|
|
460
|
+
except HTTPStatusError as e:
|
|
461
|
+
if e.response.status_code == 409:
|
|
462
|
+
# Ignore duplicate runs - we'll get the final state from the database
|
|
463
|
+
return None
|
|
464
|
+
raise
|
|
371
465
|
return exp_run
|
|
372
466
|
|
|
373
467
|
_errors: tuple[type[BaseException], ...]
|
|
@@ -401,6 +495,26 @@ def run_experiment(
|
|
|
401
495
|
]
|
|
402
496
|
task_runs, _execution_details = executor.run(test_cases)
|
|
403
497
|
print("✅ Task runs completed.")
|
|
498
|
+
|
|
499
|
+
# Get the final state of runs from the database
|
|
500
|
+
if not dry_run:
|
|
501
|
+
all_runs = sync_client.get(f"/v1/experiments/{experiment.id}/runs").json()["data"]
|
|
502
|
+
task_runs = []
|
|
503
|
+
for run in all_runs:
|
|
504
|
+
# Parse datetime strings
|
|
505
|
+
run["start_time"] = datetime.fromisoformat(run["start_time"])
|
|
506
|
+
run["end_time"] = datetime.fromisoformat(run["end_time"])
|
|
507
|
+
task_runs.append(ExperimentRun.from_dict(run))
|
|
508
|
+
|
|
509
|
+
# Check if we got all expected runs
|
|
510
|
+
expected_runs = len(dataset.examples) * repetitions
|
|
511
|
+
actual_runs = len(task_runs)
|
|
512
|
+
if actual_runs < expected_runs:
|
|
513
|
+
print(
|
|
514
|
+
f"⚠️ Warning: Only {actual_runs} out of {expected_runs} expected runs were "
|
|
515
|
+
"completed successfully."
|
|
516
|
+
)
|
|
517
|
+
|
|
404
518
|
params = ExperimentParameters(n_examples=len(dataset.examples), n_repetitions=repetitions)
|
|
405
519
|
task_summary = TaskSummary.from_task_runs(params, task_runs)
|
|
406
520
|
ran_experiment: RanExperiment = object.__new__(RanExperiment)
|
|
@@ -701,6 +701,10 @@ class AzureOpenAIStreamingClient(OpenAIBaseStreamingClient):
|
|
|
701
701
|
provider_key=GenerativeProviderKey.ANTHROPIC,
|
|
702
702
|
model_names=[
|
|
703
703
|
PROVIDER_DEFAULT,
|
|
704
|
+
"claude-sonnet-4-0",
|
|
705
|
+
"claude-sonnet-4-20250514",
|
|
706
|
+
"claude-opus-4-0",
|
|
707
|
+
"claude-opus-4-20250514",
|
|
704
708
|
"claude-3-7-sonnet-latest",
|
|
705
709
|
"claude-3-7-sonnet-20250219",
|
|
706
710
|
"claude-3-5-sonnet-latest",
|
|
@@ -4,8 +4,10 @@ from typing import Any, Optional
|
|
|
4
4
|
from fastapi import APIRouter, HTTPException
|
|
5
5
|
from pydantic import Field
|
|
6
6
|
from sqlalchemy import select
|
|
7
|
+
from sqlalchemy.exc import IntegrityError as PostgreSQLIntegrityError
|
|
8
|
+
from sqlean.dbapi2 import IntegrityError as SQLiteIntegrityError # type: ignore[import-untyped]
|
|
7
9
|
from starlette.requests import Request
|
|
8
|
-
from starlette.status import HTTP_404_NOT_FOUND
|
|
10
|
+
from starlette.status import HTTP_404_NOT_FOUND, HTTP_409_CONFLICT
|
|
9
11
|
from strawberry.relay import GlobalID
|
|
10
12
|
|
|
11
13
|
from phoenix.db import models
|
|
@@ -58,7 +60,11 @@ class CreateExperimentRunResponseBody(ResponseBody[CreateExperimentRunResponseBo
|
|
|
58
60
|
{
|
|
59
61
|
"status_code": HTTP_404_NOT_FOUND,
|
|
60
62
|
"description": "Experiment or dataset example not found",
|
|
61
|
-
}
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"status_code": HTTP_409_CONFLICT,
|
|
66
|
+
"description": "This experiment run has already been submitted",
|
|
67
|
+
},
|
|
62
68
|
]
|
|
63
69
|
),
|
|
64
70
|
)
|
|
@@ -101,8 +107,14 @@ async def create_experiment_run(
|
|
|
101
107
|
end_time=end_time,
|
|
102
108
|
error=error,
|
|
103
109
|
)
|
|
104
|
-
|
|
105
|
-
|
|
110
|
+
try:
|
|
111
|
+
session.add(exp_run)
|
|
112
|
+
await session.flush()
|
|
113
|
+
except (PostgreSQLIntegrityError, SQLiteIntegrityError):
|
|
114
|
+
raise HTTPException(
|
|
115
|
+
detail="This experiment run has already been submitted",
|
|
116
|
+
status_code=HTTP_409_CONFLICT,
|
|
117
|
+
)
|
|
106
118
|
request.state.event_queue.put(ExperimentRunInsertEvent((exp_run.id,)))
|
|
107
119
|
run_gid = GlobalID("ExperimentRun", str(exp_run.id))
|
|
108
120
|
return CreateExperimentRunResponseBody(
|