arize-phoenix 4.12.1rc1__py3-none-any.whl → 4.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/METADATA +12 -9
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/RECORD +48 -49
- phoenix/db/bulk_inserter.py +3 -1
- phoenix/experiments/evaluators/base.py +4 -0
- phoenix/experiments/evaluators/code_evaluators.py +80 -0
- phoenix/experiments/evaluators/llm_evaluators.py +77 -1
- phoenix/experiments/evaluators/utils.py +70 -21
- phoenix/experiments/functions.py +14 -14
- phoenix/server/api/context.py +7 -3
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +23 -23
- phoenix/server/api/dataloaders/experiment_error_rates.py +30 -10
- phoenix/server/api/dataloaders/experiment_run_counts.py +18 -5
- phoenix/server/api/input_types/{CreateSpanAnnotationsInput.py → CreateSpanAnnotationInput.py} +4 -2
- phoenix/server/api/input_types/{CreateTraceAnnotationsInput.py → CreateTraceAnnotationInput.py} +4 -2
- phoenix/server/api/input_types/{PatchAnnotationsInput.py → PatchAnnotationInput.py} +4 -2
- phoenix/server/api/mutations/span_annotations_mutations.py +12 -6
- phoenix/server/api/mutations/trace_annotations_mutations.py +12 -6
- phoenix/server/api/openapi/main.py +2 -18
- phoenix/server/api/openapi/schema.py +12 -12
- phoenix/server/api/routers/v1/__init__.py +83 -36
- phoenix/server/api/routers/v1/dataset_examples.py +123 -102
- phoenix/server/api/routers/v1/datasets.py +506 -390
- phoenix/server/api/routers/v1/evaluations.py +66 -73
- phoenix/server/api/routers/v1/experiment_evaluations.py +91 -68
- phoenix/server/api/routers/v1/experiment_runs.py +155 -98
- phoenix/server/api/routers/v1/experiments.py +181 -132
- phoenix/server/api/routers/v1/spans.py +173 -144
- phoenix/server/api/routers/v1/traces.py +128 -115
- phoenix/server/api/types/Experiment.py +2 -2
- phoenix/server/api/types/Inferences.py +1 -2
- phoenix/server/api/types/Model.py +1 -2
- phoenix/server/app.py +177 -152
- phoenix/server/openapi/docs.py +221 -0
- phoenix/server/static/.vite/manifest.json +31 -31
- phoenix/server/static/assets/{components-C8sm_r1F.js → components-DeS0YEmv.js} +2 -2
- phoenix/server/static/assets/index-CQgXRwU0.js +100 -0
- phoenix/server/static/assets/{pages-bN7juCjh.js → pages-hdjlFZhO.js} +275 -198
- phoenix/server/static/assets/{vendor-CUDAPm8e.js → vendor-DPvSDRn3.js} +1 -1
- phoenix/server/static/assets/{vendor-arizeai-Do2HOmcL.js → vendor-arizeai-CkvPT67c.js} +2 -2
- phoenix/server/static/assets/{vendor-codemirror-CrdxOlMs.js → vendor-codemirror-Cqwpwlua.js} +1 -1
- phoenix/server/static/assets/{vendor-recharts-PKRvByVe.js → vendor-recharts-5jlNaZuF.js} +1 -1
- phoenix/server/thread_server.py +2 -2
- phoenix/session/client.py +9 -8
- phoenix/trace/dsl/filter.py +40 -25
- phoenix/version.py +1 -1
- phoenix/server/api/routers/v1/pydantic_compat.py +0 -78
- phoenix/server/api/routers/v1/utils.py +0 -95
- phoenix/server/static/assets/index-BEKPzgQs.js +0 -100
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/WHEEL +0 -0
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.12.1rc1.dist-info → arize_phoenix-4.14.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.14.1
|
|
4
4
|
Summary: AI Observability and Evaluation
|
|
5
5
|
Project-URL: Documentation, https://docs.arize.com/phoenix/
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -22,7 +22,6 @@ Requires-Dist: aiosqlite
|
|
|
22
22
|
Requires-Dist: alembic<2,>=1.3.0
|
|
23
23
|
Requires-Dist: arize-phoenix-evals>=0.13.1
|
|
24
24
|
Requires-Dist: cachetools
|
|
25
|
-
Requires-Dist: fastapi
|
|
26
25
|
Requires-Dist: grpcio
|
|
27
26
|
Requires-Dist: hdbscan>=0.8.33
|
|
28
27
|
Requires-Dist: httpx
|
|
@@ -41,14 +40,14 @@ Requires-Dist: pandas>=1.0
|
|
|
41
40
|
Requires-Dist: protobuf<6.0,>=3.20
|
|
42
41
|
Requires-Dist: psutil
|
|
43
42
|
Requires-Dist: pyarrow
|
|
44
|
-
Requires-Dist: pydantic!=2.0.*,<3,>=1.0
|
|
45
43
|
Requires-Dist: python-multipart
|
|
44
|
+
Requires-Dist: pyyaml
|
|
46
45
|
Requires-Dist: scikit-learn
|
|
47
46
|
Requires-Dist: scipy
|
|
48
47
|
Requires-Dist: sqlalchemy[asyncio]<3,>=2.0.4
|
|
49
48
|
Requires-Dist: sqlean-py>=3.45.1
|
|
50
49
|
Requires-Dist: starlette
|
|
51
|
-
Requires-Dist: strawberry-graphql==0.
|
|
50
|
+
Requires-Dist: strawberry-graphql==0.236.0
|
|
52
51
|
Requires-Dist: tqdm
|
|
53
52
|
Requires-Dist: typing-extensions>=4.5; python_version < '3.12'
|
|
54
53
|
Requires-Dist: typing-extensions>=4.6; python_version >= '3.12'
|
|
@@ -57,19 +56,20 @@ Requires-Dist: uvicorn
|
|
|
57
56
|
Requires-Dist: wrapt
|
|
58
57
|
Provides-Extra: container
|
|
59
58
|
Requires-Dist: opentelemetry-exporter-otlp; extra == 'container'
|
|
60
|
-
Requires-Dist: opentelemetry-instrumentation-fastapi; extra == 'container'
|
|
61
59
|
Requires-Dist: opentelemetry-instrumentation-grpc; extra == 'container'
|
|
62
60
|
Requires-Dist: opentelemetry-instrumentation-sqlalchemy; extra == 'container'
|
|
61
|
+
Requires-Dist: opentelemetry-instrumentation-starlette; extra == 'container'
|
|
63
62
|
Requires-Dist: opentelemetry-proto>=1.12.0; extra == 'container'
|
|
64
63
|
Requires-Dist: opentelemetry-sdk; extra == 'container'
|
|
65
64
|
Requires-Dist: opentelemetry-semantic-conventions; extra == 'container'
|
|
66
65
|
Requires-Dist: prometheus-client; extra == 'container'
|
|
67
66
|
Requires-Dist: py-grpc-prometheus; extra == 'container'
|
|
68
|
-
Requires-Dist: strawberry-graphql[opentelemetry]==0.
|
|
67
|
+
Requires-Dist: strawberry-graphql[opentelemetry]==0.236.0; extra == 'container'
|
|
69
68
|
Requires-Dist: uvloop; (platform_system != 'Windows') and extra == 'container'
|
|
70
69
|
Provides-Extra: dev
|
|
71
70
|
Requires-Dist: anthropic; extra == 'dev'
|
|
72
71
|
Requires-Dist: arize[autoembeddings,llm-evaluation]; extra == 'dev'
|
|
72
|
+
Requires-Dist: asgi-lifespan; extra == 'dev'
|
|
73
73
|
Requires-Dist: asyncpg; extra == 'dev'
|
|
74
74
|
Requires-Dist: gcsfs; extra == 'dev'
|
|
75
75
|
Requires-Dist: google-cloud-aiplatform>=1.3; extra == 'dev'
|
|
@@ -78,6 +78,7 @@ Requires-Dist: jupyter; extra == 'dev'
|
|
|
78
78
|
Requires-Dist: langchain>=0.0.334; extra == 'dev'
|
|
79
79
|
Requires-Dist: litellm>=1.0.3; extra == 'dev'
|
|
80
80
|
Requires-Dist: llama-index>=0.10.3; extra == 'dev'
|
|
81
|
+
Requires-Dist: mypy==1.11.0; extra == 'dev'
|
|
81
82
|
Requires-Dist: nbqa; extra == 'dev'
|
|
82
83
|
Requires-Dist: pandas-stubs==2.0.3.230814; (python_version < '3.9') and extra == 'dev'
|
|
83
84
|
Requires-Dist: pandas-stubs==2.2.2.240603; (python_version >= '3.9') and extra == 'dev'
|
|
@@ -88,9 +89,9 @@ Requires-Dist: psycopg[binary]; extra == 'dev'
|
|
|
88
89
|
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
89
90
|
Requires-Dist: pytest-cov; extra == 'dev'
|
|
90
91
|
Requires-Dist: pytest-postgresql; extra == 'dev'
|
|
91
|
-
Requires-Dist: pytest==8.
|
|
92
|
-
Requires-Dist: ruff==0.4
|
|
93
|
-
Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.
|
|
92
|
+
Requires-Dist: pytest==8.3.1; extra == 'dev'
|
|
93
|
+
Requires-Dist: ruff==0.5.4; extra == 'dev'
|
|
94
|
+
Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.236.0; extra == 'dev'
|
|
94
95
|
Requires-Dist: tabulate; extra == 'dev'
|
|
95
96
|
Requires-Dist: types-tabulate; extra == 'dev'
|
|
96
97
|
Provides-Extra: evals
|
|
@@ -138,6 +139,8 @@ Phoenix is an open-source AI observability platform designed for experimentation
|
|
|
138
139
|
|
|
139
140
|
- **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
|
|
140
141
|
- **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
|
|
142
|
+
- **_Datasets_** - Create versioned datasets of examples for experimentation, evaluation, and fine-tuning.
|
|
143
|
+
- **_Experiments_** - Track and evaluate changes to prompts, LLMs, and retrieval.
|
|
141
144
|
- **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
|
|
142
145
|
|
|
143
146
|
Phoenix is vendor and language agnostic with out-of-the-box support for popular frameworks (🦙LlamaIndex, 🦜⛓LangChain, 🧩DSPy) and LLM providers (OpenAI, Bedrock, and more). For details on auto-instrumentation, check out the [OpenInference](https://github.com/Arize-ai/openinference) project.
|
|
@@ -5,7 +5,7 @@ phoenix/exceptions.py,sha256=n2L2KKuecrdflB9MsCdAYCiSEvGJptIsfRkXMoJle7A,169
|
|
|
5
5
|
phoenix/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
6
6
|
phoenix/services.py,sha256=aTxhcOA1pZHB6U-B3TEcp6fqDF5oT0xCUvEUNMZVTUQ,5175
|
|
7
7
|
phoenix/settings.py,sha256=cO-qgis_S27nHirTobYI9hHPfZH18R--WMmxNdsVUwc,273
|
|
8
|
-
phoenix/version.py,sha256=
|
|
8
|
+
phoenix/version.py,sha256=9Wn8BwD7EU7A8hupiqYQO3QNgiGcpbdgKgkCwHlUb-o,23
|
|
9
9
|
phoenix/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
phoenix/core/embedding_dimension.py,sha256=zKGbcvwOXgLf-yrJBpQyKtd-LEOPRKHnUToyAU8Owis,87
|
|
11
11
|
phoenix/core/model.py,sha256=km_a--PBHOuA337ClRw9xqhOHhrUT6Rl9pz_zV0JYkQ,4843
|
|
@@ -14,7 +14,7 @@ phoenix/core/model_schema_adapter.py,sha256=0Tm_Y_gV-WED8fKBCaFXAEFwE3CTEZS1dowq
|
|
|
14
14
|
phoenix/db/README.md,sha256=IvKaZyf9ECbGBYYePaRhBveKZwDbxAc-c7BMxJYZh6Q,595
|
|
15
15
|
phoenix/db/__init__.py,sha256=pDjEFXukHmJBM-1D8RjmXkvLsz85YWNxMQczt81ec3A,118
|
|
16
16
|
phoenix/db/alembic.ini,sha256=p8DjVqGUs_tTx8oU56JP7qj-rMUebNFizItUSv_hPhs,3763
|
|
17
|
-
phoenix/db/bulk_inserter.py,sha256=
|
|
17
|
+
phoenix/db/bulk_inserter.py,sha256=pqyfgwBHU7as5ll56q-NEzADuqYzQ2P-Z7-X9JHM35U,11339
|
|
18
18
|
phoenix/db/engines.py,sha256=vLWaZlToMtDI7rJDxSidYkfOoojamxaZxaz8ND3zTus,4770
|
|
19
19
|
phoenix/db/helpers.py,sha256=mTBhPzdy_aU9gD7hNzUZJkAnV77ko5CdaXyoWH3snPA,2982
|
|
20
20
|
phoenix/db/migrate.py,sha256=MuhtNWnR24riROvarvKfbRb4_D5xuQi6P760vBUKl1E,2270
|
|
@@ -31,15 +31,15 @@ phoenix/db/migrations/types.py,sha256=Frq1AKSyBKQQ0FLzON-EmgTqE4kNkOpHMsbWnI-WgC
|
|
|
31
31
|
phoenix/db/migrations/versions/10460e46d750_datasets.py,sha256=l69yZfScFrjfZZpY0gnqwhsDUEctLeo02qMgA_aOGDg,8155
|
|
32
32
|
phoenix/db/migrations/versions/cf03bd6bae1d_init.py,sha256=CbWT3ZTR0CZqeT3zWLoTWhboFmnOy3Ju1z6Ztpq8WIM,8122
|
|
33
33
|
phoenix/experiments/__init__.py,sha256=6JGwgUd7xCbGpuHqYZlsmErmYvVgv7N_j43bn3dUqsk,123
|
|
34
|
-
phoenix/experiments/functions.py,sha256=
|
|
34
|
+
phoenix/experiments/functions.py,sha256=lz5Add19Hf8EQnfkTLfHFtRJsXfruPC4tXhXUpglGMc,32128
|
|
35
35
|
phoenix/experiments/tracing.py,sha256=wVpt8Ie9WNPoi1djJdcrkwCokHdTO0bicXViLg3O-1Y,2831
|
|
36
36
|
phoenix/experiments/types.py,sha256=VuvDCcvUGeHIQuXS_xpz7Jq5xHdt3qu-O_C7IQ3DvF8,23397
|
|
37
37
|
phoenix/experiments/utils.py,sha256=wLu5Kvt1b4a8rGPRWq5G8RQ9XSiV8fCIVm51zWBI3-g,758
|
|
38
38
|
phoenix/experiments/evaluators/__init__.py,sha256=j63fi3fa3U7-itVPHa82GowhjQRU-wO6yhO34u_lhsA,714
|
|
39
|
-
phoenix/experiments/evaluators/base.py,sha256=
|
|
40
|
-
phoenix/experiments/evaluators/code_evaluators.py,sha256=
|
|
41
|
-
phoenix/experiments/evaluators/llm_evaluators.py,sha256=
|
|
42
|
-
phoenix/experiments/evaluators/utils.py,sha256=
|
|
39
|
+
phoenix/experiments/evaluators/base.py,sha256=jAwJs-V7jCp2UBChL0S3813Xyd9GN4rU4IEhX0nkFGs,5549
|
|
40
|
+
phoenix/experiments/evaluators/code_evaluators.py,sha256=O7ZtFk7ZEf3OjgrZeJTIDKeYfcQet8omlGG0s9vEywQ,6683
|
|
41
|
+
phoenix/experiments/evaluators/llm_evaluators.py,sha256=zyGhxXBDNi1qoj_8I95PRSwjfVaCzpFoAVUQeFT0XSM,13176
|
|
42
|
+
phoenix/experiments/evaluators/utils.py,sha256=XYqB0bOljyR0GewmR_mm9Ndl_q95EkjjDqfXd7YVqTk,9303
|
|
43
43
|
phoenix/inferences/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
phoenix/inferences/errors.py,sha256=cGp9vxnw4SewFoWBV3ZGMkhE0Kh73lPIv3Ppz_H_RoA,8261
|
|
45
45
|
phoenix/inferences/fixtures.py,sha256=FC2eRL4dpobKQHYOilFtDexUWFkMZ_w6jun_4WkbMk0,20792
|
|
@@ -60,20 +60,20 @@ phoenix/pointcloud/pointcloud.py,sha256=4zAIkKs2xOUbchpj4XDAV-iPMXrfAJ15TG6rlIYG
|
|
|
60
60
|
phoenix/pointcloud/projectors.py,sha256=zO_RrtDYSv2rqVOfIP2_9Cv11Dc8EmcZR94xhFcBYPU,1057
|
|
61
61
|
phoenix/pointcloud/umap_parameters.py,sha256=3UQSjrysVOvq2V4KNpTMqNqNiK0BsTZnPBHWZ4fyJtQ,1708
|
|
62
62
|
phoenix/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
phoenix/server/app.py,sha256=
|
|
63
|
+
phoenix/server/app.py,sha256=Agr0XLJGAEyBtV34qbsdBhabHxpPIxZL9SCaoFMeh2g,19479
|
|
64
64
|
phoenix/server/grpc_server.py,sha256=faktLxEtWGlCB1bPR4QwwTsRoQloahKMx0hAWqRGI5s,3379
|
|
65
65
|
phoenix/server/main.py,sha256=dRyODpwkNi_3as14fnZ8LWW_JLWtpXHldRy9SNjNtws,11251
|
|
66
66
|
phoenix/server/prometheus.py,sha256=j9DHB2fERuq_ZKmwVaqR-9wx5WcPPuU1Cm5Bhg5241Y,2996
|
|
67
67
|
phoenix/server/telemetry.py,sha256=T_2OKrxNViAeaANlNspEekg_Y5uZIFWvKAnpz8Aoqvk,2762
|
|
68
|
-
phoenix/server/thread_server.py,sha256=
|
|
68
|
+
phoenix/server/thread_server.py,sha256=dP6cm6Cf08jNhDA1TRlVZpziu1YgtPDmaeIJMm725eI,2154
|
|
69
69
|
phoenix/server/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
70
|
-
phoenix/server/api/context.py,sha256=
|
|
70
|
+
phoenix/server/api/context.py,sha256=DiK2IRMBbMvBF0uK20YBftApJXau4GSDHltVZX2yERQ,2957
|
|
71
71
|
phoenix/server/api/interceptor.py,sha256=ykDnoC_apUd-llVli3m1CW18kNSIgjz2qZ6m5JmPDu8,1294
|
|
72
72
|
phoenix/server/api/queries.py,sha256=eq2xHaQF-x4k6AGSY6b6mU2pie9bj-AJML6P2Mr0_DM,19886
|
|
73
73
|
phoenix/server/api/schema.py,sha256=BcxdqO5CSGqpKd-AAJHMjFlzaK9oJA8GJuxmMfcdjn4,434
|
|
74
74
|
phoenix/server/api/utils.py,sha256=Y1lGu8J8r8BSBX9OzffgewI8QMziovbG-ePDvZrrwGI,949
|
|
75
75
|
phoenix/server/api/dataloaders/__init__.py,sha256=F6-8dwb-aQ_T8LeRYg3LPR7T94__8Xe4ysM0VGQeQUQ,4936
|
|
76
|
-
phoenix/server/api/dataloaders/average_experiment_run_latency.py,sha256=
|
|
76
|
+
phoenix/server/api/dataloaders/average_experiment_run_latency.py,sha256=ITbbwEWuFqqAxY1CLuuG7VtZYfNXxjjibigNDYf7Yl0,1887
|
|
77
77
|
phoenix/server/api/dataloaders/dataset_example_revisions.py,sha256=Vpr5IEKSR4QnAVxE5NM7u92fPNgeHQV2ieYc6JakCj0,3788
|
|
78
78
|
phoenix/server/api/dataloaders/dataset_example_spans.py,sha256=_jLlo0KdUS65d4PNTtE9aXVyG_NZWgA7VcpNC9udQ8U,1484
|
|
79
79
|
phoenix/server/api/dataloaders/document_evaluation_summaries.py,sha256=dgAAlD0n8X6oAPLaD-czoefNkDqP338MouWsKaW8bOY,5684
|
|
@@ -81,8 +81,8 @@ phoenix/server/api/dataloaders/document_evaluations.py,sha256=V-y8eyAA0sZpQTjHvm
|
|
|
81
81
|
phoenix/server/api/dataloaders/document_retrieval_metrics.py,sha256=8tZYMNLZ7zxUmyTHHZRUTZTumvw6lK2tYOpFbATIPdI,4270
|
|
82
82
|
phoenix/server/api/dataloaders/evaluation_summaries.py,sha256=z9aal3IQL_t30aNqpAS7x4tjq0xNkuEG8dWW-bhqZmo,5724
|
|
83
83
|
phoenix/server/api/dataloaders/experiment_annotation_summaries.py,sha256=RsQ-o84kWVTYgIlh9VKkyw2kDMWIlHCRpS7RE2aw9vs,2881
|
|
84
|
-
phoenix/server/api/dataloaders/experiment_error_rates.py,sha256=
|
|
85
|
-
phoenix/server/api/dataloaders/experiment_run_counts.py,sha256=
|
|
84
|
+
phoenix/server/api/dataloaders/experiment_error_rates.py,sha256=Q7Cga0IRnzJy1IW26LWQmUu8pdLlBVYj3p6CJL7fcIk,2017
|
|
85
|
+
phoenix/server/api/dataloaders/experiment_run_counts.py,sha256=lnsX4GYll1EXaGYHxRL0HJol9DtqYYwLcMnoh-h994w,1729
|
|
86
86
|
phoenix/server/api/dataloaders/experiment_sequence_number.py,sha256=Va1KuoHOd-wzvrlKykoV4kLRFW4JsJvGp_DUI4HYZX4,1631
|
|
87
87
|
phoenix/server/api/dataloaders/latency_ms_quantile.py,sha256=pEc7QjB2iiNOQm_Fmo99F5O_DKOJWgGmcnT0OADJzYE,7423
|
|
88
88
|
phoenix/server/api/dataloaders/min_start_or_max_end_times.py,sha256=IoFX5PtSpvQdMk_7-oB8TpIse3Q4PMxep4qKggkHpzo,2902
|
|
@@ -105,8 +105,8 @@ phoenix/server/api/input_types/ClearProjectInput.py,sha256=cpPFRyQ3ffy2dLbCZgYpw
|
|
|
105
105
|
phoenix/server/api/input_types/ClusterInput.py,sha256=EL4ftvZxQ8mVdruUPcdhMhByORmSmM8S-X6RPqU6GX0,179
|
|
106
106
|
phoenix/server/api/input_types/Coordinates.py,sha256=meTwbIjwTfqx5DGD2DBlH9wQzdQVNM5a8x9dp1FfIgA,173
|
|
107
107
|
phoenix/server/api/input_types/CreateDatasetInput.py,sha256=Q3MwouIx9jTQBRWDju75iMQXEGJCrL4aD4ESQp771nc,248
|
|
108
|
-
phoenix/server/api/input_types/
|
|
109
|
-
phoenix/server/api/input_types/
|
|
108
|
+
phoenix/server/api/input_types/CreateSpanAnnotationInput.py,sha256=bKgT7bdA9-gYpJmqnMq9TEfjNDEYoldc17EjAglXVlU,474
|
|
109
|
+
phoenix/server/api/input_types/CreateTraceAnnotationInput.py,sha256=iSukKAxt-gTTykpkttse3MVOTD3AOk6fWD-N5PaZ2yY,476
|
|
110
110
|
phoenix/server/api/input_types/DataQualityMetricInput.py,sha256=LazvmQCCM5m9SDZTpyxQXO1rYF4cmsc3lsR2S9S65X4,1292
|
|
111
111
|
phoenix/server/api/input_types/DatasetExampleInput.py,sha256=9oJ6pCFxFd02IWJuK4YAUvz-jCgFGDUCDDb2--GAzCw,289
|
|
112
112
|
phoenix/server/api/input_types/DatasetSort.py,sha256=KDKjx5L8WFNwx7O-g1pDzCMMwY-ErgDd1_HkkZBAvCY,333
|
|
@@ -118,7 +118,7 @@ phoenix/server/api/input_types/DeleteExperimentsInput.py,sha256=yUbwMckIBvIL-R9t
|
|
|
118
118
|
phoenix/server/api/input_types/DimensionFilter.py,sha256=vcXgglSnZcB5pGh-6oEtRmGx95hISgFUR7BEPw01g7U,3143
|
|
119
119
|
phoenix/server/api/input_types/DimensionInput.py,sha256=Vfx5FmiMKey4-EHDQsQRPzSAMRJMN5oVMLDUl4NKAa8,164
|
|
120
120
|
phoenix/server/api/input_types/Granularity.py,sha256=6SVfZ5yTZYq1PI6vdpjfkBUc4YilLSkF-k6okuSNbbQ,2301
|
|
121
|
-
phoenix/server/api/input_types/
|
|
121
|
+
phoenix/server/api/input_types/PatchAnnotationInput.py,sha256=NWhkcbcGNPwfOYsN3wm5YFNNrSc5T-8Y5my74RK99HE,520
|
|
122
122
|
phoenix/server/api/input_types/PatchDatasetExamplesInput.py,sha256=E86aBGXDBC83jiEGwV5rilnoeQf6eqCfZ0aAVeIt2VI,890
|
|
123
123
|
phoenix/server/api/input_types/PatchDatasetInput.py,sha256=OURtTVY8Z_oFEDtKwT1LCMaOK5D4QYo5TVQ6mDrex-g,328
|
|
124
124
|
phoenix/server/api/input_types/PerformanceMetricInput.py,sha256=fElsLTSEYYgGFGMYTEGcYid39tXUKFdV_JkdHavMcbA,591
|
|
@@ -131,24 +131,22 @@ phoenix/server/api/mutations/dataset_mutations.py,sha256=CuKhxsYfvwVcdN_9EXhKxB6
|
|
|
131
131
|
phoenix/server/api/mutations/experiment_mutations.py,sha256=vV2lbJ7ccXZqe-LY7nXx6QxWqhKQE4UNZAFcML-KQ8I,3011
|
|
132
132
|
phoenix/server/api/mutations/export_events_mutations.py,sha256=t_wYBxaqvBJYRoHslh3Bmoxmwlzoy0u8SsBKWIKN5hE,4028
|
|
133
133
|
phoenix/server/api/mutations/project_mutations.py,sha256=d_xtYkYfZ5flpVgEkGknKB8rsEux-zZraczzqAs4e8A,2255
|
|
134
|
-
phoenix/server/api/mutations/span_annotations_mutations.py,sha256=
|
|
135
|
-
phoenix/server/api/mutations/trace_annotations_mutations.py,sha256=
|
|
134
|
+
phoenix/server/api/mutations/span_annotations_mutations.py,sha256=Kig5hdH-Jw0UZBhQAqyHvF7HdCHCqKZaoLR-jCOVJUA,5197
|
|
135
|
+
phoenix/server/api/mutations/trace_annotations_mutations.py,sha256=X0k49Ysu-su_hJbfjb_q3-G7qB9o_mpC6UXrDYhV5Sw,5237
|
|
136
136
|
phoenix/server/api/openapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
137
|
-
phoenix/server/api/openapi/main.py,sha256=
|
|
138
|
-
phoenix/server/api/openapi/schema.py,sha256=
|
|
137
|
+
phoenix/server/api/openapi/main.py,sha256=WY0pj3B7siQyyYqKyhqnzWC7P8MtEtiukOBUjGwLXfw,153
|
|
138
|
+
phoenix/server/api/openapi/schema.py,sha256=uuSYe1Ecu72aXRgTNjyMu-9ZPE13DAHJPKtedS-MsSs,451
|
|
139
139
|
phoenix/server/api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
140
140
|
phoenix/server/api/routers/utils.py,sha256=M41BoH-fl37izhRuN2aX7lWm7jOC20A_3uClv9TVUUY,583
|
|
141
|
-
phoenix/server/api/routers/v1/__init__.py,sha256=
|
|
142
|
-
phoenix/server/api/routers/v1/dataset_examples.py,sha256=
|
|
143
|
-
phoenix/server/api/routers/v1/datasets.py,sha256
|
|
144
|
-
phoenix/server/api/routers/v1/evaluations.py,sha256=
|
|
145
|
-
phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=
|
|
146
|
-
phoenix/server/api/routers/v1/experiment_runs.py,sha256=
|
|
147
|
-
phoenix/server/api/routers/v1/experiments.py,sha256=
|
|
148
|
-
phoenix/server/api/routers/v1/
|
|
149
|
-
phoenix/server/api/routers/v1/
|
|
150
|
-
phoenix/server/api/routers/v1/traces.py,sha256=Zl_hGHd-4rA0tXegH_GVoN9Ij84vbPB8oHu28fzGHA8,8029
|
|
151
|
-
phoenix/server/api/routers/v1/utils.py,sha256=xvl2v-BKUkqmFVMmgmmWGFKuRBTrUdoiAeT3mCYEE68,3086
|
|
141
|
+
phoenix/server/api/routers/v1/__init__.py,sha256=D1EFRWG4PcsTubeF3A3ENlNatCRq26AA52FxW11BGjM,3048
|
|
142
|
+
phoenix/server/api/routers/v1/dataset_examples.py,sha256=XfqOvDKF1oxb0pkeYfBycwwGt3LnSyyGdMLKC5VKoGQ,6690
|
|
143
|
+
phoenix/server/api/routers/v1/datasets.py,sha256=r0WcNxF8SKVa3-4rrTIg4Andwr4NmRmW1ybpKuxR9qw,33639
|
|
144
|
+
phoenix/server/api/routers/v1/evaluations.py,sha256=8g6P_e2BweV3RDU0esFmpkb0L5fCwonQPXiJ0y6HLwg,9126
|
|
145
|
+
phoenix/server/api/routers/v1/experiment_evaluations.py,sha256=TE1GMSOLN_96uAaJpnRpIH2u9x6_ebtkECgZRHvqt-w,5098
|
|
146
|
+
phoenix/server/api/routers/v1/experiment_runs.py,sha256=jy4SynmzdtQMoUzlowmG6wsVU14SsLAzfcW4JOhXjeQ,8154
|
|
147
|
+
phoenix/server/api/routers/v1/experiments.py,sha256=uVdmhyJgYI-UqOiRSJ-8OcVpL8a6Z02B5H2Rt_7yboY,11829
|
|
148
|
+
phoenix/server/api/routers/v1/spans.py,sha256=tryWFoJVFRLALzt6dfPmBBhKMS0s3hhlYdTathxVEU4,9638
|
|
149
|
+
phoenix/server/api/routers/v1/traces.py,sha256=PBIrpdJHVJ9gyiukCy1Ck1w0xts0VEHtRKaF7Noa248,8434
|
|
152
150
|
phoenix/server/api/types/Annotation.py,sha256=7Ym7iuVcbwHlw2yIRylz4nATAF_Cm-Z17qcjiooj1cc,751
|
|
153
151
|
phoenix/server/api/types/AnnotatorKind.py,sha256=rPgGdbN1Gvc109sGQ_ZH-gfJbp93V9wlarzTEJNtUwI,236
|
|
154
152
|
phoenix/server/api/types/Cluster.py,sha256=ac4YfT1OH3xLVmex7EUmB6b9IpULnhLTt554LR0jglE,5689
|
|
@@ -173,17 +171,17 @@ phoenix/server/api/types/EvaluationSummary.py,sha256=EFucuzAhcxR9sdEn6WNAtmAGJk-
|
|
|
173
171
|
phoenix/server/api/types/Event.py,sha256=XdYgaIxcVIW-YFViCkxj5l9OaVNepyIrCtm5Iqg2le8,3989
|
|
174
172
|
phoenix/server/api/types/EventMetadata.py,sha256=-J0tYF9eZTHwCjwxQHY7Gckr2_MNW5OoWT1mydweZNM,635
|
|
175
173
|
phoenix/server/api/types/ExampleRevisionInterface.py,sha256=gV3Gt9-3Oi5wjaVtepC6nOt3FzTzZFD1KebNnqiw56E,294
|
|
176
|
-
phoenix/server/api/types/Experiment.py,sha256=
|
|
174
|
+
phoenix/server/api/types/Experiment.py,sha256=K-3w6dniPRSMO4v-4ToDRwH2xr4fPaDumoyeT4We7g4,5228
|
|
177
175
|
phoenix/server/api/types/ExperimentAnnotationSummary.py,sha256=Uk3JtxIrsMoZT5tqc4nJdUOM3XegVzjUyoV3pkjNotE,256
|
|
178
176
|
phoenix/server/api/types/ExperimentComparison.py,sha256=0sFz6MoBDw39dds0qVyaqhVs9qqO5rkG1FMSjmfBeCc,441
|
|
179
177
|
phoenix/server/api/types/ExperimentRun.py,sha256=122_SID7SLKPUq2dJ2Y4BBw40DNUtcxo6QCZuO8UbBs,2997
|
|
180
178
|
phoenix/server/api/types/ExperimentRunAnnotation.py,sha256=iBxDaD9DgiF-Qymp5QyxWfJRGYXM1_CeWA_qzsZBqkI,1812
|
|
181
179
|
phoenix/server/api/types/ExportedFile.py,sha256=e3GTn7B5LgsTbqiwjhMCQH7VsiqXitrBO4aCMS1lHsg,163
|
|
182
180
|
phoenix/server/api/types/Functionality.py,sha256=tzV9xdhB8zqfsjWxP66NDC7EZsplYkYO7jRbLWJIeeg,382
|
|
183
|
-
phoenix/server/api/types/Inferences.py,sha256=
|
|
181
|
+
phoenix/server/api/types/Inferences.py,sha256=BOMlOSsRtUV9XQxpnjmZxdBcJ1w-t9PiFfVOSTS160E,3367
|
|
184
182
|
phoenix/server/api/types/InferencesRole.py,sha256=Kj9aiXOpGhpeg9PHd9MDU7aXVIT28EjJxr4P6xybfzc,601
|
|
185
183
|
phoenix/server/api/types/MimeType.py,sha256=Zpi6zCalkSFgsvhzvOs-O1gYA04usAi9H__QZUmFlO0,365
|
|
186
|
-
phoenix/server/api/types/Model.py,sha256=
|
|
184
|
+
phoenix/server/api/types/Model.py,sha256=BRIzH5xSGiDrAUYvhwDpwxT6--ddS3Xr3vCvP8_vzdo,8051
|
|
187
185
|
phoenix/server/api/types/NumericRange.py,sha256=afEjgF97Go_OvmjMggbPBt-zGM8IONewAyEiKEHRds0,192
|
|
188
186
|
phoenix/server/api/types/PerformanceMetric.py,sha256=W92B7OghEOgzFvmY0LCqpgavHaQggTGshdgfD0yqHX4,350
|
|
189
187
|
phoenix/server/api/types/Project.py,sha256=R2_nart3H4m8QYRbBe-SRnYvIjH4BCipcj_tKe6GaC8,14516
|
|
@@ -204,6 +202,7 @@ phoenix/server/api/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
|
204
202
|
phoenix/server/api/types/node.py,sha256=V0Xh9U4cGkz3iMg-vzEXtcs6cumU29JFPiU-JuGzjWI,848
|
|
205
203
|
phoenix/server/api/types/pagination.py,sha256=PcaJ0s4exsTKgCZC4aFm1cgZNrGpHSdo6PbkWzPcweg,9077
|
|
206
204
|
phoenix/server/openapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
205
|
+
phoenix/server/openapi/docs.py,sha256=fTb9q2oOSKC5bLVQy2Dsg3Bs0mGkCOKX1ypX7731sE0,7044
|
|
207
206
|
phoenix/server/static/apple-touch-icon-114x114.png,sha256=xtFVXAYQnJkpUApg2D1hltSTuyO4Is4sD4A0ZkikiVU,9486
|
|
208
207
|
phoenix/server/static/apple-touch-icon-120x120.png,sha256=iqZVAk634BbjJMozA8aHYyw15JjhIlIrG41FA2DFFaE,9957
|
|
209
208
|
phoenix/server/static/apple-touch-icon-144x144.png,sha256=VgARtkHKoU8zikb3_G83h_cb02kpPcoJqO78yRh1AfU,10047
|
|
@@ -214,20 +213,20 @@ phoenix/server/static/apple-touch-icon-76x76.png,sha256=CT_xT12I0u2i0WU8JzBZBuOQ
|
|
|
214
213
|
phoenix/server/static/apple-touch-icon.png,sha256=fOfpjqGpWYbJ0eAurKsyoZP1EAs6ZVooBJ_SGk2ZkDs,3801
|
|
215
214
|
phoenix/server/static/favicon.ico,sha256=bY0vvCKRftemZfPShwZtE93DiiQdaYaozkPGwNFr6H8,34494
|
|
216
215
|
phoenix/server/static/modernizr.js,sha256=mvK-XtkNqjOral-QvzoqsyOMECXIMu5BQwSVN_wcU9c,2564
|
|
217
|
-
phoenix/server/static/.vite/manifest.json,sha256=
|
|
218
|
-
phoenix/server/static/assets/components-
|
|
219
|
-
phoenix/server/static/assets/index-
|
|
220
|
-
phoenix/server/static/assets/pages-
|
|
221
|
-
phoenix/server/static/assets/vendor-
|
|
216
|
+
phoenix/server/static/.vite/manifest.json,sha256=10o8Ytfii6SAgHd91u81MQsDMIwhoNgzav27nED77Ow,1929
|
|
217
|
+
phoenix/server/static/assets/components-DeS0YEmv.js,sha256=eE0JsxZZ0MI9DS9b8nDmBpmzCXNxqBGy8qVssaeHdj4,160991
|
|
218
|
+
phoenix/server/static/assets/index-CQgXRwU0.js,sha256=hzyJ3S10rmimJlZny1IO3l0_noAurwHdJw0Nc38B1Bc,6342
|
|
219
|
+
phoenix/server/static/assets/pages-hdjlFZhO.js,sha256=CRfgXRtrvdut-V-KmrKFtezUb_azfs-eCIbTrIPKX04,422269
|
|
220
|
+
phoenix/server/static/assets/vendor-DPvSDRn3.js,sha256=Hc-RClavSPr5CtZbFYbQNrIPim9LJrD4e51QMOxF3Io,1355429
|
|
222
221
|
phoenix/server/static/assets/vendor-DxkFTwjz.css,sha256=nZrkr0u6NNElFGvpWHk9GTHeGoibCXCli1bE7mXZGZg,1816
|
|
223
|
-
phoenix/server/static/assets/vendor-arizeai-
|
|
224
|
-
phoenix/server/static/assets/vendor-codemirror-
|
|
225
|
-
phoenix/server/static/assets/vendor-recharts-
|
|
222
|
+
phoenix/server/static/assets/vendor-arizeai-CkvPT67c.js,sha256=sD4eqJrzqLBhFUAe2TNFUkoGv2nJl_gv3cK3Lo_Iiqk,290966
|
|
223
|
+
phoenix/server/static/assets/vendor-codemirror-Cqwpwlua.js,sha256=RSF9c9RG5ol0VSYXxItT5llkabRIUO1gIZVrF4-nX1o,357576
|
|
224
|
+
phoenix/server/static/assets/vendor-recharts-5jlNaZuF.js,sha256=3LZ6-as7BKqU7ulx_f_2wE8LVc8IjFrIxJNTxrkPfps,282859
|
|
226
225
|
phoenix/server/static/assets/vendor-three-DwGkEfCM.js,sha256=0D12ZgKzfKCTSdSTKJBFR2RZO_xxeMXrqDp0AszZqHY,620972
|
|
227
226
|
phoenix/server/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
228
227
|
phoenix/server/templates/index.html,sha256=gVpjB8pCMiubdMh2DA9mTCtV5AVTXJH_9u5PmG2t7Vk,4238
|
|
229
228
|
phoenix/session/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
230
|
-
phoenix/session/client.py,sha256=
|
|
229
|
+
phoenix/session/client.py,sha256=AEqBnjWl1D1KounuUj5T269cqPAZIV_KdWezknj6nC0,32577
|
|
231
230
|
phoenix/session/data_extractor.py,sha256=gkEM3WWZAlWGMfRgQopAQlid4cSi6GNco-sdrGir0qc,2788
|
|
232
231
|
phoenix/session/evaluation.py,sha256=aKeV8UVOyq3b7CYOwt3cWuLz0xzvMjX7vlEPILJ_fcs,5311
|
|
233
232
|
phoenix/session/session.py,sha256=1ZGR0pBmah8bqX353MDf4sq7XuK904EfxNLo0B9z_sU,26714
|
|
@@ -247,7 +246,7 @@ phoenix/trace/trace_dataset.py,sha256=Wq89jJ4hYQ1Qt-Uj11ZNzKQYQeKmGY6NqWStQiiTlM
|
|
|
247
246
|
phoenix/trace/utils.py,sha256=7LurVGXn245cjj4MJsc7v6jq4DSJkpK6YGBfIaSywuw,1307
|
|
248
247
|
phoenix/trace/dsl/README.md,sha256=ihmP9zGUC5V-TDbzKla76LuyDqPDQIBUH2BORwxNI68,2902
|
|
249
248
|
phoenix/trace/dsl/__init__.py,sha256=WIQIjJg362XD3s50OsPJJ0xbDsGp41bSv7vDllLrPuA,144
|
|
250
|
-
phoenix/trace/dsl/filter.py,sha256=
|
|
249
|
+
phoenix/trace/dsl/filter.py,sha256=DTDERSAexxDbTy5QvC48NSKhnr2pfIRDZ4PS-s3ZW80,32642
|
|
251
250
|
phoenix/trace/dsl/helpers.py,sha256=ULAhqWULPqYWCSNX7y50DVKIqfySx86nqb6hDvZPnVk,3896
|
|
252
251
|
phoenix/trace/dsl/query.py,sha256=W0t-tiXh2WIVb96lzFAGQOQ-U46uKux78d4KL3rW-PE,30316
|
|
253
252
|
phoenix/trace/langchain/__init__.py,sha256=F37GfD1pd5Kuw7R7iRUM1zXXpO8xEcycNZh5dwqBXNk,109
|
|
@@ -267,8 +266,8 @@ phoenix/utilities/logging.py,sha256=lDXd6EGaamBNcQxL4vP1au9-i_SXe0OraUDiJOcszSw,
|
|
|
267
266
|
phoenix/utilities/project.py,sha256=qWsvKnG1oKhOFUowXf9qiOL2ia7jaFe_ijFFHEt8GJo,431
|
|
268
267
|
phoenix/utilities/re.py,sha256=PDve_OLjRTM8yQQJHC8-n3HdIONi7aNils3ZKRZ5uBM,2045
|
|
269
268
|
phoenix/utilities/span_store.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
270
|
-
arize_phoenix-4.
|
|
271
|
-
arize_phoenix-4.
|
|
272
|
-
arize_phoenix-4.
|
|
273
|
-
arize_phoenix-4.
|
|
274
|
-
arize_phoenix-4.
|
|
269
|
+
arize_phoenix-4.14.1.dist-info/METADATA,sha256=FgEpDDxRhJOIbBjm9IWUQxHltEu7XB1XjtsEHMl23W8,11736
|
|
270
|
+
arize_phoenix-4.14.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
271
|
+
arize_phoenix-4.14.1.dist-info/licenses/IP_NOTICE,sha256=JBqyyCYYxGDfzQ0TtsQgjts41IJoa-hiwDrBjCb9gHM,469
|
|
272
|
+
arize_phoenix-4.14.1.dist-info/licenses/LICENSE,sha256=HFkW9REuMOkvKRACuwLPT0hRydHb3zNg-fdFt94td18,3794
|
|
273
|
+
arize_phoenix-4.14.1.dist-info/RECORD,,
|
phoenix/db/bulk_inserter.py
CHANGED
|
@@ -105,8 +105,10 @@ class BulkInserter:
|
|
|
105
105
|
)
|
|
106
106
|
|
|
107
107
|
async def __aexit__(self, *args: Any) -> None:
|
|
108
|
-
self._operations = None
|
|
109
108
|
self._running = False
|
|
109
|
+
if self._task:
|
|
110
|
+
self._task.cancel()
|
|
111
|
+
self._task = None
|
|
110
112
|
|
|
111
113
|
def _enqueue_operation(self, operation: DataManipulation) -> None:
|
|
112
114
|
cast("Queue[DataManipulation]", self._operations).put_nowait(operation)
|
|
@@ -90,11 +90,15 @@ class Evaluator(ABC):
|
|
|
90
90
|
if super_cls in (LLMEvaluator, Evaluator):
|
|
91
91
|
break
|
|
92
92
|
if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
|
|
93
|
+
if isinstance(evaluate, classmethod):
|
|
94
|
+
evaluate = evaluate.__func__
|
|
93
95
|
assert callable(evaluate), "`evaluate()` method should be callable"
|
|
94
96
|
# need to remove the first param, i.e. `self`
|
|
95
97
|
_validate_sig(functools.partial(evaluate, None), "evaluate")
|
|
96
98
|
return
|
|
97
99
|
if async_evaluate := super_cls.__dict__.get(Evaluator.async_evaluate.__name__):
|
|
100
|
+
if isinstance(async_evaluate, classmethod):
|
|
101
|
+
async_evaluate = async_evaluate.__func__
|
|
98
102
|
assert callable(async_evaluate), "`async_evaluate()` method should be callable"
|
|
99
103
|
# need to remove the first param, i.e. `self`
|
|
100
104
|
_validate_sig(functools.partial(async_evaluate, None), "async_evaluate")
|
|
@@ -9,6 +9,19 @@ from phoenix.experiments.types import EvaluationResult, TaskOutput
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class JSONParsable(CodeEvaluator):
|
|
12
|
+
"""
|
|
13
|
+
An evaluator that checks if the output of an experiment run is a JSON-parsable string.
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
|
|
17
|
+
.. code-block:: python
|
|
18
|
+
from phoenix.experiments import run_experiment
|
|
19
|
+
from phoenix.experiments.evaluators import JSONParsable
|
|
20
|
+
|
|
21
|
+
run_experiment(dataset, task, evaluators=[JSONParsable])
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
12
25
|
def evaluate(self, *, output: Optional[TaskOutput] = None, **_: Any) -> EvaluationResult:
|
|
13
26
|
assert isinstance(output, str), "Experiment run output must be a string"
|
|
14
27
|
try:
|
|
@@ -22,6 +35,22 @@ class JSONParsable(CodeEvaluator):
|
|
|
22
35
|
|
|
23
36
|
|
|
24
37
|
class ContainsKeyword(CodeEvaluator):
|
|
38
|
+
"""
|
|
39
|
+
An evaluator that checks if a keyword is present in the output of an experiment run.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
keyword (str): The keyword to search for in the output.
|
|
43
|
+
name (str, optional): An optional name for the evaluator. Defaults to "Contains(<keyword>)".
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
|
|
47
|
+
.. code-block:: python
|
|
48
|
+
from phoenix.experiments import run_experiment
|
|
49
|
+
from phoenix.experiments.evaluators import ContainsKeyword
|
|
50
|
+
|
|
51
|
+
run_experiment(dataset, task, evaluators=[ContainsKeyword("foo")])
|
|
52
|
+
"""
|
|
53
|
+
|
|
25
54
|
def __init__(self, keyword: str, name: Optional[str] = None) -> None:
|
|
26
55
|
self.keyword = keyword
|
|
27
56
|
self._name = name or f"Contains({repr(keyword)})"
|
|
@@ -39,6 +68,23 @@ class ContainsKeyword(CodeEvaluator):
|
|
|
39
68
|
|
|
40
69
|
|
|
41
70
|
class ContainsAnyKeyword(CodeEvaluator):
|
|
71
|
+
"""
|
|
72
|
+
An evaluator that checks if any of the keywords are present in the output of an experiment run.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
keywords (List[str]): The keywords to search for in the output.
|
|
76
|
+
name (str, optional): An optional name for the evaluator. Defaults to
|
|
77
|
+
"ContainsAny(<keywords>)".
|
|
78
|
+
|
|
79
|
+
Example:
|
|
80
|
+
|
|
81
|
+
.. code-block:: python
|
|
82
|
+
from phoenix.experiments import run_experiment
|
|
83
|
+
from phoenix.experiments.evaluators import ContainsAnyKeyword
|
|
84
|
+
|
|
85
|
+
run_experiment(dataset, task, evaluators=[ContainsAnyKeyword(["foo", "bar"])])
|
|
86
|
+
"""
|
|
87
|
+
|
|
42
88
|
def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
|
|
43
89
|
self.keywords = keywords
|
|
44
90
|
self._name = name or f"ContainsAny({keywords})"
|
|
@@ -57,6 +103,23 @@ class ContainsAnyKeyword(CodeEvaluator):
|
|
|
57
103
|
|
|
58
104
|
|
|
59
105
|
class ContainsAllKeywords(CodeEvaluator):
|
|
106
|
+
"""
|
|
107
|
+
An evaluator that checks if all of the keywords are present in the output of an experiment run.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
keywords (List[str]): The keywords to search for in the output.
|
|
111
|
+
name (str, optional): An optional name for the evaluator. Defaults to
|
|
112
|
+
"ContainsAll(<keywords>)".
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
.. code-block:: python
|
|
116
|
+
|
|
117
|
+
from phoenix.experiments import run_experiment
|
|
118
|
+
from phoenix.experiments.evaluators import ContainsAllKeywords
|
|
119
|
+
|
|
120
|
+
run_experiment(dataset, task, evaluators=[ContainsAllKeywords(["foo", "bar"])])
|
|
121
|
+
"""
|
|
122
|
+
|
|
60
123
|
def __init__(self, keywords: List[str], name: Optional[str] = None) -> None:
|
|
61
124
|
self.keywords = keywords
|
|
62
125
|
self._name = name or f"ContainsAll({keywords})"
|
|
@@ -77,6 +140,23 @@ class ContainsAllKeywords(CodeEvaluator):
|
|
|
77
140
|
|
|
78
141
|
|
|
79
142
|
class MatchesRegex(CodeEvaluator):
|
|
143
|
+
r"""
|
|
144
|
+
An experiment evaluator that checks if the output of an experiment run matches a regex pattern.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
pattern (Union[str, re.Pattern[str]]): The regex pattern to match the output against.
|
|
148
|
+
name (str, optional): An optional name for the evaluator. Defaults to "matches_({pattern})".
|
|
149
|
+
|
|
150
|
+
Example:
|
|
151
|
+
.. code-block:: python
|
|
152
|
+
|
|
153
|
+
from phoenix.experiments import run_experiment
|
|
154
|
+
from phoenix.experiments.evaluators import MatchesRegex
|
|
155
|
+
|
|
156
|
+
phone_number_evaluator = MatchesRegex(r"\d{3}-\d{3}-\d{4}", name="valid-phone-number")
|
|
157
|
+
run_experiment(dataset, task, evaluators=[phone_number_evaluator])
|
|
158
|
+
"""
|
|
159
|
+
|
|
80
160
|
def __init__(self, pattern: Union[str, re.Pattern[str]], name: Optional[str] = None) -> None:
|
|
81
161
|
if isinstance(pattern, str):
|
|
82
162
|
pattern = re.compile(pattern)
|
|
@@ -18,6 +18,31 @@ from phoenix.experiments.types import (
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class LLMCriteriaEvaluator(LLMEvaluator):
|
|
21
|
+
"""
|
|
22
|
+
An experiment evaluator that uses an LLM to evaluate whether the text meets a custom criteria.
|
|
23
|
+
|
|
24
|
+
This evaluator uses the chain-of-thought technique to perform a binary evaluation of text based
|
|
25
|
+
on a custom criteria and description. When used as an experiment evaluator,
|
|
26
|
+
`LLMCriteriaEvaluator` will return a score of 1.0 if the text meets the criteria and a score of
|
|
27
|
+
0.0 if not. The explanation produced by the chain-of-thought technique will be included in the
|
|
28
|
+
experiment evaluation as well.
|
|
29
|
+
|
|
30
|
+
Example criteria and descriptions:
|
|
31
|
+
- "thoughtfulness" - "shows careful consideration and fair judgement"
|
|
32
|
+
- "clarity" - "is easy to understand and follow"
|
|
33
|
+
- "professionalism" - "is respectful and appropriate for a formal setting"
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
|
|
37
|
+
the `phoenix.evals` module.
|
|
38
|
+
criteria: The criteria to evaluate the text against, the criteria should be able to be used
|
|
39
|
+
as a noun in a sentence.
|
|
40
|
+
description (str): A description of the criteria, used to clarify instructions to the LLM.
|
|
41
|
+
The description should complete this sentence: "{criteria} means the text
|
|
42
|
+
{description}".
|
|
43
|
+
name (str): The name of the evaluator
|
|
44
|
+
"""
|
|
45
|
+
|
|
21
46
|
_base_template = (
|
|
22
47
|
"Determine if the following text is {criteria}. {description}"
|
|
23
48
|
"First, explain step-by-step why you think the text is or is not {criteria}. Then provide "
|
|
@@ -117,6 +142,14 @@ ConcisenessEvaluator = criteria_evaluator_factory(
|
|
|
117
142
|
description="is just a few sentences and easy to follow",
|
|
118
143
|
default_name="Conciseness",
|
|
119
144
|
)
|
|
145
|
+
"""
|
|
146
|
+
An experiment evaluator that uses an LLM to evaluate whether the text is concise.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
|
|
150
|
+
the `phoenix.evals` module.
|
|
151
|
+
name (str, optional): The name of the evaluator, defaults to "Conciseness".
|
|
152
|
+
"""
|
|
120
153
|
|
|
121
154
|
|
|
122
155
|
HelpfulnessEvaluator = criteria_evaluator_factory(
|
|
@@ -125,6 +158,14 @@ HelpfulnessEvaluator = criteria_evaluator_factory(
|
|
|
125
158
|
description="provides useful information",
|
|
126
159
|
default_name="Helpfulness",
|
|
127
160
|
)
|
|
161
|
+
"""
|
|
162
|
+
An experiment evaluator that uses an LLM to evaluate whether the text is helpful.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
|
|
166
|
+
the `phoenix.evals` module.
|
|
167
|
+
name (str, optional): The name of the evaluator, defaults to "Helpfulness".
|
|
168
|
+
"""
|
|
128
169
|
|
|
129
170
|
|
|
130
171
|
CoherenceEvaluator = criteria_evaluator_factory(
|
|
@@ -133,6 +174,14 @@ CoherenceEvaluator = criteria_evaluator_factory(
|
|
|
133
174
|
description="is coherent, well-structured, and logically sound",
|
|
134
175
|
default_name="Coherence",
|
|
135
176
|
)
|
|
177
|
+
"""
|
|
178
|
+
An experiment evaluator that uses an LLM to evaluate whether the text is coherent.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
|
|
182
|
+
the `phoenix.evals` module.
|
|
183
|
+
name (str, optional): The name of the evaluator, defaults to "Coherence".
|
|
184
|
+
"""
|
|
136
185
|
|
|
137
186
|
|
|
138
187
|
def _parse_label_from_explanation(raw_string: str) -> str:
|
|
@@ -149,6 +198,33 @@ def _parse_label_from_explanation(raw_string: str) -> str:
|
|
|
149
198
|
|
|
150
199
|
|
|
151
200
|
class RelevanceEvaluator(LLMEvaluator):
|
|
201
|
+
"""
|
|
202
|
+
An experiment evaluator that uses an LLM to evaluate whether a response is relevant to a query.
|
|
203
|
+
|
|
204
|
+
This evaluator uses the chain-of-thought technique to perform a binary evaluation of whether
|
|
205
|
+
the output "response" of an experiment is relevant to its input "query". When used as an
|
|
206
|
+
experiment evaluator, `RelevanceEvaluator` will return a score of 1.0 if the response is
|
|
207
|
+
relevant to the query and a score of 0.0 if not. The explanation produced by the
|
|
208
|
+
chain-of-thought technique will be included in the experiment evaluation as well.
|
|
209
|
+
|
|
210
|
+
Optionally, you can provide custom functions to extract the query and response from the input
|
|
211
|
+
and output of the experiment task. By default, the evaluator will use the dataset example as
|
|
212
|
+
the input and the output of the experiment task as the response.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
model: The LLM model wrapper to use for evaluation. Compatible models can be imported from
|
|
216
|
+
the `phoenix.evals` module.
|
|
217
|
+
get_query (callable, optional): A function that extracts the query from the input of the
|
|
218
|
+
experiment task. The function should take the input and metadata of the dataset example
|
|
219
|
+
and return a string. By default, the function will return the string representation of
|
|
220
|
+
the input.
|
|
221
|
+
get_response (callable, optional): A function that extracts the response from the output of
|
|
222
|
+
the experiment task. The function should take the output and metadata of the experiment
|
|
223
|
+
task and return a string. By default, the function will return the string representation
|
|
224
|
+
of the output.
|
|
225
|
+
name (str, optional): The name of the evaluator. Defaults to "Relevance".
|
|
226
|
+
"""
|
|
227
|
+
|
|
152
228
|
template = (
|
|
153
229
|
"Determine if the following response is relevant to the query. In this context, "
|
|
154
230
|
"'relevance' means that the response directly addresses the core question or topic of the "
|
|
@@ -174,7 +250,7 @@ class RelevanceEvaluator(LLMEvaluator):
|
|
|
174
250
|
model: LLMBaseModel,
|
|
175
251
|
get_query: Optional[Callable[[ExampleInput, ExampleMetadata], str]] = None,
|
|
176
252
|
get_response: Optional[Callable[[Optional[TaskOutput], ExampleMetadata], str]] = None,
|
|
177
|
-
name: str = "
|
|
253
|
+
name: str = "Relevance",
|
|
178
254
|
):
|
|
179
255
|
self.model = model
|
|
180
256
|
self._name = name
|