arize-phoenix 4.4.4rc4__tar.gz → 4.4.4rc6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/PKG-INFO +12 -6
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/README.md +1 -1
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/examples/manually-instrumented-chatbot/frontend/requirements.txt +2 -2
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/pyproject.toml +15 -4
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/config.py +21 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/datetime_utils.py +4 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/insertion/dataset.py +19 -16
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/insertion/evaluation.py +4 -4
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/insertion/helpers.py +4 -12
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/insertion/span.py +3 -3
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/migrations/versions/10460e46d750_datasets.py +2 -2
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/models.py +8 -3
- arize_phoenix-4.4.4rc6/src/phoenix/experiments/__init__.py +6 -0
- arize_phoenix-4.4.4rc6/src/phoenix/experiments/evaluators/__init__.py +29 -0
- arize_phoenix-4.4.4rc6/src/phoenix/experiments/evaluators/base.py +153 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/datasets → arize_phoenix-4.4.4rc6/src/phoenix/experiments}/evaluators/code_evaluators.py +25 -53
- {arize_phoenix-4.4.4rc4/src/phoenix/datasets → arize_phoenix-4.4.4rc6/src/phoenix/experiments}/evaluators/llm_evaluators.py +62 -31
- arize_phoenix-4.4.4rc6/src/phoenix/experiments/evaluators/utils.py +189 -0
- arize_phoenix-4.4.4rc6/src/phoenix/experiments/functions.py +616 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/datasets → arize_phoenix-4.4.4rc6/src/phoenix/experiments}/tracing.py +19 -0
- arize_phoenix-4.4.4rc6/src/phoenix/experiments/types.py +722 -0
- arize_phoenix-4.4.4rc6/src/phoenix/experiments/utils.py +9 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/context.py +4 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/__init__.py +4 -0
- arize_phoenix-4.4.4rc6/src/phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- arize_phoenix-4.4.4rc6/src/phoenix/server/api/dataloaders/experiment_run_counts.py +42 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/helpers/dataset_helpers.py +8 -7
- arize_phoenix-4.4.4rc6/src/phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/mutations/project_mutations.py +9 -4
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/v1/__init__.py +1 -1
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/v1/dataset_examples.py +10 -10
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/v1/datasets.py +152 -48
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/v1/evaluations.py +4 -11
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/v1/experiment_evaluations.py +23 -23
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/v1/experiment_runs.py +5 -17
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/v1/experiments.py +5 -5
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/v1/spans.py +6 -4
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Experiment.py +12 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/ExperimentRun.py +1 -1
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/ExperimentRunAnnotation.py +1 -1
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/app.py +4 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/index.js +712 -588
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/session/client.py +321 -28
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/fixtures.py +6 -6
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/utilities/json.py +8 -8
- arize_phoenix-4.4.4rc6/src/phoenix/version.py +1 -0
- arize_phoenix-4.4.4rc4/src/phoenix/datasets/evaluators/__init__.py +0 -18
- arize_phoenix-4.4.4rc4/src/phoenix/datasets/evaluators/_utils.py +0 -13
- arize_phoenix-4.4.4rc4/src/phoenix/datasets/experiments.py +0 -485
- arize_phoenix-4.4.4rc4/src/phoenix/datasets/types.py +0 -212
- arize_phoenix-4.4.4rc4/src/phoenix/utilities/__init__.py +0 -0
- arize_phoenix-4.4.4rc4/src/phoenix/version.py +0 -1
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/.gitignore +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/IP_NOTICE +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/LICENSE +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/examples/manually-instrumented-chatbot/chat-service/chat/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/examples/manually-instrumented-chatbot/chat-service/chat/app.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/examples/manually-instrumented-chatbot/chat-service/chat/types.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/examples/manually-instrumented-chatbot/frontend/Dockerfile +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/examples/manually-instrumented-chatbot/frontend/Makefile +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/examples/manually-instrumented-chatbot/frontend/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/examples/manually-instrumented-chatbot/frontend/pyproject.toml +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/examples/manually-instrumented-chatbot/frontend/schema.json +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/core/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/core/embedding_dimension.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/core/model.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/core/model_schema.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/core/model_schema_adapter.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/README.md +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/alembic.ini +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/bulk_inserter.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/engines.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/helpers.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/datasets → arize_phoenix-4.4.4rc6/src/phoenix/db/insertion}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/migrate.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/db/insertion → arize_phoenix-4.4.4rc6/src/phoenix/db/migrations}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/migrations/env.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/migrations/script.py.mako +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/migrations/types.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/exceptions.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/db/migrations → arize_phoenix-4.4.4rc6/src/phoenix/inferences}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/inferences/errors.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/inferences/fixtures.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/inferences/inferences.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/inferences/schema.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/inferences/validation.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/metrics/README.md +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/metrics/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/metrics/binning.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/metrics/metrics.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/metrics/mixins.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/metrics/retrieval_metrics.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/metrics/timeseries.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/metrics/wrappers.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/pointcloud/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/pointcloud/clustering.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/pointcloud/pointcloud.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/pointcloud/projectors.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/pointcloud/umap_parameters.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/py.typed +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/inferences → arize_phoenix-4.4.4rc6/src/phoenix/server}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/server → arize_phoenix-4.4.4rc6/src/phoenix/server/api}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/cache/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/cache/two_tier_cache.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/dataset_example_spans.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/document_evaluation_summaries.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/document_evaluations.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/document_retrieval_metrics.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/evaluation_summaries.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/experiment_error_rates.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/latency_ms_quantile.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/min_start_or_max_end_times.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/project_by_name.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/record_counts.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/span_descendants.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/span_evaluations.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/span_projects.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/token_counts.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/trace_evaluations.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/dataloaders/trace_row_ids.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/helpers/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/CreateDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/DatasetExampleInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/DatasetSort.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/DatasetVersionSort.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/DeleteDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/Granularity.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/PatchDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/server/api → arize_phoenix-4.4.4rc6/src/phoenix/server/api/input_types}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/interceptor.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/mutations/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/mutations/auth.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/mutations/dataset_mutations.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/mutations/experiment_mutations.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/mutations/export_events_mutations.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/server/api/input_types → arize_phoenix-4.4.4rc6/src/phoenix/server/api/openapi}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/openapi/main.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/openapi/schema.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/queries.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/server/api/openapi → arize_phoenix-4.4.4rc6/src/phoenix/server/api/routers}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/utils.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/routers/v1/traces.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/schema.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/AnnotatorKind.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Cluster.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/CreateDatasetPayload.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Dataset.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DatasetExample.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DatasetExampleRevision.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DatasetValues.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DatasetVersion.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Dimension.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DimensionShape.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DimensionType.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Evaluation.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Event.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/EventMetadata.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/ExampleRevisionInterface.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/ExperimentComparison.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/ExportedFile.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Functionality.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Inferences.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/InferencesRole.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/MimeType.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Model.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/NumericRange.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Project.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/PromptResponse.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Retrieval.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Segments.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/SortDir.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Span.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/TimeSeries.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/Trace.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/ValidationResult.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/server/api/routers → arize_phoenix-4.4.4rc6/src/phoenix/server/api/types}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/node.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/api/types/pagination.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/grpc_server.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/main.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/server/api/types → arize_phoenix-4.4.4rc6/src/phoenix/server/openapi}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/openapi/docs.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/prometheus.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/apple-touch-icon.png +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/favicon.ico +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/index.css +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/static/modernizr.js +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/telemetry.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/server/openapi → arize_phoenix-4.4.4rc6/src/phoenix/server/templates}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/templates/index.html +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/server/thread_server.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/services.py +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/server/templates → arize_phoenix-4.4.4rc6/src/phoenix/session}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/session/data_extractor.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/session/evaluation.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/session/session.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/settings.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/attributes.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/dsl/README.md +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/dsl/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/dsl/filter.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/dsl/helpers.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/dsl/query.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/errors.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/evaluation_conventions.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/exporter.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/langchain/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/langchain/instrumentor.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/llama_index/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/llama_index/callback.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/openai/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/openai/instrumentor.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/otel.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/projects.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/schemas.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/span_evaluations.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/span_json_decoder.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/span_json_encoder.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/trace_dataset.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/utils.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/v1/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
- {arize_phoenix-4.4.4rc4/src/phoenix/session → arize_phoenix-4.4.4rc6/src/phoenix/utilities}/__init__.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/utilities/deprecation.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/utilities/error_handling.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/utilities/logging.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/utilities/project.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/utilities/re.py +0 -0
- {arize_phoenix-4.4.4rc4 → arize_phoenix-4.4.4rc6}/src/phoenix/utilities/span_store.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version: 4.4.
|
|
3
|
+
Version: 4.4.4rc6
|
|
4
4
|
Summary: AI Observability and Evaluation
|
|
5
5
|
Project-URL: Documentation, https://docs.arize.com/phoenix/
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -20,7 +20,7 @@ Requires-Python: <3.13,>=3.8
|
|
|
20
20
|
Requires-Dist: aioitertools
|
|
21
21
|
Requires-Dist: aiosqlite
|
|
22
22
|
Requires-Dist: alembic<2,>=1.3.0
|
|
23
|
-
Requires-Dist: arize-phoenix-evals>=0.
|
|
23
|
+
Requires-Dist: arize-phoenix-evals>=0.13.1
|
|
24
24
|
Requires-Dist: cachetools
|
|
25
25
|
Requires-Dist: grpcio
|
|
26
26
|
Requires-Dist: hdbscan>=0.8.33
|
|
@@ -31,12 +31,12 @@ Requires-Dist: openinference-instrumentation
|
|
|
31
31
|
Requires-Dist: openinference-instrumentation-langchain>=0.1.12
|
|
32
32
|
Requires-Dist: openinference-instrumentation-llama-index>=1.2.0
|
|
33
33
|
Requires-Dist: openinference-instrumentation-openai>=0.1.4
|
|
34
|
-
Requires-Dist: openinference-semantic-conventions>=0.1.
|
|
34
|
+
Requires-Dist: openinference-semantic-conventions>=0.1.9
|
|
35
35
|
Requires-Dist: opentelemetry-exporter-otlp
|
|
36
36
|
Requires-Dist: opentelemetry-proto>=1.12.0
|
|
37
37
|
Requires-Dist: opentelemetry-sdk
|
|
38
38
|
Requires-Dist: opentelemetry-semantic-conventions
|
|
39
|
-
Requires-Dist: pandas
|
|
39
|
+
Requires-Dist: pandas>=1.0
|
|
40
40
|
Requires-Dist: protobuf<6.0,>=3.20
|
|
41
41
|
Requires-Dist: psutil
|
|
42
42
|
Requires-Dist: pyarrow
|
|
@@ -79,6 +79,7 @@ Requires-Dist: llama-index>=0.10.3; extra == 'dev'
|
|
|
79
79
|
Requires-Dist: nbqa; extra == 'dev'
|
|
80
80
|
Requires-Dist: pandas-stubs==2.0.3.230814; (python_version < '3.9') and extra == 'dev'
|
|
81
81
|
Requires-Dist: pandas-stubs==2.2.2.240603; (python_version >= '3.9') and extra == 'dev'
|
|
82
|
+
Requires-Dist: pandas>=1.0; extra == 'dev'
|
|
82
83
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
83
84
|
Requires-Dist: prometheus-client; extra == 'dev'
|
|
84
85
|
Requires-Dist: psycopg[binary]; extra == 'dev'
|
|
@@ -88,10 +89,15 @@ Requires-Dist: pytest-postgresql; extra == 'dev'
|
|
|
88
89
|
Requires-Dist: pytest==8.2.2; extra == 'dev'
|
|
89
90
|
Requires-Dist: ruff==0.4.9; extra == 'dev'
|
|
90
91
|
Requires-Dist: strawberry-graphql[debug-server,opentelemetry]==0.235.0; extra == 'dev'
|
|
92
|
+
Requires-Dist: tabulate; extra == 'dev'
|
|
93
|
+
Requires-Dist: types-tabulate; extra == 'dev'
|
|
91
94
|
Provides-Extra: evals
|
|
92
95
|
Provides-Extra: experimental
|
|
93
96
|
Provides-Extra: llama-index
|
|
94
|
-
Requires-Dist: llama-index
|
|
97
|
+
Requires-Dist: llama-index-embeddings-openai; extra == 'llama-index'
|
|
98
|
+
Requires-Dist: llama-index-llms-openai; extra == 'llama-index'
|
|
99
|
+
Requires-Dist: llama-index-readers-file; extra == 'llama-index'
|
|
100
|
+
Requires-Dist: llama-index==0.10.51; extra == 'llama-index'
|
|
95
101
|
Provides-Extra: pg
|
|
96
102
|
Requires-Dist: asyncpg; extra == 'pg'
|
|
97
103
|
Description-Content-Type: text/markdown
|
|
@@ -127,7 +133,7 @@ Description-Content-Type: text/markdown
|
|
|
127
133
|
|
|
128
134
|
Phoenix is an open-source AI observability platform designed for experimentation, evaluation, and troubleshooting. It provides:
|
|
129
135
|
|
|
130
|
-
- **_Tracing_** - Trace your LLM application's runtime using
|
|
136
|
+
- **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
|
|
131
137
|
- **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
|
|
132
138
|
- **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
|
|
133
139
|
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
|
|
30
30
|
Phoenix is an open-source AI observability platform designed for experimentation, evaluation, and troubleshooting. It provides:
|
|
31
31
|
|
|
32
|
-
- **_Tracing_** - Trace your LLM application's runtime using
|
|
32
|
+
- **_Tracing_** - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
|
|
33
33
|
- **_Evaluation_** - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
|
|
34
34
|
- **_Inference Analysis_** - Visualize inferences and embeddings using dimensionality reduction and clustering to identify drift and performance degradation.
|
|
35
35
|
|
|
@@ -87,7 +87,7 @@ referencing==0.34.0
|
|
|
87
87
|
# via
|
|
88
88
|
# jsonschema
|
|
89
89
|
# jsonschema-specifications
|
|
90
|
-
requests==2.
|
|
90
|
+
requests==2.32.2
|
|
91
91
|
# via streamlit
|
|
92
92
|
rich==13.7.1
|
|
93
93
|
# via streamlit
|
|
@@ -110,7 +110,7 @@ toml==0.10.2
|
|
|
110
110
|
# via streamlit
|
|
111
111
|
toolz==0.12.1
|
|
112
112
|
# via altair
|
|
113
|
-
tornado==6.4
|
|
113
|
+
tornado==6.4.1
|
|
114
114
|
# via streamlit
|
|
115
115
|
typing-extensions==4.11.0
|
|
116
116
|
# via
|
|
@@ -24,7 +24,7 @@ classifiers = [
|
|
|
24
24
|
dependencies = [
|
|
25
25
|
"scikit-learn",
|
|
26
26
|
"numpy<2", # https://github.com/scikit-learn-contrib/hdbscan/issues/642
|
|
27
|
-
"pandas",
|
|
27
|
+
"pandas>=1.0",
|
|
28
28
|
"jinja2",
|
|
29
29
|
"umap-learn",
|
|
30
30
|
"hdbscan>=0.8.33",
|
|
@@ -46,7 +46,7 @@ dependencies = [
|
|
|
46
46
|
"opentelemetry-proto>=1.12.0", # needed to avoid this issue: https://github.com/Arize-ai/phoenix/issues/2695
|
|
47
47
|
"opentelemetry-exporter-otlp",
|
|
48
48
|
"opentelemetry-semantic-conventions",
|
|
49
|
-
"openinference-semantic-conventions>=0.1.
|
|
49
|
+
"openinference-semantic-conventions>=0.1.9",
|
|
50
50
|
"openinference-instrumentation",
|
|
51
51
|
"openinference-instrumentation-langchain>=0.1.12",
|
|
52
52
|
"openinference-instrumentation-llama-index>=1.2.0",
|
|
@@ -58,7 +58,7 @@ dependencies = [
|
|
|
58
58
|
"sqlean.py>=3.45.1",
|
|
59
59
|
"cachetools",
|
|
60
60
|
"python-multipart", # see https://www.starlette.io/#dependencies
|
|
61
|
-
"arize-phoenix-evals>=0.
|
|
61
|
+
"arize-phoenix-evals>=0.13.1",
|
|
62
62
|
]
|
|
63
63
|
dynamic = ["version"]
|
|
64
64
|
|
|
@@ -69,6 +69,9 @@ dev = [
|
|
|
69
69
|
"jupyter",
|
|
70
70
|
"nbqa",
|
|
71
71
|
"ruff==0.4.9",
|
|
72
|
+
"pandas>=1.0",
|
|
73
|
+
"tabulate", # used by DataFrame.to_markdown()
|
|
74
|
+
"types-tabulate",
|
|
72
75
|
"pandas-stubs==2.2.2.240603; python_version>='3.9'",
|
|
73
76
|
"pandas-stubs==2.0.3.230814; python_version<'3.9'",
|
|
74
77
|
"pytest==8.2.2",
|
|
@@ -90,7 +93,10 @@ dev = [
|
|
|
90
93
|
evals = []
|
|
91
94
|
experimental = []
|
|
92
95
|
llama-index = [
|
|
93
|
-
"llama-index==0.10.
|
|
96
|
+
"llama-index==0.10.51", # always pin to a version that keeps our notebooks working
|
|
97
|
+
"llama-index-readers-file",
|
|
98
|
+
"llama-index-llms-openai",
|
|
99
|
+
"llama-index-embeddings-openai",
|
|
94
100
|
]
|
|
95
101
|
pg = [
|
|
96
102
|
"asyncpg",
|
|
@@ -163,7 +169,9 @@ dependencies = [
|
|
|
163
169
|
dependencies = [
|
|
164
170
|
"mypy==1.10.0",
|
|
165
171
|
"tenacity",
|
|
172
|
+
"pandas>=1.0",
|
|
166
173
|
"pandas-stubs==2.0.3.230814",
|
|
174
|
+
"types-tabulate",
|
|
167
175
|
"types-psutil",
|
|
168
176
|
"types-tqdm",
|
|
169
177
|
"types-protobuf",
|
|
@@ -206,6 +214,7 @@ dependencies = [
|
|
|
206
214
|
[tool.hatch.envs.docs]
|
|
207
215
|
detached = true
|
|
208
216
|
dependencies = [
|
|
217
|
+
"pyment",
|
|
209
218
|
"interrogate",
|
|
210
219
|
]
|
|
211
220
|
|
|
@@ -336,6 +345,7 @@ disallow_untyped_defs = true
|
|
|
336
345
|
disallow_incomplete_defs = true
|
|
337
346
|
strict = true
|
|
338
347
|
exclude = [
|
|
348
|
+
"api_reference",
|
|
339
349
|
"packages/",
|
|
340
350
|
"src/phoenix/evals/",
|
|
341
351
|
"dist/",
|
|
@@ -357,6 +367,7 @@ module = [
|
|
|
357
367
|
"wrapt",
|
|
358
368
|
"langchain.*",
|
|
359
369
|
"litellm",
|
|
370
|
+
"litellm.*",
|
|
360
371
|
"nest_asyncio",
|
|
361
372
|
"opentelemetry.*",
|
|
362
373
|
"pyarrow",
|
|
@@ -233,4 +233,25 @@ def get_env_client_headers() -> Optional[Dict[str, str]]:
|
|
|
233
233
|
return None
|
|
234
234
|
|
|
235
235
|
|
|
236
|
+
def get_base_url() -> str:
|
|
237
|
+
host = get_env_host()
|
|
238
|
+
if host == "0.0.0.0":
|
|
239
|
+
host = "127.0.0.1"
|
|
240
|
+
base_url = get_env_collector_endpoint() or f"http://{host}:{get_env_port()}"
|
|
241
|
+
return base_url if base_url.endswith("/") else base_url + "/"
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def get_web_base_url() -> str:
|
|
245
|
+
"""Return the web UI base URL.
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
str: the web UI base URL
|
|
249
|
+
"""
|
|
250
|
+
from phoenix.session.session import active_session
|
|
251
|
+
|
|
252
|
+
if session := active_session():
|
|
253
|
+
return session.url
|
|
254
|
+
return get_base_url()
|
|
255
|
+
|
|
256
|
+
|
|
236
257
|
DEFAULT_PROJECT_NAME = "default"
|
|
@@ -14,6 +14,10 @@ from pandas.core.dtypes.common import (
|
|
|
14
14
|
_LOCAL_TIMEZONE = datetime.now(timezone.utc).astimezone().tzinfo
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
def local_now() -> datetime:
|
|
18
|
+
return datetime.now(timezone.utc).astimezone(tz=_LOCAL_TIMEZONE)
|
|
19
|
+
|
|
20
|
+
|
|
17
21
|
def normalize_datetime(
|
|
18
22
|
dt: Optional[datetime],
|
|
19
23
|
tz: Optional[tzinfo] = None,
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from dataclasses import dataclass
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
3
|
from datetime import datetime, timezone
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from itertools import chain
|
|
6
6
|
from typing import (
|
|
7
7
|
Any,
|
|
8
8
|
Awaitable,
|
|
9
|
+
Dict,
|
|
9
10
|
FrozenSet,
|
|
10
11
|
Iterable,
|
|
11
12
|
Iterator,
|
|
12
13
|
Mapping,
|
|
13
14
|
Optional,
|
|
14
|
-
Sequence,
|
|
15
15
|
Union,
|
|
16
16
|
cast,
|
|
17
17
|
)
|
|
@@ -30,7 +30,16 @@ DatasetVersionId: TypeAlias = int
|
|
|
30
30
|
DatasetExampleId: TypeAlias = int
|
|
31
31
|
DatasetExampleRevisionId: TypeAlias = int
|
|
32
32
|
SpanRowId: TypeAlias = int
|
|
33
|
-
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class ExampleContent:
|
|
37
|
+
input: Dict[str, Any] = field(default_factory=dict)
|
|
38
|
+
output: Dict[str, Any] = field(default_factory=dict)
|
|
39
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
Examples: TypeAlias = Iterable[ExampleContent]
|
|
34
43
|
|
|
35
44
|
|
|
36
45
|
@dataclass(frozen=True)
|
|
@@ -149,14 +158,10 @@ async def add_dataset_examples(
|
|
|
149
158
|
session: AsyncSession,
|
|
150
159
|
name: str,
|
|
151
160
|
examples: Union[Examples, Awaitable[Examples]],
|
|
152
|
-
input_keys: Sequence[str],
|
|
153
|
-
output_keys: Sequence[str],
|
|
154
|
-
metadata_keys: Sequence[str] = (),
|
|
155
161
|
description: Optional[str] = None,
|
|
156
162
|
metadata: Optional[Mapping[str, Any]] = None,
|
|
157
163
|
action: DatasetAction = DatasetAction.CREATE,
|
|
158
164
|
) -> Optional[DatasetExampleAdditionEvent]:
|
|
159
|
-
keys = DatasetKeys(frozenset(input_keys), frozenset(output_keys), frozenset(metadata_keys))
|
|
160
165
|
created_at = datetime.now(timezone.utc)
|
|
161
166
|
dataset_id: Optional[DatasetId] = None
|
|
162
167
|
if action is DatasetAction.APPEND and name:
|
|
@@ -173,9 +178,7 @@ async def add_dataset_examples(
|
|
|
173
178
|
created_at=created_at,
|
|
174
179
|
)
|
|
175
180
|
except Exception:
|
|
176
|
-
logger.exception(
|
|
177
|
-
f"Fail to insert dataset: {input_keys=}, {output_keys=}, {metadata_keys=}"
|
|
178
|
-
)
|
|
181
|
+
logger.exception(f"Failed to insert dataset: {name=}")
|
|
179
182
|
raise
|
|
180
183
|
try:
|
|
181
184
|
dataset_version_id = await insert_dataset_version(
|
|
@@ -184,7 +187,7 @@ async def add_dataset_examples(
|
|
|
184
187
|
created_at=created_at,
|
|
185
188
|
)
|
|
186
189
|
except Exception:
|
|
187
|
-
logger.exception(f"
|
|
190
|
+
logger.exception(f"Failed to insert dataset version for {dataset_id=}")
|
|
188
191
|
raise
|
|
189
192
|
for example in (await examples) if isinstance(examples, Awaitable) else examples:
|
|
190
193
|
try:
|
|
@@ -194,21 +197,21 @@ async def add_dataset_examples(
|
|
|
194
197
|
created_at=created_at,
|
|
195
198
|
)
|
|
196
199
|
except Exception:
|
|
197
|
-
logger.exception(f"
|
|
200
|
+
logger.exception(f"Failed to insert dataset example for {dataset_id=}")
|
|
198
201
|
raise
|
|
199
202
|
try:
|
|
200
203
|
await insert_dataset_example_revision(
|
|
201
204
|
session=session,
|
|
202
205
|
dataset_version_id=dataset_version_id,
|
|
203
206
|
dataset_example_id=dataset_example_id,
|
|
204
|
-
input=
|
|
205
|
-
output=
|
|
206
|
-
metadata=
|
|
207
|
+
input=example.input,
|
|
208
|
+
output=example.output,
|
|
209
|
+
metadata=example.metadata,
|
|
207
210
|
created_at=created_at,
|
|
208
211
|
)
|
|
209
212
|
except Exception:
|
|
210
213
|
logger.exception(
|
|
211
|
-
f"
|
|
214
|
+
f"Failed to insert dataset example revision for {dataset_version_id=}, "
|
|
212
215
|
f"{dataset_example_id=}"
|
|
213
216
|
)
|
|
214
217
|
raise
|
|
@@ -6,7 +6,7 @@ from typing_extensions import assert_never
|
|
|
6
6
|
|
|
7
7
|
from phoenix.db import models
|
|
8
8
|
from phoenix.db.helpers import SupportedSQLDialect, num_docs_col
|
|
9
|
-
from phoenix.db.insertion.helpers import OnConflict,
|
|
9
|
+
from phoenix.db.insertion.helpers import OnConflict, insert_on_conflict
|
|
10
10
|
from phoenix.exceptions import PhoenixException
|
|
11
11
|
from phoenix.trace import v1 as pb
|
|
12
12
|
|
|
@@ -91,7 +91,7 @@ async def _insert_trace_evaluation(
|
|
|
91
91
|
set_.pop("metadata_")
|
|
92
92
|
set_["metadata"] = values["metadata_"] # `metadata` must match database
|
|
93
93
|
await session.execute(
|
|
94
|
-
|
|
94
|
+
insert_on_conflict(
|
|
95
95
|
dialect=dialect,
|
|
96
96
|
table=models.TraceAnnotation,
|
|
97
97
|
values=values,
|
|
@@ -139,7 +139,7 @@ async def _insert_span_evaluation(
|
|
|
139
139
|
set_.pop("metadata_")
|
|
140
140
|
set_["metadata"] = values["metadata_"] # `metadata` must match database
|
|
141
141
|
await session.execute(
|
|
142
|
-
|
|
142
|
+
insert_on_conflict(
|
|
143
143
|
dialect=dialect,
|
|
144
144
|
table=models.SpanAnnotation,
|
|
145
145
|
values=values,
|
|
@@ -196,7 +196,7 @@ async def _insert_document_evaluation(
|
|
|
196
196
|
set_.pop("metadata_")
|
|
197
197
|
set_["metadata"] = values["metadata_"] # `metadata` must match database
|
|
198
198
|
await session.execute(
|
|
199
|
-
|
|
199
|
+
insert_on_conflict(
|
|
200
200
|
dialect=dialect,
|
|
201
201
|
table=models.DocumentAnnotation,
|
|
202
202
|
values=values,
|
|
@@ -2,7 +2,7 @@ from abc import ABC
|
|
|
2
2
|
from enum import Enum, auto
|
|
3
3
|
from typing import Any, Awaitable, Callable, Mapping, Optional, Sequence
|
|
4
4
|
|
|
5
|
-
from sqlalchemy import Insert
|
|
5
|
+
from sqlalchemy import Insert
|
|
6
6
|
from sqlalchemy.dialects.postgresql import insert as insert_postgresql
|
|
7
7
|
from sqlalchemy.dialects.sqlite import insert as insert_sqlite
|
|
8
8
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
@@ -25,26 +25,18 @@ class OnConflict(Enum):
|
|
|
25
25
|
DO_UPDATE = auto()
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def
|
|
28
|
+
def insert_on_conflict(
|
|
29
29
|
dialect: SupportedSQLDialect,
|
|
30
30
|
table: Any,
|
|
31
31
|
values: Mapping[str, Any],
|
|
32
|
-
constraint:
|
|
33
|
-
column_names: Sequence[str]
|
|
32
|
+
constraint: str,
|
|
33
|
+
column_names: Sequence[str],
|
|
34
34
|
on_conflict: OnConflict = OnConflict.DO_NOTHING,
|
|
35
35
|
set_: Optional[Mapping[str, Any]] = None,
|
|
36
36
|
) -> Insert:
|
|
37
37
|
"""
|
|
38
38
|
Dialect specific insertion statement using ON CONFLICT DO syntax.
|
|
39
39
|
"""
|
|
40
|
-
if bool(constraint) != bool(column_names):
|
|
41
|
-
raise ValueError(
|
|
42
|
-
"Both `constraint` and `column_names` must be provided or omitted at the same time."
|
|
43
|
-
)
|
|
44
|
-
if (dialect is SupportedSQLDialect.POSTGRESQL and constraint is None) or (
|
|
45
|
-
dialect is SupportedSQLDialect.SQLITE and not column_names
|
|
46
|
-
):
|
|
47
|
-
return insert(table).values(values)
|
|
48
40
|
if dialect is SupportedSQLDialect.POSTGRESQL:
|
|
49
41
|
stmt_postgresql = insert_postgresql(table).values(values)
|
|
50
42
|
if on_conflict is OnConflict.DO_NOTHING or not set_:
|
|
@@ -7,7 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
7
7
|
|
|
8
8
|
from phoenix.db import models
|
|
9
9
|
from phoenix.db.helpers import SupportedSQLDialect
|
|
10
|
-
from phoenix.db.insertion.helpers import OnConflict,
|
|
10
|
+
from phoenix.db.insertion.helpers import OnConflict, insert_on_conflict
|
|
11
11
|
from phoenix.trace.attributes import get_attribute_value
|
|
12
12
|
from phoenix.trace.schemas import Span, SpanStatusCode
|
|
13
13
|
|
|
@@ -27,7 +27,7 @@ async def insert_span(
|
|
|
27
27
|
) -> Optional[SpanInsertionEvent]:
|
|
28
28
|
dialect = SupportedSQLDialect(session.bind.dialect.name)
|
|
29
29
|
project_rowid = await session.scalar(
|
|
30
|
-
|
|
30
|
+
insert_on_conflict(
|
|
31
31
|
dialect=dialect,
|
|
32
32
|
table=models.Project,
|
|
33
33
|
constraint="uq_projects_name",
|
|
@@ -87,7 +87,7 @@ async def insert_span(
|
|
|
87
87
|
cumulative_llm_token_count_prompt += cast(int, accumulation[1] or 0)
|
|
88
88
|
cumulative_llm_token_count_completion += cast(int, accumulation[2] or 0)
|
|
89
89
|
span_rowid = await session.scalar(
|
|
90
|
-
|
|
90
|
+
insert_on_conflict(
|
|
91
91
|
dialect=dialect,
|
|
92
92
|
table=models.Span,
|
|
93
93
|
constraint="uq_spans_span_id",
|
|
@@ -72,7 +72,7 @@ def upgrade() -> None:
|
|
|
72
72
|
sa.Column(
|
|
73
73
|
"span_rowid",
|
|
74
74
|
sa.Integer,
|
|
75
|
-
sa.ForeignKey("spans.id"),
|
|
75
|
+
sa.ForeignKey("spans.id", ondelete="SET NULL"),
|
|
76
76
|
nullable=True,
|
|
77
77
|
index=True,
|
|
78
78
|
),
|
|
@@ -198,7 +198,7 @@ def upgrade() -> None:
|
|
|
198
198
|
sa.String,
|
|
199
199
|
nullable=True,
|
|
200
200
|
),
|
|
201
|
-
sa.Column("output", JSON_, nullable=
|
|
201
|
+
sa.Column("output", JSON_, nullable=False),
|
|
202
202
|
sa.Column("start_time", sa.TIMESTAMP(timezone=True), nullable=False),
|
|
203
203
|
sa.Column("end_time", sa.TIMESTAMP(timezone=True), nullable=False),
|
|
204
204
|
sa.Column(
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from datetime import datetime, timezone
|
|
2
|
-
from typing import Any, Dict, List, Optional
|
|
2
|
+
from typing import Any, Dict, List, Optional, TypedDict
|
|
3
3
|
|
|
4
4
|
from sqlalchemy import (
|
|
5
5
|
JSON,
|
|
@@ -91,6 +91,10 @@ class UtcTimeStamp(TypeDecorator[datetime]):
|
|
|
91
91
|
return normalize_datetime(value, timezone.utc)
|
|
92
92
|
|
|
93
93
|
|
|
94
|
+
class ExperimentResult(TypedDict, total=False):
|
|
95
|
+
result: Any
|
|
96
|
+
|
|
97
|
+
|
|
94
98
|
class Base(DeclarativeBase):
|
|
95
99
|
# Enforce best practices for naming constraints
|
|
96
100
|
# https://alembic.sqlalchemy.org/en/latest/naming.html#integration-of-naming-conventions-into-operations-autogenerate
|
|
@@ -106,6 +110,7 @@ class Base(DeclarativeBase):
|
|
|
106
110
|
type_annotation_map = {
|
|
107
111
|
Dict[str, Any]: JsonDict,
|
|
108
112
|
List[Dict[str, Any]]: JsonList,
|
|
113
|
+
ExperimentResult: JsonDict,
|
|
109
114
|
}
|
|
110
115
|
|
|
111
116
|
|
|
@@ -483,7 +488,7 @@ class DatasetExample(Base):
|
|
|
483
488
|
index=True,
|
|
484
489
|
)
|
|
485
490
|
span_rowid: Mapped[Optional[int]] = mapped_column(
|
|
486
|
-
ForeignKey("spans.id"),
|
|
491
|
+
ForeignKey("spans.id", ondelete="SET NULL"),
|
|
487
492
|
index=True,
|
|
488
493
|
nullable=True,
|
|
489
494
|
)
|
|
@@ -556,7 +561,7 @@ class ExperimentRun(Base):
|
|
|
556
561
|
)
|
|
557
562
|
repetition_number: Mapped[int]
|
|
558
563
|
trace_id: Mapped[Optional[str]]
|
|
559
|
-
output: Mapped[
|
|
564
|
+
output: Mapped[ExperimentResult]
|
|
560
565
|
start_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
|
|
561
566
|
end_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
|
|
562
567
|
prompt_token_count: Mapped[Optional[int]]
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from phoenix.experiments.evaluators.code_evaluators import (
|
|
2
|
+
ContainsAllKeywords,
|
|
3
|
+
ContainsAnyKeyword,
|
|
4
|
+
ContainsKeyword,
|
|
5
|
+
JSONParsable,
|
|
6
|
+
MatchesRegex,
|
|
7
|
+
)
|
|
8
|
+
from phoenix.experiments.evaluators.llm_evaluators import (
|
|
9
|
+
CoherenceEvaluator,
|
|
10
|
+
ConcisenessEvaluator,
|
|
11
|
+
HelpfulnessEvaluator,
|
|
12
|
+
LLMCriteriaEvaluator,
|
|
13
|
+
RelevanceEvaluator,
|
|
14
|
+
)
|
|
15
|
+
from phoenix.experiments.evaluators.utils import create_evaluator
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"create_evaluator",
|
|
19
|
+
"ContainsAllKeywords",
|
|
20
|
+
"ContainsAnyKeyword",
|
|
21
|
+
"ContainsKeyword",
|
|
22
|
+
"JSONParsable",
|
|
23
|
+
"MatchesRegex",
|
|
24
|
+
"CoherenceEvaluator",
|
|
25
|
+
"ConcisenessEvaluator",
|
|
26
|
+
"LLMCriteriaEvaluator",
|
|
27
|
+
"HelpfulnessEvaluator",
|
|
28
|
+
"RelevanceEvaluator",
|
|
29
|
+
]
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import inspect
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from types import MappingProxyType
|
|
5
|
+
from typing import Any, Awaitable, Callable, Optional, Union
|
|
6
|
+
|
|
7
|
+
from typing_extensions import TypeAlias
|
|
8
|
+
|
|
9
|
+
from phoenix.experiments.evaluators.utils import validate_signature
|
|
10
|
+
from phoenix.experiments.types import (
|
|
11
|
+
AnnotatorKind,
|
|
12
|
+
EvaluationResult,
|
|
13
|
+
EvaluatorKind,
|
|
14
|
+
EvaluatorName,
|
|
15
|
+
EvaluatorOutput,
|
|
16
|
+
ExampleInput,
|
|
17
|
+
ExampleMetadata,
|
|
18
|
+
ExampleOutput,
|
|
19
|
+
TaskOutput,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Evaluator(ABC):
|
|
24
|
+
"""
|
|
25
|
+
A helper super class to guide the implementation of an `Evaluator` object.
|
|
26
|
+
Subclasses must implement either the `evaluate` or `async_evaluate` method.
|
|
27
|
+
Implementing both methods is recommended, but not required.
|
|
28
|
+
|
|
29
|
+
This Class is intended to be subclassed, and should not be instantiated directly.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
_kind: AnnotatorKind
|
|
33
|
+
_name: EvaluatorName
|
|
34
|
+
|
|
35
|
+
@functools.cached_property
|
|
36
|
+
def name(self) -> EvaluatorName:
|
|
37
|
+
if hasattr(self, "_name"):
|
|
38
|
+
return self._name
|
|
39
|
+
return self.__class__.__name__
|
|
40
|
+
|
|
41
|
+
@functools.cached_property
|
|
42
|
+
def kind(self) -> EvaluatorKind:
|
|
43
|
+
if hasattr(self, "_kind"):
|
|
44
|
+
return self._kind.value
|
|
45
|
+
return AnnotatorKind.CODE.value
|
|
46
|
+
|
|
47
|
+
def __new__(cls, *args: Any, **kwargs: Any) -> "Evaluator":
|
|
48
|
+
if cls is Evaluator:
|
|
49
|
+
raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
|
|
50
|
+
return object.__new__(cls)
|
|
51
|
+
|
|
52
|
+
def evaluate(
|
|
53
|
+
self,
|
|
54
|
+
*,
|
|
55
|
+
output: Optional[TaskOutput] = None,
|
|
56
|
+
expected: Optional[ExampleOutput] = None,
|
|
57
|
+
metadata: ExampleMetadata = MappingProxyType({}),
|
|
58
|
+
input: ExampleInput = MappingProxyType({}),
|
|
59
|
+
**kwargs: Any,
|
|
60
|
+
) -> EvaluationResult:
|
|
61
|
+
# For subclassing, one should implement either this sync method or the
|
|
62
|
+
# async version. Implementing both is recommended but not required.
|
|
63
|
+
raise NotImplementedError
|
|
64
|
+
|
|
65
|
+
async def async_evaluate(
|
|
66
|
+
self,
|
|
67
|
+
*,
|
|
68
|
+
output: Optional[TaskOutput] = None,
|
|
69
|
+
expected: Optional[ExampleOutput] = None,
|
|
70
|
+
metadata: ExampleMetadata = MappingProxyType({}),
|
|
71
|
+
input: ExampleInput = MappingProxyType({}),
|
|
72
|
+
**kwargs: Any,
|
|
73
|
+
) -> EvaluationResult:
|
|
74
|
+
# For subclassing, one should implement either this async method or the
|
|
75
|
+
# sync version. Implementing both is recommended but not required.
|
|
76
|
+
return self.evaluate(
|
|
77
|
+
output=output,
|
|
78
|
+
expected=expected,
|
|
79
|
+
metadata=metadata,
|
|
80
|
+
input=input,
|
|
81
|
+
**kwargs,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def __init_subclass__(cls, is_abstract: bool = False, **kwargs: Any) -> None:
|
|
85
|
+
super().__init_subclass__(**kwargs)
|
|
86
|
+
if is_abstract:
|
|
87
|
+
return
|
|
88
|
+
evaluate_fn_signature = inspect.signature(Evaluator.evaluate)
|
|
89
|
+
for super_cls in inspect.getmro(cls):
|
|
90
|
+
if super_cls in (LLMEvaluator, Evaluator):
|
|
91
|
+
break
|
|
92
|
+
if evaluate := super_cls.__dict__.get(Evaluator.evaluate.__name__):
|
|
93
|
+
assert callable(evaluate), "`evaluate()` method should be callable"
|
|
94
|
+
# need to remove the first param, i.e. `self`
|
|
95
|
+
_validate_sig(functools.partial(evaluate, None), "evaluate")
|
|
96
|
+
return
|
|
97
|
+
if async_evaluate := super_cls.__dict__.get(Evaluator.async_evaluate.__name__):
|
|
98
|
+
assert callable(async_evaluate), "`async_evaluate()` method should be callable"
|
|
99
|
+
# need to remove the first param, i.e. `self`
|
|
100
|
+
_validate_sig(functools.partial(async_evaluate, None), "async_evaluate")
|
|
101
|
+
return
|
|
102
|
+
raise ValueError(
|
|
103
|
+
f"Evaluator must implement either "
|
|
104
|
+
f"`def evaluate{evaluate_fn_signature}` or "
|
|
105
|
+
f"`async def async_evaluate{evaluate_fn_signature}`"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _validate_sig(fn: Callable[..., Any], fn_name: str) -> None:
|
|
110
|
+
sig = inspect.signature(fn)
|
|
111
|
+
validate_signature(sig)
|
|
112
|
+
for param in sig.parameters.values():
|
|
113
|
+
if param.kind is inspect.Parameter.VAR_KEYWORD:
|
|
114
|
+
return
|
|
115
|
+
else:
|
|
116
|
+
raise ValueError(f"`{fn_name}` should allow variadic keyword arguments `**kwargs`")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class CodeEvaluator(Evaluator, ABC, is_abstract=True):
|
|
120
|
+
"""
|
|
121
|
+
A convenience super class for defining code evaluators.
|
|
122
|
+
|
|
123
|
+
This class is intended to be subclassed, and should not be instantiated directly.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
_kind = AnnotatorKind.CODE
|
|
127
|
+
|
|
128
|
+
def __new__(cls, *args: Any, **kwargs: Any) -> "CodeEvaluator":
|
|
129
|
+
if cls is CodeEvaluator:
|
|
130
|
+
raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
|
|
131
|
+
return object.__new__(cls)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class LLMEvaluator(Evaluator, ABC, is_abstract=True):
|
|
135
|
+
"""
|
|
136
|
+
A convenience super class for defining LLM evaluators.
|
|
137
|
+
|
|
138
|
+
This class is intended to be subclassed, and should not be instantiated directly.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
_kind = AnnotatorKind.LLM
|
|
142
|
+
|
|
143
|
+
def __new__(cls, *args: Any, **kwargs: Any) -> "LLMEvaluator":
|
|
144
|
+
if cls is LLMEvaluator:
|
|
145
|
+
raise TypeError(f"{cls.__name__} is an abstract class and should not be instantiated.")
|
|
146
|
+
return object.__new__(cls)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
ExperimentEvaluator: TypeAlias = Union[
|
|
150
|
+
Evaluator,
|
|
151
|
+
Callable[..., EvaluatorOutput],
|
|
152
|
+
Callable[..., Awaitable[EvaluatorOutput]],
|
|
153
|
+
]
|