arize-phoenix 4.4.4rc6__tar.gz → 4.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/PKG-INFO +6 -4
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/pyproject.toml +5 -3
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/models.py +4 -4
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/experiments/evaluators/base.py +2 -2
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/experiments/evaluators/utils.py +9 -12
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/experiments/functions.py +166 -25
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/experiments/types.py +60 -29
- arize_phoenix-4.6.1/src/phoenix/experiments/utils.py +24 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/v1/experiment_evaluations.py +78 -0
- arize_phoenix-4.6.1/src/phoenix/server/api/routers/v1/experiment_runs.py +220 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/v1/experiments.py +128 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/ExperimentRun.py +1 -1
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/session/client.py +2 -31
- arize_phoenix-4.6.1/src/phoenix/version.py +1 -0
- arize_phoenix-4.4.4rc6/src/phoenix/experiments/utils.py +0 -9
- arize_phoenix-4.4.4rc6/src/phoenix/server/api/routers/v1/experiment_runs.py +0 -96
- arize_phoenix-4.4.4rc6/src/phoenix/version.py +0 -1
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/.gitignore +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/IP_NOTICE +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/LICENSE +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/README.md +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/examples/manually-instrumented-chatbot/chat-service/chat/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/examples/manually-instrumented-chatbot/chat-service/chat/app.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/examples/manually-instrumented-chatbot/chat-service/chat/types.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/examples/manually-instrumented-chatbot/frontend/Dockerfile +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/examples/manually-instrumented-chatbot/frontend/Makefile +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/examples/manually-instrumented-chatbot/frontend/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/examples/manually-instrumented-chatbot/frontend/pyproject.toml +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/examples/manually-instrumented-chatbot/frontend/requirements.txt +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/examples/manually-instrumented-chatbot/frontend/schema.json +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/config.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/core/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/core/embedding_dimension.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/core/model.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/core/model_schema.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/core/model_schema_adapter.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/datetime_utils.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/README.md +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/alembic.ini +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/bulk_inserter.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/engines.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/helpers.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/insertion/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/insertion/dataset.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/insertion/evaluation.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/insertion/helpers.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/insertion/span.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/migrate.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/migrations/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/migrations/env.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/migrations/script.py.mako +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/migrations/types.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/migrations/versions/10460e46d750_datasets.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/db/migrations/versions/cf03bd6bae1d_init.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/exceptions.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/experiments/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/experiments/evaluators/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/experiments/evaluators/code_evaluators.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/experiments/evaluators/llm_evaluators.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/experiments/tracing.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/inferences/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/inferences/errors.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/inferences/fixtures.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/inferences/inferences.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/inferences/schema.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/inferences/validation.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/metrics/README.md +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/metrics/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/metrics/binning.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/metrics/metrics.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/metrics/mixins.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/metrics/retrieval_metrics.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/metrics/timeseries.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/metrics/wrappers.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/pointcloud/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/pointcloud/clustering.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/pointcloud/pointcloud.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/pointcloud/projectors.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/pointcloud/umap_parameters.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/py.typed +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/context.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/average_experiment_run_latency.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/cache/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/cache/two_tier_cache.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/dataset_example_revisions.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/dataset_example_spans.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/document_evaluation_summaries.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/document_evaluations.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/document_retrieval_metrics.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/evaluation_summaries.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/experiment_annotation_summaries.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/experiment_error_rates.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/experiment_run_counts.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/experiment_sequence_number.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/latency_ms_quantile.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/min_start_or_max_end_times.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/project_by_name.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/record_counts.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/span_descendants.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/span_evaluations.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/span_projects.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/token_counts.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/trace_evaluations.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/dataloaders/trace_row_ids.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/helpers/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/helpers/dataset_helpers.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/AddExamplesToDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/AddSpansToDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/ClearProjectInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/ClusterInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/Coordinates.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/CreateDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/DataQualityMetricInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/DatasetExampleInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/DatasetSort.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/DatasetVersionSort.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/DeleteDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/DeleteExperimentsInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/DimensionFilter.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/DimensionInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/Granularity.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/PatchDatasetExamplesInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/PatchDatasetInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/PerformanceMetricInput.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/SpanSort.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/TimeRange.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/input_types/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/interceptor.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/mutations/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/mutations/auth.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/mutations/dataset_mutations.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/mutations/experiment_mutations.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/mutations/export_events_mutations.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/mutations/project_mutations.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/openapi/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/openapi/main.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/openapi/schema.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/queries.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/utils.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/v1/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/v1/dataset_examples.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/v1/datasets.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/v1/evaluations.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/v1/spans.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/routers/v1/traces.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/schema.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/AnnotatorKind.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Cluster.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/CreateDatasetPayload.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DataQualityMetric.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Dataset.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DatasetExample.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DatasetExampleRevision.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DatasetValues.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DatasetVersion.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Dimension.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DimensionDataType.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DimensionShape.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DimensionType.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DimensionWithValue.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DocumentEvaluationSummary.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/DocumentRetrievalMetrics.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/EmbeddingDimension.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/EmbeddingMetadata.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Evaluation.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/EvaluationSummary.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Event.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/EventMetadata.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/ExampleRevisionInterface.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Experiment.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/ExperimentAnnotationSummary.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/ExperimentComparison.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/ExperimentRunAnnotation.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/ExportedFile.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Functionality.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Inferences.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/InferencesRole.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/MimeType.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Model.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/NumericRange.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/PerformanceMetric.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Project.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/PromptResponse.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Retrieval.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/ScalarDriftMetricEnum.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Segments.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/SortDir.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Span.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/TimeSeries.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/Trace.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/UMAPPoints.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/ValidationResult.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/VectorDriftMetricEnum.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/node.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/api/types/pagination.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/app.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/grpc_server.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/main.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/openapi/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/openapi/docs.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/prometheus.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/apple-touch-icon-114x114.png +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/apple-touch-icon-120x120.png +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/apple-touch-icon-144x144.png +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/apple-touch-icon-152x152.png +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/apple-touch-icon-180x180.png +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/apple-touch-icon-72x72.png +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/apple-touch-icon-76x76.png +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/apple-touch-icon.png +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/favicon.ico +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/index.css +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/index.js +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/static/modernizr.js +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/telemetry.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/templates/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/templates/index.html +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/server/thread_server.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/services.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/session/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/session/data_extractor.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/session/evaluation.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/session/session.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/settings.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/attributes.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/dsl/README.md +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/dsl/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/dsl/filter.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/dsl/helpers.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/dsl/query.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/errors.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/evaluation_conventions.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/exporter.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/fixtures.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/langchain/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/langchain/instrumentor.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/llama_index/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/llama_index/callback.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/openai/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/openai/instrumentor.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/otel.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/projects.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/schemas.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/span_evaluations.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/span_json_decoder.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/span_json_encoder.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/trace_dataset.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/utils.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/v1/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/v1/evaluation_pb2.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/trace/v1/evaluation_pb2.pyi +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/utilities/__init__.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/utilities/deprecation.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/utilities/error_handling.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/utilities/json.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/utilities/logging.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/utilities/project.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/utilities/re.py +0 -0
- {arize_phoenix-4.4.4rc6 → arize_phoenix-4.6.1}/src/phoenix/utilities/span_store.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: arize-phoenix
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.6.1
|
|
4
4
|
Summary: AI Observability and Evaluation
|
|
5
5
|
Project-URL: Documentation, https://docs.arize.com/phoenix/
|
|
6
6
|
Project-URL: Issues, https://github.com/Arize-ai/phoenix/issues
|
|
@@ -41,6 +41,7 @@ Requires-Dist: protobuf<6.0,>=3.20
|
|
|
41
41
|
Requires-Dist: psutil
|
|
42
42
|
Requires-Dist: pyarrow
|
|
43
43
|
Requires-Dist: python-multipart
|
|
44
|
+
Requires-Dist: pyyaml
|
|
44
45
|
Requires-Dist: scikit-learn
|
|
45
46
|
Requires-Dist: scipy
|
|
46
47
|
Requires-Dist: sqlalchemy[asyncio]<3,>=2.0.4
|
|
@@ -94,9 +95,10 @@ Requires-Dist: types-tabulate; extra == 'dev'
|
|
|
94
95
|
Provides-Extra: evals
|
|
95
96
|
Provides-Extra: experimental
|
|
96
97
|
Provides-Extra: llama-index
|
|
97
|
-
Requires-Dist: llama-index-
|
|
98
|
-
Requires-Dist: llama-index-
|
|
99
|
-
Requires-Dist: llama-index-
|
|
98
|
+
Requires-Dist: llama-index-agent-openai==0.2.7; extra == 'llama-index'
|
|
99
|
+
Requires-Dist: llama-index-embeddings-openai==0.1.10; extra == 'llama-index'
|
|
100
|
+
Requires-Dist: llama-index-llms-openai==0.1.24; extra == 'llama-index'
|
|
101
|
+
Requires-Dist: llama-index-readers-file==0.1.25; extra == 'llama-index'
|
|
100
102
|
Requires-Dist: llama-index==0.10.51; extra == 'llama-index'
|
|
101
103
|
Provides-Extra: pg
|
|
102
104
|
Requires-Dist: asyncpg; extra == 'pg'
|
|
@@ -59,6 +59,7 @@ dependencies = [
|
|
|
59
59
|
"cachetools",
|
|
60
60
|
"python-multipart", # see https://www.starlette.io/#dependencies
|
|
61
61
|
"arize-phoenix-evals>=0.13.1",
|
|
62
|
+
"pyyaml", # for OpenAPI
|
|
62
63
|
]
|
|
63
64
|
dynamic = ["version"]
|
|
64
65
|
|
|
@@ -94,9 +95,10 @@ evals = []
|
|
|
94
95
|
experimental = []
|
|
95
96
|
llama-index = [
|
|
96
97
|
"llama-index==0.10.51", # always pin to a version that keeps our notebooks working
|
|
97
|
-
"llama-index-readers-file",
|
|
98
|
-
"llama-index-llms-openai",
|
|
99
|
-
"llama-index-embeddings-openai",
|
|
98
|
+
"llama-index-readers-file==0.1.25",
|
|
99
|
+
"llama-index-llms-openai==0.1.24",
|
|
100
|
+
"llama-index-embeddings-openai==0.1.10",
|
|
101
|
+
"llama-index-agent-openai==0.2.7",
|
|
100
102
|
]
|
|
101
103
|
pg = [
|
|
102
104
|
"asyncpg",
|
|
@@ -91,8 +91,8 @@ class UtcTimeStamp(TypeDecorator[datetime]):
|
|
|
91
91
|
return normalize_datetime(value, timezone.utc)
|
|
92
92
|
|
|
93
93
|
|
|
94
|
-
class
|
|
95
|
-
|
|
94
|
+
class ExperimentRunOutput(TypedDict, total=False):
|
|
95
|
+
task_output: Any
|
|
96
96
|
|
|
97
97
|
|
|
98
98
|
class Base(DeclarativeBase):
|
|
@@ -110,7 +110,7 @@ class Base(DeclarativeBase):
|
|
|
110
110
|
type_annotation_map = {
|
|
111
111
|
Dict[str, Any]: JsonDict,
|
|
112
112
|
List[Dict[str, Any]]: JsonList,
|
|
113
|
-
|
|
113
|
+
ExperimentRunOutput: JsonDict,
|
|
114
114
|
}
|
|
115
115
|
|
|
116
116
|
|
|
@@ -561,7 +561,7 @@ class ExperimentRun(Base):
|
|
|
561
561
|
)
|
|
562
562
|
repetition_number: Mapped[int]
|
|
563
563
|
trace_id: Mapped[Optional[str]]
|
|
564
|
-
output: Mapped[
|
|
564
|
+
output: Mapped[ExperimentRunOutput]
|
|
565
565
|
start_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
|
|
566
566
|
end_time: Mapped[datetime] = mapped_column(UtcTimeStamp)
|
|
567
567
|
prompt_token_count: Mapped[Optional[int]]
|
|
@@ -6,7 +6,7 @@ from typing import Any, Awaitable, Callable, Optional, Union
|
|
|
6
6
|
|
|
7
7
|
from typing_extensions import TypeAlias
|
|
8
8
|
|
|
9
|
-
from phoenix.experiments.evaluators.utils import
|
|
9
|
+
from phoenix.experiments.evaluators.utils import validate_evaluator_signature
|
|
10
10
|
from phoenix.experiments.types import (
|
|
11
11
|
AnnotatorKind,
|
|
12
12
|
EvaluationResult,
|
|
@@ -108,7 +108,7 @@ class Evaluator(ABC):
|
|
|
108
108
|
|
|
109
109
|
def _validate_sig(fn: Callable[..., Any], fn_name: str) -> None:
|
|
110
110
|
sig = inspect.signature(fn)
|
|
111
|
-
|
|
111
|
+
validate_evaluator_signature(sig)
|
|
112
112
|
for param in sig.parameters.values():
|
|
113
113
|
if param.kind is inspect.Parameter.VAR_KEYWORD:
|
|
114
114
|
return
|
|
@@ -8,6 +8,7 @@ from phoenix.experiments.types import (
|
|
|
8
8
|
EvaluationResult,
|
|
9
9
|
JSONSerializable,
|
|
10
10
|
)
|
|
11
|
+
from phoenix.experiments.utils import get_func_name
|
|
11
12
|
|
|
12
13
|
if TYPE_CHECKING:
|
|
13
14
|
from phoenix.experiments.evaluators.base import Evaluator
|
|
@@ -25,11 +26,11 @@ def unwrap_json(obj: JSONSerializable) -> JSONSerializable:
|
|
|
25
26
|
return obj
|
|
26
27
|
|
|
27
28
|
|
|
28
|
-
def
|
|
29
|
+
def validate_evaluator_signature(sig: inspect.Signature) -> None:
|
|
29
30
|
# Check that the wrapped function has a valid signature for use as an evaluator
|
|
30
31
|
# If it does not, raise an error to exit early before running evaluations
|
|
31
32
|
params = sig.parameters
|
|
32
|
-
valid_named_params = {"input", "output", "expected", "metadata"}
|
|
33
|
+
valid_named_params = {"input", "output", "expected", "reference", "metadata"}
|
|
33
34
|
if len(params) == 0:
|
|
34
35
|
raise ValueError("Evaluation function must have at least one parameter.")
|
|
35
36
|
if len(params) > 1:
|
|
@@ -49,11 +50,12 @@ def validate_signature(sig: inspect.Signature) -> None:
|
|
|
49
50
|
)
|
|
50
51
|
|
|
51
52
|
|
|
52
|
-
def
|
|
53
|
+
def _bind_evaluator_signature(sig: inspect.Signature, **kwargs: Any) -> inspect.BoundArguments:
|
|
53
54
|
parameter_mapping = {
|
|
54
55
|
"input": kwargs.get("input"),
|
|
55
56
|
"output": kwargs.get("output"),
|
|
56
57
|
"expected": kwargs.get("expected"),
|
|
58
|
+
"reference": kwargs.get("reference"), # `reference` is an alias for `expected`
|
|
57
59
|
"metadata": kwargs.get("metadata"),
|
|
58
60
|
}
|
|
59
61
|
params = sig.parameters
|
|
@@ -82,16 +84,11 @@ def create_evaluator(
|
|
|
82
84
|
def wrapper(func: Callable[..., Any]) -> "Evaluator":
|
|
83
85
|
nonlocal name
|
|
84
86
|
if not name:
|
|
85
|
-
|
|
86
|
-
name = func.__self__.__class__.__name__
|
|
87
|
-
elif hasattr(func, "__name__"):
|
|
88
|
-
name = func.__name__
|
|
89
|
-
else:
|
|
90
|
-
name = str(func)
|
|
87
|
+
name = get_func_name(func)
|
|
91
88
|
assert name is not None
|
|
92
89
|
|
|
93
90
|
wrapped_signature = inspect.signature(func)
|
|
94
|
-
|
|
91
|
+
validate_evaluator_signature(wrapped_signature)
|
|
95
92
|
|
|
96
93
|
if inspect.iscoroutinefunction(func):
|
|
97
94
|
return _wrap_coroutine_evaluation_function(name, kind, wrapped_signature, scorer)(func)
|
|
@@ -120,7 +117,7 @@ def _wrap_coroutine_evaluation_function(
|
|
|
120
117
|
return await func(*args, **kwargs)
|
|
121
118
|
|
|
122
119
|
async def async_evaluate(self, **kwargs: Any) -> EvaluationResult:
|
|
123
|
-
bound_signature =
|
|
120
|
+
bound_signature = _bind_evaluator_signature(sig, **kwargs)
|
|
124
121
|
result = await func(*bound_signature.args, **bound_signature.kwargs)
|
|
125
122
|
return convert_to_score(result)
|
|
126
123
|
|
|
@@ -148,7 +145,7 @@ def _wrap_sync_evaluation_function(
|
|
|
148
145
|
return func(*args, **kwargs)
|
|
149
146
|
|
|
150
147
|
def evaluate(self, **kwargs: Any) -> EvaluationResult:
|
|
151
|
-
bound_signature =
|
|
148
|
+
bound_signature = _bind_evaluator_signature(sig, **kwargs)
|
|
152
149
|
result = func(*bound_signature.args, **bound_signature.kwargs)
|
|
153
150
|
return convert_to_score(result)
|
|
154
151
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import functools
|
|
2
|
+
import inspect
|
|
2
3
|
import json
|
|
4
|
+
import traceback
|
|
3
5
|
from binascii import hexlify
|
|
4
6
|
from contextlib import ExitStack
|
|
5
7
|
from copy import deepcopy
|
|
@@ -10,6 +12,7 @@ from typing import (
|
|
|
10
12
|
Any,
|
|
11
13
|
Awaitable,
|
|
12
14
|
Dict,
|
|
15
|
+
Literal,
|
|
13
16
|
Mapping,
|
|
14
17
|
Optional,
|
|
15
18
|
Sequence,
|
|
@@ -58,8 +61,8 @@ from phoenix.experiments.types import (
|
|
|
58
61
|
Experiment,
|
|
59
62
|
ExperimentEvaluationRun,
|
|
60
63
|
ExperimentParameters,
|
|
61
|
-
ExperimentResult,
|
|
62
64
|
ExperimentRun,
|
|
65
|
+
ExperimentRunOutput,
|
|
63
66
|
ExperimentTask,
|
|
64
67
|
RanExperiment,
|
|
65
68
|
TaskSummary,
|
|
@@ -67,7 +70,7 @@ from phoenix.experiments.types import (
|
|
|
67
70
|
_asdict,
|
|
68
71
|
_replace,
|
|
69
72
|
)
|
|
70
|
-
from phoenix.experiments.utils import get_dataset_experiments_url, get_experiment_url
|
|
73
|
+
from phoenix.experiments.utils import get_dataset_experiments_url, get_experiment_url, get_func_name
|
|
71
74
|
from phoenix.trace.attributes import flatten
|
|
72
75
|
from phoenix.utilities.json import jsonify
|
|
73
76
|
|
|
@@ -105,6 +108,61 @@ def run_experiment(
|
|
|
105
108
|
dry_run: Union[bool, int] = False,
|
|
106
109
|
print_summary: bool = True,
|
|
107
110
|
) -> RanExperiment:
|
|
111
|
+
"""
|
|
112
|
+
Runs an experiment using a given set of dataset of examples.
|
|
113
|
+
|
|
114
|
+
An experiment is a user-defined task that runs on each example in a dataset. The results from
|
|
115
|
+
each experiment can be evaluated using any number of evaluators to measure the behavior of the
|
|
116
|
+
task. The experiment and evaluation results are stored in the Phoenix database for comparison
|
|
117
|
+
and analysis.
|
|
118
|
+
|
|
119
|
+
A `task` is either a synchronous or asynchronous function that returns a JSON serializable
|
|
120
|
+
output. If the `task` is a function of one argument then that argument will be bound to the
|
|
121
|
+
`input` field of the dataset example. Alternatively, the `task` can be a function of any
|
|
122
|
+
combination of specific argument names that will be bound to special values:
|
|
123
|
+
`input`: The input field of the dataset example
|
|
124
|
+
`expected`: The expected or reference output of the dataset example
|
|
125
|
+
`reference`: An alias for `expected`
|
|
126
|
+
`metadata`: Metadata associated with the dataset example
|
|
127
|
+
`example`: The dataset `Example` object with all associated fields
|
|
128
|
+
|
|
129
|
+
An `evaluator` is either a synchronous or asynchronous function that returns either a boolean
|
|
130
|
+
or numeric "score". If the `evaluator` is a function of one argument then that argument will be
|
|
131
|
+
bound to the `output` of the task. Alternatively, the `evaluator` can be a function of any
|
|
132
|
+
combination of specific argument names that will be bound to special values:
|
|
133
|
+
`input`: The input field of the dataset example
|
|
134
|
+
`output`: The output of the task
|
|
135
|
+
`expected`: The expected or reference output of the dataset example
|
|
136
|
+
`reference`: An alias for `expected`
|
|
137
|
+
`metadata`: Metadata associated with the dataset example
|
|
138
|
+
|
|
139
|
+
Phoenix also provides pre-built evaluators in the `phoenix.experiments.evaluators` module.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
dataset (Dataset): The dataset on which to run the experiment.
|
|
143
|
+
task (ExperimentTask): The task to run on each example in the dataset.
|
|
144
|
+
evaluators (Optional[Evaluators]): A single evaluator or sequence of evaluators used to
|
|
145
|
+
evaluate the results of the experiment. Defaults to None.
|
|
146
|
+
experiment_name (Optional[str]): The name of the experiment. Defaults to None.
|
|
147
|
+
experiment_description (Optional[str]): A description of the experiment. Defaults to None.
|
|
148
|
+
experiment_metadata (Optional[Mapping[str, Any]]): Metadata to associate with the
|
|
149
|
+
experiment. Defaults to None.
|
|
150
|
+
rate_limit_errors (Optional[BaseException | Sequence[BaseException]]): An exception or
|
|
151
|
+
sequence of exceptions to adaptively throttle on. Defaults to None.
|
|
152
|
+
dry_run (bool | int): R the experiment in dry-run mode. When set, experiment results will
|
|
153
|
+
not be recorded in Phoenix. If True, the experiment will run on a random dataset
|
|
154
|
+
example. If an integer, the experiment will run on a random sample of the dataset
|
|
155
|
+
examples of the given size. Defaults to False.
|
|
156
|
+
print_summary (bool): Whether to print a summary of the experiment and evaluation results.
|
|
157
|
+
Defaults to True.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
RanExperiment: The results of the experiment and evaluation. Additional evaluations can be
|
|
161
|
+
added to the experiment using the `evaluate_experiment` function.
|
|
162
|
+
"""
|
|
163
|
+
task_signature = inspect.signature(task)
|
|
164
|
+
_validate_task_signature(task_signature)
|
|
165
|
+
|
|
108
166
|
if not dataset.examples:
|
|
109
167
|
raise ValueError(f"Dataset has no examples: {dataset.id=}, {dataset.version_id=}")
|
|
110
168
|
# Add this to the params once supported in the UI
|
|
@@ -146,7 +204,7 @@ def run_experiment(
|
|
|
146
204
|
)
|
|
147
205
|
|
|
148
206
|
tracer, resource = _get_tracer(experiment.project_name)
|
|
149
|
-
root_span_name = f"Task: {
|
|
207
|
+
root_span_name = f"Task: {get_func_name(task)}"
|
|
150
208
|
root_span_kind = CHAIN
|
|
151
209
|
|
|
152
210
|
print("🧪 Experiment started.")
|
|
@@ -183,25 +241,37 @@ def run_experiment(
|
|
|
183
241
|
# Do not use keyword arguments, which can fail at runtime
|
|
184
242
|
# even when function obeys protocol, because keyword arguments
|
|
185
243
|
# are implementation details.
|
|
186
|
-
|
|
244
|
+
bound_task_args = _bind_task_signature(task_signature, example)
|
|
245
|
+
_output = task(*bound_task_args.args, **bound_task_args.kwargs)
|
|
187
246
|
if isinstance(_output, Awaitable):
|
|
188
|
-
|
|
247
|
+
sync_error_message = (
|
|
248
|
+
"Task is async and cannot be run within an existing event loop. "
|
|
249
|
+
"Consider the following options:\n\n"
|
|
250
|
+
"1. Pass in a synchronous task callable.\n"
|
|
251
|
+
"2. Use `nest_asyncio.apply()` to allow nesting event loops."
|
|
252
|
+
)
|
|
253
|
+
raise RuntimeError(sync_error_message)
|
|
189
254
|
else:
|
|
190
255
|
output = _output
|
|
191
256
|
except BaseException as exc:
|
|
192
257
|
span.record_exception(exc)
|
|
193
258
|
status = Status(StatusCode.ERROR, f"{type(exc).__name__}: {exc}")
|
|
194
259
|
error = exc
|
|
260
|
+
_print_experiment_error(
|
|
261
|
+
exc,
|
|
262
|
+
example_id=example.id,
|
|
263
|
+
repetition_number=repetition_number,
|
|
264
|
+
kind="task",
|
|
265
|
+
)
|
|
195
266
|
output = jsonify(output)
|
|
196
267
|
span.set_attribute(INPUT_VALUE, json.dumps(example.input, ensure_ascii=False))
|
|
197
268
|
span.set_attribute(INPUT_MIME_TYPE, JSON.value)
|
|
198
|
-
if
|
|
269
|
+
if output is not None:
|
|
199
270
|
if isinstance(output, str):
|
|
200
271
|
span.set_attribute(OUTPUT_VALUE, output)
|
|
201
272
|
else:
|
|
202
273
|
span.set_attribute(OUTPUT_VALUE, json.dumps(output, ensure_ascii=False))
|
|
203
274
|
span.set_attribute(OUTPUT_MIME_TYPE, JSON.value)
|
|
204
|
-
span.set_attributes(dict(flatten(jsonify(result), recurse_on_sequence=True)))
|
|
205
275
|
span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND, root_span_kind)
|
|
206
276
|
span.set_status(status)
|
|
207
277
|
|
|
@@ -214,7 +284,7 @@ def run_experiment(
|
|
|
214
284
|
experiment_id=experiment.id,
|
|
215
285
|
dataset_example_id=example.id,
|
|
216
286
|
repetition_number=repetition_number,
|
|
217
|
-
output
|
|
287
|
+
experiment_run_output=ExperimentRunOutput(task_output=output),
|
|
218
288
|
error=repr(error) if error else None,
|
|
219
289
|
trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
|
|
220
290
|
)
|
|
@@ -238,7 +308,8 @@ def run_experiment(
|
|
|
238
308
|
# Do not use keyword arguments, which can fail at runtime
|
|
239
309
|
# even when function obeys protocol, because keyword arguments
|
|
240
310
|
# are implementation details.
|
|
241
|
-
|
|
311
|
+
bound_task_args = _bind_task_signature(task_signature, example)
|
|
312
|
+
_output = task(*bound_task_args.args, **bound_task_args.kwargs)
|
|
242
313
|
if isinstance(_output, Awaitable):
|
|
243
314
|
output = await _output
|
|
244
315
|
else:
|
|
@@ -247,16 +318,21 @@ def run_experiment(
|
|
|
247
318
|
span.record_exception(exc)
|
|
248
319
|
status = Status(StatusCode.ERROR, f"{type(exc).__name__}: {exc}")
|
|
249
320
|
error = exc
|
|
321
|
+
_print_experiment_error(
|
|
322
|
+
exc,
|
|
323
|
+
example_id=example.id,
|
|
324
|
+
repetition_number=repetition_number,
|
|
325
|
+
kind="task",
|
|
326
|
+
)
|
|
250
327
|
output = jsonify(output)
|
|
251
328
|
span.set_attribute(INPUT_VALUE, json.dumps(example.input, ensure_ascii=False))
|
|
252
329
|
span.set_attribute(INPUT_MIME_TYPE, JSON.value)
|
|
253
|
-
if
|
|
330
|
+
if output is not None:
|
|
254
331
|
if isinstance(output, str):
|
|
255
332
|
span.set_attribute(OUTPUT_VALUE, output)
|
|
256
333
|
else:
|
|
257
334
|
span.set_attribute(OUTPUT_VALUE, json.dumps(output, ensure_ascii=False))
|
|
258
335
|
span.set_attribute(OUTPUT_MIME_TYPE, JSON.value)
|
|
259
|
-
span.set_attributes(dict(flatten(jsonify(result), recurse_on_sequence=True)))
|
|
260
336
|
span.set_attribute(OPENINFERENCE_SPAN_KIND, root_span_kind)
|
|
261
337
|
span.set_status(status)
|
|
262
338
|
|
|
@@ -269,7 +345,7 @@ def run_experiment(
|
|
|
269
345
|
experiment_id=experiment.id,
|
|
270
346
|
dataset_example_id=example.id,
|
|
271
347
|
repetition_number=repetition_number,
|
|
272
|
-
output
|
|
348
|
+
experiment_run_output=ExperimentRunOutput(task_output=output),
|
|
273
349
|
error=repr(error) if error else None,
|
|
274
350
|
trace_id=_str_trace_id(span.get_span_context().trace_id), # type: ignore[no-untyped-call]
|
|
275
351
|
)
|
|
@@ -422,8 +498,9 @@ def evaluate_experiment(
|
|
|
422
498
|
stack.enter_context(capture_spans(resource))
|
|
423
499
|
try:
|
|
424
500
|
result = evaluator.evaluate(
|
|
425
|
-
output=experiment_run.
|
|
501
|
+
output=experiment_run.output,
|
|
426
502
|
expected=example.output,
|
|
503
|
+
reference=example.output,
|
|
427
504
|
input=example.input,
|
|
428
505
|
metadata=example.metadata,
|
|
429
506
|
)
|
|
@@ -431,6 +508,12 @@ def evaluate_experiment(
|
|
|
431
508
|
span.record_exception(exc)
|
|
432
509
|
status = Status(StatusCode.ERROR, f"{type(exc).__name__}: {exc}")
|
|
433
510
|
error = exc
|
|
511
|
+
_print_experiment_error(
|
|
512
|
+
exc,
|
|
513
|
+
example_id=example.id,
|
|
514
|
+
repetition_number=experiment_run.repetition_number,
|
|
515
|
+
kind="evaluator",
|
|
516
|
+
)
|
|
434
517
|
if result:
|
|
435
518
|
span.set_attributes(dict(flatten(jsonify(result), recurse_on_sequence=True)))
|
|
436
519
|
span.set_attribute(OPENINFERENCE_SPAN_KIND, root_span_kind)
|
|
@@ -467,8 +550,9 @@ def evaluate_experiment(
|
|
|
467
550
|
stack.enter_context(capture_spans(resource))
|
|
468
551
|
try:
|
|
469
552
|
result = await evaluator.async_evaluate(
|
|
470
|
-
output=experiment_run.
|
|
553
|
+
output=experiment_run.output,
|
|
471
554
|
expected=example.output,
|
|
555
|
+
reference=example.output,
|
|
472
556
|
input=example.input,
|
|
473
557
|
metadata=example.metadata,
|
|
474
558
|
)
|
|
@@ -476,6 +560,12 @@ def evaluate_experiment(
|
|
|
476
560
|
span.record_exception(exc)
|
|
477
561
|
status = Status(StatusCode.ERROR, f"{type(exc).__name__}: {exc}")
|
|
478
562
|
error = exc
|
|
563
|
+
_print_experiment_error(
|
|
564
|
+
exc,
|
|
565
|
+
example_id=example.id,
|
|
566
|
+
repetition_number=experiment_run.repetition_number,
|
|
567
|
+
kind="evaluator",
|
|
568
|
+
)
|
|
479
569
|
if result:
|
|
480
570
|
span.set_attributes(dict(flatten(jsonify(result), recurse_on_sequence=True)))
|
|
481
571
|
span.set_attribute(OPENINFERENCE_SPAN_KIND, root_span_kind)
|
|
@@ -584,20 +674,71 @@ def _decode_unix_nano(time_unix_nano: int) -> datetime:
|
|
|
584
674
|
return datetime.fromtimestamp(time_unix_nano / 1e9, tz=timezone.utc)
|
|
585
675
|
|
|
586
676
|
|
|
587
|
-
def
|
|
588
|
-
""
|
|
589
|
-
Makes a best-effort attempt to get the name of the task.
|
|
590
|
-
"""
|
|
677
|
+
def _is_dry_run(obj: Any) -> bool:
|
|
678
|
+
return hasattr(obj, "id") and isinstance(obj.id, str) and obj.id.startswith(DRY_RUN)
|
|
591
679
|
|
|
592
|
-
if isinstance(task, functools.partial):
|
|
593
|
-
return task.func.__qualname__
|
|
594
|
-
if hasattr(task, "__qualname__"):
|
|
595
|
-
return task.__qualname__
|
|
596
|
-
return str(task)
|
|
597
680
|
|
|
681
|
+
def _validate_task_signature(sig: inspect.Signature) -> None:
|
|
682
|
+
# Check that the function signature has a valid signature for use as a task
|
|
683
|
+
# If it does not, raise an error to exit early before running an experiment
|
|
684
|
+
params = sig.parameters
|
|
685
|
+
valid_named_params = {"input", "expected", "reference", "metadata", "example"}
|
|
686
|
+
if len(params) == 0:
|
|
687
|
+
raise ValueError("Task function must have at least one parameter.")
|
|
688
|
+
if len(params) > 1:
|
|
689
|
+
for not_found in set(params) - valid_named_params:
|
|
690
|
+
param = params[not_found]
|
|
691
|
+
if (
|
|
692
|
+
param.kind is inspect.Parameter.VAR_KEYWORD
|
|
693
|
+
or param.default is not inspect.Parameter.empty
|
|
694
|
+
):
|
|
695
|
+
continue
|
|
696
|
+
raise ValueError(
|
|
697
|
+
(
|
|
698
|
+
f"Invalid parameter names in task function: {', '.join(not_found)}. "
|
|
699
|
+
"Parameters names for multi-argument functions must be "
|
|
700
|
+
f"any of: {', '.join(valid_named_params)}."
|
|
701
|
+
)
|
|
702
|
+
)
|
|
598
703
|
|
|
599
|
-
|
|
600
|
-
|
|
704
|
+
|
|
705
|
+
def _bind_task_signature(sig: inspect.Signature, example: Example) -> inspect.BoundArguments:
|
|
706
|
+
parameter_mapping = {
|
|
707
|
+
"input": example.input,
|
|
708
|
+
"expected": example.output,
|
|
709
|
+
"reference": example.output, # Alias for "expected"
|
|
710
|
+
"metadata": example.metadata,
|
|
711
|
+
"example": example,
|
|
712
|
+
}
|
|
713
|
+
params = sig.parameters
|
|
714
|
+
if len(params) == 1:
|
|
715
|
+
parameter_name = next(iter(params))
|
|
716
|
+
if parameter_name in parameter_mapping:
|
|
717
|
+
return sig.bind(parameter_mapping[parameter_name])
|
|
718
|
+
else:
|
|
719
|
+
return sig.bind(parameter_mapping["input"])
|
|
720
|
+
return sig.bind_partial(
|
|
721
|
+
**{name: parameter_mapping[name] for name in set(parameter_mapping).intersection(params)}
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def _print_experiment_error(
|
|
726
|
+
error: BaseException,
|
|
727
|
+
/,
|
|
728
|
+
*,
|
|
729
|
+
example_id: str,
|
|
730
|
+
repetition_number: int,
|
|
731
|
+
kind: Literal["evaluator", "task"],
|
|
732
|
+
) -> None:
|
|
733
|
+
"""
|
|
734
|
+
Prints an experiment error.
|
|
735
|
+
"""
|
|
736
|
+
display_error = RuntimeError(
|
|
737
|
+
f"{kind} failed for example id {repr(example_id)}, " f"repetition {repr(repetition_number)}"
|
|
738
|
+
)
|
|
739
|
+
display_error.__cause__ = error
|
|
740
|
+
formatted_exception = "".join(traceback.format_exception(display_error)) # type: ignore[arg-type, call-arg, unused-ignore]
|
|
741
|
+
print("\033[91m" + formatted_exception + "\033[0m") # prints in red
|
|
601
742
|
|
|
602
743
|
|
|
603
744
|
class _NoOpProcessor(trace_sdk.SpanProcessor):
|
|
@@ -103,9 +103,9 @@ class Example:
|
|
|
103
103
|
identifiers = [f'{spaces}id="{self.id}",']
|
|
104
104
|
contents = [
|
|
105
105
|
spaces
|
|
106
|
-
+ f"{
|
|
106
|
+
+ f"{_blue(key)}="
|
|
107
107
|
+ json.dumps(
|
|
108
|
-
_shorten(
|
|
108
|
+
_shorten(value),
|
|
109
109
|
ensure_ascii=False,
|
|
110
110
|
sort_keys=True,
|
|
111
111
|
indent=len(spaces),
|
|
@@ -113,8 +113,8 @@ class Example:
|
|
|
113
113
|
.replace("\n", f"\n{spaces}")
|
|
114
114
|
.replace(' "..."\n', " ...\n")
|
|
115
115
|
+ ","
|
|
116
|
-
for
|
|
117
|
-
if (
|
|
116
|
+
for key in ("input", "output", "metadata")
|
|
117
|
+
if (value := getattr(self, key, None))
|
|
118
118
|
]
|
|
119
119
|
return "\n".join([f"{name}(", *identifiers, *contents, ")"])
|
|
120
120
|
|
|
@@ -199,17 +199,17 @@ class Experiment:
|
|
|
199
199
|
|
|
200
200
|
|
|
201
201
|
@dataclass(frozen=True)
|
|
202
|
-
class
|
|
203
|
-
|
|
202
|
+
class ExperimentRunOutput:
|
|
203
|
+
task_output: TaskOutput
|
|
204
204
|
|
|
205
205
|
def __post_init__(self) -> None:
|
|
206
|
-
object.__setattr__(self, "
|
|
206
|
+
object.__setattr__(self, "task_output", _make_read_only(self.task_output))
|
|
207
207
|
|
|
208
208
|
@classmethod
|
|
209
|
-
def from_dict(cls, obj: Optional[Mapping[str, Any]]) ->
|
|
209
|
+
def from_dict(cls, obj: Optional[Mapping[str, Any]]) -> ExperimentRunOutput:
|
|
210
210
|
if not obj:
|
|
211
|
-
return None
|
|
212
|
-
return cls(
|
|
211
|
+
return cls(task_output=None)
|
|
212
|
+
return cls(task_output=obj["task_output"])
|
|
213
213
|
|
|
214
214
|
|
|
215
215
|
@dataclass(frozen=True)
|
|
@@ -219,14 +219,14 @@ class ExperimentRun:
|
|
|
219
219
|
experiment_id: ExperimentId
|
|
220
220
|
dataset_example_id: ExampleId
|
|
221
221
|
repetition_number: RepetitionNumber
|
|
222
|
-
|
|
222
|
+
experiment_run_output: ExperimentRunOutput
|
|
223
223
|
error: Optional[str] = None
|
|
224
224
|
id: ExperimentRunId = field(default_factory=_dry_run_id)
|
|
225
225
|
trace_id: Optional[TraceId] = None
|
|
226
226
|
|
|
227
227
|
@property
|
|
228
|
-
def
|
|
229
|
-
return deepcopy(self.
|
|
228
|
+
def output(self) -> Optional[TaskOutput]:
|
|
229
|
+
return deepcopy(self.experiment_run_output.task_output)
|
|
230
230
|
|
|
231
231
|
@classmethod
|
|
232
232
|
def from_dict(cls, obj: Mapping[str, Any]) -> ExperimentRun:
|
|
@@ -236,15 +236,15 @@ class ExperimentRun:
|
|
|
236
236
|
experiment_id=obj["experiment_id"],
|
|
237
237
|
dataset_example_id=obj["dataset_example_id"],
|
|
238
238
|
repetition_number=obj.get("repetition_number") or 1,
|
|
239
|
-
|
|
239
|
+
experiment_run_output=ExperimentRunOutput.from_dict(obj["experiment_run_output"]),
|
|
240
240
|
error=obj.get("error"),
|
|
241
241
|
id=obj["id"],
|
|
242
242
|
trace_id=obj.get("trace_id"),
|
|
243
243
|
)
|
|
244
244
|
|
|
245
245
|
def __post_init__(self) -> None:
|
|
246
|
-
if bool(self.
|
|
247
|
-
ValueError("Must specify
|
|
246
|
+
if bool(self.experiment_run_output) == bool(self.error):
|
|
247
|
+
ValueError("Must specify exactly one of experiment_run_output or error")
|
|
248
248
|
|
|
249
249
|
|
|
250
250
|
@dataclass(frozen=True)
|
|
@@ -571,7 +571,7 @@ class RanExperiment(Experiment):
|
|
|
571
571
|
{
|
|
572
572
|
"run_id": run.id,
|
|
573
573
|
"error": run.error,
|
|
574
|
-
"
|
|
574
|
+
"output": deepcopy(run.experiment_run_output.task_output),
|
|
575
575
|
"input": deepcopy((ex := self.dataset.examples[run.dataset_example_id]).input),
|
|
576
576
|
"expected": deepcopy(ex.output),
|
|
577
577
|
"metadata": deepcopy(ex.metadata),
|
|
@@ -688,6 +688,10 @@ class _ExperimentRunWithExample(ObjectProxy): # type: ignore[misc]
|
|
|
688
688
|
def expected(self) -> ExampleOutput:
|
|
689
689
|
return deepcopy(self._self_example.output)
|
|
690
690
|
|
|
691
|
+
@property
|
|
692
|
+
def reference(self) -> ExampleOutput:
|
|
693
|
+
return deepcopy(self._self_example.output)
|
|
694
|
+
|
|
691
695
|
@property
|
|
692
696
|
def input(self) -> ExampleInput:
|
|
693
697
|
return deepcopy(self._self_example.input)
|
|
@@ -703,20 +707,47 @@ class _ExperimentRunWithExample(ObjectProxy): # type: ignore[misc]
|
|
|
703
707
|
f'{spaces}id="{self.id}",',
|
|
704
708
|
f'{spaces}example_id="{self.dataset_example_id}",',
|
|
705
709
|
]
|
|
706
|
-
|
|
710
|
+
outputs = [
|
|
711
|
+
*([f'{spaces}error="{self.error}",'] if self.error else []),
|
|
712
|
+
*(
|
|
713
|
+
[
|
|
714
|
+
f"{spaces}{_blue('output')}="
|
|
715
|
+
+ json.dumps(
|
|
716
|
+
_shorten(self.output),
|
|
717
|
+
ensure_ascii=False,
|
|
718
|
+
sort_keys=True,
|
|
719
|
+
indent=len(spaces),
|
|
720
|
+
)
|
|
721
|
+
.replace("\n", f"\n{spaces}")
|
|
722
|
+
.replace(' "..."\n', " ...\n")
|
|
723
|
+
]
|
|
724
|
+
if not self.error
|
|
725
|
+
else []
|
|
726
|
+
),
|
|
727
|
+
]
|
|
728
|
+
dicts = [
|
|
707
729
|
spaces
|
|
708
|
-
+ f"{
|
|
709
|
-
+
|
|
730
|
+
+ f"{_blue(alias)}={{"
|
|
731
|
+
+ (f" # {comment}" if comment else "")
|
|
732
|
+
+ json.dumps(
|
|
733
|
+
_shorten(value),
|
|
734
|
+
ensure_ascii=False,
|
|
735
|
+
sort_keys=True,
|
|
736
|
+
indent=len(spaces),
|
|
737
|
+
)[1:]
|
|
710
738
|
.replace("\n", f"\n{spaces}")
|
|
711
739
|
.replace(' "..."\n', " ...\n")
|
|
712
740
|
+ ","
|
|
713
|
-
for
|
|
714
|
-
"
|
|
715
|
-
"
|
|
716
|
-
"
|
|
717
|
-
"
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
if v
|
|
741
|
+
for alias, value, comment in (
|
|
742
|
+
("expected", self.expected, f"alias for the example.{_blue('output')} dict"),
|
|
743
|
+
("reference", self.reference, f"alias for the example.{_blue('output')} dict"),
|
|
744
|
+
("input", self.input, f"alias for the example.{_blue('input')} dict"),
|
|
745
|
+
("metadata", self.metadata, f"alias for the example.{_blue('metadata')} dict"),
|
|
746
|
+
)
|
|
747
|
+
if value
|
|
721
748
|
]
|
|
722
|
-
return "\n".join([f"{name}(", *identifiers, *
|
|
749
|
+
return "\n".join([f"{name}(", *identifiers, *outputs, *dicts, ")"])
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def _blue(text: str) -> str:
|
|
753
|
+
return f"\033[1m\033[94m{text}\033[0m"
|