arize-phoenix 3.16.1__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- arize_phoenix-7.7.0.dist-info/METADATA +261 -0
- arize_phoenix-7.7.0.dist-info/RECORD +345 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
- arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
- phoenix/__init__.py +86 -14
- phoenix/auth.py +309 -0
- phoenix/config.py +675 -45
- phoenix/core/model.py +32 -30
- phoenix/core/model_schema.py +102 -109
- phoenix/core/model_schema_adapter.py +48 -45
- phoenix/datetime_utils.py +24 -3
- phoenix/db/README.md +54 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +85 -0
- phoenix/db/bulk_inserter.py +294 -0
- phoenix/db/engines.py +208 -0
- phoenix/db/enums.py +20 -0
- phoenix/db/facilitator.py +113 -0
- phoenix/db/helpers.py +159 -0
- phoenix/db/insertion/constants.py +2 -0
- phoenix/db/insertion/dataset.py +227 -0
- phoenix/db/insertion/document_annotation.py +171 -0
- phoenix/db/insertion/evaluation.py +191 -0
- phoenix/db/insertion/helpers.py +98 -0
- phoenix/db/insertion/span.py +193 -0
- phoenix/db/insertion/span_annotation.py +158 -0
- phoenix/db/insertion/trace_annotation.py +158 -0
- phoenix/db/insertion/types.py +256 -0
- phoenix/db/migrate.py +86 -0
- phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
- phoenix/db/migrations/env.py +114 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
- phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
- phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
- phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +807 -0
- phoenix/exceptions.py +5 -1
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +158 -0
- phoenix/experiments/evaluators/code_evaluators.py +184 -0
- phoenix/experiments/evaluators/llm_evaluators.py +473 -0
- phoenix/experiments/evaluators/utils.py +236 -0
- phoenix/experiments/functions.py +772 -0
- phoenix/experiments/tracing.py +86 -0
- phoenix/experiments/types.py +726 -0
- phoenix/experiments/utils.py +25 -0
- phoenix/inferences/__init__.py +0 -0
- phoenix/{datasets → inferences}/errors.py +6 -5
- phoenix/{datasets → inferences}/fixtures.py +49 -42
- phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
- phoenix/{datasets → inferences}/schema.py +11 -11
- phoenix/{datasets → inferences}/validation.py +13 -14
- phoenix/logging/__init__.py +3 -0
- phoenix/logging/_config.py +90 -0
- phoenix/logging/_filter.py +6 -0
- phoenix/logging/_formatter.py +69 -0
- phoenix/metrics/__init__.py +5 -4
- phoenix/metrics/binning.py +4 -3
- phoenix/metrics/metrics.py +2 -1
- phoenix/metrics/mixins.py +7 -6
- phoenix/metrics/retrieval_metrics.py +2 -1
- phoenix/metrics/timeseries.py +5 -4
- phoenix/metrics/wrappers.py +9 -3
- phoenix/pointcloud/clustering.py +5 -5
- phoenix/pointcloud/pointcloud.py +7 -5
- phoenix/pointcloud/projectors.py +5 -6
- phoenix/pointcloud/umap_parameters.py +53 -52
- phoenix/server/api/README.md +28 -0
- phoenix/server/api/auth.py +44 -0
- phoenix/server/api/context.py +152 -9
- phoenix/server/api/dataloaders/__init__.py +91 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
- phoenix/server/api/dataloaders/document_evaluations.py +31 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
- phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/record_counts.py +116 -0
- phoenix/server/api/dataloaders/session_io.py +79 -0
- phoenix/server/api/dataloaders/session_num_traces.py +30 -0
- phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
- phoenix/server/api/dataloaders/session_token_usages.py +41 -0
- phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
- phoenix/server/api/dataloaders/span_annotations.py +26 -0
- phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +57 -0
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/token_counts.py +124 -0
- phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
- phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
- phoenix/server/api/dataloaders/user_roles.py +30 -0
- phoenix/server/api/dataloaders/users.py +33 -0
- phoenix/server/api/exceptions.py +48 -0
- phoenix/server/api/helpers/__init__.py +12 -0
- phoenix/server/api/helpers/dataset_helpers.py +217 -0
- phoenix/server/api/helpers/experiment_run_filters.py +763 -0
- phoenix/server/api/helpers/playground_clients.py +948 -0
- phoenix/server/api/helpers/playground_registry.py +70 -0
- phoenix/server/api/helpers/playground_spans.py +455 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
- phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/ClusterInput.py +2 -2
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
- phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
- phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
- phoenix/server/api/input_types/DimensionFilter.py +4 -4
- phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
- phoenix/server/api/input_types/Granularity.py +1 -1
- phoenix/server/api/input_types/InvocationParameters.py +162 -0
- phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
- phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
- phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
- phoenix/server/api/input_types/SpanSort.py +134 -69
- phoenix/server/api/input_types/TemplateOptions.py +10 -0
- phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
- phoenix/server/api/input_types/UserRoleInput.py +9 -0
- phoenix/server/api/mutations/__init__.py +28 -0
- phoenix/server/api/mutations/api_key_mutations.py +167 -0
- phoenix/server/api/mutations/chat_mutations.py +593 -0
- phoenix/server/api/mutations/dataset_mutations.py +591 -0
- phoenix/server/api/mutations/experiment_mutations.py +75 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
- phoenix/server/api/mutations/project_mutations.py +57 -0
- phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
- phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
- phoenix/server/api/mutations/user_mutations.py +329 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +17 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +738 -0
- phoenix/server/api/routers/__init__.py +11 -0
- phoenix/server/api/routers/auth.py +284 -0
- phoenix/server/api/routers/embeddings.py +26 -0
- phoenix/server/api/routers/oauth2.py +488 -0
- phoenix/server/api/routers/v1/__init__.py +64 -0
- phoenix/server/api/routers/v1/datasets.py +1017 -0
- phoenix/server/api/routers/v1/evaluations.py +362 -0
- phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
- phoenix/server/api/routers/v1/experiment_runs.py +167 -0
- phoenix/server/api/routers/v1/experiments.py +308 -0
- phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
- phoenix/server/api/routers/v1/spans.py +267 -0
- phoenix/server/api/routers/v1/traces.py +208 -0
- phoenix/server/api/routers/v1/utils.py +95 -0
- phoenix/server/api/schema.py +44 -241
- phoenix/server/api/subscriptions.py +597 -0
- phoenix/server/api/types/Annotation.py +21 -0
- phoenix/server/api/types/AnnotationSummary.py +55 -0
- phoenix/server/api/types/AnnotatorKind.py +16 -0
- phoenix/server/api/types/ApiKey.py +27 -0
- phoenix/server/api/types/AuthMethod.py +9 -0
- phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
- phoenix/server/api/types/Cluster.py +25 -24
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/DataQualityMetric.py +31 -13
- phoenix/server/api/types/Dataset.py +288 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +32 -31
- phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
- phoenix/server/api/types/EmbeddingDimension.py +56 -49
- phoenix/server/api/types/Evaluation.py +25 -31
- phoenix/server/api/types/EvaluationSummary.py +30 -50
- phoenix/server/api/types/Event.py +20 -20
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +152 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +17 -0
- phoenix/server/api/types/ExperimentRun.py +119 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
- phoenix/server/api/types/GenerativeModel.py +9 -0
- phoenix/server/api/types/GenerativeProvider.py +85 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/LabelFraction.py +7 -0
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +54 -54
- phoenix/server/api/types/PerformanceMetric.py +8 -5
- phoenix/server/api/types/Project.py +407 -142
- phoenix/server/api/types/ProjectSession.py +139 -0
- phoenix/server/api/types/Segments.py +4 -4
- phoenix/server/api/types/Span.py +221 -176
- phoenix/server/api/types/SpanAnnotation.py +43 -0
- phoenix/server/api/types/SpanIOValue.py +15 -0
- phoenix/server/api/types/SystemApiKey.py +9 -0
- phoenix/server/api/types/TemplateLanguage.py +10 -0
- phoenix/server/api/types/TimeSeries.py +19 -15
- phoenix/server/api/types/TokenUsage.py +11 -0
- phoenix/server/api/types/Trace.py +154 -0
- phoenix/server/api/types/TraceAnnotation.py +45 -0
- phoenix/server/api/types/UMAPPoints.py +7 -7
- phoenix/server/api/types/User.py +60 -0
- phoenix/server/api/types/UserApiKey.py +45 -0
- phoenix/server/api/types/UserRole.py +15 -0
- phoenix/server/api/types/node.py +4 -112
- phoenix/server/api/types/pagination.py +156 -57
- phoenix/server/api/utils.py +34 -0
- phoenix/server/app.py +864 -115
- phoenix/server/bearer_auth.py +163 -0
- phoenix/server/dml_event.py +136 -0
- phoenix/server/dml_event_handler.py +256 -0
- phoenix/server/email/__init__.py +0 -0
- phoenix/server/email/sender.py +97 -0
- phoenix/server/email/templates/__init__.py +0 -0
- phoenix/server/email/templates/password_reset.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/grpc_server.py +102 -0
- phoenix/server/jwt_store.py +505 -0
- phoenix/server/main.py +305 -116
- phoenix/server/oauth2.py +52 -0
- phoenix/server/openapi/__init__.py +0 -0
- phoenix/server/prometheus.py +111 -0
- phoenix/server/rate_limiters.py +188 -0
- phoenix/server/static/.vite/manifest.json +87 -0
- phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
- phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
- phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
- phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
- phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
- phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
- phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
- phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
- phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
- phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
- phoenix/server/telemetry.py +68 -0
- phoenix/server/templates/index.html +82 -23
- phoenix/server/thread_server.py +3 -3
- phoenix/server/types.py +275 -0
- phoenix/services.py +27 -18
- phoenix/session/client.py +743 -68
- phoenix/session/data_extractor.py +31 -7
- phoenix/session/evaluation.py +3 -9
- phoenix/session/session.py +263 -219
- phoenix/settings.py +22 -0
- phoenix/trace/__init__.py +2 -22
- phoenix/trace/attributes.py +338 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +663 -213
- phoenix/trace/dsl/helpers.py +73 -21
- phoenix/trace/dsl/query.py +574 -201
- phoenix/trace/exporter.py +24 -19
- phoenix/trace/fixtures.py +368 -32
- phoenix/trace/otel.py +71 -219
- phoenix/trace/projects.py +3 -2
- phoenix/trace/schemas.py +33 -11
- phoenix/trace/span_evaluations.py +21 -16
- phoenix/trace/span_json_decoder.py +6 -4
- phoenix/trace/span_json_encoder.py +2 -2
- phoenix/trace/trace_dataset.py +47 -32
- phoenix/trace/utils.py +21 -4
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/client.py +132 -0
- phoenix/utilities/deprecation.py +31 -0
- phoenix/utilities/error_handling.py +3 -2
- phoenix/utilities/json.py +109 -0
- phoenix/utilities/logging.py +8 -0
- phoenix/utilities/project.py +2 -2
- phoenix/utilities/re.py +49 -0
- phoenix/utilities/span_store.py +0 -23
- phoenix/utilities/template_formatters.py +99 -0
- phoenix/version.py +1 -1
- arize_phoenix-3.16.1.dist-info/METADATA +0 -495
- arize_phoenix-3.16.1.dist-info/RECORD +0 -178
- phoenix/core/project.py +0 -619
- phoenix/core/traces.py +0 -96
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -448
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/helpers.py +0 -11
- phoenix/server/api/routers/evaluation_handler.py +0 -109
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/server/api/types/DatasetRole.py +0 -23
- phoenix/server/static/index.css +0 -6
- phoenix/server/static/index.js +0 -7447
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- phoenix/trace/langchain/__init__.py +0 -3
- phoenix/trace/langchain/instrumentor.py +0 -35
- phoenix/trace/llama_index/__init__.py +0 -3
- phoenix/trace/llama_index/callback.py +0 -102
- phoenix/trace/openai/__init__.py +0 -3
- phoenix/trace/openai/instrumentor.py +0 -30
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.16.1.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from phoenix.config import get_web_base_url
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_experiment_url(*, dataset_id: str, experiment_id: str) -> str:
|
|
9
|
+
return f"{get_web_base_url()}datasets/{dataset_id}/compare?experimentId={experiment_id}"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_dataset_experiments_url(*, dataset_id: str) -> str:
|
|
13
|
+
return f"{get_web_base_url()}datasets/{dataset_id}/experiments"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_func_name(fn: Callable[..., Any]) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Makes a best-effort attempt to get the name of the function.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
if isinstance(fn, functools.partial):
|
|
22
|
+
return fn.func.__qualname__
|
|
23
|
+
if hasattr(fn, "__qualname__") and not fn.__qualname__.endswith("<lambda>"):
|
|
24
|
+
return fn.__qualname__.split(".<locals>.")[-1]
|
|
25
|
+
return str(fn)
|
|
File without changes
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
-
from
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
from typing import Any, Union
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
class ValidationError(Exception):
|
|
@@ -57,8 +58,8 @@ class InvalidSchemaError(ValidationError):
|
|
|
57
58
|
class DatasetError(Exception):
|
|
58
59
|
"""An error raised when the dataset is invalid or incomplete"""
|
|
59
60
|
|
|
60
|
-
def __init__(self, errors: Union[ValidationError,
|
|
61
|
-
self.errors:
|
|
61
|
+
def __init__(self, errors: Union[ValidationError, list[ValidationError]]):
|
|
62
|
+
self.errors: list[ValidationError] = errors if isinstance(errors, list) else [errors]
|
|
62
63
|
|
|
63
64
|
def __str__(self) -> str:
|
|
64
65
|
return "\n".join(map(str, self.errors))
|
|
@@ -142,7 +143,7 @@ class EmbeddingVectorSizeMismatch(ValidationError):
|
|
|
142
143
|
vector lengths"""
|
|
143
144
|
|
|
144
145
|
def __init__(
|
|
145
|
-
self, embedding_feature_name: str, vector_column_name: str, vector_lengths:
|
|
146
|
+
self, embedding_feature_name: str, vector_column_name: str, vector_lengths: list[int]
|
|
146
147
|
) -> None:
|
|
147
148
|
self.embedding_feature_name = embedding_feature_name
|
|
148
149
|
self.vector_column_name = vector_column_name
|
|
@@ -238,5 +239,5 @@ class MissingTimestampColumnName(ValidationError):
|
|
|
238
239
|
class SchemaError(Exception):
|
|
239
240
|
"""An error raised when the Schema is invalid or incomplete"""
|
|
240
241
|
|
|
241
|
-
def __init__(self, errors: Union[ValidationError,
|
|
242
|
+
def __init__(self, errors: Union[ValidationError, list[ValidationError]]):
|
|
242
243
|
self.errors = errors
|
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
from collections.abc import Iterator
|
|
3
4
|
from dataclasses import dataclass, replace
|
|
4
5
|
from enum import Enum, auto
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import
|
|
7
|
+
from typing import NamedTuple, Optional
|
|
7
8
|
from urllib import request
|
|
8
9
|
from urllib.parse import quote, urljoin
|
|
9
10
|
|
|
10
11
|
from pandas import read_parquet
|
|
11
12
|
|
|
12
|
-
from phoenix.config import
|
|
13
|
-
from phoenix.
|
|
14
|
-
from phoenix.
|
|
13
|
+
from phoenix.config import INFERENCES_DIR
|
|
14
|
+
from phoenix.inferences.inferences import Inferences
|
|
15
|
+
from phoenix.inferences.schema import (
|
|
15
16
|
EmbeddingColumnNames,
|
|
16
17
|
RetrievalEmbeddingColumnNames,
|
|
17
18
|
Schema,
|
|
@@ -20,7 +21,7 @@ from phoenix.datasets.schema import (
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
22
23
|
|
|
23
|
-
class
|
|
24
|
+
class InferencesRole(Enum):
|
|
24
25
|
PRIMARY = auto()
|
|
25
26
|
REFERENCE = auto()
|
|
26
27
|
CORPUS = auto()
|
|
@@ -39,11 +40,11 @@ class Fixture:
|
|
|
39
40
|
corpus_file_name: Optional[str] = None
|
|
40
41
|
corpus_schema: Optional[Schema] = None
|
|
41
42
|
|
|
42
|
-
def paths(self) -> Iterator[
|
|
43
|
+
def paths(self) -> Iterator[tuple[InferencesRole, Path]]:
|
|
43
44
|
return (
|
|
44
45
|
(role, Path(self.prefix) / name)
|
|
45
46
|
for role, name in zip(
|
|
46
|
-
|
|
47
|
+
InferencesRole,
|
|
47
48
|
(
|
|
48
49
|
self.primary_file_name,
|
|
49
50
|
self.reference_file_name,
|
|
@@ -397,7 +398,7 @@ wikipedia_fixture = Fixture(
|
|
|
397
398
|
corpus_file_name="corpus.parquet",
|
|
398
399
|
)
|
|
399
400
|
|
|
400
|
-
FIXTURES:
|
|
401
|
+
FIXTURES: tuple[Fixture, ...] = (
|
|
401
402
|
sentiment_classification_language_drift_fixture,
|
|
402
403
|
image_classification_fixture,
|
|
403
404
|
fashion_mnist_fixture,
|
|
@@ -413,47 +414,53 @@ FIXTURES: Tuple[Fixture, ...] = (
|
|
|
413
414
|
NAME_TO_FIXTURE = {fixture.name: fixture for fixture in FIXTURES}
|
|
414
415
|
|
|
415
416
|
|
|
416
|
-
def
|
|
417
|
+
def get_inferences(
|
|
417
418
|
fixture_name: str,
|
|
418
419
|
no_internet: bool = False,
|
|
419
|
-
) ->
|
|
420
|
+
) -> tuple[Inferences, Optional[Inferences], Optional[Inferences]]:
|
|
420
421
|
"""
|
|
421
|
-
Downloads primary and reference
|
|
422
|
+
Downloads primary and reference inferences for a fixture if they are not found
|
|
422
423
|
locally.
|
|
423
424
|
"""
|
|
424
|
-
fixture =
|
|
425
|
+
fixture = get_fixture_by_name(fixture_name=fixture_name)
|
|
425
426
|
if no_internet:
|
|
426
|
-
paths = {role:
|
|
427
|
+
paths = {role: INFERENCES_DIR / path for role, path in fixture.paths()}
|
|
427
428
|
else:
|
|
428
|
-
paths = dict(_download(fixture,
|
|
429
|
-
|
|
430
|
-
read_parquet(paths[
|
|
429
|
+
paths = dict(_download(fixture, INFERENCES_DIR))
|
|
430
|
+
primary_inferences = Inferences(
|
|
431
|
+
read_parquet(paths[InferencesRole.PRIMARY]),
|
|
431
432
|
fixture.primary_schema,
|
|
432
433
|
"production",
|
|
433
434
|
)
|
|
434
|
-
|
|
435
|
+
reference_inferences = None
|
|
435
436
|
if fixture.reference_file_name is not None:
|
|
436
|
-
|
|
437
|
-
read_parquet(paths[
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
437
|
+
reference_inferences = Inferences(
|
|
438
|
+
read_parquet(paths[InferencesRole.REFERENCE]),
|
|
439
|
+
(
|
|
440
|
+
fixture.reference_schema
|
|
441
|
+
if fixture.reference_schema is not None
|
|
442
|
+
else fixture.primary_schema
|
|
443
|
+
),
|
|
441
444
|
"training",
|
|
442
445
|
)
|
|
443
|
-
|
|
446
|
+
corpus_inferences = None
|
|
444
447
|
if fixture.corpus_file_name is not None:
|
|
445
|
-
|
|
446
|
-
read_parquet(paths[
|
|
448
|
+
corpus_inferences = Inferences(
|
|
449
|
+
read_parquet(paths[InferencesRole.CORPUS]),
|
|
447
450
|
fixture.corpus_schema,
|
|
448
451
|
"knowledge_base",
|
|
449
452
|
)
|
|
450
|
-
return
|
|
453
|
+
return primary_inferences, reference_inferences, corpus_inferences
|
|
451
454
|
|
|
452
455
|
|
|
453
|
-
def
|
|
456
|
+
def get_fixture_by_name(fixture_name: str) -> Fixture:
|
|
454
457
|
"""
|
|
455
|
-
Returns the fixture whose name matches the input name.
|
|
456
|
-
|
|
458
|
+
Returns the fixture whose name matches the input name.
|
|
459
|
+
|
|
460
|
+
Raises
|
|
461
|
+
------
|
|
462
|
+
ValueError
|
|
463
|
+
if the input fixture name does not match any known fixture names.
|
|
457
464
|
"""
|
|
458
465
|
if fixture_name not in NAME_TO_FIXTURE:
|
|
459
466
|
valid_fixture_names = ", ".join(NAME_TO_FIXTURE.keys())
|
|
@@ -462,17 +469,17 @@ def _get_fixture_by_name(fixture_name: str) -> Fixture:
|
|
|
462
469
|
|
|
463
470
|
|
|
464
471
|
@dataclass
|
|
465
|
-
class
|
|
472
|
+
class ExampleInferences:
|
|
466
473
|
"""
|
|
467
474
|
A primary and optional reference dataset pair.
|
|
468
475
|
"""
|
|
469
476
|
|
|
470
|
-
primary:
|
|
471
|
-
reference: Optional[
|
|
472
|
-
corpus: Optional[
|
|
477
|
+
primary: Inferences
|
|
478
|
+
reference: Optional[Inferences] = None
|
|
479
|
+
corpus: Optional[Inferences] = None
|
|
473
480
|
|
|
474
481
|
|
|
475
|
-
def load_example(use_case: str) ->
|
|
482
|
+
def load_example(use_case: str) -> ExampleInferences:
|
|
476
483
|
"""
|
|
477
484
|
Loads an example primary and reference dataset for a given use-case.
|
|
478
485
|
|
|
@@ -495,15 +502,15 @@ def load_example(use_case: str) -> ExampleDatasets:
|
|
|
495
502
|
reference).
|
|
496
503
|
|
|
497
504
|
"""
|
|
498
|
-
fixture =
|
|
499
|
-
|
|
505
|
+
fixture = get_fixture_by_name(use_case)
|
|
506
|
+
primary_inferences, reference_inferences, corpus_inferences = get_inferences(use_case)
|
|
500
507
|
print(f"📥 Loaded {use_case} example datasets.")
|
|
501
508
|
print("ℹ️ About this use-case:")
|
|
502
509
|
print(fixture.description)
|
|
503
|
-
return
|
|
504
|
-
primary=
|
|
505
|
-
reference=
|
|
506
|
-
corpus=
|
|
510
|
+
return ExampleInferences(
|
|
511
|
+
primary=primary_inferences,
|
|
512
|
+
reference=reference_inferences,
|
|
513
|
+
corpus=corpus_inferences,
|
|
507
514
|
)
|
|
508
515
|
|
|
509
516
|
|
|
@@ -544,7 +551,7 @@ class GCSAssets(NamedTuple):
|
|
|
544
551
|
)
|
|
545
552
|
|
|
546
553
|
|
|
547
|
-
def _download(fixture: Fixture, location: Path) -> Iterator[
|
|
554
|
+
def _download(fixture: Fixture, location: Path) -> Iterator[tuple[InferencesRole, Path]]:
|
|
548
555
|
for role, path in fixture.paths():
|
|
549
556
|
yield role, GCSAssets().metadata(path).save_artifact(location)
|
|
550
557
|
|
|
@@ -556,5 +563,5 @@ if __name__ == "__main__":
|
|
|
556
563
|
for fixture in FIXTURES:
|
|
557
564
|
start_time = time.time()
|
|
558
565
|
print(f"getting {fixture.name}", end="...")
|
|
559
|
-
dict(_download(fixture,
|
|
566
|
+
dict(_download(fixture, INFERENCES_DIR))
|
|
560
567
|
print(f"done ({time.time() - start_time:.2f}s)")
|
|
@@ -5,7 +5,7 @@ from copy import deepcopy
|
|
|
5
5
|
from dataclasses import dataclass, fields, replace
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from itertools import groupby
|
|
8
|
-
from typing import Any,
|
|
8
|
+
from typing import Any, Optional, Union
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pandas as pd
|
|
@@ -15,8 +15,9 @@ from pandas.api.types import (
|
|
|
15
15
|
)
|
|
16
16
|
from typing_extensions import TypeAlias
|
|
17
17
|
|
|
18
|
-
from phoenix.config import
|
|
18
|
+
from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX, INFERENCES_DIR
|
|
19
19
|
from phoenix.datetime_utils import normalize_timestamps
|
|
20
|
+
from phoenix.utilities.deprecation import deprecated
|
|
20
21
|
|
|
21
22
|
from . import errors as err
|
|
22
23
|
from .schema import (
|
|
@@ -30,7 +31,7 @@ from .schema import (
|
|
|
30
31
|
SchemaFieldName,
|
|
31
32
|
SchemaFieldValue,
|
|
32
33
|
)
|
|
33
|
-
from .validation import
|
|
34
|
+
from .validation import validate_inferences_inputs
|
|
34
35
|
|
|
35
36
|
logger = logging.getLogger(__name__)
|
|
36
37
|
|
|
@@ -38,10 +39,16 @@ logger = logging.getLogger(__name__)
|
|
|
38
39
|
SchemaLike: TypeAlias = Any
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
class
|
|
42
|
+
class Inferences:
|
|
42
43
|
"""
|
|
43
44
|
A dataset to use for analysis using phoenix.
|
|
44
|
-
Used to construct a phoenix session via px.launch_app
|
|
45
|
+
Used to construct a phoenix session via px.launch_app.
|
|
46
|
+
|
|
47
|
+
Typical usage example::
|
|
48
|
+
|
|
49
|
+
primary_inferences = px.Inferences(
|
|
50
|
+
dataframe=production_dataframe, schema=schema, name="primary"
|
|
51
|
+
)
|
|
45
52
|
|
|
46
53
|
Parameters
|
|
47
54
|
----------
|
|
@@ -61,7 +68,15 @@ class Dataset:
|
|
|
61
68
|
|
|
62
69
|
Examples
|
|
63
70
|
--------
|
|
64
|
-
|
|
71
|
+
Define inferences ds from a pandas dataframe df and a schema object schema by running::
|
|
72
|
+
|
|
73
|
+
ds = px.Inferences(df, schema)
|
|
74
|
+
|
|
75
|
+
Alternatively, provide a name for the inferences that will appear in the application::
|
|
76
|
+
|
|
77
|
+
ds = px.Inferences(df, schema, name="training")
|
|
78
|
+
|
|
79
|
+
ds is then passed as the primary or reference argument to launch_app.
|
|
65
80
|
"""
|
|
66
81
|
|
|
67
82
|
_data_file_name: str = "data.parquet"
|
|
@@ -78,7 +93,7 @@ class Dataset:
|
|
|
78
93
|
# allow for schema like objects
|
|
79
94
|
if not isinstance(schema, Schema):
|
|
80
95
|
schema = _get_schema_from_unknown_schema_param(schema)
|
|
81
|
-
errors =
|
|
96
|
+
errors = validate_inferences_inputs(
|
|
82
97
|
dataframe=dataframe,
|
|
83
98
|
schema=schema,
|
|
84
99
|
)
|
|
@@ -92,7 +107,7 @@ class Dataset:
|
|
|
92
107
|
self.__dataframe: DataFrame = dataframe
|
|
93
108
|
self.__schema: Schema = schema
|
|
94
109
|
self.__name: str = (
|
|
95
|
-
name if name is not None else f"{
|
|
110
|
+
name if name is not None else f"{GENERATED_INFERENCES_NAME_PREFIX}{str(uuid.uuid4())}"
|
|
96
111
|
)
|
|
97
112
|
self._is_empty = self.dataframe.empty
|
|
98
113
|
logger.info(f"""Dataset: {self.__name} initialized""")
|
|
@@ -113,19 +128,33 @@ class Dataset:
|
|
|
113
128
|
return self.__name
|
|
114
129
|
|
|
115
130
|
@classmethod
|
|
116
|
-
def from_name(cls, name: str) -> "
|
|
131
|
+
def from_name(cls, name: str) -> "Inferences":
|
|
117
132
|
"""Retrieves a dataset by name from the file system"""
|
|
118
|
-
directory =
|
|
133
|
+
directory = INFERENCES_DIR / name
|
|
119
134
|
df = read_parquet(directory / cls._data_file_name)
|
|
120
135
|
with open(directory / cls._schema_file_name) as schema_file:
|
|
121
136
|
schema_json = schema_file.read()
|
|
122
137
|
schema = Schema.from_json(schema_json)
|
|
123
138
|
return cls(df, schema, name)
|
|
124
139
|
|
|
140
|
+
def to_disc(self) -> None:
|
|
141
|
+
"""writes the data and schema to disc"""
|
|
142
|
+
directory = INFERENCES_DIR / self.name
|
|
143
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
144
|
+
self.dataframe.to_parquet(
|
|
145
|
+
directory / self._data_file_name,
|
|
146
|
+
allow_truncated_timestamps=True,
|
|
147
|
+
coerce_timestamps="ms",
|
|
148
|
+
)
|
|
149
|
+
schema_json_data = self.schema.to_json()
|
|
150
|
+
with open(directory / self._schema_file_name, "w+") as schema_file:
|
|
151
|
+
schema_file.write(schema_json_data)
|
|
152
|
+
|
|
125
153
|
@classmethod
|
|
126
|
-
|
|
154
|
+
@deprecated("Inferences.from_open_inference is deprecated and will be removed.")
|
|
155
|
+
def from_open_inference(cls, dataframe: DataFrame) -> "Inferences":
|
|
127
156
|
schema = Schema()
|
|
128
|
-
column_renaming:
|
|
157
|
+
column_renaming: dict[str, str] = {}
|
|
129
158
|
for group_name, group in groupby(
|
|
130
159
|
sorted(
|
|
131
160
|
map(_parse_open_inference_column_name, dataframe.columns),
|
|
@@ -276,21 +305,53 @@ class Dataset:
|
|
|
276
305
|
schema,
|
|
277
306
|
)
|
|
278
307
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
308
|
+
|
|
309
|
+
class OpenInferenceCategory(Enum):
|
|
310
|
+
id = "id"
|
|
311
|
+
timestamp = "timestamp"
|
|
312
|
+
feature = "feature"
|
|
313
|
+
tag = "tag"
|
|
314
|
+
prediction = "prediction"
|
|
315
|
+
actual = "actual"
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
class OpenInferenceSpecifier(Enum):
|
|
319
|
+
default = ""
|
|
320
|
+
score = "score"
|
|
321
|
+
label = "label"
|
|
322
|
+
embedding = "embedding"
|
|
323
|
+
raw_data = "raw_data"
|
|
324
|
+
link_to_data = "link_to_data"
|
|
325
|
+
retrieved_document_ids = "retrieved_document_ids"
|
|
326
|
+
retrieved_document_scores = "retrieved_document_scores"
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
@dataclass(frozen=True)
|
|
330
|
+
class _OpenInferenceColumnName:
|
|
331
|
+
full_name: str
|
|
332
|
+
category: OpenInferenceCategory
|
|
333
|
+
data_type: str
|
|
334
|
+
specifier: OpenInferenceSpecifier = OpenInferenceSpecifier.default
|
|
335
|
+
name: str = ""
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _parse_open_inference_column_name(column_name: str) -> _OpenInferenceColumnName:
|
|
339
|
+
pattern = (
|
|
340
|
+
r"^:(?P<category>\w+)\.(?P<data_type>\[\w+\]|\w+)(\.(?P<specifier>\w+))?:(?P<name>.*)?$"
|
|
341
|
+
)
|
|
342
|
+
if match := re.match(pattern, column_name):
|
|
343
|
+
extract = match.groupdict(default="")
|
|
344
|
+
return _OpenInferenceColumnName(
|
|
345
|
+
full_name=column_name,
|
|
346
|
+
category=OpenInferenceCategory(extract.get("category", "").lower()),
|
|
347
|
+
data_type=extract.get("data_type", "").lower(),
|
|
348
|
+
specifier=OpenInferenceSpecifier(extract.get("specifier", "").lower()),
|
|
349
|
+
name=extract.get("name", ""),
|
|
287
350
|
)
|
|
288
|
-
|
|
289
|
-
with open(directory / self._schema_file_name, "w+") as schema_file:
|
|
290
|
-
schema_file.write(schema_json_data)
|
|
351
|
+
raise ValueError(f"Invalid format for column name: {column_name}")
|
|
291
352
|
|
|
292
353
|
|
|
293
|
-
def _parse_dataframe_and_schema(dataframe: DataFrame, schema: Schema) ->
|
|
354
|
+
def _parse_dataframe_and_schema(dataframe: DataFrame, schema: Schema) -> tuple[DataFrame, Schema]:
|
|
294
355
|
"""
|
|
295
356
|
Parses a dataframe according to a schema, infers feature columns names when
|
|
296
357
|
they are not explicitly provided, and removes excluded column names from
|
|
@@ -303,12 +364,12 @@ def _parse_dataframe_and_schema(dataframe: DataFrame, schema: Schema) -> Tuple[D
|
|
|
303
364
|
names present in the dataframe but not included in any other schema fields.
|
|
304
365
|
"""
|
|
305
366
|
|
|
306
|
-
unseen_excluded_column_names:
|
|
367
|
+
unseen_excluded_column_names: set[str] = (
|
|
307
368
|
set(schema.excluded_column_names) if schema.excluded_column_names is not None else set()
|
|
308
369
|
)
|
|
309
|
-
unseen_column_names:
|
|
310
|
-
column_name_to_include:
|
|
311
|
-
schema_patch:
|
|
370
|
+
unseen_column_names: set[str] = set(dataframe.columns.to_list())
|
|
371
|
+
column_name_to_include: dict[str, bool] = {}
|
|
372
|
+
schema_patch: dict[SchemaFieldName, SchemaFieldValue] = {}
|
|
312
373
|
|
|
313
374
|
for schema_field_name in SINGLE_COLUMN_SCHEMA_FIELD_NAMES:
|
|
314
375
|
_check_single_column_schema_field_for_excluded_columns(
|
|
@@ -373,10 +434,10 @@ def _parse_dataframe_and_schema(dataframe: DataFrame, schema: Schema) -> Tuple[D
|
|
|
373
434
|
def _check_single_column_schema_field_for_excluded_columns(
|
|
374
435
|
schema: Schema,
|
|
375
436
|
schema_field_name: str,
|
|
376
|
-
unseen_excluded_column_names:
|
|
377
|
-
schema_patch:
|
|
378
|
-
column_name_to_include:
|
|
379
|
-
unseen_column_names:
|
|
437
|
+
unseen_excluded_column_names: set[str],
|
|
438
|
+
schema_patch: dict[SchemaFieldName, SchemaFieldValue],
|
|
439
|
+
column_name_to_include: dict[str, bool],
|
|
440
|
+
unseen_column_names: set[str],
|
|
380
441
|
) -> None:
|
|
381
442
|
"""
|
|
382
443
|
Checks single-column schema fields for excluded column names.
|
|
@@ -394,18 +455,18 @@ def _check_single_column_schema_field_for_excluded_columns(
|
|
|
394
455
|
def _check_multi_column_schema_field_for_excluded_columns(
|
|
395
456
|
schema: Schema,
|
|
396
457
|
schema_field_name: str,
|
|
397
|
-
unseen_excluded_column_names:
|
|
398
|
-
schema_patch:
|
|
399
|
-
column_name_to_include:
|
|
400
|
-
unseen_column_names:
|
|
458
|
+
unseen_excluded_column_names: set[str],
|
|
459
|
+
schema_patch: dict[SchemaFieldName, SchemaFieldValue],
|
|
460
|
+
column_name_to_include: dict[str, bool],
|
|
461
|
+
unseen_column_names: set[str],
|
|
401
462
|
) -> None:
|
|
402
463
|
"""
|
|
403
464
|
Checks multi-column schema fields for excluded columns names.
|
|
404
465
|
"""
|
|
405
|
-
column_names: Optional[
|
|
466
|
+
column_names: Optional[list[str]] = getattr(schema, schema_field_name)
|
|
406
467
|
if column_names:
|
|
407
|
-
included_column_names:
|
|
408
|
-
excluded_column_names:
|
|
468
|
+
included_column_names: list[str] = []
|
|
469
|
+
excluded_column_names: list[str] = []
|
|
409
470
|
for column_name in column_names:
|
|
410
471
|
is_included_column = column_name not in unseen_excluded_column_names
|
|
411
472
|
column_name_to_include[column_name] = is_included_column
|
|
@@ -421,10 +482,10 @@ def _check_multi_column_schema_field_for_excluded_columns(
|
|
|
421
482
|
|
|
422
483
|
def _check_embedding_features_schema_field_for_excluded_columns(
|
|
423
484
|
embedding_features: EmbeddingFeatures,
|
|
424
|
-
unseen_excluded_column_names:
|
|
425
|
-
schema_patch:
|
|
426
|
-
column_name_to_include:
|
|
427
|
-
unseen_column_names:
|
|
485
|
+
unseen_excluded_column_names: set[str],
|
|
486
|
+
schema_patch: dict[SchemaFieldName, SchemaFieldValue],
|
|
487
|
+
column_name_to_include: dict[str, bool],
|
|
488
|
+
unseen_column_names: set[str],
|
|
428
489
|
) -> None:
|
|
429
490
|
"""
|
|
430
491
|
Check embedding features for excluded column names.
|
|
@@ -466,8 +527,8 @@ def _check_embedding_features_schema_field_for_excluded_columns(
|
|
|
466
527
|
|
|
467
528
|
def _check_embedding_column_names_for_excluded_columns(
|
|
468
529
|
embedding_column_name_mapping: EmbeddingColumnNames,
|
|
469
|
-
column_name_to_include:
|
|
470
|
-
unseen_column_names:
|
|
530
|
+
column_name_to_include: dict[str, bool],
|
|
531
|
+
unseen_column_names: set[str],
|
|
471
532
|
) -> None:
|
|
472
533
|
"""
|
|
473
534
|
Check embedding column names for excluded column names.
|
|
@@ -481,10 +542,10 @@ def _check_embedding_column_names_for_excluded_columns(
|
|
|
481
542
|
|
|
482
543
|
def _discover_feature_columns(
|
|
483
544
|
dataframe: DataFrame,
|
|
484
|
-
unseen_excluded_column_names:
|
|
485
|
-
schema_patch:
|
|
486
|
-
column_name_to_include:
|
|
487
|
-
unseen_column_names:
|
|
545
|
+
unseen_excluded_column_names: set[str],
|
|
546
|
+
schema_patch: dict[SchemaFieldName, SchemaFieldValue],
|
|
547
|
+
column_name_to_include: dict[str, bool],
|
|
548
|
+
unseen_column_names: set[str],
|
|
488
549
|
) -> None:
|
|
489
550
|
"""
|
|
490
551
|
Adds unseen and un-excluded columns as features, with the exception of "prediction_id"
|
|
@@ -498,10 +559,10 @@ def _discover_feature_columns(
|
|
|
498
559
|
else:
|
|
499
560
|
unseen_excluded_column_names.discard(column_name)
|
|
500
561
|
logger.debug(f"excluded feature: {column_name}")
|
|
501
|
-
original_column_positions:
|
|
562
|
+
original_column_positions: list[int] = dataframe.columns.get_indexer(
|
|
502
563
|
discovered_feature_column_names
|
|
503
564
|
) # type: ignore
|
|
504
|
-
feature_column_name_to_position:
|
|
565
|
+
feature_column_name_to_position: dict[str, int] = dict(
|
|
505
566
|
zip(discovered_feature_column_names, original_column_positions)
|
|
506
567
|
)
|
|
507
568
|
discovered_feature_column_names.sort(key=lambda col: feature_column_name_to_position[col])
|
|
@@ -514,16 +575,16 @@ def _discover_feature_columns(
|
|
|
514
575
|
def _create_and_normalize_dataframe_and_schema(
|
|
515
576
|
dataframe: DataFrame,
|
|
516
577
|
schema: Schema,
|
|
517
|
-
schema_patch:
|
|
518
|
-
column_name_to_include:
|
|
519
|
-
) ->
|
|
578
|
+
schema_patch: dict[SchemaFieldName, SchemaFieldValue],
|
|
579
|
+
column_name_to_include: dict[str, bool],
|
|
580
|
+
) -> tuple[DataFrame, Schema]:
|
|
520
581
|
"""
|
|
521
582
|
Creates new dataframe and schema objects to reflect excluded column names
|
|
522
583
|
and discovered features. This also normalizes dataframe columns to ensure a
|
|
523
584
|
standard set of columns (i.e. timestamp and prediction_id) and datatypes for
|
|
524
585
|
those columns.
|
|
525
586
|
"""
|
|
526
|
-
included_column_names:
|
|
587
|
+
included_column_names: list[str] = []
|
|
527
588
|
for column_name in dataframe.columns:
|
|
528
589
|
if column_name_to_include.get(str(column_name), False):
|
|
529
590
|
included_column_names.append(str(column_name))
|
|
@@ -587,7 +648,7 @@ def _normalize_timestamps(
|
|
|
587
648
|
dataframe: DataFrame,
|
|
588
649
|
schema: Schema,
|
|
589
650
|
default_timestamp: Timestamp,
|
|
590
|
-
) ->
|
|
651
|
+
) -> tuple[DataFrame, Schema]:
|
|
591
652
|
"""
|
|
592
653
|
Ensures that the dataframe has a timestamp column and the schema has a timestamp field. If the
|
|
593
654
|
input dataframe contains a Unix or datetime timestamp or ISO8601 timestamp strings column, it
|
|
@@ -614,7 +675,7 @@ def _normalize_timestamps(
|
|
|
614
675
|
|
|
615
676
|
def _get_schema_from_unknown_schema_param(schemaLike: SchemaLike) -> Schema:
|
|
616
677
|
"""
|
|
617
|
-
Compatibility function for converting from arize.utils.types.Schema to phoenix.
|
|
678
|
+
Compatibility function for converting from arize.utils.types.Schema to phoenix.inferences.Schema
|
|
618
679
|
"""
|
|
619
680
|
try:
|
|
620
681
|
from arize.utils.types import (
|
|
@@ -625,7 +686,7 @@ def _get_schema_from_unknown_schema_param(schemaLike: SchemaLike) -> Schema:
|
|
|
625
686
|
if not isinstance(schemaLike, ArizeSchema):
|
|
626
687
|
raise ValueError("Unknown schema passed to Dataset. Please pass a phoenix Schema")
|
|
627
688
|
|
|
628
|
-
embedding_feature_column_names:
|
|
689
|
+
embedding_feature_column_names: dict[str, EmbeddingColumnNames] = {}
|
|
629
690
|
if schemaLike.embedding_feature_column_names is not None:
|
|
630
691
|
for (
|
|
631
692
|
embedding_name,
|
|
@@ -673,54 +734,9 @@ def _get_schema_from_unknown_schema_param(schemaLike: SchemaLike) -> Schema:
|
|
|
673
734
|
)
|
|
674
735
|
|
|
675
736
|
|
|
676
|
-
def _add_prediction_id(num_rows: int) ->
|
|
737
|
+
def _add_prediction_id(num_rows: int) -> list[str]:
|
|
677
738
|
return [str(uuid.uuid4()) for _ in range(num_rows)]
|
|
678
739
|
|
|
679
740
|
|
|
680
|
-
class OpenInferenceCategory(Enum):
|
|
681
|
-
id = "id"
|
|
682
|
-
timestamp = "timestamp"
|
|
683
|
-
feature = "feature"
|
|
684
|
-
tag = "tag"
|
|
685
|
-
prediction = "prediction"
|
|
686
|
-
actual = "actual"
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
class OpenInferenceSpecifier(Enum):
|
|
690
|
-
default = ""
|
|
691
|
-
score = "score"
|
|
692
|
-
label = "label"
|
|
693
|
-
embedding = "embedding"
|
|
694
|
-
raw_data = "raw_data"
|
|
695
|
-
link_to_data = "link_to_data"
|
|
696
|
-
retrieved_document_ids = "retrieved_document_ids"
|
|
697
|
-
retrieved_document_scores = "retrieved_document_scores"
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
@dataclass(frozen=True)
|
|
701
|
-
class _OpenInferenceColumnName:
|
|
702
|
-
full_name: str
|
|
703
|
-
category: OpenInferenceCategory
|
|
704
|
-
data_type: str
|
|
705
|
-
specifier: OpenInferenceSpecifier = OpenInferenceSpecifier.default
|
|
706
|
-
name: str = ""
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
def _parse_open_inference_column_name(column_name: str) -> _OpenInferenceColumnName:
|
|
710
|
-
pattern = (
|
|
711
|
-
r"^:(?P<category>\w+)\.(?P<data_type>\[\w+\]|\w+)(\.(?P<specifier>\w+))?:(?P<name>.*)?$"
|
|
712
|
-
)
|
|
713
|
-
if match := re.match(pattern, column_name):
|
|
714
|
-
extract = match.groupdict(default="")
|
|
715
|
-
return _OpenInferenceColumnName(
|
|
716
|
-
full_name=column_name,
|
|
717
|
-
category=OpenInferenceCategory(extract.get("category", "").lower()),
|
|
718
|
-
data_type=extract.get("data_type", "").lower(),
|
|
719
|
-
specifier=OpenInferenceSpecifier(extract.get("specifier", "").lower()),
|
|
720
|
-
name=extract.get("name", ""),
|
|
721
|
-
)
|
|
722
|
-
raise ValueError(f"Invalid format for column name: {column_name}")
|
|
723
|
-
|
|
724
|
-
|
|
725
741
|
# A dataset with no data. Useful for stubs
|
|
726
|
-
|
|
742
|
+
EMPTY_INFERENCES = Inferences(pd.DataFrame(), schema=Schema())
|