arize-phoenix 3.16.0__py3-none-any.whl → 7.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- arize_phoenix-7.7.0.dist-info/METADATA +261 -0
- arize_phoenix-7.7.0.dist-info/RECORD +345 -0
- {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/WHEEL +1 -1
- arize_phoenix-7.7.0.dist-info/entry_points.txt +3 -0
- phoenix/__init__.py +86 -14
- phoenix/auth.py +309 -0
- phoenix/config.py +675 -45
- phoenix/core/model.py +32 -30
- phoenix/core/model_schema.py +102 -109
- phoenix/core/model_schema_adapter.py +48 -45
- phoenix/datetime_utils.py +24 -3
- phoenix/db/README.md +54 -0
- phoenix/db/__init__.py +4 -0
- phoenix/db/alembic.ini +85 -0
- phoenix/db/bulk_inserter.py +294 -0
- phoenix/db/engines.py +208 -0
- phoenix/db/enums.py +20 -0
- phoenix/db/facilitator.py +113 -0
- phoenix/db/helpers.py +159 -0
- phoenix/db/insertion/constants.py +2 -0
- phoenix/db/insertion/dataset.py +227 -0
- phoenix/db/insertion/document_annotation.py +171 -0
- phoenix/db/insertion/evaluation.py +191 -0
- phoenix/db/insertion/helpers.py +98 -0
- phoenix/db/insertion/span.py +193 -0
- phoenix/db/insertion/span_annotation.py +158 -0
- phoenix/db/insertion/trace_annotation.py +158 -0
- phoenix/db/insertion/types.py +256 -0
- phoenix/db/migrate.py +86 -0
- phoenix/db/migrations/data_migration_scripts/populate_project_sessions.py +199 -0
- phoenix/db/migrations/env.py +114 -0
- phoenix/db/migrations/script.py.mako +26 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +317 -0
- phoenix/db/migrations/versions/3be8647b87d8_add_token_columns_to_spans_table.py +126 -0
- phoenix/db/migrations/versions/4ded9e43755f_create_project_sessions_table.py +66 -0
- phoenix/db/migrations/versions/cd164e83824f_users_and_tokens.py +157 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +280 -0
- phoenix/db/models.py +807 -0
- phoenix/exceptions.py +5 -1
- phoenix/experiments/__init__.py +6 -0
- phoenix/experiments/evaluators/__init__.py +29 -0
- phoenix/experiments/evaluators/base.py +158 -0
- phoenix/experiments/evaluators/code_evaluators.py +184 -0
- phoenix/experiments/evaluators/llm_evaluators.py +473 -0
- phoenix/experiments/evaluators/utils.py +236 -0
- phoenix/experiments/functions.py +772 -0
- phoenix/experiments/tracing.py +86 -0
- phoenix/experiments/types.py +726 -0
- phoenix/experiments/utils.py +25 -0
- phoenix/inferences/__init__.py +0 -0
- phoenix/{datasets → inferences}/errors.py +6 -5
- phoenix/{datasets → inferences}/fixtures.py +49 -42
- phoenix/{datasets/dataset.py → inferences/inferences.py} +121 -105
- phoenix/{datasets → inferences}/schema.py +11 -11
- phoenix/{datasets → inferences}/validation.py +13 -14
- phoenix/logging/__init__.py +3 -0
- phoenix/logging/_config.py +90 -0
- phoenix/logging/_filter.py +6 -0
- phoenix/logging/_formatter.py +69 -0
- phoenix/metrics/__init__.py +5 -4
- phoenix/metrics/binning.py +4 -3
- phoenix/metrics/metrics.py +2 -1
- phoenix/metrics/mixins.py +7 -6
- phoenix/metrics/retrieval_metrics.py +2 -1
- phoenix/metrics/timeseries.py +5 -4
- phoenix/metrics/wrappers.py +9 -3
- phoenix/pointcloud/clustering.py +5 -5
- phoenix/pointcloud/pointcloud.py +7 -5
- phoenix/pointcloud/projectors.py +5 -6
- phoenix/pointcloud/umap_parameters.py +53 -52
- phoenix/server/api/README.md +28 -0
- phoenix/server/api/auth.py +44 -0
- phoenix/server/api/context.py +152 -9
- phoenix/server/api/dataloaders/__init__.py +91 -0
- phoenix/server/api/dataloaders/annotation_summaries.py +139 -0
- phoenix/server/api/dataloaders/average_experiment_run_latency.py +54 -0
- phoenix/server/api/dataloaders/cache/__init__.py +3 -0
- phoenix/server/api/dataloaders/cache/two_tier_cache.py +68 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +131 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +38 -0
- phoenix/server/api/dataloaders/document_evaluation_summaries.py +144 -0
- phoenix/server/api/dataloaders/document_evaluations.py +31 -0
- phoenix/server/api/dataloaders/document_retrieval_metrics.py +89 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +79 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +58 -0
- phoenix/server/api/dataloaders/experiment_run_annotations.py +36 -0
- phoenix/server/api/dataloaders/experiment_run_counts.py +49 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +44 -0
- phoenix/server/api/dataloaders/latency_ms_quantile.py +188 -0
- phoenix/server/api/dataloaders/min_start_or_max_end_times.py +85 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/record_counts.py +116 -0
- phoenix/server/api/dataloaders/session_io.py +79 -0
- phoenix/server/api/dataloaders/session_num_traces.py +30 -0
- phoenix/server/api/dataloaders/session_num_traces_with_error.py +32 -0
- phoenix/server/api/dataloaders/session_token_usages.py +41 -0
- phoenix/server/api/dataloaders/session_trace_latency_ms_quantile.py +55 -0
- phoenix/server/api/dataloaders/span_annotations.py +26 -0
- phoenix/server/api/dataloaders/span_dataset_examples.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +57 -0
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/token_counts.py +124 -0
- phoenix/server/api/dataloaders/trace_by_trace_ids.py +25 -0
- phoenix/server/api/dataloaders/trace_root_spans.py +32 -0
- phoenix/server/api/dataloaders/user_roles.py +30 -0
- phoenix/server/api/dataloaders/users.py +33 -0
- phoenix/server/api/exceptions.py +48 -0
- phoenix/server/api/helpers/__init__.py +12 -0
- phoenix/server/api/helpers/dataset_helpers.py +217 -0
- phoenix/server/api/helpers/experiment_run_filters.py +763 -0
- phoenix/server/api/helpers/playground_clients.py +948 -0
- phoenix/server/api/helpers/playground_registry.py +70 -0
- phoenix/server/api/helpers/playground_spans.py +455 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/ChatCompletionInput.py +38 -0
- phoenix/server/api/input_types/ChatCompletionMessageInput.py +24 -0
- phoenix/server/api/input_types/ClearProjectInput.py +15 -0
- phoenix/server/api/input_types/ClusterInput.py +2 -2
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/CreateSpanAnnotationInput.py +18 -0
- phoenix/server/api/input_types/CreateTraceAnnotationInput.py +18 -0
- phoenix/server/api/input_types/DataQualityMetricInput.py +5 -2
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteAnnotationsInput.py +7 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +7 -0
- phoenix/server/api/input_types/DimensionFilter.py +4 -4
- phoenix/server/api/input_types/GenerativeModelInput.py +17 -0
- phoenix/server/api/input_types/Granularity.py +1 -1
- phoenix/server/api/input_types/InvocationParameters.py +162 -0
- phoenix/server/api/input_types/PatchAnnotationInput.py +19 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/input_types/PerformanceMetricInput.py +5 -2
- phoenix/server/api/input_types/ProjectSessionSort.py +29 -0
- phoenix/server/api/input_types/SpanAnnotationSort.py +17 -0
- phoenix/server/api/input_types/SpanSort.py +134 -69
- phoenix/server/api/input_types/TemplateOptions.py +10 -0
- phoenix/server/api/input_types/TraceAnnotationSort.py +17 -0
- phoenix/server/api/input_types/UserRoleInput.py +9 -0
- phoenix/server/api/mutations/__init__.py +28 -0
- phoenix/server/api/mutations/api_key_mutations.py +167 -0
- phoenix/server/api/mutations/chat_mutations.py +593 -0
- phoenix/server/api/mutations/dataset_mutations.py +591 -0
- phoenix/server/api/mutations/experiment_mutations.py +75 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +21 -18
- phoenix/server/api/mutations/project_mutations.py +57 -0
- phoenix/server/api/mutations/span_annotations_mutations.py +128 -0
- phoenix/server/api/mutations/trace_annotations_mutations.py +127 -0
- phoenix/server/api/mutations/user_mutations.py +329 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +17 -0
- phoenix/server/api/openapi/schema.py +16 -0
- phoenix/server/api/queries.py +738 -0
- phoenix/server/api/routers/__init__.py +11 -0
- phoenix/server/api/routers/auth.py +284 -0
- phoenix/server/api/routers/embeddings.py +26 -0
- phoenix/server/api/routers/oauth2.py +488 -0
- phoenix/server/api/routers/v1/__init__.py +64 -0
- phoenix/server/api/routers/v1/datasets.py +1017 -0
- phoenix/server/api/routers/v1/evaluations.py +362 -0
- phoenix/server/api/routers/v1/experiment_evaluations.py +115 -0
- phoenix/server/api/routers/v1/experiment_runs.py +167 -0
- phoenix/server/api/routers/v1/experiments.py +308 -0
- phoenix/server/api/routers/v1/pydantic_compat.py +78 -0
- phoenix/server/api/routers/v1/spans.py +267 -0
- phoenix/server/api/routers/v1/traces.py +208 -0
- phoenix/server/api/routers/v1/utils.py +95 -0
- phoenix/server/api/schema.py +44 -247
- phoenix/server/api/subscriptions.py +597 -0
- phoenix/server/api/types/Annotation.py +21 -0
- phoenix/server/api/types/AnnotationSummary.py +55 -0
- phoenix/server/api/types/AnnotatorKind.py +16 -0
- phoenix/server/api/types/ApiKey.py +27 -0
- phoenix/server/api/types/AuthMethod.py +9 -0
- phoenix/server/api/types/ChatCompletionMessageRole.py +11 -0
- phoenix/server/api/types/ChatCompletionSubscriptionPayload.py +46 -0
- phoenix/server/api/types/Cluster.py +25 -24
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/DataQualityMetric.py +31 -13
- phoenix/server/api/types/Dataset.py +288 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +32 -31
- phoenix/server/api/types/DocumentEvaluationSummary.py +9 -8
- phoenix/server/api/types/EmbeddingDimension.py +56 -49
- phoenix/server/api/types/Evaluation.py +25 -31
- phoenix/server/api/types/EvaluationSummary.py +30 -50
- phoenix/server/api/types/Event.py +20 -20
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +152 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +17 -0
- phoenix/server/api/types/ExperimentRun.py +119 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +56 -0
- phoenix/server/api/types/GenerativeModel.py +9 -0
- phoenix/server/api/types/GenerativeProvider.py +85 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/LabelFraction.py +7 -0
- phoenix/server/api/types/MimeType.py +2 -2
- phoenix/server/api/types/Model.py +54 -54
- phoenix/server/api/types/PerformanceMetric.py +8 -5
- phoenix/server/api/types/Project.py +407 -142
- phoenix/server/api/types/ProjectSession.py +139 -0
- phoenix/server/api/types/Segments.py +4 -4
- phoenix/server/api/types/Span.py +221 -176
- phoenix/server/api/types/SpanAnnotation.py +43 -0
- phoenix/server/api/types/SpanIOValue.py +15 -0
- phoenix/server/api/types/SystemApiKey.py +9 -0
- phoenix/server/api/types/TemplateLanguage.py +10 -0
- phoenix/server/api/types/TimeSeries.py +19 -15
- phoenix/server/api/types/TokenUsage.py +11 -0
- phoenix/server/api/types/Trace.py +154 -0
- phoenix/server/api/types/TraceAnnotation.py +45 -0
- phoenix/server/api/types/UMAPPoints.py +7 -7
- phoenix/server/api/types/User.py +60 -0
- phoenix/server/api/types/UserApiKey.py +45 -0
- phoenix/server/api/types/UserRole.py +15 -0
- phoenix/server/api/types/node.py +13 -107
- phoenix/server/api/types/pagination.py +156 -57
- phoenix/server/api/utils.py +34 -0
- phoenix/server/app.py +864 -115
- phoenix/server/bearer_auth.py +163 -0
- phoenix/server/dml_event.py +136 -0
- phoenix/server/dml_event_handler.py +256 -0
- phoenix/server/email/__init__.py +0 -0
- phoenix/server/email/sender.py +97 -0
- phoenix/server/email/templates/__init__.py +0 -0
- phoenix/server/email/templates/password_reset.html +19 -0
- phoenix/server/email/types.py +11 -0
- phoenix/server/grpc_server.py +102 -0
- phoenix/server/jwt_store.py +505 -0
- phoenix/server/main.py +305 -116
- phoenix/server/oauth2.py +52 -0
- phoenix/server/openapi/__init__.py +0 -0
- phoenix/server/prometheus.py +111 -0
- phoenix/server/rate_limiters.py +188 -0
- phoenix/server/static/.vite/manifest.json +87 -0
- phoenix/server/static/assets/components-Cy9nwIvF.js +2125 -0
- phoenix/server/static/assets/index-BKvHIxkk.js +113 -0
- phoenix/server/static/assets/pages-CUi2xCVQ.js +4449 -0
- phoenix/server/static/assets/vendor-DvC8cT4X.js +894 -0
- phoenix/server/static/assets/vendor-DxkFTwjz.css +1 -0
- phoenix/server/static/assets/vendor-arizeai-Do1793cv.js +662 -0
- phoenix/server/static/assets/vendor-codemirror-BzwZPyJM.js +24 -0
- phoenix/server/static/assets/vendor-recharts-_Jb7JjhG.js +59 -0
- phoenix/server/static/assets/vendor-shiki-Cl9QBraO.js +5 -0
- phoenix/server/static/assets/vendor-three-DwGkEfCM.js +2998 -0
- phoenix/server/telemetry.py +68 -0
- phoenix/server/templates/index.html +82 -23
- phoenix/server/thread_server.py +3 -3
- phoenix/server/types.py +275 -0
- phoenix/services.py +27 -18
- phoenix/session/client.py +743 -68
- phoenix/session/data_extractor.py +31 -7
- phoenix/session/evaluation.py +3 -9
- phoenix/session/session.py +263 -219
- phoenix/settings.py +22 -0
- phoenix/trace/__init__.py +2 -22
- phoenix/trace/attributes.py +338 -0
- phoenix/trace/dsl/README.md +116 -0
- phoenix/trace/dsl/filter.py +663 -213
- phoenix/trace/dsl/helpers.py +73 -21
- phoenix/trace/dsl/query.py +574 -201
- phoenix/trace/exporter.py +24 -19
- phoenix/trace/fixtures.py +368 -32
- phoenix/trace/otel.py +71 -219
- phoenix/trace/projects.py +3 -2
- phoenix/trace/schemas.py +33 -11
- phoenix/trace/span_evaluations.py +21 -16
- phoenix/trace/span_json_decoder.py +6 -4
- phoenix/trace/span_json_encoder.py +2 -2
- phoenix/trace/trace_dataset.py +47 -32
- phoenix/trace/utils.py +21 -4
- phoenix/utilities/__init__.py +0 -26
- phoenix/utilities/client.py +132 -0
- phoenix/utilities/deprecation.py +31 -0
- phoenix/utilities/error_handling.py +3 -2
- phoenix/utilities/json.py +109 -0
- phoenix/utilities/logging.py +8 -0
- phoenix/utilities/project.py +2 -2
- phoenix/utilities/re.py +49 -0
- phoenix/utilities/span_store.py +0 -23
- phoenix/utilities/template_formatters.py +99 -0
- phoenix/version.py +1 -1
- arize_phoenix-3.16.0.dist-info/METADATA +0 -495
- arize_phoenix-3.16.0.dist-info/RECORD +0 -178
- phoenix/core/project.py +0 -617
- phoenix/core/traces.py +0 -100
- phoenix/experimental/evals/__init__.py +0 -73
- phoenix/experimental/evals/evaluators.py +0 -413
- phoenix/experimental/evals/functions/__init__.py +0 -4
- phoenix/experimental/evals/functions/classify.py +0 -453
- phoenix/experimental/evals/functions/executor.py +0 -353
- phoenix/experimental/evals/functions/generate.py +0 -138
- phoenix/experimental/evals/functions/processing.py +0 -76
- phoenix/experimental/evals/models/__init__.py +0 -14
- phoenix/experimental/evals/models/anthropic.py +0 -175
- phoenix/experimental/evals/models/base.py +0 -170
- phoenix/experimental/evals/models/bedrock.py +0 -221
- phoenix/experimental/evals/models/litellm.py +0 -134
- phoenix/experimental/evals/models/openai.py +0 -448
- phoenix/experimental/evals/models/rate_limiters.py +0 -246
- phoenix/experimental/evals/models/vertex.py +0 -173
- phoenix/experimental/evals/models/vertexai.py +0 -186
- phoenix/experimental/evals/retrievals.py +0 -96
- phoenix/experimental/evals/templates/__init__.py +0 -50
- phoenix/experimental/evals/templates/default_templates.py +0 -472
- phoenix/experimental/evals/templates/template.py +0 -195
- phoenix/experimental/evals/utils/__init__.py +0 -172
- phoenix/experimental/evals/utils/threads.py +0 -27
- phoenix/server/api/helpers.py +0 -11
- phoenix/server/api/routers/evaluation_handler.py +0 -109
- phoenix/server/api/routers/span_handler.py +0 -70
- phoenix/server/api/routers/trace_handler.py +0 -60
- phoenix/server/api/types/DatasetRole.py +0 -23
- phoenix/server/static/index.css +0 -6
- phoenix/server/static/index.js +0 -7447
- phoenix/storage/span_store/__init__.py +0 -23
- phoenix/storage/span_store/text_file.py +0 -85
- phoenix/trace/dsl/missing.py +0 -60
- phoenix/trace/langchain/__init__.py +0 -3
- phoenix/trace/langchain/instrumentor.py +0 -35
- phoenix/trace/llama_index/__init__.py +0 -3
- phoenix/trace/llama_index/callback.py +0 -102
- phoenix/trace/openai/__init__.py +0 -3
- phoenix/trace/openai/instrumentor.py +0 -30
- {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.16.0.dist-info → arize_phoenix-7.7.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → db/insertion}/__init__.py +0 -0
- /phoenix/{experimental → db/migrations}/__init__.py +0 -0
- /phoenix/{storage → db/migrations/data_migration_scripts}/__init__.py +0 -0
phoenix/core/model_schema.py
CHANGED
|
@@ -4,6 +4,7 @@ import re
|
|
|
4
4
|
import threading
|
|
5
5
|
from abc import ABC, abstractmethod
|
|
6
6
|
from collections import defaultdict
|
|
7
|
+
from collections.abc import Callable, Hashable, Iterable, Iterator, Mapping, Sequence
|
|
7
8
|
from contextlib import contextmanager
|
|
8
9
|
from dataclasses import dataclass, field, fields, replace
|
|
9
10
|
from datetime import datetime, timedelta, timezone
|
|
@@ -14,19 +15,9 @@ from random import random
|
|
|
14
15
|
from typing import (
|
|
15
16
|
Any,
|
|
16
17
|
BinaryIO,
|
|
17
|
-
Callable,
|
|
18
|
-
Dict,
|
|
19
18
|
Generic,
|
|
20
|
-
Hashable,
|
|
21
|
-
Iterable,
|
|
22
|
-
Iterator,
|
|
23
|
-
List,
|
|
24
|
-
Mapping,
|
|
25
19
|
NamedTuple,
|
|
26
20
|
Optional,
|
|
27
|
-
Sequence,
|
|
28
|
-
Tuple,
|
|
29
|
-
Type,
|
|
30
21
|
TypeVar,
|
|
31
22
|
Union,
|
|
32
23
|
cast,
|
|
@@ -48,7 +39,7 @@ from pandas.core.dtypes.common import (
|
|
|
48
39
|
from typing_extensions import TypeAlias, TypeGuard
|
|
49
40
|
from wrapt import ObjectProxy
|
|
50
41
|
|
|
51
|
-
from phoenix.config import
|
|
42
|
+
from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX
|
|
52
43
|
from phoenix.datetime_utils import floor_to_minute
|
|
53
44
|
|
|
54
45
|
|
|
@@ -185,7 +176,7 @@ class RetrievalEmbedding(Embedding):
|
|
|
185
176
|
yield value
|
|
186
177
|
|
|
187
178
|
|
|
188
|
-
class
|
|
179
|
+
class InferencesRole(Enum):
|
|
189
180
|
"""A dataframe's role in a Model: primary or reference (as
|
|
190
181
|
baseline for drift).
|
|
191
182
|
"""
|
|
@@ -194,8 +185,8 @@ class DatasetRole(Enum):
|
|
|
194
185
|
REFERENCE = auto()
|
|
195
186
|
|
|
196
187
|
|
|
197
|
-
PRIMARY =
|
|
198
|
-
REFERENCE =
|
|
188
|
+
PRIMARY = InferencesRole.PRIMARY
|
|
189
|
+
REFERENCE = InferencesRole.REFERENCE
|
|
199
190
|
|
|
200
191
|
|
|
201
192
|
@dataclass(frozen=True, repr=False, eq=False)
|
|
@@ -265,7 +256,7 @@ class _Cache(Generic[_Key, _Value]):
|
|
|
265
256
|
2
|
|
266
257
|
"""
|
|
267
258
|
|
|
268
|
-
_cache:
|
|
259
|
+
_cache: dict[_Key, _Value] = field(
|
|
269
260
|
init=False,
|
|
270
261
|
default_factory=dict,
|
|
271
262
|
)
|
|
@@ -275,7 +266,7 @@ class _Cache(Generic[_Key, _Value]):
|
|
|
275
266
|
)
|
|
276
267
|
|
|
277
268
|
@contextmanager
|
|
278
|
-
def __call__(self) -> Iterator[
|
|
269
|
+
def __call__(self) -> Iterator[dict[_Key, _Value]]:
|
|
279
270
|
with self._lock:
|
|
280
271
|
yield self._cache
|
|
281
272
|
|
|
@@ -381,7 +372,7 @@ class Dimension(Column, ABC):
|
|
|
381
372
|
# But we really want the role to be specified for a Dimension.
|
|
382
373
|
raise ValueError("role must be assigned")
|
|
383
374
|
|
|
384
|
-
def __getitem__(self, df_role:
|
|
375
|
+
def __getitem__(self, df_role: InferencesRole) -> "pd.Series[Any]":
|
|
385
376
|
if self._model is None:
|
|
386
377
|
return pd.Series(dtype=object)
|
|
387
378
|
model = cast(Model, self._model)
|
|
@@ -405,18 +396,18 @@ class Dimension(Column, ABC):
|
|
|
405
396
|
@dataclass(frozen=True)
|
|
406
397
|
class ScalarDimension(Dimension):
|
|
407
398
|
@property
|
|
408
|
-
def min_max(self) ->
|
|
399
|
+
def min_max(self) -> tuple[Any, Any]:
|
|
409
400
|
if self._model is None:
|
|
410
401
|
return np.nan, np.nan
|
|
411
402
|
model = cast(Model, self._model)
|
|
412
403
|
return model.dimension_min_max_from_all_df(self.name)
|
|
413
404
|
|
|
414
405
|
@property
|
|
415
|
-
def categories(self) ->
|
|
406
|
+
def categories(self) -> tuple[str, ...]:
|
|
416
407
|
if self._model is None or self.data_type is CONTINUOUS:
|
|
417
408
|
return ()
|
|
418
409
|
model = cast(Model, self._model)
|
|
419
|
-
return model.
|
|
410
|
+
return model.dimension_categories_from_all_inferences(self.name)
|
|
420
411
|
|
|
421
412
|
|
|
422
413
|
@dataclass(frozen=True)
|
|
@@ -502,7 +493,7 @@ class RetrievalEmbeddingDimension(EmbeddingDimension):
|
|
|
502
493
|
Name: TypeAlias = str
|
|
503
494
|
ColumnKey: TypeAlias = Union[Name, Column, SingularDimensionalRole]
|
|
504
495
|
MultiDimensionKey: TypeAlias = Union[MultiDimensionalRole, Sequence[DimensionRole]]
|
|
505
|
-
RowNumbering: TypeAlias = Union[int,
|
|
496
|
+
RowNumbering: TypeAlias = Union[int, list[int]]
|
|
506
497
|
|
|
507
498
|
|
|
508
499
|
def _is_column_key(key: Any) -> TypeGuard[ColumnKey]:
|
|
@@ -525,7 +516,7 @@ def _is_multi_dimension_key(
|
|
|
525
516
|
|
|
526
517
|
def _is_dimension_type_filter(
|
|
527
518
|
key: Any,
|
|
528
|
-
) -> TypeGuard[
|
|
519
|
+
) -> TypeGuard[tuple[MultiDimensionKey, Union[type[ScalarDimension], type[EmbeddingDimension]]]]:
|
|
529
520
|
return (
|
|
530
521
|
isinstance(key, tuple)
|
|
531
522
|
and len(key) == 2
|
|
@@ -534,7 +525,7 @@ def _is_dimension_type_filter(
|
|
|
534
525
|
)
|
|
535
526
|
|
|
536
527
|
|
|
537
|
-
def _is_named_df(obj: Any) -> TypeGuard[
|
|
528
|
+
def _is_named_df(obj: Any) -> TypeGuard[tuple[Name, pd.DataFrame]]:
|
|
538
529
|
return (
|
|
539
530
|
isinstance(obj, tuple)
|
|
540
531
|
and len(obj) == 2
|
|
@@ -582,7 +573,7 @@ class EventId(NamedTuple):
|
|
|
582
573
|
"""Identifies an event."""
|
|
583
574
|
|
|
584
575
|
row_id: int = 0
|
|
585
|
-
|
|
576
|
+
inferences_id: InferencesRole = PRIMARY
|
|
586
577
|
|
|
587
578
|
def __str__(self) -> str:
|
|
588
579
|
return ":".join(map(str, self))
|
|
@@ -625,7 +616,7 @@ class Events(ModelData):
|
|
|
625
616
|
self,
|
|
626
617
|
df: pd.DataFrame,
|
|
627
618
|
/,
|
|
628
|
-
role:
|
|
619
|
+
role: InferencesRole,
|
|
629
620
|
**kwargs: Any,
|
|
630
621
|
) -> None:
|
|
631
622
|
super().__init__(df, **kwargs)
|
|
@@ -664,7 +655,7 @@ class Events(ModelData):
|
|
|
664
655
|
def __getitem__(self, key: ColumnKey) -> "pd.Series[Any]": ...
|
|
665
656
|
|
|
666
657
|
@overload
|
|
667
|
-
def __getitem__(self, key:
|
|
658
|
+
def __getitem__(self, key: list[RowId]) -> "Events": ...
|
|
668
659
|
|
|
669
660
|
def __getitem__(self, key: Any) -> Any:
|
|
670
661
|
if isinstance(key, list):
|
|
@@ -676,7 +667,7 @@ class Events(ModelData):
|
|
|
676
667
|
return super().__getitem__(key)
|
|
677
668
|
|
|
678
669
|
|
|
679
|
-
class
|
|
670
|
+
class Inferences(Events):
|
|
680
671
|
"""pd.DataFrame wrapped with extra functions and metadata."""
|
|
681
672
|
|
|
682
673
|
def __init__(
|
|
@@ -701,13 +692,13 @@ class Dataset(Events):
|
|
|
701
692
|
friendly. Falls back to the role of the dataset if no name is provided.
|
|
702
693
|
"""
|
|
703
694
|
ds_name = self._self_name
|
|
704
|
-
if ds_name.startswith(
|
|
695
|
+
if ds_name.startswith(GENERATED_INFERENCES_NAME_PREFIX):
|
|
705
696
|
# The generated names are UUIDs so use the role as the name
|
|
706
|
-
return "primary" if self.role is
|
|
697
|
+
return "primary" if self.role is InferencesRole.PRIMARY else "reference"
|
|
707
698
|
return ds_name
|
|
708
699
|
|
|
709
700
|
@property
|
|
710
|
-
def role(self) ->
|
|
701
|
+
def role(self) -> InferencesRole:
|
|
711
702
|
return self._self_role
|
|
712
703
|
|
|
713
704
|
@property
|
|
@@ -715,14 +706,14 @@ class Dataset(Events):
|
|
|
715
706
|
return len(self) == 0
|
|
716
707
|
|
|
717
708
|
@cached_property
|
|
718
|
-
def primary_key(self) -> pd.Index:
|
|
719
|
-
return pd.Index(self[PREDICTION_ID])
|
|
709
|
+
def primary_key(self) -> "pd.Index[Any]":
|
|
710
|
+
return cast("pd.Index[Any]", pd.Index(self[PREDICTION_ID]))
|
|
720
711
|
|
|
721
712
|
@overload
|
|
722
713
|
def __getitem__(self, key: ColumnKey) -> "pd.Series[Any]": ...
|
|
723
714
|
|
|
724
715
|
@overload
|
|
725
|
-
def __getitem__(self, key:
|
|
716
|
+
def __getitem__(self, key: list[RowId]) -> Events: ...
|
|
726
717
|
|
|
727
718
|
def __getitem__(self, key: Any) -> Any:
|
|
728
719
|
if isinstance(key, list):
|
|
@@ -746,19 +737,19 @@ class Model:
|
|
|
746
737
|
a column of NaNs.
|
|
747
738
|
"""
|
|
748
739
|
|
|
749
|
-
|
|
750
|
-
_dimensions:
|
|
751
|
-
_dim_names_by_role:
|
|
752
|
-
_original_columns_by_role:
|
|
740
|
+
_inference_sets: dict[InferencesRole, Inferences]
|
|
741
|
+
_dimensions: dict[Name, Dimension]
|
|
742
|
+
_dim_names_by_role: dict[DimensionRole, list[Name]]
|
|
743
|
+
_original_columns_by_role: dict[InferencesRole, "pd.Index[Any]"]
|
|
753
744
|
_default_timestamps_factory: _ConstantValueSeriesFactory
|
|
754
745
|
_nan_series_factory: _ConstantValueSeriesFactory
|
|
755
|
-
|
|
756
|
-
|
|
746
|
+
_dimension_categories_from_all_inferences: _Cache[Name, tuple[str, ...]]
|
|
747
|
+
_dimension_min_max_from_all_inferences: _Cache[Name, tuple[float, float]]
|
|
757
748
|
|
|
758
749
|
def __init__(
|
|
759
750
|
self,
|
|
760
751
|
dimensions: Iterable[Dimension],
|
|
761
|
-
dataframes: Iterable[Union[pd.DataFrame,
|
|
752
|
+
dataframes: Iterable[Union[pd.DataFrame, tuple[Name, pd.DataFrame]]],
|
|
762
753
|
/,
|
|
763
754
|
treat_omitted_columns_as_features: bool = True,
|
|
764
755
|
timestamps_already_normalized: bool = False,
|
|
@@ -769,37 +760,37 @@ class Model:
|
|
|
769
760
|
# memoization
|
|
770
761
|
object.__setattr__(
|
|
771
762
|
self,
|
|
772
|
-
"
|
|
763
|
+
"_dimension_categories_from_all_inferences",
|
|
773
764
|
_Cache[Name, "pd.Series[Any]"](),
|
|
774
765
|
)
|
|
775
766
|
object.__setattr__(
|
|
776
767
|
self,
|
|
777
|
-
"
|
|
778
|
-
_Cache[Name,
|
|
768
|
+
"_dimension_min_max_from_all_inferences",
|
|
769
|
+
_Cache[Name, tuple[float, float]](),
|
|
779
770
|
)
|
|
780
771
|
|
|
781
772
|
df_names, dfs = cast(
|
|
782
|
-
|
|
773
|
+
tuple[Iterable[Name], Iterable[pd.DataFrame]],
|
|
783
774
|
zip(*_coerce_tuple(dataframes)),
|
|
784
775
|
)
|
|
785
776
|
str_col_dfs = _coerce_str_column_names(dfs)
|
|
786
777
|
padded_dfs = _add_padding(str_col_dfs, pd.DataFrame)
|
|
787
778
|
padded_df_names = _add_padding(df_names, _rand_str)
|
|
788
|
-
|
|
789
|
-
self.
|
|
790
|
-
zip(padded_dfs, padded_df_names,
|
|
779
|
+
inference_sets = starmap(
|
|
780
|
+
self._new_inferences,
|
|
781
|
+
zip(padded_dfs, padded_df_names, InferencesRole),
|
|
791
782
|
)
|
|
792
|
-
# Store
|
|
783
|
+
# Store inferences by role.
|
|
793
784
|
object.__setattr__(
|
|
794
785
|
self,
|
|
795
|
-
"
|
|
796
|
-
{
|
|
786
|
+
"_inference_sets",
|
|
787
|
+
{inferences.role: inferences for inferences in inference_sets},
|
|
797
788
|
)
|
|
798
789
|
# Preserve originals, useful for exporting.
|
|
799
790
|
object.__setattr__(
|
|
800
791
|
self,
|
|
801
792
|
"_original_columns_by_role",
|
|
802
|
-
{role:
|
|
793
|
+
{role: inferences.columns for role, inferences in self._inference_sets.items()},
|
|
803
794
|
)
|
|
804
795
|
|
|
805
796
|
object.__setattr__(
|
|
@@ -828,7 +819,7 @@ class Model:
|
|
|
828
819
|
(name, self._new_dimension(name, role=FEATURE))
|
|
829
820
|
for name in _get_omitted_column_names(
|
|
830
821
|
self._dimensions.values(),
|
|
831
|
-
self.
|
|
822
|
+
self._inference_sets.values(),
|
|
832
823
|
)
|
|
833
824
|
)
|
|
834
825
|
|
|
@@ -849,7 +840,7 @@ class Model:
|
|
|
849
840
|
data_type=(
|
|
850
841
|
_guess_data_type(
|
|
851
842
|
dataset.loc[:, dim.name]
|
|
852
|
-
for dataset in self.
|
|
843
|
+
for dataset in self._inference_sets.values()
|
|
853
844
|
if dim.name in dataset.columns
|
|
854
845
|
)
|
|
855
846
|
),
|
|
@@ -859,9 +850,9 @@ class Model:
|
|
|
859
850
|
# Add TIMESTAMP if missing.
|
|
860
851
|
# If needed, normalize the timestamps values.
|
|
861
852
|
# If needed, sort the dataframes by time.
|
|
862
|
-
for
|
|
853
|
+
for inferences_role, dataset in list(self._inference_sets.items()):
|
|
863
854
|
df = dataset.__wrapped__
|
|
864
|
-
df_original_columns = self._original_columns_by_role[
|
|
855
|
+
df_original_columns = self._original_columns_by_role[inferences_role]
|
|
865
856
|
|
|
866
857
|
# PREDICTION_ID
|
|
867
858
|
dim_pred_id = self._dimensions.get(
|
|
@@ -897,20 +888,20 @@ class Model:
|
|
|
897
888
|
df = df.set_index(dim_time.name, drop=False)
|
|
898
889
|
|
|
899
890
|
# Update dataset since its dataframe may have changed.
|
|
900
|
-
self.
|
|
901
|
-
df, name=dataset.name, role=
|
|
891
|
+
self._inference_sets[inferences_role] = self._new_inferences(
|
|
892
|
+
df, name=dataset.name, role=inferences_role
|
|
902
893
|
)
|
|
903
894
|
|
|
904
895
|
@cached_property
|
|
905
896
|
def is_empty(self) -> bool:
|
|
906
897
|
"""Returns True if the model has no data."""
|
|
907
|
-
return not any(map(len, self.
|
|
898
|
+
return not any(map(len, self._inference_sets.values()))
|
|
908
899
|
|
|
909
900
|
def export_rows_as_parquet_file(
|
|
910
901
|
self,
|
|
911
|
-
row_numbers: Mapping[
|
|
902
|
+
row_numbers: Mapping[InferencesRole, Iterable[int]],
|
|
912
903
|
parquet_file: BinaryIO,
|
|
913
|
-
cluster_ids: Optional[Mapping[
|
|
904
|
+
cluster_ids: Optional[Mapping[InferencesRole, Mapping[int, str]]] = None,
|
|
914
905
|
) -> None:
|
|
915
906
|
"""
|
|
916
907
|
Given row numbers, exports dataframe subset into parquet file.
|
|
@@ -921,29 +912,31 @@ class Model:
|
|
|
921
912
|
|
|
922
913
|
Parameters
|
|
923
914
|
----------
|
|
924
|
-
row_numbers: Mapping[
|
|
915
|
+
row_numbers: Mapping[InferencesRole, Iterable[int]]
|
|
925
916
|
mapping of dataset role to list of row numbers
|
|
926
917
|
parquet_file: file handle
|
|
927
918
|
output parquet file handle
|
|
928
|
-
cluster_ids: Optional[Mapping[
|
|
929
|
-
mapping of
|
|
919
|
+
cluster_ids: Optional[Mapping[InferencesRole, Mapping[int, str]]]
|
|
920
|
+
mapping of inferences role to mapping of row number to cluster id.
|
|
930
921
|
If cluster_ids is non-empty, a new column is inserted to the
|
|
931
922
|
dataframe containing the cluster IDs of each row in the exported
|
|
932
923
|
data. The name of the added column name is `__phoenix_cluster_id__`.
|
|
933
924
|
"""
|
|
934
925
|
export_dataframes = [pd.DataFrame()]
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
926
|
+
model_has_multiple_inference_sets = (
|
|
927
|
+
sum(not df.empty for df in self._inference_sets.values()) > 1
|
|
928
|
+
)
|
|
929
|
+
for inferences_role, numbers in row_numbers.items():
|
|
930
|
+
df = self._inference_sets[inferences_role]
|
|
938
931
|
columns = [
|
|
939
932
|
df.columns.get_loc(column_name)
|
|
940
|
-
for column_name in self._original_columns_by_role[
|
|
933
|
+
for column_name in self._original_columns_by_role[inferences_role]
|
|
941
934
|
]
|
|
942
935
|
rows = pd.Series(sorted(set(numbers)))
|
|
943
936
|
filtered_df = df.iloc[rows, columns].reset_index(drop=True)
|
|
944
|
-
if
|
|
937
|
+
if model_has_multiple_inference_sets:
|
|
945
938
|
filtered_df["__phoenix_dataset_name__"] = df.display_name
|
|
946
|
-
if cluster_ids and (ids := cluster_ids.get(
|
|
939
|
+
if cluster_ids and (ids := cluster_ids.get(inferences_role)):
|
|
947
940
|
filtered_df["__phoenix_cluster_id__"] = rows.apply(ids.get)
|
|
948
941
|
export_dataframes.append(filtered_df)
|
|
949
942
|
pd.concat(export_dataframes).to_parquet(
|
|
@@ -954,7 +947,7 @@ class Model:
|
|
|
954
947
|
)
|
|
955
948
|
|
|
956
949
|
@cached_property
|
|
957
|
-
def scalar_dimensions(self) ->
|
|
950
|
+
def scalar_dimensions(self) -> tuple[ScalarDimension, ...]:
|
|
958
951
|
"""Put these in a cached sequence because currently their positions
|
|
959
952
|
in this list also determine their node IDs in graphql.
|
|
960
953
|
"""
|
|
@@ -967,7 +960,7 @@ class Model:
|
|
|
967
960
|
)
|
|
968
961
|
|
|
969
962
|
@cached_property
|
|
970
|
-
def embedding_dimensions(self) ->
|
|
963
|
+
def embedding_dimensions(self) -> tuple[EmbeddingDimension, ...]:
|
|
971
964
|
"""Put these in a cached sequence because currently their positions
|
|
972
965
|
in this list also determine their node IDs in graphql.
|
|
973
966
|
"""
|
|
@@ -977,52 +970,52 @@ class Model:
|
|
|
977
970
|
if not dim.is_dummy and isinstance(dim, EmbeddingDimension)
|
|
978
971
|
)
|
|
979
972
|
|
|
980
|
-
def
|
|
973
|
+
def dimension_categories_from_all_inferences(
|
|
981
974
|
self,
|
|
982
975
|
dimension_name: Name,
|
|
983
|
-
) ->
|
|
976
|
+
) -> tuple[str, ...]:
|
|
984
977
|
dim = self[dimension_name]
|
|
985
978
|
if dim.data_type is CONTINUOUS:
|
|
986
|
-
return cast(
|
|
987
|
-
with self.
|
|
979
|
+
return cast(tuple[str, ...], ())
|
|
980
|
+
with self._dimension_categories_from_all_inferences() as cache:
|
|
988
981
|
try:
|
|
989
982
|
return cache[dimension_name]
|
|
990
983
|
except KeyError:
|
|
991
984
|
pass
|
|
992
985
|
categories_by_dataset = (
|
|
993
|
-
pd.Series(dim[role].unique()).dropna().astype(str) for role in
|
|
986
|
+
pd.Series(dim[role].unique()).dropna().astype(str) for role in InferencesRole
|
|
994
987
|
)
|
|
995
988
|
all_values_combined = chain.from_iterable(categories_by_dataset)
|
|
996
989
|
ans = tuple(np.sort(pd.Series(all_values_combined).unique()))
|
|
997
|
-
with self.
|
|
990
|
+
with self._dimension_categories_from_all_inferences() as cache:
|
|
998
991
|
cache[dimension_name] = ans
|
|
999
992
|
return ans
|
|
1000
993
|
|
|
1001
994
|
def dimension_min_max_from_all_df(
|
|
1002
995
|
self,
|
|
1003
996
|
dimension_name: Name,
|
|
1004
|
-
) ->
|
|
997
|
+
) -> tuple[float, float]:
|
|
1005
998
|
dim = self[dimension_name]
|
|
1006
999
|
if dim.data_type is not CONTINUOUS:
|
|
1007
1000
|
return (np.nan, np.nan)
|
|
1008
|
-
with self.
|
|
1001
|
+
with self._dimension_min_max_from_all_inferences() as cache:
|
|
1009
1002
|
try:
|
|
1010
1003
|
return cache[dimension_name]
|
|
1011
1004
|
except KeyError:
|
|
1012
1005
|
pass
|
|
1013
|
-
min_max_by_df = (_agg_min_max(dim[df_role]) for df_role in
|
|
1006
|
+
min_max_by_df = (_agg_min_max(dim[df_role]) for df_role in InferencesRole)
|
|
1014
1007
|
all_values_combined = chain.from_iterable(min_max_by_df)
|
|
1015
1008
|
min_max = _agg_min_max(pd.Series(all_values_combined))
|
|
1016
1009
|
ans = (min_max.min(), min_max.max())
|
|
1017
|
-
with self.
|
|
1010
|
+
with self._dimension_min_max_from_all_inferences() as cache:
|
|
1018
1011
|
cache[dimension_name] = ans
|
|
1019
1012
|
return ans
|
|
1020
1013
|
|
|
1021
1014
|
@overload
|
|
1022
|
-
def __getitem__(self, key:
|
|
1015
|
+
def __getitem__(self, key: type[Inferences]) -> Iterator[Inferences]: ...
|
|
1023
1016
|
|
|
1024
1017
|
@overload
|
|
1025
|
-
def __getitem__(self, key:
|
|
1018
|
+
def __getitem__(self, key: InferencesRole) -> Inferences: ...
|
|
1026
1019
|
|
|
1027
1020
|
@overload
|
|
1028
1021
|
def __getitem__(self, key: ColumnKey) -> Dimension: ...
|
|
@@ -1031,28 +1024,28 @@ class Model:
|
|
|
1031
1024
|
def __getitem__(self, key: MultiDimensionKey) -> Iterator[Dimension]: ...
|
|
1032
1025
|
|
|
1033
1026
|
@overload
|
|
1034
|
-
def __getitem__(self, key:
|
|
1027
|
+
def __getitem__(self, key: type[ScalarDimension]) -> Iterator[ScalarDimension]: ...
|
|
1035
1028
|
|
|
1036
1029
|
@overload
|
|
1037
|
-
def __getitem__(self, key:
|
|
1030
|
+
def __getitem__(self, key: type[EmbeddingDimension]) -> Iterator[EmbeddingDimension]: ...
|
|
1038
1031
|
|
|
1039
1032
|
@overload
|
|
1040
|
-
def __getitem__(self, key:
|
|
1033
|
+
def __getitem__(self, key: type[Dimension]) -> Iterator[Dimension]: ...
|
|
1041
1034
|
|
|
1042
1035
|
@overload
|
|
1043
1036
|
def __getitem__(
|
|
1044
1037
|
self,
|
|
1045
|
-
key:
|
|
1038
|
+
key: tuple[
|
|
1046
1039
|
MultiDimensionKey,
|
|
1047
|
-
Union[
|
|
1040
|
+
Union[type[ScalarDimension], type[EmbeddingDimension]],
|
|
1048
1041
|
],
|
|
1049
1042
|
) -> Iterator[Dimension]: ...
|
|
1050
1043
|
|
|
1051
1044
|
def __getitem__(self, key: Any) -> Any:
|
|
1052
|
-
if key is
|
|
1053
|
-
return self.
|
|
1054
|
-
if isinstance(key,
|
|
1055
|
-
return self.
|
|
1045
|
+
if key is Inferences:
|
|
1046
|
+
return self._inference_sets.values()
|
|
1047
|
+
if isinstance(key, InferencesRole):
|
|
1048
|
+
return self._inference_sets[key]
|
|
1056
1049
|
if _is_column_key(key):
|
|
1057
1050
|
return self._get_dim(key)
|
|
1058
1051
|
if _is_multi_dimension_key(key):
|
|
@@ -1092,9 +1085,9 @@ class Model:
|
|
|
1092
1085
|
|
|
1093
1086
|
def _get_multi_dims_by_type(
|
|
1094
1087
|
self,
|
|
1095
|
-
key:
|
|
1088
|
+
key: tuple[
|
|
1096
1089
|
MultiDimensionKey,
|
|
1097
|
-
Union[
|
|
1090
|
+
Union[type[ScalarDimension], type[EmbeddingDimension]],
|
|
1098
1091
|
],
|
|
1099
1092
|
) -> Iterator[Dimension]:
|
|
1100
1093
|
return filter(lambda dim: type(dim) is key[1], self[key[0]])
|
|
@@ -1103,7 +1096,7 @@ class Model:
|
|
|
1103
1096
|
def _new_dimension(
|
|
1104
1097
|
self,
|
|
1105
1098
|
obj: DimensionRole,
|
|
1106
|
-
cls:
|
|
1099
|
+
cls: type[Dimension] = ScalarDimension,
|
|
1107
1100
|
**kwargs: Any,
|
|
1108
1101
|
) -> Dimension: ...
|
|
1109
1102
|
|
|
@@ -1111,7 +1104,7 @@ class Model:
|
|
|
1111
1104
|
def _new_dimension(
|
|
1112
1105
|
self,
|
|
1113
1106
|
obj: Name,
|
|
1114
|
-
cls:
|
|
1107
|
+
cls: type[Dimension] = ScalarDimension,
|
|
1115
1108
|
**kwargs: Any,
|
|
1116
1109
|
) -> Dimension: ...
|
|
1117
1110
|
|
|
@@ -1123,7 +1116,7 @@ class Model:
|
|
|
1123
1116
|
) -> Dimension: ...
|
|
1124
1117
|
|
|
1125
1118
|
def _new_dimension(
|
|
1126
|
-
self, obj: Any, cls:
|
|
1119
|
+
self, obj: Any, cls: type[Dimension] = ScalarDimension, **kwargs: Any
|
|
1127
1120
|
) -> Dimension:
|
|
1128
1121
|
"""Creates a new Dimension or copies an existing one, setting the
|
|
1129
1122
|
model weak reference to the `self` Model instance, and sharing the
|
|
@@ -1152,17 +1145,17 @@ class Model:
|
|
|
1152
1145
|
)
|
|
1153
1146
|
raise ValueError(f"invalid argument: {repr(obj)}")
|
|
1154
1147
|
|
|
1155
|
-
def
|
|
1148
|
+
def _new_inferences(
|
|
1156
1149
|
self,
|
|
1157
1150
|
df: pd.DataFrame,
|
|
1158
1151
|
/,
|
|
1159
1152
|
name: str,
|
|
1160
|
-
role:
|
|
1161
|
-
) ->
|
|
1162
|
-
"""Creates a new
|
|
1153
|
+
role: InferencesRole,
|
|
1154
|
+
) -> Inferences:
|
|
1155
|
+
"""Creates a new Inferences, setting the model weak reference to the
|
|
1163
1156
|
`self` Model instance.
|
|
1164
1157
|
"""
|
|
1165
|
-
return
|
|
1158
|
+
return Inferences(df, name=name, role=role, _model=proxy(self))
|
|
1166
1159
|
|
|
1167
1160
|
|
|
1168
1161
|
@dataclass(frozen=True)
|
|
@@ -1173,13 +1166,13 @@ class Schema(SchemaSpec):
|
|
|
1173
1166
|
prediction_score: Optional[str] = None
|
|
1174
1167
|
actual_label: Optional[str] = None
|
|
1175
1168
|
actual_score: Optional[str] = None
|
|
1176
|
-
prompt: Optional[
|
|
1169
|
+
prompt: Optional[Embedding] = None
|
|
1177
1170
|
response: Optional[Union[str, Embedding]] = None
|
|
1178
1171
|
features: Iterable[Union[str, CompositeDimensionSpec]] = field(default_factory=list)
|
|
1179
1172
|
tags: Iterable[Union[str, CompositeDimensionSpec]] = field(default_factory=list)
|
|
1180
1173
|
|
|
1181
1174
|
# internal attribute not exposed to users
|
|
1182
|
-
_dimensions:
|
|
1175
|
+
_dimensions: list[Dimension] = field(
|
|
1183
1176
|
init=False, repr=False, hash=False, compare=False, default_factory=list
|
|
1184
1177
|
)
|
|
1185
1178
|
|
|
@@ -1246,7 +1239,7 @@ class Schema(SchemaSpec):
|
|
|
1246
1239
|
|
|
1247
1240
|
def __call__(
|
|
1248
1241
|
self,
|
|
1249
|
-
*dataframes: Union[pd.DataFrame,
|
|
1242
|
+
*dataframes: Union[pd.DataFrame, tuple[Name, pd.DataFrame]],
|
|
1250
1243
|
**kwargs: Any,
|
|
1251
1244
|
) -> Model:
|
|
1252
1245
|
"""Dimensions are the "baton" that Schema hands over to Model."""
|
|
@@ -1300,7 +1293,7 @@ def _get_omitted_column_names(
|
|
|
1300
1293
|
|
|
1301
1294
|
def _group_names_by_dim_role(
|
|
1302
1295
|
dimensions: Iterable[Dimension],
|
|
1303
|
-
) -> Iterator[
|
|
1296
|
+
) -> Iterator[tuple[DimensionRole, list[str]]]:
|
|
1304
1297
|
return (
|
|
1305
1298
|
(role, [dim.name for dim in dims])
|
|
1306
1299
|
for role, dims in groupby(
|
|
@@ -1344,7 +1337,7 @@ def _series_uuid(length: int) -> "pd.Series[str]":
|
|
|
1344
1337
|
|
|
1345
1338
|
|
|
1346
1339
|
def _raise_if_too_many_dataframes(given: int) -> None:
|
|
1347
|
-
limit = len(
|
|
1340
|
+
limit = len(InferencesRole)
|
|
1348
1341
|
if not 0 < given <= limit:
|
|
1349
1342
|
raise ValueError(f"expected between 1 to {limit} dataframes, but {given} were given")
|
|
1350
1343
|
|
|
@@ -1354,8 +1347,8 @@ def _coerce_str(obj: Optional[str]) -> str:
|
|
|
1354
1347
|
|
|
1355
1348
|
|
|
1356
1349
|
def _coerce_tuple(
|
|
1357
|
-
dataframes: Iterable[Union[pd.DataFrame,
|
|
1358
|
-
) -> Iterator[
|
|
1350
|
+
dataframes: Iterable[Union[pd.DataFrame, tuple[Name, pd.DataFrame]]],
|
|
1351
|
+
) -> Iterator[tuple[Name, pd.DataFrame]]:
|
|
1359
1352
|
for dataframe in dataframes:
|
|
1360
1353
|
if isinstance(dataframe, pd.DataFrame):
|
|
1361
1354
|
yield (_rand_str(), dataframe)
|