arize-phoenix 4.4.3__py3-none-any.whl → 4.4.4rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc1.dist-info}/METADATA +4 -4
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc1.dist-info}/RECORD +111 -55
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc1.dist-info}/WHEEL +1 -1
- phoenix/__init__.py +0 -27
- phoenix/config.py +21 -7
- phoenix/core/model.py +25 -25
- phoenix/core/model_schema.py +64 -62
- phoenix/core/model_schema_adapter.py +27 -25
- phoenix/datasets/__init__.py +0 -0
- phoenix/datasets/evaluators.py +275 -0
- phoenix/datasets/experiments.py +469 -0
- phoenix/datasets/tracing.py +66 -0
- phoenix/datasets/types.py +212 -0
- phoenix/db/bulk_inserter.py +54 -14
- phoenix/db/insertion/dataset.py +234 -0
- phoenix/db/insertion/evaluation.py +6 -6
- phoenix/db/insertion/helpers.py +13 -2
- phoenix/db/migrations/types.py +29 -0
- phoenix/db/migrations/versions/10460e46d750_datasets.py +291 -0
- phoenix/db/migrations/versions/cf03bd6bae1d_init.py +2 -28
- phoenix/db/models.py +230 -3
- phoenix/inferences/fixtures.py +23 -23
- phoenix/inferences/inferences.py +7 -7
- phoenix/inferences/validation.py +1 -1
- phoenix/server/api/context.py +16 -0
- phoenix/server/api/dataloaders/__init__.py +16 -0
- phoenix/server/api/dataloaders/dataset_example_revisions.py +100 -0
- phoenix/server/api/dataloaders/dataset_example_spans.py +43 -0
- phoenix/server/api/dataloaders/experiment_annotation_summaries.py +85 -0
- phoenix/server/api/dataloaders/experiment_error_rates.py +43 -0
- phoenix/server/api/dataloaders/experiment_sequence_number.py +49 -0
- phoenix/server/api/dataloaders/project_by_name.py +31 -0
- phoenix/server/api/dataloaders/span_descendants.py +2 -3
- phoenix/server/api/dataloaders/span_projects.py +33 -0
- phoenix/server/api/dataloaders/trace_row_ids.py +39 -0
- phoenix/server/api/helpers/dataset_helpers.py +178 -0
- phoenix/server/api/input_types/AddExamplesToDatasetInput.py +16 -0
- phoenix/server/api/input_types/AddSpansToDatasetInput.py +14 -0
- phoenix/server/api/input_types/CreateDatasetInput.py +12 -0
- phoenix/server/api/input_types/DatasetExampleInput.py +14 -0
- phoenix/server/api/input_types/DatasetSort.py +17 -0
- phoenix/server/api/input_types/DatasetVersionSort.py +16 -0
- phoenix/server/api/input_types/DeleteDatasetExamplesInput.py +13 -0
- phoenix/server/api/input_types/DeleteDatasetInput.py +7 -0
- phoenix/server/api/input_types/DeleteExperimentsInput.py +9 -0
- phoenix/server/api/input_types/PatchDatasetExamplesInput.py +35 -0
- phoenix/server/api/input_types/PatchDatasetInput.py +14 -0
- phoenix/server/api/mutations/__init__.py +13 -0
- phoenix/server/api/mutations/auth.py +11 -0
- phoenix/server/api/mutations/dataset_mutations.py +520 -0
- phoenix/server/api/mutations/experiment_mutations.py +65 -0
- phoenix/server/api/{types/ExportEventsMutation.py → mutations/export_events_mutations.py} +17 -14
- phoenix/server/api/mutations/project_mutations.py +42 -0
- phoenix/server/api/openapi/__init__.py +0 -0
- phoenix/server/api/openapi/main.py +6 -0
- phoenix/server/api/openapi/schema.py +15 -0
- phoenix/server/api/queries.py +503 -0
- phoenix/server/api/routers/v1/__init__.py +77 -2
- phoenix/server/api/routers/v1/dataset_examples.py +178 -0
- phoenix/server/api/routers/v1/datasets.py +861 -0
- phoenix/server/api/routers/v1/evaluations.py +4 -2
- phoenix/server/api/routers/v1/experiment_evaluations.py +65 -0
- phoenix/server/api/routers/v1/experiment_runs.py +108 -0
- phoenix/server/api/routers/v1/experiments.py +174 -0
- phoenix/server/api/routers/v1/spans.py +3 -1
- phoenix/server/api/routers/v1/traces.py +1 -4
- phoenix/server/api/schema.py +2 -303
- phoenix/server/api/types/AnnotatorKind.py +10 -0
- phoenix/server/api/types/Cluster.py +19 -19
- phoenix/server/api/types/CreateDatasetPayload.py +8 -0
- phoenix/server/api/types/Dataset.py +282 -63
- phoenix/server/api/types/DatasetExample.py +85 -0
- phoenix/server/api/types/DatasetExampleRevision.py +34 -0
- phoenix/server/api/types/DatasetVersion.py +14 -0
- phoenix/server/api/types/Dimension.py +30 -29
- phoenix/server/api/types/EmbeddingDimension.py +40 -34
- phoenix/server/api/types/Event.py +16 -16
- phoenix/server/api/types/ExampleRevisionInterface.py +14 -0
- phoenix/server/api/types/Experiment.py +135 -0
- phoenix/server/api/types/ExperimentAnnotationSummary.py +13 -0
- phoenix/server/api/types/ExperimentComparison.py +19 -0
- phoenix/server/api/types/ExperimentRun.py +91 -0
- phoenix/server/api/types/ExperimentRunAnnotation.py +57 -0
- phoenix/server/api/types/Inferences.py +80 -0
- phoenix/server/api/types/InferencesRole.py +23 -0
- phoenix/server/api/types/Model.py +43 -42
- phoenix/server/api/types/Project.py +26 -12
- phoenix/server/api/types/Span.py +78 -2
- phoenix/server/api/types/TimeSeries.py +6 -6
- phoenix/server/api/types/Trace.py +15 -4
- phoenix/server/api/types/UMAPPoints.py +1 -1
- phoenix/server/api/types/node.py +5 -111
- phoenix/server/api/types/pagination.py +10 -52
- phoenix/server/app.py +99 -49
- phoenix/server/main.py +49 -27
- phoenix/server/openapi/docs.py +3 -0
- phoenix/server/static/index.js +2246 -1368
- phoenix/server/templates/index.html +1 -0
- phoenix/services.py +15 -15
- phoenix/session/client.py +316 -21
- phoenix/session/session.py +47 -37
- phoenix/trace/exporter.py +14 -9
- phoenix/trace/fixtures.py +133 -7
- phoenix/trace/span_evaluations.py +3 -3
- phoenix/trace/trace_dataset.py +6 -6
- phoenix/utilities/json.py +61 -0
- phoenix/utilities/re.py +50 -0
- phoenix/version.py +1 -1
- phoenix/server/api/types/DatasetRole.py +0 -23
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc1.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-4.4.3.dist-info → arize_phoenix-4.4.4rc1.dist-info}/licenses/LICENSE +0 -0
- /phoenix/server/api/{helpers.py → helpers/__init__.py} +0 -0
phoenix/core/model_schema.py
CHANGED
|
@@ -48,7 +48,7 @@ from pandas.core.dtypes.common import (
|
|
|
48
48
|
from typing_extensions import TypeAlias, TypeGuard
|
|
49
49
|
from wrapt import ObjectProxy
|
|
50
50
|
|
|
51
|
-
from phoenix.config import
|
|
51
|
+
from phoenix.config import GENERATED_INFERENCES_NAME_PREFIX
|
|
52
52
|
from phoenix.datetime_utils import floor_to_minute
|
|
53
53
|
|
|
54
54
|
|
|
@@ -185,7 +185,7 @@ class RetrievalEmbedding(Embedding):
|
|
|
185
185
|
yield value
|
|
186
186
|
|
|
187
187
|
|
|
188
|
-
class
|
|
188
|
+
class InferencesRole(Enum):
|
|
189
189
|
"""A dataframe's role in a Model: primary or reference (as
|
|
190
190
|
baseline for drift).
|
|
191
191
|
"""
|
|
@@ -194,8 +194,8 @@ class DatasetRole(Enum):
|
|
|
194
194
|
REFERENCE = auto()
|
|
195
195
|
|
|
196
196
|
|
|
197
|
-
PRIMARY =
|
|
198
|
-
REFERENCE =
|
|
197
|
+
PRIMARY = InferencesRole.PRIMARY
|
|
198
|
+
REFERENCE = InferencesRole.REFERENCE
|
|
199
199
|
|
|
200
200
|
|
|
201
201
|
@dataclass(frozen=True, repr=False, eq=False)
|
|
@@ -381,7 +381,7 @@ class Dimension(Column, ABC):
|
|
|
381
381
|
# But we really want the role to be specified for a Dimension.
|
|
382
382
|
raise ValueError("role must be assigned")
|
|
383
383
|
|
|
384
|
-
def __getitem__(self, df_role:
|
|
384
|
+
def __getitem__(self, df_role: InferencesRole) -> "pd.Series[Any]":
|
|
385
385
|
if self._model is None:
|
|
386
386
|
return pd.Series(dtype=object)
|
|
387
387
|
model = cast(Model, self._model)
|
|
@@ -416,7 +416,7 @@ class ScalarDimension(Dimension):
|
|
|
416
416
|
if self._model is None or self.data_type is CONTINUOUS:
|
|
417
417
|
return ()
|
|
418
418
|
model = cast(Model, self._model)
|
|
419
|
-
return model.
|
|
419
|
+
return model.dimension_categories_from_all_inferences(self.name)
|
|
420
420
|
|
|
421
421
|
|
|
422
422
|
@dataclass(frozen=True)
|
|
@@ -582,7 +582,7 @@ class EventId(NamedTuple):
|
|
|
582
582
|
"""Identifies an event."""
|
|
583
583
|
|
|
584
584
|
row_id: int = 0
|
|
585
|
-
|
|
585
|
+
inferences_id: InferencesRole = PRIMARY
|
|
586
586
|
|
|
587
587
|
def __str__(self) -> str:
|
|
588
588
|
return ":".join(map(str, self))
|
|
@@ -625,7 +625,7 @@ class Events(ModelData):
|
|
|
625
625
|
self,
|
|
626
626
|
df: pd.DataFrame,
|
|
627
627
|
/,
|
|
628
|
-
role:
|
|
628
|
+
role: InferencesRole,
|
|
629
629
|
**kwargs: Any,
|
|
630
630
|
) -> None:
|
|
631
631
|
super().__init__(df, **kwargs)
|
|
@@ -676,7 +676,7 @@ class Events(ModelData):
|
|
|
676
676
|
return super().__getitem__(key)
|
|
677
677
|
|
|
678
678
|
|
|
679
|
-
class
|
|
679
|
+
class Inferences(Events):
|
|
680
680
|
"""pd.DataFrame wrapped with extra functions and metadata."""
|
|
681
681
|
|
|
682
682
|
def __init__(
|
|
@@ -701,13 +701,13 @@ class Dataset(Events):
|
|
|
701
701
|
friendly. Falls back to the role of the dataset if no name is provided.
|
|
702
702
|
"""
|
|
703
703
|
ds_name = self._self_name
|
|
704
|
-
if ds_name.startswith(
|
|
704
|
+
if ds_name.startswith(GENERATED_INFERENCES_NAME_PREFIX):
|
|
705
705
|
# The generated names are UUIDs so use the role as the name
|
|
706
|
-
return "primary" if self.role is
|
|
706
|
+
return "primary" if self.role is InferencesRole.PRIMARY else "reference"
|
|
707
707
|
return ds_name
|
|
708
708
|
|
|
709
709
|
@property
|
|
710
|
-
def role(self) ->
|
|
710
|
+
def role(self) -> InferencesRole:
|
|
711
711
|
return self._self_role
|
|
712
712
|
|
|
713
713
|
@property
|
|
@@ -746,14 +746,14 @@ class Model:
|
|
|
746
746
|
a column of NaNs.
|
|
747
747
|
"""
|
|
748
748
|
|
|
749
|
-
|
|
749
|
+
_inference_sets: Dict[InferencesRole, Inferences]
|
|
750
750
|
_dimensions: Dict[Name, Dimension]
|
|
751
751
|
_dim_names_by_role: Dict[DimensionRole, List[Name]]
|
|
752
|
-
_original_columns_by_role: Dict[
|
|
752
|
+
_original_columns_by_role: Dict[InferencesRole, "pd.Index[Any]"]
|
|
753
753
|
_default_timestamps_factory: _ConstantValueSeriesFactory
|
|
754
754
|
_nan_series_factory: _ConstantValueSeriesFactory
|
|
755
|
-
|
|
756
|
-
|
|
755
|
+
_dimension_categories_from_all_inferences: _Cache[Name, Tuple[str, ...]]
|
|
756
|
+
_dimension_min_max_from_all_inferences: _Cache[Name, Tuple[float, float]]
|
|
757
757
|
|
|
758
758
|
def __init__(
|
|
759
759
|
self,
|
|
@@ -769,12 +769,12 @@ class Model:
|
|
|
769
769
|
# memoization
|
|
770
770
|
object.__setattr__(
|
|
771
771
|
self,
|
|
772
|
-
"
|
|
772
|
+
"_dimension_categories_from_all_inferences",
|
|
773
773
|
_Cache[Name, "pd.Series[Any]"](),
|
|
774
774
|
)
|
|
775
775
|
object.__setattr__(
|
|
776
776
|
self,
|
|
777
|
-
"
|
|
777
|
+
"_dimension_min_max_from_all_inferences",
|
|
778
778
|
_Cache[Name, Tuple[float, float]](),
|
|
779
779
|
)
|
|
780
780
|
|
|
@@ -785,21 +785,21 @@ class Model:
|
|
|
785
785
|
str_col_dfs = _coerce_str_column_names(dfs)
|
|
786
786
|
padded_dfs = _add_padding(str_col_dfs, pd.DataFrame)
|
|
787
787
|
padded_df_names = _add_padding(df_names, _rand_str)
|
|
788
|
-
|
|
789
|
-
self.
|
|
790
|
-
zip(padded_dfs, padded_df_names,
|
|
788
|
+
inference_sets = starmap(
|
|
789
|
+
self._new_inferences,
|
|
790
|
+
zip(padded_dfs, padded_df_names, InferencesRole),
|
|
791
791
|
)
|
|
792
|
-
# Store
|
|
792
|
+
# Store inferences by role.
|
|
793
793
|
object.__setattr__(
|
|
794
794
|
self,
|
|
795
|
-
"
|
|
796
|
-
{
|
|
795
|
+
"_inference_sets",
|
|
796
|
+
{inferences.role: inferences for inferences in inference_sets},
|
|
797
797
|
)
|
|
798
798
|
# Preserve originals, useful for exporting.
|
|
799
799
|
object.__setattr__(
|
|
800
800
|
self,
|
|
801
801
|
"_original_columns_by_role",
|
|
802
|
-
{role:
|
|
802
|
+
{role: inferences.columns for role, inferences in self._inference_sets.items()},
|
|
803
803
|
)
|
|
804
804
|
|
|
805
805
|
object.__setattr__(
|
|
@@ -828,7 +828,7 @@ class Model:
|
|
|
828
828
|
(name, self._new_dimension(name, role=FEATURE))
|
|
829
829
|
for name in _get_omitted_column_names(
|
|
830
830
|
self._dimensions.values(),
|
|
831
|
-
self.
|
|
831
|
+
self._inference_sets.values(),
|
|
832
832
|
)
|
|
833
833
|
)
|
|
834
834
|
|
|
@@ -849,7 +849,7 @@ class Model:
|
|
|
849
849
|
data_type=(
|
|
850
850
|
_guess_data_type(
|
|
851
851
|
dataset.loc[:, dim.name]
|
|
852
|
-
for dataset in self.
|
|
852
|
+
for dataset in self._inference_sets.values()
|
|
853
853
|
if dim.name in dataset.columns
|
|
854
854
|
)
|
|
855
855
|
),
|
|
@@ -859,9 +859,9 @@ class Model:
|
|
|
859
859
|
# Add TIMESTAMP if missing.
|
|
860
860
|
# If needed, normalize the timestamps values.
|
|
861
861
|
# If needed, sort the dataframes by time.
|
|
862
|
-
for
|
|
862
|
+
for inferences_role, dataset in list(self._inference_sets.items()):
|
|
863
863
|
df = dataset.__wrapped__
|
|
864
|
-
df_original_columns = self._original_columns_by_role[
|
|
864
|
+
df_original_columns = self._original_columns_by_role[inferences_role]
|
|
865
865
|
|
|
866
866
|
# PREDICTION_ID
|
|
867
867
|
dim_pred_id = self._dimensions.get(
|
|
@@ -897,20 +897,20 @@ class Model:
|
|
|
897
897
|
df = df.set_index(dim_time.name, drop=False)
|
|
898
898
|
|
|
899
899
|
# Update dataset since its dataframe may have changed.
|
|
900
|
-
self.
|
|
901
|
-
df, name=dataset.name, role=
|
|
900
|
+
self._inference_sets[inferences_role] = self._new_inferences(
|
|
901
|
+
df, name=dataset.name, role=inferences_role
|
|
902
902
|
)
|
|
903
903
|
|
|
904
904
|
@cached_property
|
|
905
905
|
def is_empty(self) -> bool:
|
|
906
906
|
"""Returns True if the model has no data."""
|
|
907
|
-
return not any(map(len, self.
|
|
907
|
+
return not any(map(len, self._inference_sets.values()))
|
|
908
908
|
|
|
909
909
|
def export_rows_as_parquet_file(
|
|
910
910
|
self,
|
|
911
|
-
row_numbers: Mapping[
|
|
911
|
+
row_numbers: Mapping[InferencesRole, Iterable[int]],
|
|
912
912
|
parquet_file: BinaryIO,
|
|
913
|
-
cluster_ids: Optional[Mapping[
|
|
913
|
+
cluster_ids: Optional[Mapping[InferencesRole, Mapping[int, str]]] = None,
|
|
914
914
|
) -> None:
|
|
915
915
|
"""
|
|
916
916
|
Given row numbers, exports dataframe subset into parquet file.
|
|
@@ -921,29 +921,31 @@ class Model:
|
|
|
921
921
|
|
|
922
922
|
Parameters
|
|
923
923
|
----------
|
|
924
|
-
row_numbers: Mapping[
|
|
924
|
+
row_numbers: Mapping[InferencesRole, Iterable[int]]
|
|
925
925
|
mapping of dataset role to list of row numbers
|
|
926
926
|
parquet_file: file handle
|
|
927
927
|
output parquet file handle
|
|
928
|
-
cluster_ids: Optional[Mapping[
|
|
929
|
-
mapping of
|
|
928
|
+
cluster_ids: Optional[Mapping[InferencesRole, Mapping[int, str]]]
|
|
929
|
+
mapping of inferences role to mapping of row number to cluster id.
|
|
930
930
|
If cluster_ids is non-empty, a new column is inserted to the
|
|
931
931
|
dataframe containing the cluster IDs of each row in the exported
|
|
932
932
|
data. The name of the added column name is `__phoenix_cluster_id__`.
|
|
933
933
|
"""
|
|
934
934
|
export_dataframes = [pd.DataFrame()]
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
935
|
+
model_has_multiple_inference_sets = (
|
|
936
|
+
sum(not df.empty for df in self._inference_sets.values()) > 1
|
|
937
|
+
)
|
|
938
|
+
for inferences_role, numbers in row_numbers.items():
|
|
939
|
+
df = self._inference_sets[inferences_role]
|
|
938
940
|
columns = [
|
|
939
941
|
df.columns.get_loc(column_name)
|
|
940
|
-
for column_name in self._original_columns_by_role[
|
|
942
|
+
for column_name in self._original_columns_by_role[inferences_role]
|
|
941
943
|
]
|
|
942
944
|
rows = pd.Series(sorted(set(numbers)))
|
|
943
945
|
filtered_df = df.iloc[rows, columns].reset_index(drop=True)
|
|
944
|
-
if
|
|
946
|
+
if model_has_multiple_inference_sets:
|
|
945
947
|
filtered_df["__phoenix_dataset_name__"] = df.display_name
|
|
946
|
-
if cluster_ids and (ids := cluster_ids.get(
|
|
948
|
+
if cluster_ids and (ids := cluster_ids.get(inferences_role)):
|
|
947
949
|
filtered_df["__phoenix_cluster_id__"] = rows.apply(ids.get)
|
|
948
950
|
export_dataframes.append(filtered_df)
|
|
949
951
|
pd.concat(export_dataframes).to_parquet(
|
|
@@ -977,24 +979,24 @@ class Model:
|
|
|
977
979
|
if not dim.is_dummy and isinstance(dim, EmbeddingDimension)
|
|
978
980
|
)
|
|
979
981
|
|
|
980
|
-
def
|
|
982
|
+
def dimension_categories_from_all_inferences(
|
|
981
983
|
self,
|
|
982
984
|
dimension_name: Name,
|
|
983
985
|
) -> Tuple[str, ...]:
|
|
984
986
|
dim = self[dimension_name]
|
|
985
987
|
if dim.data_type is CONTINUOUS:
|
|
986
988
|
return cast(Tuple[str, ...], ())
|
|
987
|
-
with self.
|
|
989
|
+
with self._dimension_categories_from_all_inferences() as cache:
|
|
988
990
|
try:
|
|
989
991
|
return cache[dimension_name]
|
|
990
992
|
except KeyError:
|
|
991
993
|
pass
|
|
992
994
|
categories_by_dataset = (
|
|
993
|
-
pd.Series(dim[role].unique()).dropna().astype(str) for role in
|
|
995
|
+
pd.Series(dim[role].unique()).dropna().astype(str) for role in InferencesRole
|
|
994
996
|
)
|
|
995
997
|
all_values_combined = chain.from_iterable(categories_by_dataset)
|
|
996
998
|
ans = tuple(np.sort(pd.Series(all_values_combined).unique()))
|
|
997
|
-
with self.
|
|
999
|
+
with self._dimension_categories_from_all_inferences() as cache:
|
|
998
1000
|
cache[dimension_name] = ans
|
|
999
1001
|
return ans
|
|
1000
1002
|
|
|
@@ -1005,24 +1007,24 @@ class Model:
|
|
|
1005
1007
|
dim = self[dimension_name]
|
|
1006
1008
|
if dim.data_type is not CONTINUOUS:
|
|
1007
1009
|
return (np.nan, np.nan)
|
|
1008
|
-
with self.
|
|
1010
|
+
with self._dimension_min_max_from_all_inferences() as cache:
|
|
1009
1011
|
try:
|
|
1010
1012
|
return cache[dimension_name]
|
|
1011
1013
|
except KeyError:
|
|
1012
1014
|
pass
|
|
1013
|
-
min_max_by_df = (_agg_min_max(dim[df_role]) for df_role in
|
|
1015
|
+
min_max_by_df = (_agg_min_max(dim[df_role]) for df_role in InferencesRole)
|
|
1014
1016
|
all_values_combined = chain.from_iterable(min_max_by_df)
|
|
1015
1017
|
min_max = _agg_min_max(pd.Series(all_values_combined))
|
|
1016
1018
|
ans = (min_max.min(), min_max.max())
|
|
1017
|
-
with self.
|
|
1019
|
+
with self._dimension_min_max_from_all_inferences() as cache:
|
|
1018
1020
|
cache[dimension_name] = ans
|
|
1019
1021
|
return ans
|
|
1020
1022
|
|
|
1021
1023
|
@overload
|
|
1022
|
-
def __getitem__(self, key: Type[
|
|
1024
|
+
def __getitem__(self, key: Type[Inferences]) -> Iterator[Inferences]: ...
|
|
1023
1025
|
|
|
1024
1026
|
@overload
|
|
1025
|
-
def __getitem__(self, key:
|
|
1027
|
+
def __getitem__(self, key: InferencesRole) -> Inferences: ...
|
|
1026
1028
|
|
|
1027
1029
|
@overload
|
|
1028
1030
|
def __getitem__(self, key: ColumnKey) -> Dimension: ...
|
|
@@ -1049,10 +1051,10 @@ class Model:
|
|
|
1049
1051
|
) -> Iterator[Dimension]: ...
|
|
1050
1052
|
|
|
1051
1053
|
def __getitem__(self, key: Any) -> Any:
|
|
1052
|
-
if key is
|
|
1053
|
-
return self.
|
|
1054
|
-
if isinstance(key,
|
|
1055
|
-
return self.
|
|
1054
|
+
if key is Inferences:
|
|
1055
|
+
return self._inference_sets.values()
|
|
1056
|
+
if isinstance(key, InferencesRole):
|
|
1057
|
+
return self._inference_sets[key]
|
|
1056
1058
|
if _is_column_key(key):
|
|
1057
1059
|
return self._get_dim(key)
|
|
1058
1060
|
if _is_multi_dimension_key(key):
|
|
@@ -1152,17 +1154,17 @@ class Model:
|
|
|
1152
1154
|
)
|
|
1153
1155
|
raise ValueError(f"invalid argument: {repr(obj)}")
|
|
1154
1156
|
|
|
1155
|
-
def
|
|
1157
|
+
def _new_inferences(
|
|
1156
1158
|
self,
|
|
1157
1159
|
df: pd.DataFrame,
|
|
1158
1160
|
/,
|
|
1159
1161
|
name: str,
|
|
1160
|
-
role:
|
|
1161
|
-
) ->
|
|
1162
|
-
"""Creates a new
|
|
1162
|
+
role: InferencesRole,
|
|
1163
|
+
) -> Inferences:
|
|
1164
|
+
"""Creates a new Inferences, setting the model weak reference to the
|
|
1163
1165
|
`self` Model instance.
|
|
1164
1166
|
"""
|
|
1165
|
-
return
|
|
1167
|
+
return Inferences(df, name=name, role=role, _model=proxy(self))
|
|
1166
1168
|
|
|
1167
1169
|
|
|
1168
1170
|
@dataclass(frozen=True)
|
|
@@ -1344,7 +1346,7 @@ def _series_uuid(length: int) -> "pd.Series[str]":
|
|
|
1344
1346
|
|
|
1345
1347
|
|
|
1346
1348
|
def _raise_if_too_many_dataframes(given: int) -> None:
|
|
1347
|
-
limit = len(
|
|
1349
|
+
limit = len(InferencesRole)
|
|
1348
1350
|
if not 0 < given <= limit:
|
|
1349
1351
|
raise ValueError(f"expected between 1 to {limit} dataframes, but {given} were given")
|
|
1350
1352
|
|
|
@@ -10,21 +10,21 @@ from phoenix import EmbeddingColumnNames, Inferences
|
|
|
10
10
|
from phoenix.core.model import _get_embedding_dimensions
|
|
11
11
|
from phoenix.core.model_schema import Embedding, Model, RetrievalEmbedding, Schema
|
|
12
12
|
from phoenix.inferences.schema import RetrievalEmbeddingColumnNames
|
|
13
|
-
from phoenix.inferences.schema import Schema as
|
|
13
|
+
from phoenix.inferences.schema import Schema as InferencesSchema
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
InferencesName: TypeAlias = str
|
|
16
16
|
ColumnName: TypeAlias = str
|
|
17
17
|
DisplayName: TypeAlias = str
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def
|
|
20
|
+
def create_model_from_inferences(*inference_sets: Optional[Inferences]) -> Model:
|
|
21
21
|
# TODO: move this validation into model_schema.Model.
|
|
22
|
-
if len(
|
|
22
|
+
if len(inference_sets) > 1 and inference_sets[0] is not None:
|
|
23
23
|
# Check that for each embedding dimension all vectors
|
|
24
|
-
# have the same length between
|
|
25
|
-
_ = _get_embedding_dimensions(
|
|
24
|
+
# have the same length between inferences.
|
|
25
|
+
_ = _get_embedding_dimensions(inference_sets[0], inference_sets[1])
|
|
26
26
|
|
|
27
|
-
named_dataframes: List[Tuple[
|
|
27
|
+
named_dataframes: List[Tuple[InferencesName, pd.DataFrame]] = []
|
|
28
28
|
prediction_ids: List[ColumnName] = []
|
|
29
29
|
timestamps: List[ColumnName] = []
|
|
30
30
|
prediction_labels: List[ColumnName] = []
|
|
@@ -37,33 +37,35 @@ def create_model_from_datasets(*datasets: Optional[Inferences]) -> Model:
|
|
|
37
37
|
prompts: List[EmbeddingColumnNames] = []
|
|
38
38
|
responses: List[Union[str, EmbeddingColumnNames]] = []
|
|
39
39
|
|
|
40
|
-
for
|
|
41
|
-
df =
|
|
40
|
+
for inferences in filter(_is_inferences, inference_sets):
|
|
41
|
+
df = inferences.dataframe
|
|
42
42
|
# Coerce string column names at run time.
|
|
43
43
|
df = df.set_axis(
|
|
44
44
|
map(str, df.columns),
|
|
45
45
|
axis=1,
|
|
46
46
|
)
|
|
47
|
-
named_dataframes.append((
|
|
48
|
-
|
|
47
|
+
named_dataframes.append((inferences.name, df))
|
|
48
|
+
inferences_schema = (
|
|
49
|
+
inferences.schema if inferences.schema is not None else InferencesSchema()
|
|
50
|
+
)
|
|
49
51
|
for display_name, embedding in (
|
|
50
|
-
|
|
52
|
+
inferences_schema.embedding_feature_column_names or {}
|
|
51
53
|
).items():
|
|
52
54
|
if display_name not in embeddings:
|
|
53
55
|
embeddings[display_name] = embedding
|
|
54
|
-
if
|
|
55
|
-
prompts.append(
|
|
56
|
-
if
|
|
57
|
-
responses.append(
|
|
56
|
+
if inferences_schema.prompt_column_names is not None:
|
|
57
|
+
prompts.append(inferences_schema.prompt_column_names)
|
|
58
|
+
if inferences_schema.response_column_names is not None:
|
|
59
|
+
responses.append(inferences_schema.response_column_names)
|
|
58
60
|
for source, sink in (
|
|
59
|
-
([
|
|
60
|
-
([
|
|
61
|
-
([
|
|
62
|
-
([
|
|
63
|
-
([
|
|
64
|
-
([
|
|
65
|
-
(
|
|
66
|
-
(
|
|
61
|
+
([inferences_schema.prediction_id_column_name], prediction_ids),
|
|
62
|
+
([inferences_schema.timestamp_column_name], timestamps),
|
|
63
|
+
([inferences_schema.prediction_label_column_name], prediction_labels),
|
|
64
|
+
([inferences_schema.prediction_score_column_name], prediction_scores),
|
|
65
|
+
([inferences_schema.actual_label_column_name], actual_labels),
|
|
66
|
+
([inferences_schema.actual_score_column_name], actual_scores),
|
|
67
|
+
(inferences_schema.feature_column_names or (), features),
|
|
68
|
+
(inferences_schema.tag_column_names or (), tags),
|
|
67
69
|
):
|
|
68
70
|
# Coerce None to "" to simplify type checks.
|
|
69
71
|
sink.extend(map(lambda s: "" if s is None else str(s), source))
|
|
@@ -132,7 +134,7 @@ def create_model_from_datasets(*datasets: Optional[Inferences]) -> Model:
|
|
|
132
134
|
)
|
|
133
135
|
|
|
134
136
|
|
|
135
|
-
def
|
|
137
|
+
def _is_inferences(obj: Optional[Inferences]) -> TypeGuard[Inferences]:
|
|
136
138
|
return type(obj) is Inferences
|
|
137
139
|
|
|
138
140
|
|
|
File without changes
|