arize-phoenix 3.19.4__py3-none-any.whl → 3.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-3.19.4.dist-info → arize_phoenix-3.21.0.dist-info}/METADATA +7 -7
- {arize_phoenix-3.19.4.dist-info → arize_phoenix-3.21.0.dist-info}/RECORD +23 -18
- phoenix/__init__.py +7 -3
- phoenix/core/model.py +8 -6
- phoenix/core/model_schema_adapter.py +6 -6
- phoenix/datasets/dataset.py +9 -521
- phoenix/datasets/fixtures.py +16 -552
- phoenix/datasets/schema.py +24 -145
- phoenix/inferences/__init__.py +0 -0
- phoenix/inferences/fixtures.py +560 -0
- phoenix/inferences/inferences.py +730 -0
- phoenix/inferences/schema.py +151 -0
- phoenix/server/app.py +5 -0
- phoenix/server/main.py +8 -8
- phoenix/session/evaluation.py +1 -2
- phoenix/session/session.py +23 -23
- phoenix/utilities/deprecation.py +30 -0
- phoenix/version.py +1 -1
- {arize_phoenix-3.19.4.dist-info → arize_phoenix-3.21.0.dist-info}/WHEEL +0 -0
- {arize_phoenix-3.19.4.dist-info → arize_phoenix-3.21.0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-3.19.4.dist-info → arize_phoenix-3.21.0.dist-info}/licenses/LICENSE +0 -0
- /phoenix/{datasets → inferences}/errors.py +0 -0
- /phoenix/{datasets → inferences}/validation.py +0 -0
phoenix/datasets/schema.py
CHANGED
|
@@ -1,152 +1,31 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
|
|
4
|
-
|
|
5
|
-
EmbeddingFeatures = Dict[str, "EmbeddingColumnNames"]
|
|
6
|
-
SchemaFieldName = str
|
|
7
|
-
SchemaFieldValue = Union[Optional[str], Optional[List[str]], Optional[EmbeddingFeatures]]
|
|
8
|
-
|
|
9
|
-
MULTI_COLUMN_SCHEMA_FIELD_NAMES: Tuple[str, ...] = ("feature_column_names", "tag_column_names")
|
|
10
|
-
SINGLE_COLUMN_SCHEMA_FIELD_NAMES: Tuple[str, ...] = (
|
|
11
|
-
"prediction_id_column_name",
|
|
12
|
-
"timestamp_column_name",
|
|
13
|
-
"prediction_label_column_name",
|
|
14
|
-
"prediction_score_column_name",
|
|
15
|
-
"actual_label_column_name",
|
|
16
|
-
"actual_score_column_name",
|
|
1
|
+
from phoenix.inferences.schema import (
|
|
2
|
+
EmbeddingColumnNames as _EmbeddingColumnNames,
|
|
17
3
|
)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
An embedding feature is a feature that is represented by a vector.
|
|
26
|
-
The vector is a representation of unstructured data, such as text or an image
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
vector_column_name: str
|
|
30
|
-
raw_data_column_name: Optional[str] = None
|
|
31
|
-
link_to_data_column_name: Optional[str] = None
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
@dataclass(frozen=True)
|
|
35
|
-
class RetrievalEmbeddingColumnNames(EmbeddingColumnNames):
|
|
36
|
-
"""
|
|
37
|
-
*** Experimental ***
|
|
38
|
-
A relationship is a column that maps a prediction to another record.
|
|
39
|
-
|
|
40
|
-
Example
|
|
41
|
-
-------
|
|
42
|
-
For example, in context retrieval from a vector store, a query is
|
|
43
|
-
embedded and used to search for relevant records in a vector store.
|
|
44
|
-
In this case you would add a column to the dataset that maps the query
|
|
45
|
-
to the vector store records. E.x. [document_1, document_5, document_3]
|
|
46
|
-
|
|
47
|
-
A table view of the primary dataset could look like this:
|
|
48
|
-
|
|
49
|
-
| query | retrieved_document_ids | document_relevance_scores |
|
|
50
|
-
|-------|------------------------|---------------------------|
|
|
51
|
-
| ... | [doc_1, doc_5, doc_3] | [0.4567, 0.3456, 0.2345] |
|
|
52
|
-
| ... | [doc_1, doc_6, doc_2] | [0.7890, 0.6789, 0.5678] |
|
|
53
|
-
| ... | [doc_1, doc_6, doc_9] | [0.9012, 0.8901, 0.0123] |
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
The corresponding vector store dataset would look like this:
|
|
57
|
-
|
|
58
|
-
| id | embedding_vector | document_text |
|
|
59
|
-
|----------|------------------|---------------|
|
|
60
|
-
| doc_1 | ... | lorem ipsum |
|
|
61
|
-
| doc_2 | ... | lorem ipsum |
|
|
62
|
-
| doc_3 | ... | lorem ipsum |
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
To declare this relationship in the schema, you would configure the schema as follows:
|
|
66
|
-
|
|
67
|
-
>>> schema = Schema(
|
|
68
|
-
... prompt_column_names=RetrievalEmbeddingColumnNames(
|
|
69
|
-
... context_retrieval_ids_column_name="retrieved_document_ids",
|
|
70
|
-
... context_retrieval_scores_column_name="document_relevance_scores",
|
|
71
|
-
... )
|
|
72
|
-
...)
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
|
-
context_retrieval_ids_column_name: Optional[str] = None
|
|
76
|
-
context_retrieval_scores_column_name: Optional[str] = None
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
@dataclass(frozen=True)
|
|
80
|
-
class Schema:
|
|
81
|
-
prediction_id_column_name: Optional[str] = None
|
|
82
|
-
id_column_name: Optional[str] = None # Syntax sugar for prediction_id_column_name
|
|
83
|
-
timestamp_column_name: Optional[str] = None
|
|
84
|
-
feature_column_names: Optional[List[str]] = None
|
|
85
|
-
tag_column_names: Optional[List[str]] = None
|
|
86
|
-
prediction_label_column_name: Optional[str] = None
|
|
87
|
-
prediction_score_column_name: Optional[str] = None
|
|
88
|
-
actual_label_column_name: Optional[str] = None
|
|
89
|
-
actual_score_column_name: Optional[str] = None
|
|
90
|
-
prompt_column_names: Optional[Union[EmbeddingColumnNames, RetrievalEmbeddingColumnNames]] = None
|
|
91
|
-
response_column_names: Optional[Union[str, EmbeddingColumnNames]] = None
|
|
92
|
-
# document_column_names is used explicitly when the schema is used to capture a corpus
|
|
93
|
-
document_column_names: Optional[EmbeddingColumnNames] = None
|
|
94
|
-
embedding_feature_column_names: Optional[EmbeddingFeatures] = None
|
|
95
|
-
excluded_column_names: Optional[List[str]] = None
|
|
96
|
-
|
|
97
|
-
def __post_init__(self) -> None:
|
|
98
|
-
# re-map document_column_names to be in the prompt_column_names position
|
|
99
|
-
# This is a shortcut to leverage the same schema for model and corpus datasets
|
|
100
|
-
if self.document_column_names is not None:
|
|
101
|
-
object.__setattr__(self, "prompt_column_names", self.document_column_names)
|
|
102
|
-
object.__setattr__(self, "document_column_names", None)
|
|
103
|
-
|
|
104
|
-
if self.id_column_name is not None:
|
|
105
|
-
object.__setattr__(self, "prediction_id_column_name", self.id_column_name)
|
|
106
|
-
object.__setattr__(self, "id_column_name", None)
|
|
107
|
-
|
|
108
|
-
def replace(self, **changes: Any) -> "Schema":
|
|
109
|
-
return replace(self, **changes)
|
|
110
|
-
|
|
111
|
-
def asdict(self) -> Dict[str, str]:
|
|
112
|
-
return asdict(self)
|
|
4
|
+
from phoenix.inferences.schema import (
|
|
5
|
+
RetrievalEmbeddingColumnNames as _RetrievalEmbeddingColumnNames,
|
|
6
|
+
)
|
|
7
|
+
from phoenix.inferences.schema import (
|
|
8
|
+
Schema as _Schema,
|
|
9
|
+
)
|
|
10
|
+
from phoenix.utilities.deprecation import deprecated_class
|
|
113
11
|
|
|
114
|
-
def to_json(self) -> str:
|
|
115
|
-
"Converts the schema to a dict for JSON serialization"
|
|
116
|
-
return json.dumps(asdict(self))
|
|
117
12
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
13
|
+
@deprecated_class(
|
|
14
|
+
"The phoenix.datasets.fixtures module is deprecated, use phoenix.inferences.fixtures instead."
|
|
15
|
+
)
|
|
16
|
+
class EmbeddingColumnNames(_EmbeddingColumnNames):
|
|
17
|
+
pass
|
|
121
18
|
|
|
122
|
-
# parse embedding_feature_column_names
|
|
123
|
-
if json_data.get("embedding_feature_column_names") is not None:
|
|
124
|
-
embedding_feature_column_names = {}
|
|
125
|
-
for feature_name, column_names in json_data["embedding_feature_column_names"].items():
|
|
126
|
-
embedding_feature_column_names[feature_name] = EmbeddingColumnNames(
|
|
127
|
-
vector_column_name=column_names["vector_column_name"],
|
|
128
|
-
raw_data_column_name=column_names["raw_data_column_name"],
|
|
129
|
-
link_to_data_column_name=column_names["link_to_data_column_name"],
|
|
130
|
-
)
|
|
131
|
-
json_data["embedding_feature_column_names"] = embedding_feature_column_names
|
|
132
19
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
context_retrieval_ids_column_name=prompt.get("context_retrieval_ids_column_name"),
|
|
139
|
-
context_retrieval_scores_column_name=prompt.get(
|
|
140
|
-
"context_retrieval_scores_column_name"
|
|
141
|
-
),
|
|
142
|
-
)
|
|
20
|
+
@deprecated_class(
|
|
21
|
+
"The phoenix.datasets.fixtures module is deprecated, use phoenix.inferences.fixtures instead."
|
|
22
|
+
)
|
|
23
|
+
class RetrievalEmbeddingColumnNames(_RetrievalEmbeddingColumnNames):
|
|
24
|
+
pass
|
|
143
25
|
|
|
144
|
-
# parse response_column_names
|
|
145
|
-
if isinstance(json_data.get("response_column_names"), Mapping):
|
|
146
|
-
response_column_names = EmbeddingColumnNames(
|
|
147
|
-
vector_column_name=json_data["response_column_names"]["vector_column_name"],
|
|
148
|
-
raw_data_column_name=json_data["response_column_names"]["raw_data_column_name"],
|
|
149
|
-
)
|
|
150
|
-
json_data["response_column_names"] = response_column_names
|
|
151
26
|
|
|
152
|
-
|
|
27
|
+
@deprecated_class(
|
|
28
|
+
"The phoenix.datasets.fixtures module is deprecated, use phoenix.inferences.fixtures instead."
|
|
29
|
+
)
|
|
30
|
+
class Schema(_Schema):
|
|
31
|
+
pass
|
|
File without changes
|