arize-phoenix 3.19.4__py3-none-any.whl → 3.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,152 +1,31 @@
1
- import json
2
- from dataclasses import asdict, dataclass, replace
3
- from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
4
-
5
- EmbeddingFeatures = Dict[str, "EmbeddingColumnNames"]
6
- SchemaFieldName = str
7
- SchemaFieldValue = Union[Optional[str], Optional[List[str]], Optional[EmbeddingFeatures]]
8
-
9
- MULTI_COLUMN_SCHEMA_FIELD_NAMES: Tuple[str, ...] = ("feature_column_names", "tag_column_names")
10
- SINGLE_COLUMN_SCHEMA_FIELD_NAMES: Tuple[str, ...] = (
11
- "prediction_id_column_name",
12
- "timestamp_column_name",
13
- "prediction_label_column_name",
14
- "prediction_score_column_name",
15
- "actual_label_column_name",
16
- "actual_score_column_name",
1
+ from phoenix.inferences.schema import (
2
+ EmbeddingColumnNames as _EmbeddingColumnNames,
17
3
  )
18
- LLM_SCHEMA_FIELD_NAMES = ["prompt_column_names", "response_column_names"]
19
-
20
-
21
- @dataclass(frozen=True)
22
- class EmbeddingColumnNames(Dict[str, Any]):
23
- """
24
- A dataclass to hold the column names for the embedding features.
25
- An embedding feature is a feature that is represented by a vector.
26
- The vector is a representation of unstructured data, such as text or an image
27
- """
28
-
29
- vector_column_name: str
30
- raw_data_column_name: Optional[str] = None
31
- link_to_data_column_name: Optional[str] = None
32
-
33
-
34
- @dataclass(frozen=True)
35
- class RetrievalEmbeddingColumnNames(EmbeddingColumnNames):
36
- """
37
- *** Experimental ***
38
- A relationship is a column that maps a prediction to another record.
39
-
40
- Example
41
- -------
42
- For example, in context retrieval from a vector store, a query is
43
- embedded and used to search for relevant records in a vector store.
44
- In this case you would add a column to the dataset that maps the query
45
- to the vector store records. E.x. [document_1, document_5, document_3]
46
-
47
- A table view of the primary dataset could look like this:
48
-
49
- | query | retrieved_document_ids | document_relevance_scores |
50
- |-------|------------------------|---------------------------|
51
- | ... | [doc_1, doc_5, doc_3] | [0.4567, 0.3456, 0.2345] |
52
- | ... | [doc_1, doc_6, doc_2] | [0.7890, 0.6789, 0.5678] |
53
- | ... | [doc_1, doc_6, doc_9] | [0.9012, 0.8901, 0.0123] |
54
-
55
-
56
- The corresponding vector store dataset would look like this:
57
-
58
- | id | embedding_vector | document_text |
59
- |----------|------------------|---------------|
60
- | doc_1 | ... | lorem ipsum |
61
- | doc_2 | ... | lorem ipsum |
62
- | doc_3 | ... | lorem ipsum |
63
-
64
-
65
- To declare this relationship in the schema, you would configure the schema as follows:
66
-
67
- >>> schema = Schema(
68
- ... prompt_column_names=RetrievalEmbeddingColumnNames(
69
- ... context_retrieval_ids_column_name="retrieved_document_ids",
70
- ... context_retrieval_scores_column_name="document_relevance_scores",
71
- ... )
72
- ...)
73
- """
74
-
75
- context_retrieval_ids_column_name: Optional[str] = None
76
- context_retrieval_scores_column_name: Optional[str] = None
77
-
78
-
79
- @dataclass(frozen=True)
80
- class Schema:
81
- prediction_id_column_name: Optional[str] = None
82
- id_column_name: Optional[str] = None # Syntax sugar for prediction_id_column_name
83
- timestamp_column_name: Optional[str] = None
84
- feature_column_names: Optional[List[str]] = None
85
- tag_column_names: Optional[List[str]] = None
86
- prediction_label_column_name: Optional[str] = None
87
- prediction_score_column_name: Optional[str] = None
88
- actual_label_column_name: Optional[str] = None
89
- actual_score_column_name: Optional[str] = None
90
- prompt_column_names: Optional[Union[EmbeddingColumnNames, RetrievalEmbeddingColumnNames]] = None
91
- response_column_names: Optional[Union[str, EmbeddingColumnNames]] = None
92
- # document_column_names is used explicitly when the schema is used to capture a corpus
93
- document_column_names: Optional[EmbeddingColumnNames] = None
94
- embedding_feature_column_names: Optional[EmbeddingFeatures] = None
95
- excluded_column_names: Optional[List[str]] = None
96
-
97
- def __post_init__(self) -> None:
98
- # re-map document_column_names to be in the prompt_column_names position
99
- # This is a shortcut to leverage the same schema for model and corpus datasets
100
- if self.document_column_names is not None:
101
- object.__setattr__(self, "prompt_column_names", self.document_column_names)
102
- object.__setattr__(self, "document_column_names", None)
103
-
104
- if self.id_column_name is not None:
105
- object.__setattr__(self, "prediction_id_column_name", self.id_column_name)
106
- object.__setattr__(self, "id_column_name", None)
107
-
108
- def replace(self, **changes: Any) -> "Schema":
109
- return replace(self, **changes)
110
-
111
- def asdict(self) -> Dict[str, str]:
112
- return asdict(self)
4
+ from phoenix.inferences.schema import (
5
+ RetrievalEmbeddingColumnNames as _RetrievalEmbeddingColumnNames,
6
+ )
7
+ from phoenix.inferences.schema import (
8
+ Schema as _Schema,
9
+ )
10
+ from phoenix.utilities.deprecation import deprecated_class
113
11
 
114
- def to_json(self) -> str:
115
- "Converts the schema to a dict for JSON serialization"
116
- return json.dumps(asdict(self))
117
12
 
118
- @classmethod
119
- def from_json(cls, json_string: str) -> "Schema":
120
- json_data = json.loads(json_string)
13
+ @deprecated_class(
14
+ "The phoenix.datasets.fixtures module is deprecated, use phoenix.inferences.fixtures instead."
15
+ )
16
+ class EmbeddingColumnNames(_EmbeddingColumnNames):
17
+ pass
121
18
 
122
- # parse embedding_feature_column_names
123
- if json_data.get("embedding_feature_column_names") is not None:
124
- embedding_feature_column_names = {}
125
- for feature_name, column_names in json_data["embedding_feature_column_names"].items():
126
- embedding_feature_column_names[feature_name] = EmbeddingColumnNames(
127
- vector_column_name=column_names["vector_column_name"],
128
- raw_data_column_name=column_names["raw_data_column_name"],
129
- link_to_data_column_name=column_names["link_to_data_column_name"],
130
- )
131
- json_data["embedding_feature_column_names"] = embedding_feature_column_names
132
19
 
133
- # parse prompt_column_names
134
- if (prompt := json_data.get("prompt_column_names")) is not None:
135
- json_data["prompt_column_names"] = RetrievalEmbeddingColumnNames(
136
- vector_column_name=prompt.get("vector_column_name"),
137
- raw_data_column_name=prompt.get("raw_data_column_name"),
138
- context_retrieval_ids_column_name=prompt.get("context_retrieval_ids_column_name"),
139
- context_retrieval_scores_column_name=prompt.get(
140
- "context_retrieval_scores_column_name"
141
- ),
142
- )
20
+ @deprecated_class(
21
+ "The phoenix.datasets.fixtures module is deprecated, use phoenix.inferences.fixtures instead."
22
+ )
23
+ class RetrievalEmbeddingColumnNames(_RetrievalEmbeddingColumnNames):
24
+ pass
143
25
 
144
- # parse response_column_names
145
- if isinstance(json_data.get("response_column_names"), Mapping):
146
- response_column_names = EmbeddingColumnNames(
147
- vector_column_name=json_data["response_column_names"]["vector_column_name"],
148
- raw_data_column_name=json_data["response_column_names"]["raw_data_column_name"],
149
- )
150
- json_data["response_column_names"] = response_column_names
151
26
 
152
- return cls(**json_data)
27
+ @deprecated_class(
28
+ "The phoenix.datasets.fixtures module is deprecated, use phoenix.inferences.fixtures instead."
29
+ )
30
+ class Schema(_Schema):
31
+ pass
File without changes