arize-phoenix 3.19.4__py3-none-any.whl → 3.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -0,0 +1,151 @@
1
+ import json
2
+ from dataclasses import asdict, dataclass, replace
3
+ from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
4
+
5
+ EmbeddingFeatures = Dict[str, "EmbeddingColumnNames"]
6
+ SchemaFieldName = str
7
+ SchemaFieldValue = Union[Optional[str], Optional[List[str]], Optional[EmbeddingFeatures]]
8
+
9
+ MULTI_COLUMN_SCHEMA_FIELD_NAMES: Tuple[str, ...] = ("feature_column_names", "tag_column_names")
10
+ SINGLE_COLUMN_SCHEMA_FIELD_NAMES: Tuple[str, ...] = (
11
+ "prediction_id_column_name",
12
+ "timestamp_column_name",
13
+ "prediction_label_column_name",
14
+ "prediction_score_column_name",
15
+ "actual_label_column_name",
16
+ "actual_score_column_name",
17
+ )
18
+ LLM_SCHEMA_FIELD_NAMES = ["prompt_column_names", "response_column_names"]
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class EmbeddingColumnNames(Dict[str, Any]):
23
+ """
24
+ A dataclass to hold the column names for the embedding features.
25
+ An embedding feature is a feature that is represented by a vector.
26
+ The vector is a representation of unstructured data, such as text or an image
27
+ """
28
+
29
+ vector_column_name: str
30
+ raw_data_column_name: Optional[str] = None
31
+ link_to_data_column_name: Optional[str] = None
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class RetrievalEmbeddingColumnNames(EmbeddingColumnNames):
36
+ """
37
+ A relationship is a column that maps a prediction to another record.
38
+
39
+ Example
40
+ -------
41
+ For example, in context retrieval from a vector store, a query is
42
+ embedded and used to search for relevant records in a vector store.
43
+ In this case you would add a column to the dataset that maps the query
44
+ to the vector store records. E.x. [document_1, document_5, document_3]
45
+
46
+ A table view of the primary dataset could look like this:
47
+
48
+ | query | retrieved_document_ids | document_relevance_scores |
49
+ |-------|------------------------|---------------------------|
50
+ | ... | [doc_1, doc_5, doc_3] | [0.4567, 0.3456, 0.2345] |
51
+ | ... | [doc_1, doc_6, doc_2] | [0.7890, 0.6789, 0.5678] |
52
+ | ... | [doc_1, doc_6, doc_9] | [0.9012, 0.8901, 0.0123] |
53
+
54
+
55
+ The corresponding vector store dataset would look like this:
56
+
57
+ | id | embedding_vector | document_text |
58
+ |----------|------------------|---------------|
59
+ | doc_1 | ... | lorem ipsum |
60
+ | doc_2 | ... | lorem ipsum |
61
+ | doc_3 | ... | lorem ipsum |
62
+
63
+
64
+ To declare this relationship in the schema, you would configure the schema as follows:
65
+
66
+ >>> schema = Schema(
67
+ ... prompt_column_names=RetrievalEmbeddingColumnNames(
68
+ ... context_retrieval_ids_column_name="retrieved_document_ids",
69
+ ... context_retrieval_scores_column_name="document_relevance_scores",
70
+ ... )
71
+ ...)
72
+ """
73
+
74
+ context_retrieval_ids_column_name: Optional[str] = None
75
+ context_retrieval_scores_column_name: Optional[str] = None
76
+
77
+
78
+ @dataclass(frozen=True)
79
+ class Schema:
80
+ prediction_id_column_name: Optional[str] = None
81
+ id_column_name: Optional[str] = None # Syntax sugar for prediction_id_column_name
82
+ timestamp_column_name: Optional[str] = None
83
+ feature_column_names: Optional[List[str]] = None
84
+ tag_column_names: Optional[List[str]] = None
85
+ prediction_label_column_name: Optional[str] = None
86
+ prediction_score_column_name: Optional[str] = None
87
+ actual_label_column_name: Optional[str] = None
88
+ actual_score_column_name: Optional[str] = None
89
+ prompt_column_names: Optional[Union[EmbeddingColumnNames, RetrievalEmbeddingColumnNames]] = None
90
+ response_column_names: Optional[Union[str, EmbeddingColumnNames]] = None
91
+ # document_column_names is used explicitly when the schema is used to capture a corpus
92
+ document_column_names: Optional[EmbeddingColumnNames] = None
93
+ embedding_feature_column_names: Optional[EmbeddingFeatures] = None
94
+ excluded_column_names: Optional[List[str]] = None
95
+
96
+ def __post_init__(self) -> None:
97
+ # re-map document_column_names to be in the prompt_column_names position
98
+ # This is a shortcut to leverage the same schema for model and corpus datasets
99
+ if self.document_column_names is not None:
100
+ object.__setattr__(self, "prompt_column_names", self.document_column_names)
101
+ object.__setattr__(self, "document_column_names", None)
102
+
103
+ if self.id_column_name is not None:
104
+ object.__setattr__(self, "prediction_id_column_name", self.id_column_name)
105
+ object.__setattr__(self, "id_column_name", None)
106
+
107
+ def replace(self, **changes: Any) -> "Schema":
108
+ return replace(self, **changes)
109
+
110
+ def asdict(self) -> Dict[str, str]:
111
+ return asdict(self)
112
+
113
+ def to_json(self) -> str:
114
+ "Converts the schema to a dict for JSON serialization"
115
+ return json.dumps(asdict(self))
116
+
117
+ @classmethod
118
+ def from_json(cls, json_string: str) -> "Schema":
119
+ json_data = json.loads(json_string)
120
+
121
+ # parse embedding_feature_column_names
122
+ if json_data.get("embedding_feature_column_names") is not None:
123
+ embedding_feature_column_names = {}
124
+ for feature_name, column_names in json_data["embedding_feature_column_names"].items():
125
+ embedding_feature_column_names[feature_name] = EmbeddingColumnNames(
126
+ vector_column_name=column_names["vector_column_name"],
127
+ raw_data_column_name=column_names["raw_data_column_name"],
128
+ link_to_data_column_name=column_names["link_to_data_column_name"],
129
+ )
130
+ json_data["embedding_feature_column_names"] = embedding_feature_column_names
131
+
132
+ # parse prompt_column_names
133
+ if (prompt := json_data.get("prompt_column_names")) is not None:
134
+ json_data["prompt_column_names"] = RetrievalEmbeddingColumnNames(
135
+ vector_column_name=prompt.get("vector_column_name"),
136
+ raw_data_column_name=prompt.get("raw_data_column_name"),
137
+ context_retrieval_ids_column_name=prompt.get("context_retrieval_ids_column_name"),
138
+ context_retrieval_scores_column_name=prompt.get(
139
+ "context_retrieval_scores_column_name"
140
+ ),
141
+ )
142
+
143
+ # parse response_column_names
144
+ if isinstance(json_data.get("response_column_names"), Mapping):
145
+ response_column_names = EmbeddingColumnNames(
146
+ vector_column_name=json_data["response_column_names"]["vector_column_name"],
147
+ raw_data_column_name=json_data["response_column_names"]["raw_data_column_name"],
148
+ )
149
+ json_data["response_column_names"] = response_column_names
150
+
151
+ return cls(**json_data)
phoenix/server/app.py CHANGED
@@ -142,6 +142,10 @@ async def version(_: Request) -> PlainTextResponse:
142
142
  return PlainTextResponse(f"{phoenix.__version__}")
143
143
 
144
144
 
145
+ async def check_healthz(_: Request) -> PlainTextResponse:
146
+ return PlainTextResponse("OK")
147
+
148
+
145
149
  def create_app(
146
150
  export_path: Path,
147
151
  model: Model,
@@ -193,6 +197,7 @@ def create_app(
193
197
  )
194
198
  + [
195
199
  Route("/arize_phoenix_version", version),
200
+ Route("/healthz", check_healthz),
196
201
  Route(
197
202
  "/exports",
198
203
  type(
phoenix/server/main.py CHANGED
@@ -19,8 +19,8 @@ from phoenix.config import (
19
19
  )
20
20
  from phoenix.core.model_schema_adapter import create_model_from_datasets
21
21
  from phoenix.core.traces import Traces
22
- from phoenix.datasets.dataset import EMPTY_DATASET, Dataset
23
- from phoenix.datasets.fixtures import FIXTURES, get_datasets
22
+ from phoenix.inferences.fixtures import FIXTURES, get_datasets
23
+ from phoenix.inferences.inferences import EMPTY_INFERENCES, Inferences
24
24
  from phoenix.pointcloud.umap_parameters import (
25
25
  DEFAULT_MIN_DIST,
26
26
  DEFAULT_N_NEIGHBORS,
@@ -114,9 +114,9 @@ if __name__ == "__main__":
114
114
  trace_dataset_name: Optional[str] = None
115
115
  simulate_streaming: Optional[bool] = None
116
116
 
117
- primary_dataset: Dataset = EMPTY_DATASET
118
- reference_dataset: Optional[Dataset] = None
119
- corpus_dataset: Optional[Dataset] = None
117
+ primary_dataset: Inferences = EMPTY_INFERENCES
118
+ reference_dataset: Optional[Inferences] = None
119
+ corpus_dataset: Optional[Inferences] = None
120
120
 
121
121
  # automatically remove the pid file when the process is being gracefully terminated
122
122
  atexit.register(_remove_pid_file)
@@ -158,14 +158,14 @@ if __name__ == "__main__":
158
158
  primary_dataset_name = args.primary
159
159
  reference_dataset_name = args.reference
160
160
  corpus_dataset_name = args.corpus
161
- primary_dataset = Dataset.from_name(primary_dataset_name)
161
+ primary_dataset = Inferences.from_name(primary_dataset_name)
162
162
  reference_dataset = (
163
- Dataset.from_name(reference_dataset_name)
163
+ Inferences.from_name(reference_dataset_name)
164
164
  if reference_dataset_name is not None
165
165
  else None
166
166
  )
167
167
  corpus_dataset = (
168
- None if corpus_dataset_name is None else Dataset.from_name(corpus_dataset_name)
168
+ None if corpus_dataset_name is None else Inferences.from_name(corpus_dataset_name)
169
169
  )
170
170
  elif args.command == "fixture":
171
171
  fixture_name = args.fixture
@@ -1,5 +1,5 @@
1
1
  """
2
- A set of **highly experimental** helper functions to
2
+ A set of helper functions to
3
3
  - extract spans from Phoenix for evaluation
4
4
  - explode retrieved documents from (horizontal) lists to a (vertical) series
5
5
  indexed by `context.span_id` and `document_position`
@@ -73,7 +73,6 @@ def _extract_subject_id_from_index(
73
73
  value: Union[str, Sequence[Any]],
74
74
  ) -> pb.Evaluation.SubjectId:
75
75
  """
76
- (**Highly Experimental**)
77
76
  Returns `SubjectId` given the format of `index_names`. Allowed formats are:
78
77
  - DocumentRetrievalId
79
78
  - index_names=["context.span_id", "document_position"]
@@ -36,7 +36,7 @@ from phoenix.config import (
36
36
  )
37
37
  from phoenix.core.model_schema_adapter import create_model_from_datasets
38
38
  from phoenix.core.traces import Traces
39
- from phoenix.datasets.dataset import EMPTY_DATASET, Dataset
39
+ from phoenix.inferences.inferences import EMPTY_INFERENCES, Inferences
40
40
  from phoenix.pointcloud.umap_parameters import get_umap_parameters
41
41
  from phoenix.server.app import create_app
42
42
  from phoenix.server.thread_server import ThreadServer
@@ -104,9 +104,9 @@ class Session(TraceDataExtractor, ABC):
104
104
 
105
105
  def __init__(
106
106
  self,
107
- primary_dataset: Dataset,
108
- reference_dataset: Optional[Dataset] = None,
109
- corpus_dataset: Optional[Dataset] = None,
107
+ primary_dataset: Inferences,
108
+ reference_dataset: Optional[Inferences] = None,
109
+ corpus_dataset: Optional[Inferences] = None,
110
110
  trace_dataset: Optional[TraceDataset] = None,
111
111
  default_umap_parameters: Optional[Mapping[str, Any]] = None,
112
112
  host: Optional[str] = None,
@@ -194,9 +194,9 @@ _session: Optional[Session] = None
194
194
  class ProcessSession(Session):
195
195
  def __init__(
196
196
  self,
197
- primary_dataset: Dataset,
198
- reference_dataset: Optional[Dataset] = None,
199
- corpus_dataset: Optional[Dataset] = None,
197
+ primary_dataset: Inferences,
198
+ reference_dataset: Optional[Inferences] = None,
199
+ corpus_dataset: Optional[Inferences] = None,
200
200
  trace_dataset: Optional[TraceDataset] = None,
201
201
  default_umap_parameters: Optional[Mapping[str, Any]] = None,
202
202
  host: Optional[str] = None,
@@ -215,9 +215,9 @@ class ProcessSession(Session):
215
215
  notebook_env=notebook_env,
216
216
  )
217
217
  primary_dataset.to_disc()
218
- if isinstance(reference_dataset, Dataset):
218
+ if isinstance(reference_dataset, Inferences):
219
219
  reference_dataset.to_disc()
220
- if isinstance(corpus_dataset, Dataset):
220
+ if isinstance(corpus_dataset, Inferences):
221
221
  corpus_dataset.to_disc()
222
222
  if isinstance(trace_dataset, TraceDataset):
223
223
  trace_dataset.to_disc()
@@ -284,9 +284,9 @@ class ProcessSession(Session):
284
284
  class ThreadSession(Session):
285
285
  def __init__(
286
286
  self,
287
- primary_dataset: Dataset,
288
- reference_dataset: Optional[Dataset] = None,
289
- corpus_dataset: Optional[Dataset] = None,
287
+ primary_dataset: Inferences,
288
+ reference_dataset: Optional[Inferences] = None,
289
+ corpus_dataset: Optional[Inferences] = None,
290
290
  trace_dataset: Optional[TraceDataset] = None,
291
291
  default_umap_parameters: Optional[Mapping[str, Any]] = None,
292
292
  host: Optional[str] = None,
@@ -424,9 +424,9 @@ class ThreadSession(Session):
424
424
 
425
425
 
426
426
  def launch_app(
427
- primary: Optional[Dataset] = None,
428
- reference: Optional[Dataset] = None,
429
- corpus: Optional[Dataset] = None,
427
+ primary: Optional[Inferences] = None,
428
+ reference: Optional[Inferences] = None,
429
+ corpus: Optional[Inferences] = None,
430
430
  trace: Optional[TraceDataset] = None,
431
431
  default_umap_parameters: Optional[Mapping[str, Any]] = None,
432
432
  host: Optional[str] = None,
@@ -447,7 +447,7 @@ def launch_app(
447
447
  corpus : Dataset, optional
448
448
  The dataset containing corpus for LLM context retrieval.
449
449
  trace: TraceDataset, optional
450
- **Experimental** The trace dataset containing the trace data.
450
+ The trace dataset containing the trace data.
451
451
  host: str, optional
452
452
  The host on which the server runs. It can also be set using environment
453
453
  variable `PHOENIX_HOST`, otherwise it defaults to `127.0.0.1`.
@@ -473,17 +473,17 @@ def launch_app(
473
473
  Examples
474
474
  --------
475
475
  >>> import phoenix as px
476
- >>> # construct a dataset to analyze
477
- >>> dataset = px.Dataset(...)
478
- >>> session = px.launch_app(dataset)
476
+ >>> # construct an inference set to analyze
477
+ >>> inferences = px.Inferences(...)
478
+ >>> session = px.launch_app(inferences)
479
479
  """
480
480
  global _session
481
481
 
482
- # Stopgap solution to allow the app to run without a primary dataset
482
+ # Stopgap solution to allow the app to run without a primary inferences
483
483
  if primary is None:
484
- # Dummy dataset
485
- # TODO: pass through the lack of a primary dataset to the app
486
- primary = EMPTY_DATASET
484
+ # Dummy inferences
485
+ # TODO: pass through the lack of a primary inferences to the app
486
+ primary = EMPTY_INFERENCES
487
487
 
488
488
  if _session is not None and _session.active:
489
489
  logger.warning(
@@ -0,0 +1,30 @@
1
+ import functools
2
+ import warnings
3
+ from typing import Any, Callable, Type, TypeVar
4
+
5
+ GenericClass = TypeVar("GenericClass", bound=Type[Any])
6
+ CallableType = TypeVar("CallableType", bound=Callable[..., Any])
7
+
8
+
9
+ def deprecated_class(message: str) -> Callable[[GenericClass], GenericClass]:
10
+ def decorator(original_class: GenericClass) -> GenericClass:
11
+ @functools.wraps(original_class)
12
+ def new_class(*args: Any, **kwargs: Any) -> Any:
13
+ warnings.warn(message, DeprecationWarning, stacklevel=2)
14
+ return original_class(*args, **kwargs)
15
+
16
+ return new_class # type: ignore
17
+
18
+ return decorator
19
+
20
+
21
+ def deprecated(message: str) -> Callable[[CallableType], CallableType]:
22
+ def decorator(original_func: CallableType) -> CallableType:
23
+ @functools.wraps(original_func)
24
+ def new_func(*args: Any, **kwargs: Any) -> Any:
25
+ warnings.warn(message, DeprecationWarning, stacklevel=2)
26
+ return original_func(*args, **kwargs)
27
+
28
+ return new_func # type: ignore
29
+
30
+ return decorator
phoenix/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "3.19.4"
1
+ __version__ = "3.21.0"
File without changes
File without changes