PyPI - arize - Versions diffs - 8.0.0a2__tar.gz → 8.0.0a4__tar.gz - Mend

arize 8.0.0a2tar.gz → 8.0.0a4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

{arize-8.0.0a2 → arize-8.0.0a4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: arize
-Version: 8.0.0a2
+Version: 8.0.0a4
 Summary: A helper library to interact with Arize AI APIs
 Project-URL: Homepage, https://arize.com
 Project-URL: Documentation, https://docs.arize.com/arize
@@ -26,11 +26,19 @@ Classifier: Topic :: System :: Logging
 Classifier: Topic :: System :: Monitoring
 Requires-Python: >=3.10
 Requires-Dist: lazy-imports
+Requires-Dist: numpy>=2.0.0
 Provides-Extra: dev
 Requires-Dist: pytest==8.4.2; extra == 'dev'
 Requires-Dist: ruff==0.13.2; extra == 'dev'
+Provides-Extra: ml-batch
+Requires-Dist: pandas<3,>=1.0.0; extra == 'ml-batch'
+Requires-Dist: protobuf<6,>=4.21.0; extra == 'ml-batch'
+Requires-Dist: pyarrow>=0.15.0; extra == 'ml-batch'
+Requires-Dist: tqdm; extra == 'ml-batch'
+Provides-Extra: ml-stream
+Requires-Dist: protobuf<6,>=4.21.0; extra == 'ml-stream'
+Requires-Dist: requests-futures<2,>=1.0.0; extra == 'ml-stream'
 Provides-Extra: spans
-Requires-Dist: numpy>=2.0.0; extra == 'spans'
 Requires-Dist: openinference-semantic-conventions<1,>=0.1.21; extra == 'spans'
 Requires-Dist: opentelemetry-semantic-conventions<1,>=0.43b0; extra == 'spans'
 Requires-Dist: pandas<3,>=1.0.0; extra == 'spans'
@@ -112,13 +120,14 @@ tracer_provider = register(
 OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
 ```
-### Logging Spans, Evaluations, and Annotations
+### Operations on Spans
 Use `arize.spans` to interact with spans: log spans into Arize, update the span's evaluations, annotations and metadata in bulk.
 > **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
+#### Logging spans
 ```python
 from arize import ArizeClient
@@ -130,22 +139,169 @@ client.spans.log(
     space_id=SPACE_ID,
     project_name=PROJECT_NAME,
     dataframe=spans_df,
-    evals_df=evals_df, # Optionally pass the evaluations together with the spans
+    # evals_df=evals_df, # Optionally pass the evaluations together with the spans
 )
+```
+#### Update spans Evaluations, Annotations, and Metadata
+```python
+from arize import ArizeClient
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+PROJECT_NAME = "<your-project-name>"
 client.spans.update_evaluations(
+    space_id=SPACE_ID,
+    project_name=PROJECT_NAME,
     dataframe=evals_df,
-    project_name="your-llm-project",
+    # force_http=... # Optionally pass force_http to update evaluations via HTTP instead of gRPC, defaults to False
 )
 client.spans.update_annotations(
+    space_id=SPACE_ID,
+    project_name=PROJECT_NAME,
     dataframe=annotations_df,
-    project_name="your-llm-project",
 )
 client.spans.update_metadata(
-    dataframe=annotations_df,
-    project_name="your-llm-project",
+    space_id=SPACE_ID,
+    project_name=PROJECT_NAME,
+    dataframe=metadata_df,
+)
+```
+#### Exporting spans
+Use the `export_to_df` or `export_to_parquet` to export large amounts of spans from Arize.
+```python
+from arize import ArizeClient
+from datetime import datetime
+FMT  = "%Y-%m-%d"
+start_time = datetime.strptime("2024-01-01",FMT)
+end_time = datetime.strptime("2026-01-01",FMT)
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+PROJECT_NAME = "<your-project-name>"
+df = client.spans.export_to_df(
+    space_id=SPACE_ID,
+    project_name=PROJECT_NAME,
+    start_time=start_time,
+    end_time=end_time,
+)
+```
+### Operations on ML Models
+Use `arize.models` to interact with ML models: log ML data (traininv, validation, production) into Arize, either streaming or in batches.
+> **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
+#### Stream log ML Data for a Classification use-case
+```python
+from arize import ArizeClient
+from arize.types import ModelTypes, Environments
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+MODEL_NAME = "<your-model-name>"
+features=...
+embedding_features=...
+response = client.models.log_stream(
+    space_id=SPACE_ID,
+    model_name=MODEL_NAME,
+    model_type=ModelTypes.SCORE_CATEGORICAL,
+    environment=Environments.PRODUCTION,
+    prediction_label=("not fraud",0.3),
+    actual_label=("fraud",1.0),
+    features=features,
+    embedding_features=embedding_features,
+)
+```
+#### Log a batch of ML Data for a Classification use-case
+```python
+from arize import ArizeClient
+from arize.types import ModelTypes, Environments
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+MODEL_NAME = "<your-model-name>"
+MODEL_VERSION = "1.0"
+from arize.types import Schema, EmbeddingColumnNames, ObjectDetectionColumnNames, ModelTypes, Environments
+tags = ["drift_type"]
+embedding_feature_column_names = {
+    "image_embedding": EmbeddingColumnNames(
+        vector_column_name="image_vector", link_to_data_column_name="url"
+    )
+}
+object_detection_prediction_column_names = ObjectDetectionColumnNames(
+    bounding_boxes_coordinates_column_name="prediction_bboxes",
+    categories_column_name="prediction_categories",
+    scores_column_name="prediction_scores",
+)
+object_detection_actual_column_names = ObjectDetectionColumnNames(
+    bounding_boxes_coordinates_column_name="actual_bboxes",
+    categories_column_name="actual_categories",
+)
+# Define a Schema() object for Arize to pick up data from the correct columns for logging
+schema = Schema(
+    prediction_id_column_name="prediction_id",
+    timestamp_column_name="prediction_ts",
+    tag_column_names=tags,
+    embedding_feature_column_names=embedding_feature_column_names,
+    object_detection_prediction_column_names=object_detection_prediction_column_names,
+    object_detection_actual_column_names=object_detection_actual_column_names,
+)
+# Logging Production DataFrame
+response = client.models.log_batch(
+    space_id=SPACE_ID,
+    model_name=MODEL_NAME,
+    model_type=ModelTypes.OBJECT_DETECTION,
+    dataframe=prod_df,
+    schema=schema,
+    environment=Environments.PRODUCTION,
+    model_version = MODEL_VERSION, # Optionally pass a model version
+)
+```
+#### Exporting ML Data
+Use the `export_to_df` or `export_to_parquet` to export large amounts of spans from Arize.
+```python
+from arize import ArizeClient
+from datetime import datetime
+FMT  = "%Y-%m-%d"
+start_time = datetime.strptime("2024-01-01",FMT)
+end_time = datetime.strptime("2026-01-01",FMT)
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+MODEL_NAME = "<your-model-name>"
+MODEL_VERSION = "1.0"
+df = client.models.export_to_df(
+    space_id=SPACE_ID,
+    model_name=MODEL_NAME,
+    environment=Environments.TRAINING,
+    model_version=MODEL_VERSION,
+    start_time=start_time,
+    end_time=end_time,
 )
 ```

{arize-8.0.0a2 → arize-8.0.0a4}/README.md RENAMED Viewed

@@ -71,13 +71,14 @@ tracer_provider = register(
 OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
 ```
-### Logging Spans, Evaluations, and Annotations
+### Operations on Spans
 Use `arize.spans` to interact with spans: log spans into Arize, update the span's evaluations, annotations and metadata in bulk.
 > **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
+#### Logging spans
 ```python
 from arize import ArizeClient
@@ -89,22 +90,169 @@ client.spans.log(
     space_id=SPACE_ID,
     project_name=PROJECT_NAME,
     dataframe=spans_df,
-    evals_df=evals_df, # Optionally pass the evaluations together with the spans
+    # evals_df=evals_df, # Optionally pass the evaluations together with the spans
 )
+```
+#### Update spans Evaluations, Annotations, and Metadata
+```python
+from arize import ArizeClient
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+PROJECT_NAME = "<your-project-name>"
 client.spans.update_evaluations(
+    space_id=SPACE_ID,
+    project_name=PROJECT_NAME,
     dataframe=evals_df,
-    project_name="your-llm-project",
+    # force_http=... # Optionally pass force_http to update evaluations via HTTP instead of gRPC, defaults to False
 )
 client.spans.update_annotations(
+    space_id=SPACE_ID,
+    project_name=PROJECT_NAME,
     dataframe=annotations_df,
-    project_name="your-llm-project",
 )
 client.spans.update_metadata(
-    dataframe=annotations_df,
-    project_name="your-llm-project",
+    space_id=SPACE_ID,
+    project_name=PROJECT_NAME,
+    dataframe=metadata_df,
+)
+```
+#### Exporting spans
+Use the `export_to_df` or `export_to_parquet` to export large amounts of spans from Arize.
+```python
+from arize import ArizeClient
+from datetime import datetime
+FMT  = "%Y-%m-%d"
+start_time = datetime.strptime("2024-01-01",FMT)
+end_time = datetime.strptime("2026-01-01",FMT)
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+PROJECT_NAME = "<your-project-name>"
+df = client.spans.export_to_df(
+    space_id=SPACE_ID,
+    project_name=PROJECT_NAME,
+    start_time=start_time,
+    end_time=end_time,
+)
+```
+### Operations on ML Models
+Use `arize.models` to interact with ML models: log ML data (traininv, validation, production) into Arize, either streaming or in batches.
+> **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
+#### Stream log ML Data for a Classification use-case
+```python
+from arize import ArizeClient
+from arize.types import ModelTypes, Environments
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+MODEL_NAME = "<your-model-name>"
+features=...
+embedding_features=...
+response = client.models.log_stream(
+    space_id=SPACE_ID,
+    model_name=MODEL_NAME,
+    model_type=ModelTypes.SCORE_CATEGORICAL,
+    environment=Environments.PRODUCTION,
+    prediction_label=("not fraud",0.3),
+    actual_label=("fraud",1.0),
+    features=features,
+    embedding_features=embedding_features,
+)
+```
+#### Log a batch of ML Data for a Classification use-case
+```python
+from arize import ArizeClient
+from arize.types import ModelTypes, Environments
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+MODEL_NAME = "<your-model-name>"
+MODEL_VERSION = "1.0"
+from arize.types import Schema, EmbeddingColumnNames, ObjectDetectionColumnNames, ModelTypes, Environments
+tags = ["drift_type"]
+embedding_feature_column_names = {
+    "image_embedding": EmbeddingColumnNames(
+        vector_column_name="image_vector", link_to_data_column_name="url"
+    )
+}
+object_detection_prediction_column_names = ObjectDetectionColumnNames(
+    bounding_boxes_coordinates_column_name="prediction_bboxes",
+    categories_column_name="prediction_categories",
+    scores_column_name="prediction_scores",
+)
+object_detection_actual_column_names = ObjectDetectionColumnNames(
+    bounding_boxes_coordinates_column_name="actual_bboxes",
+    categories_column_name="actual_categories",
+)
+# Define a Schema() object for Arize to pick up data from the correct columns for logging
+schema = Schema(
+    prediction_id_column_name="prediction_id",
+    timestamp_column_name="prediction_ts",
+    tag_column_names=tags,
+    embedding_feature_column_names=embedding_feature_column_names,
+    object_detection_prediction_column_names=object_detection_prediction_column_names,
+    object_detection_actual_column_names=object_detection_actual_column_names,
+)
+# Logging Production DataFrame
+response = client.models.log_batch(
+    space_id=SPACE_ID,
+    model_name=MODEL_NAME,
+    model_type=ModelTypes.OBJECT_DETECTION,
+    dataframe=prod_df,
+    schema=schema,
+    environment=Environments.PRODUCTION,
+    model_version = MODEL_VERSION, # Optionally pass a model version
+)
+```
+#### Exporting ML Data
+Use the `export_to_df` or `export_to_parquet` to export large amounts of spans from Arize.
+```python
+from arize import ArizeClient
+from datetime import datetime
+FMT  = "%Y-%m-%d"
+start_time = datetime.strptime("2024-01-01",FMT)
+end_time = datetime.strptime("2026-01-01",FMT)
+client = ArizeClient(api_key=API_KEY)
+SPACE_ID = "<your-space-id>"
+MODEL_NAME = "<your-model-name>"
+MODEL_VERSION = "1.0"
+df = client.models.export_to_df(
+    space_id=SPACE_ID,
+    model_name=MODEL_NAME,
+    environment=Environments.TRAINING,
+    model_version=MODEL_VERSION,
+    start_time=start_time,
+    end_time=end_time,
 )
 ```

{arize-8.0.0a2 → arize-8.0.0a4}/pyproject.toml RENAMED Viewed

@@ -35,6 +35,7 @@ classifiers = [
 ]
 dependencies = [
     "lazy-imports",
+    "numpy>=2.0.0",
     # "requests_futures==1.0.0",
     # "googleapis_common_protos>=1.51.0,<2",
     # "protobuf>=4.21.0,<6",
@@ -51,7 +52,6 @@ dev = [
     "ruff==0.13.2",
 ]
 spans = [
-    "numpy>=2.0.0",
     "openinference-semantic-conventions>=0.1.21, <1",
     "opentelemetry-semantic-conventions>=0.43b0, <1",
     "pandas>=1.0.0,<3",
@@ -59,6 +59,16 @@ spans = [
     "pyarrow>=0.15.0",
     "tqdm",
 ]
+ml-stream = [
+    "requests_futures>=1.0.0, <2",
+    "protobuf>=4.21.0,<6",
+]
+ml-batch = [
+    "pandas>=1.0.0,<3",
+    "pyarrow>=0.15.0",
+    "protobuf>=4.21.0,<6",
+    "tqdm",
+]
 # datasets-experiments = [
 #     "pydantic",
 # ]

{arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_exporter/client.py RENAMED Viewed

@@ -1,6 +1,4 @@
 # type: ignore[pb2]
-from __future__ import annotations
 import logging
 from dataclasses import dataclass
 from datetime import datetime
@@ -14,17 +12,14 @@ from google.protobuf.wrappers_pb2 import Int64Value
 from pyarrow import flight
 from tqdm import tqdm
-from arize._exporter.parsers.tracing_data_parser import (
-    OtelTracingDataTransformer,
-)
 from arize._exporter.validation import (
     validate_input_type,
-    validate_input_value,
     validate_start_end_time,
 )
 from arize._generated.protocol.flight import export_pb2
+from arize.logging import CtxAdapter
 from arize.types import Environments, SimilaritySearchParams
-from arize.utils.pandas import reset_dataframe_index
+from arize.utils.dataframe import reset_dataframe_index
 from arize.utils.proto import get_pb_flight_doput_request
 logger = logging.getLogger(__name__)
@@ -124,6 +119,10 @@ class ArizeExportClient:
         df.drop(null_columns, axis=1, inplace=True)
         if environment == Environments.TRACING:
+            from arize._exporter.parsers.tracing_data_parser import (
+                OtelTracingDataTransformer,
+            )
             # by default, transform the exported tracing data so that it's
             # easier to work with in Python
             df = OtelTracingDataTransformer().transform(df)
@@ -238,6 +237,27 @@ class ArizeExportClient:
         columns: List | None = None,
         stream_chunk_size: int | None = None,
     ) -> Tuple[flight.FlightStreamReader, int]:
+        # Bind common context for this operation
+        log = CtxAdapter(
+            logger,
+            {
+                "component": "exporter",
+                "operation": "export_to_df",
+                "space_id": space_id,
+                "model_id": model_id,
+                "environment": environment.name,
+                "model_version": model_version,
+                "batch_id": batch_id,
+                "include_actuals": include_actuals,
+                "where": where,
+                "columns": columns,
+                "similarity_search_params": similarity_search_params,
+                "stream_chunk_size": stream_chunk_size,
+                "start_time": start_time,
+                "end_time": end_time,
+            },
+        )
+        log.debug("Getting stream reader...")
         validate_input_type(space_id, "space_id", str)
         validate_input_type(model_id, "model_id", str)
         validate_input_type(environment, "environment", Environments)
@@ -281,7 +301,7 @@ class ArizeExportClient:
                     json_format.MessageToJson(query_descriptor)  # type: ignore
                 ),
             )
-            logger.debug("Fetching data...")
+            logger.info("Fetching data...")
             if flight_info.total_records == 0:
                 logger.warning("Query returns no data")

{arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_exporter/parsers/tracing_data_parser.py RENAMED Viewed

@@ -1,9 +1,8 @@
+from __future__ import annotations
 import json
 import logging
-from typing import List
-import numpy as np
-import pandas as pd
+from typing import TYPE_CHECKING, List
 from arize.spans.columns import (
     SPAN_ATTRIBUTES_EMBEDDING_EMBEDDINGS_COL,
@@ -19,6 +18,10 @@ from arize.spans.columns import (
     SPAN_START_TIME_COL,
 )
+if TYPE_CHECKING:
+    import numpy as np
+    import pandas as pd
 logger = logging.getLogger(__name__)

{arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_exporter/validation.py RENAMED Viewed

@@ -1,5 +1,9 @@
-from datetime import datetime
-from typing import Any
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from datetime import datetime
 def validate_input_type(
@@ -35,6 +39,6 @@ def validate_input_value(
     )
-def validate_start_end_time(start_time, end_time: datetime) -> None:
+def validate_start_end_time(start_time: datetime, end_time: datetime) -> None:
     if start_time >= end_time:
         raise ValueError("start_time must be before end_time")

{arize-8.0.0a2 → arize-8.0.0a4}/src/arize/_flight/client.py RENAMED Viewed

@@ -2,29 +2,28 @@
 from __future__ import annotations
 import base64
+import logging
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Tuple
+from typing import TYPE_CHECKING, Any, Dict, List, Tuple
-import pyarrow as pa
 from google.protobuf import json_format
 from pyarrow import flight
-from arize._flight.types import FlightRequestType
 from arize._generated.protocol.flight.ingest_pb2 import (
     WriteSpanAnnotationResponse,
     WriteSpanAttributesMetadataResponse,
     WriteSpanEvaluationResponse,
 )
 from arize.config import get_python_version
-from arize.constants.config import (
-    DEFAULT_FLIGHT_HOST,
-    DEFAULT_FLIGHT_PORT,
-    DEFAULT_FLIGHT_TRANSPORT_SCHEME,
-)
 from arize.logging import log_a_list
 from arize.utils.proto import get_pb_flight_doput_request, get_pb_schema_tracing
 from arize.version import __version__
+if TYPE_CHECKING:
+    import pyarrow as pa
+    from arize._flight.client import FlightRequestType
 BytesPair = Tuple[bytes, bytes]
 Headers = List[BytesPair]
 WriteSpanResponse = (
@@ -32,7 +31,6 @@ WriteSpanResponse = (
     | WriteSpanAnnotationResponse
     | WriteSpanAttributesMetadataResponse
 )
-import logging
 logger = logging.getLogger(__name__)
@@ -40,10 +38,10 @@ logger = logging.getLogger(__name__)
 @dataclass(frozen=True)
 class ArizeFlightClient:
     api_key: str = field(repr=False)
-    host: str = DEFAULT_FLIGHT_HOST
-    port: int = DEFAULT_FLIGHT_PORT
-    scheme: str = DEFAULT_FLIGHT_TRANSPORT_SCHEME
-    request_verify: bool = True
+    host: str
+    port: int
+    scheme: str
+    request_verify: bool
     # internal cache for the underlying FlightClient
     _client: flight.FlightClient | None = field(
@@ -54,7 +52,6 @@ class ArizeFlightClient:
     @property
     def headers(self) -> Headers:
-        # Keep the typing simple: (bytes, bytes)
         return [
             (b"origin", b"arize-logging-client"),
             (b"auth-token-bin", str(self.api_key).encode("utf-8")),

arize 8.0.0a2__tar.gz → 8.0.0a4__tar.gz

arize 8.0.0a2tar.gz → 8.0.0a4tar.gz