arize 8.0.0a16__tar.gz → 8.0.0a17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arize-8.0.0a16 → arize-8.0.0a17}/PKG-INFO +217 -14
- {arize-8.0.0a16 → arize-8.0.0a17}/README.md +216 -13
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/__init__.py +1 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_flight/client.py +32 -1
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/client.py +8 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/config.py +14 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/constants/config.py +4 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/datasets/client.py +77 -56
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/client.py +118 -17
- arize-8.0.0a17/src/arize/utils/cache.py +68 -0
- arize-8.0.0a17/src/arize/version.py +1 -0
- arize-8.0.0a16/src/arize/version.py +0 -1
- {arize-8.0.0a16 → arize-8.0.0a17}/.gitignore +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/LICENSE.md +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/pyproject.toml +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_exporter/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_exporter/client.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_exporter/parsers/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_exporter/parsers/tracing_data_parser.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_exporter/validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_flight/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_flight/types.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/api/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/api/datasets_api.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/api/experiments_api.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/api_client.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/api_response.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/configuration.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/exceptions.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/dataset.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/dataset_version.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/datasets_create_request.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/datasets_list200_response.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/datasets_list_examples200_response.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/error.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/experiment.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/experiments_create_request.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/experiments_list200_response.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/models/experiments_runs_list200_response.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/rest.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_dataset.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_dataset_version.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_datasets_api.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_datasets_create_request.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_datasets_list200_response.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_datasets_list_examples200_response.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_error.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_experiment.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_experiments_api.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_experiments_create_request.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_experiments_list200_response.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_experiments_runs_list200_response.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/api_client_README.md +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/protocol/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/protocol/flight/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/protocol/flight/export_pb2.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/protocol/flight/ingest_pb2.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/protocol/rec/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_generated/protocol/rec/public_pb2.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/_lazy.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/constants/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/constants/ml.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/constants/model_mapping.json +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/constants/openinference.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/constants/pyarrow.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/constants/spans.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/datasets/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/datasets/errors.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/datasets/validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/embeddings/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/embeddings/auto_generator.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/embeddings/base_generators.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/embeddings/constants.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/embeddings/cv_generators.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/embeddings/errors.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/embeddings/nlp_generators.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/embeddings/tabular_generators.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/embeddings/usecases.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/exceptions/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/exceptions/auth.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/exceptions/base.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/exceptions/models.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/exceptions/parameters.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/exceptions/spaces.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/exceptions/types.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/exceptions/values.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/evaluators/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/evaluators/base.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/evaluators/exceptions.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/evaluators/executors.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/evaluators/rate_limiters.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/evaluators/types.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/evaluators/utils.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/functions.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/tracing.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/experiments/types.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/logging.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/batch_validation/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/batch_validation/errors.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/batch_validation/validator.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/bounded_executor.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/casting.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/client.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/proto.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/stream_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/surrogate_explainer/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/models/surrogate_explainer/mimic.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/client.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/columns.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/conversion.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/annotations/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/annotations/annotations_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/annotations/dataframe_form_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/annotations/value_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/common/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/common/argument_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/common/dataframe_form_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/common/errors.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/common/value_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/evals/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/evals/dataframe_form_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/evals/evals_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/evals/value_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/metadata/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/metadata/argument_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/metadata/dataframe_form_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/metadata/value_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/spans/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/spans/dataframe_form_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/spans/spans_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/spans/validation/spans/value_validation.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/types.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/utils/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/utils/arrow.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/utils/dataframe.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/utils/online_tasks/__init__.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/utils/online_tasks/dataframe_preprocessor.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/utils/openinference_conversion.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/utils/proto.py +0 -0
- {arize-8.0.0a16 → arize-8.0.0a17}/src/arize/utils/size.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arize
|
|
3
|
-
Version: 8.0.
|
|
3
|
+
Version: 8.0.0a17
|
|
4
4
|
Summary: A helper library to interact with Arize AI APIs
|
|
5
5
|
Project-URL: Homepage, https://arize.com
|
|
6
6
|
Project-URL: Documentation, https://docs.arize.com/arize
|
|
@@ -99,11 +99,24 @@ Description-Content-Type: text/markdown
|
|
|
99
99
|
- [Operations on Datasets](#operations-on-datasets)
|
|
100
100
|
- [List Datasets](#list-datasets)
|
|
101
101
|
- [Create a Dataset](#create-a-dataset)
|
|
102
|
-
- [Get Dataset
|
|
102
|
+
- [Get Dataset](#get-dataset)
|
|
103
103
|
- [Delete a Dataset](#delete-a-dataset)
|
|
104
|
-
- [
|
|
105
|
-
- [
|
|
106
|
-
|
|
104
|
+
- [List Dataset Examples](#list-dataset-examples)
|
|
105
|
+
- [Operations on Experiments](#operations-on-experiments)
|
|
106
|
+
- [List Experiments](#list-experiments)
|
|
107
|
+
- [Run an Experiment](#run-an-experiment)
|
|
108
|
+
- [Create an Experiment](#create-an-experiment)
|
|
109
|
+
- [Get an Experiment](#get-an-experiment)
|
|
110
|
+
- [Delete an Experiment](#delete-an-experiment)
|
|
111
|
+
- [List Experiment runs](#list-experiment-runs)
|
|
112
|
+
- [SDK Configuration](#sdk-configuration)
|
|
113
|
+
- [Logging](#logging)
|
|
114
|
+
- [In Code](#in-code)
|
|
115
|
+
- [Via Environment Variables](#via-environment-variables)
|
|
116
|
+
- [Caching](#caching)
|
|
117
|
+
- [In Code](#in-code-1)
|
|
118
|
+
- [Via Environment Variables](#via-environment-variables-1)
|
|
119
|
+
- [Clean the cache](#clean-the-cache)
|
|
107
120
|
- [Community](#community)
|
|
108
121
|
|
|
109
122
|
# Overview
|
|
@@ -398,9 +411,9 @@ dataset_list = resp.datasets
|
|
|
398
411
|
# Get the response as a dictionary
|
|
399
412
|
resp_dict = resp.to_dict()
|
|
400
413
|
# Get the response in JSON format
|
|
401
|
-
|
|
414
|
+
resp_json = resp.to_json()
|
|
402
415
|
# Get the response as a pandas dataframe
|
|
403
|
-
|
|
416
|
+
resp_df = resp.to_df()
|
|
404
417
|
```
|
|
405
418
|
|
|
406
419
|
### Create a Dataset
|
|
@@ -430,9 +443,10 @@ If the number of examples (rows in dataframe, items in list) is too large, the c
|
|
|
430
443
|
|
|
431
444
|
```python
|
|
432
445
|
created_dataset = client.datasets.create(
|
|
433
|
-
|
|
446
|
+
space_id="<target-space-id>",
|
|
434
447
|
name="<your-dataset-name>", # Name must be unique within a space
|
|
435
448
|
examples=..., # List of dictionaries or pandas dataframe
|
|
449
|
+
# force_http=... # Optionally pass force_http to create datasets via HTTP instead of gRPC, defaults to False
|
|
436
450
|
)
|
|
437
451
|
```
|
|
438
452
|
|
|
@@ -445,8 +459,7 @@ dataset_dict = create_dataset.to_dict()
|
|
|
445
459
|
dataset_dict = create_dataset.to_json()
|
|
446
460
|
```
|
|
447
461
|
|
|
448
|
-
|
|
449
|
-
### Get Dataset by ID
|
|
462
|
+
### Get Dataset
|
|
450
463
|
|
|
451
464
|
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
|
|
452
465
|
|
|
@@ -467,9 +480,167 @@ client.datasets.delete(
|
|
|
467
480
|
)
|
|
468
481
|
```
|
|
469
482
|
|
|
470
|
-
|
|
483
|
+
### List Dataset Examples
|
|
484
|
+
|
|
485
|
+
You can list the examples of a given dataset using `client.datasets.list_examples()` and passing the dataset ID and, optionally, the dataset version ID. You can specify the number of examples desired using the `limit` parameter. If you want a large number of examples, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
|
|
486
|
+
|
|
487
|
+
```python
|
|
488
|
+
resp = client.datasets.list_examples(
|
|
489
|
+
dataset_id="your-dataset-id>",
|
|
490
|
+
dataset_version_id="your-dataset-version-id>", # Optional, defaults to latest version
|
|
491
|
+
limit=... # number of desired examples. Defaults to 100
|
|
492
|
+
all=... # Whether or not to export all of the examples. Defaults to False
|
|
493
|
+
)
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
The response is an object of type `DatasetsExamplesList200Response`, and you can access the list of examples via its `examples` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
497
|
+
|
|
498
|
+
```python
|
|
499
|
+
# Get the list of datasets from the response
|
|
500
|
+
examples_list = resp.examples
|
|
501
|
+
# Get the response as a dictionary
|
|
502
|
+
resp_dict = resp.to_dict()
|
|
503
|
+
# Get the response in JSON format
|
|
504
|
+
resp_json = resp.to_json()
|
|
505
|
+
# Get the response as a pandas dataframe
|
|
506
|
+
resp_df = resp.to_df()
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
## Operations on Experiments
|
|
510
|
+
|
|
511
|
+
### List Experiments
|
|
512
|
+
|
|
513
|
+
You can list all experiments that the user has access to using `client.experiments.list()`. You can use the `limit` parameter to specify the maximum number of datasets desired in the response and you can specify the `dataset_id` to target the list operation to a particular dataset.
|
|
514
|
+
|
|
515
|
+
```python
|
|
516
|
+
resp = client.experiments.list(
|
|
517
|
+
limit=... # Optional
|
|
518
|
+
dataset_id=... # Optional
|
|
519
|
+
)
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
The response is an object of type `ExperimentsList200Response`, and you can access the list of experiments via its `experiments` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
523
|
+
|
|
524
|
+
```python
|
|
525
|
+
# Get the list of datasets from the response
|
|
526
|
+
experiment_list = resp.experiments
|
|
527
|
+
# Get the response as a dictionary
|
|
528
|
+
resp_dict = resp.to_dict()
|
|
529
|
+
# Get the response in JSON format
|
|
530
|
+
resp_json = resp.to_json()
|
|
531
|
+
# Get the response as a pandas dataframe
|
|
532
|
+
resp_df = resp.to_df()
|
|
533
|
+
```
|
|
534
|
+
|
|
535
|
+
### Run an Experiment
|
|
536
|
+
|
|
537
|
+
You can run an experiment on a dataset using `client.experiments.run()` by defining a task, evaluators (optional), and passing the dataset id of the dataset you want to use, together with a name for the experiment. The function will download the entire dataset from Arize (unless cached, see caching section under "SDK Configuration"), execute the task to obtain an output, and perform evaluations (if evaluators were passed). The experiments will also be traced, and these traces will be visible in Arize. The experiment will be created and the data logged into Arize automatically. You can avoid logging to Arize by making `dry_run=True`. The function will return the `Experiment` object (or `None` if `dry_run=True`) together with the dataframe with the experiment data.
|
|
538
|
+
|
|
539
|
+
```python
|
|
540
|
+
experiment, experiment_df = client.run_experiment(
|
|
541
|
+
name="<name-your-experiment>",
|
|
542
|
+
dataset_id="<id-of-dataset-to-use>",
|
|
543
|
+
task=... # The task to be performed in the experiment.
|
|
544
|
+
evaluators=... # Optional: The evaluators to use in the experiment.
|
|
545
|
+
dry_run=..., # If True, the experiment result will not be uploaded to Arize. Defaults to False
|
|
546
|
+
dry_run_count=..., # Number of examples of the dataset to use in the dry run. Defaults to 10
|
|
547
|
+
concurrency=..., # The number of concurrent tasks to run. Defaults to 3.
|
|
548
|
+
set_global_tracer_provider=..., # If True, sets the global tracer provider for the experiment. Defaults to False
|
|
549
|
+
exit_on_error=..., # If True, the experiment will stop running on first occurrence of an error. Defaults to False
|
|
550
|
+
)
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
The `Experiment` object also counts with convenience method similar to `List***` objects:
|
|
554
|
+
|
|
555
|
+
```python
|
|
556
|
+
# Get the response as a dictionary
|
|
557
|
+
experiment_dict = create_experiment.to_dict()
|
|
558
|
+
# Get the response in JSON format
|
|
559
|
+
experiment_dict = create_experiment.to_json()
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
### Create an Experiment
|
|
563
|
+
|
|
564
|
+
It is possible that you have run the experiment yourself without the above function, and hence you already have experiment data that you want to send to Arize. In this case, use the `client.experiments.create()` method by passing the runs data, we currently don't support creating an empty experiment, for instance, these are 2 rows of runs, as a list of dictionaries. You can also pass a pandas dataframe for the runs data.
|
|
565
|
+
|
|
566
|
+
> NOTE: If you don't have experiment data and want to run experiment, see the `client.experiments.run()` section above.
|
|
567
|
+
|
|
568
|
+
```python
|
|
569
|
+
# TODO
|
|
570
|
+
runs = [
|
|
571
|
+
]
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
In addition, you must specify which columns are the `example_id` and the `result`, you can do so by using the `ExperimentTaskResultFieldNames`. Moreover, if you choose to pass evaluation data, you can indicate the evaluation columns using `EvaluationResultFieldNames`:
|
|
575
|
+
|
|
576
|
+
```python
|
|
577
|
+
# TODO
|
|
578
|
+
```
|
|
579
|
+
|
|
580
|
+
If the number of runs (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is an `Experiment` object.
|
|
581
|
+
|
|
582
|
+
```python
|
|
583
|
+
created_experiment = client.experiments.create(
|
|
584
|
+
name="<your-experiment-name>", # Name must be unique within a dataset
|
|
585
|
+
dataset_id="<desired-dataset-id>",
|
|
586
|
+
experiment_runs=..., # List of dictionaries or pandas dataframe
|
|
587
|
+
task_fields=ExperimentTaskResultFieldNames(...),
|
|
588
|
+
evaluator_columns=... # Optional
|
|
589
|
+
# force_http=... # Optionally pass force_http to create experiments via HTTP instead of gRPC, defaults to False
|
|
590
|
+
)
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
### Get an Experiment
|
|
594
|
+
|
|
595
|
+
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
|
|
596
|
+
|
|
597
|
+
```python
|
|
598
|
+
dataset = client.datasets.get(
|
|
599
|
+
dataset_id=... # The unique identifier of the dataset
|
|
600
|
+
dataset_version_id=... # The unique identifier of the dataset version
|
|
601
|
+
)
|
|
602
|
+
```
|
|
603
|
+
|
|
604
|
+
### Delete an Experiment
|
|
605
|
+
|
|
606
|
+
To delete an experiment by its ID use `client.experiments.delete()`. The call returns `None` if successful deletion took place, error otherwise.
|
|
607
|
+
|
|
608
|
+
```python
|
|
609
|
+
client.experiments.delete(
|
|
610
|
+
experiment_id=... # The unique identifier of the experiment
|
|
611
|
+
)
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
### List Experiment runs
|
|
471
615
|
|
|
472
|
-
|
|
616
|
+
You can list the runs of a given experiment using `client.experiments.list_runs()` and passing the experiment ID. You can specify the number of runs desired using the `limit` parameter. If you want a large number of runs, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
|
|
617
|
+
|
|
618
|
+
```python
|
|
619
|
+
resp = client.experiments.list_runs(
|
|
620
|
+
experiment_id="your-experiment-id>",
|
|
621
|
+
limit=... # number of desired runs. Defaults to 100
|
|
622
|
+
all=... # Whether or not to export all of the runs. Defaults to False
|
|
623
|
+
)
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
The response is an object of type `ExperimentsRunsList200Response`, and you can access the list of runs via its `experiment_runs` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
627
|
+
|
|
628
|
+
```python
|
|
629
|
+
# Get the list of datasets from the response
|
|
630
|
+
run_list = resp.experiments_runs
|
|
631
|
+
# Get the response as a dictionary
|
|
632
|
+
resp_dict = resp.to_dict()
|
|
633
|
+
# Get the response in JSON format
|
|
634
|
+
resp_json = resp.to_json()
|
|
635
|
+
# Get the response as a pandas dataframe
|
|
636
|
+
resp_df = resp.to_df()
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
# SDK Configuration
|
|
640
|
+
|
|
641
|
+
## Logging
|
|
642
|
+
|
|
643
|
+
### In Code
|
|
473
644
|
|
|
474
645
|
You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
|
|
475
646
|
|
|
@@ -482,14 +653,14 @@ configure_logging(
|
|
|
482
653
|
)
|
|
483
654
|
```
|
|
484
655
|
|
|
485
|
-
|
|
656
|
+
### Via Environment Variables
|
|
486
657
|
|
|
487
658
|
Configure the same options as the section above, via:
|
|
488
659
|
|
|
489
660
|
```python
|
|
490
661
|
import os
|
|
491
662
|
|
|
492
|
-
#
|
|
663
|
+
# Whether or not you want to disable logging altogether
|
|
493
664
|
os.environ["ARIZE_LOG_ENABLE"] = "true"
|
|
494
665
|
# Set up the logging level
|
|
495
666
|
os.environ["ARIZE_LOG_LEVEL"] = "debug"
|
|
@@ -499,6 +670,38 @@ os.environ["ARIZE_LOG_STRUCTURED"] = "false"
|
|
|
499
670
|
|
|
500
671
|
The default behavior of Arize's logs is: enabled, `INFO` level, and not structured.
|
|
501
672
|
|
|
673
|
+
## Caching
|
|
674
|
+
|
|
675
|
+
When downloading big segments of data from Arize, such as a `Dataset` with all of its examples, the SDK will cache the file in `parquet` format under `~/.arize/datasets/dataset_<updated_at_timestamp>.parquet`.
|
|
676
|
+
|
|
677
|
+
### In Code
|
|
678
|
+
|
|
679
|
+
You can disable caching via the `enable_caching` parameter when instantiating the client, and also edit the "arize directory":
|
|
680
|
+
|
|
681
|
+
```python
|
|
682
|
+
client = ArizeClient(
|
|
683
|
+
enable_caching=False, # Optional parameter, defaults to True
|
|
684
|
+
arize_directory="my-desired-directory", # Optional parameter, defaults to ~/.arize
|
|
685
|
+
)
|
|
686
|
+
```
|
|
687
|
+
|
|
688
|
+
### Via Environment Variables
|
|
689
|
+
|
|
690
|
+
You can also configure the above via:
|
|
691
|
+
|
|
692
|
+
```python
|
|
693
|
+
import os
|
|
694
|
+
|
|
695
|
+
# Whether or not you want to disable caching
|
|
696
|
+
os.environ["ARIZE_ENABLE_CACHING"] = "true"
|
|
697
|
+
# Where you want the SDK to store the files
|
|
698
|
+
os.environ["ARIZE_DIRECTORY"] = "~/.arize"
|
|
699
|
+
```
|
|
700
|
+
|
|
701
|
+
### Clean the cache
|
|
702
|
+
|
|
703
|
+
To clean the cache you can directly `rm` the files or directory.
|
|
704
|
+
|
|
502
705
|
# Community
|
|
503
706
|
|
|
504
707
|
Join our community to connect with thousands of AI builders.
|
|
@@ -35,11 +35,24 @@
|
|
|
35
35
|
- [Operations on Datasets](#operations-on-datasets)
|
|
36
36
|
- [List Datasets](#list-datasets)
|
|
37
37
|
- [Create a Dataset](#create-a-dataset)
|
|
38
|
-
- [Get Dataset
|
|
38
|
+
- [Get Dataset](#get-dataset)
|
|
39
39
|
- [Delete a Dataset](#delete-a-dataset)
|
|
40
|
-
- [
|
|
41
|
-
- [
|
|
42
|
-
|
|
40
|
+
- [List Dataset Examples](#list-dataset-examples)
|
|
41
|
+
- [Operations on Experiments](#operations-on-experiments)
|
|
42
|
+
- [List Experiments](#list-experiments)
|
|
43
|
+
- [Run an Experiment](#run-an-experiment)
|
|
44
|
+
- [Create an Experiment](#create-an-experiment)
|
|
45
|
+
- [Get an Experiment](#get-an-experiment)
|
|
46
|
+
- [Delete an Experiment](#delete-an-experiment)
|
|
47
|
+
- [List Experiment runs](#list-experiment-runs)
|
|
48
|
+
- [SDK Configuration](#sdk-configuration)
|
|
49
|
+
- [Logging](#logging)
|
|
50
|
+
- [In Code](#in-code)
|
|
51
|
+
- [Via Environment Variables](#via-environment-variables)
|
|
52
|
+
- [Caching](#caching)
|
|
53
|
+
- [In Code](#in-code-1)
|
|
54
|
+
- [Via Environment Variables](#via-environment-variables-1)
|
|
55
|
+
- [Clean the cache](#clean-the-cache)
|
|
43
56
|
- [Community](#community)
|
|
44
57
|
|
|
45
58
|
# Overview
|
|
@@ -334,9 +347,9 @@ dataset_list = resp.datasets
|
|
|
334
347
|
# Get the response as a dictionary
|
|
335
348
|
resp_dict = resp.to_dict()
|
|
336
349
|
# Get the response in JSON format
|
|
337
|
-
|
|
350
|
+
resp_json = resp.to_json()
|
|
338
351
|
# Get the response as a pandas dataframe
|
|
339
|
-
|
|
352
|
+
resp_df = resp.to_df()
|
|
340
353
|
```
|
|
341
354
|
|
|
342
355
|
### Create a Dataset
|
|
@@ -366,9 +379,10 @@ If the number of examples (rows in dataframe, items in list) is too large, the c
|
|
|
366
379
|
|
|
367
380
|
```python
|
|
368
381
|
created_dataset = client.datasets.create(
|
|
369
|
-
|
|
382
|
+
space_id="<target-space-id>",
|
|
370
383
|
name="<your-dataset-name>", # Name must be unique within a space
|
|
371
384
|
examples=..., # List of dictionaries or pandas dataframe
|
|
385
|
+
# force_http=... # Optionally pass force_http to create datasets via HTTP instead of gRPC, defaults to False
|
|
372
386
|
)
|
|
373
387
|
```
|
|
374
388
|
|
|
@@ -381,8 +395,7 @@ dataset_dict = create_dataset.to_dict()
|
|
|
381
395
|
dataset_dict = create_dataset.to_json()
|
|
382
396
|
```
|
|
383
397
|
|
|
384
|
-
|
|
385
|
-
### Get Dataset by ID
|
|
398
|
+
### Get Dataset
|
|
386
399
|
|
|
387
400
|
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
|
|
388
401
|
|
|
@@ -403,9 +416,167 @@ client.datasets.delete(
|
|
|
403
416
|
)
|
|
404
417
|
```
|
|
405
418
|
|
|
406
|
-
|
|
419
|
+
### List Dataset Examples
|
|
420
|
+
|
|
421
|
+
You can list the examples of a given dataset using `client.datasets.list_examples()` and passing the dataset ID and, optionally, the dataset version ID. You can specify the number of examples desired using the `limit` parameter. If you want a large number of examples, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
|
|
422
|
+
|
|
423
|
+
```python
|
|
424
|
+
resp = client.datasets.list_examples(
|
|
425
|
+
dataset_id="your-dataset-id>",
|
|
426
|
+
dataset_version_id="your-dataset-version-id>", # Optional, defaults to latest version
|
|
427
|
+
limit=... # number of desired examples. Defaults to 100
|
|
428
|
+
all=... # Whether or not to export all of the examples. Defaults to False
|
|
429
|
+
)
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
The response is an object of type `DatasetsExamplesList200Response`, and you can access the list of examples via its `examples` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
433
|
+
|
|
434
|
+
```python
|
|
435
|
+
# Get the list of datasets from the response
|
|
436
|
+
examples_list = resp.examples
|
|
437
|
+
# Get the response as a dictionary
|
|
438
|
+
resp_dict = resp.to_dict()
|
|
439
|
+
# Get the response in JSON format
|
|
440
|
+
resp_json = resp.to_json()
|
|
441
|
+
# Get the response as a pandas dataframe
|
|
442
|
+
resp_df = resp.to_df()
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
## Operations on Experiments
|
|
446
|
+
|
|
447
|
+
### List Experiments
|
|
448
|
+
|
|
449
|
+
You can list all experiments that the user has access to using `client.experiments.list()`. You can use the `limit` parameter to specify the maximum number of datasets desired in the response and you can specify the `dataset_id` to target the list operation to a particular dataset.
|
|
450
|
+
|
|
451
|
+
```python
|
|
452
|
+
resp = client.experiments.list(
|
|
453
|
+
limit=... # Optional
|
|
454
|
+
dataset_id=... # Optional
|
|
455
|
+
)
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
The response is an object of type `ExperimentsList200Response`, and you can access the list of experiments via its `experiments` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
459
|
+
|
|
460
|
+
```python
|
|
461
|
+
# Get the list of datasets from the response
|
|
462
|
+
experiment_list = resp.experiments
|
|
463
|
+
# Get the response as a dictionary
|
|
464
|
+
resp_dict = resp.to_dict()
|
|
465
|
+
# Get the response in JSON format
|
|
466
|
+
resp_json = resp.to_json()
|
|
467
|
+
# Get the response as a pandas dataframe
|
|
468
|
+
resp_df = resp.to_df()
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
### Run an Experiment
|
|
472
|
+
|
|
473
|
+
You can run an experiment on a dataset using `client.experiments.run()` by defining a task, evaluators (optional), and passing the dataset id of the dataset you want to use, together with a name for the experiment. The function will download the entire dataset from Arize (unless cached, see caching section under "SDK Configuration"), execute the task to obtain an output, and perform evaluations (if evaluators were passed). The experiments will also be traced, and these traces will be visible in Arize. The experiment will be created and the data logged into Arize automatically. You can avoid logging to Arize by making `dry_run=True`. The function will return the `Experiment` object (or `None` if `dry_run=True`) together with the dataframe with the experiment data.
|
|
474
|
+
|
|
475
|
+
```python
|
|
476
|
+
experiment, experiment_df = client.run_experiment(
|
|
477
|
+
name="<name-your-experiment>",
|
|
478
|
+
dataset_id="<id-of-dataset-to-use>",
|
|
479
|
+
task=... # The task to be performed in the experiment.
|
|
480
|
+
evaluators=... # Optional: The evaluators to use in the experiment.
|
|
481
|
+
dry_run=..., # If True, the experiment result will not be uploaded to Arize. Defaults to False
|
|
482
|
+
dry_run_count=..., # Number of examples of the dataset to use in the dry run. Defaults to 10
|
|
483
|
+
concurrency=..., # The number of concurrent tasks to run. Defaults to 3.
|
|
484
|
+
set_global_tracer_provider=..., # If True, sets the global tracer provider for the experiment. Defaults to False
|
|
485
|
+
exit_on_error=..., # If True, the experiment will stop running on first occurrence of an error. Defaults to False
|
|
486
|
+
)
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
The `Experiment` object also counts with convenience method similar to `List***` objects:
|
|
490
|
+
|
|
491
|
+
```python
|
|
492
|
+
# Get the response as a dictionary
|
|
493
|
+
experiment_dict = create_experiment.to_dict()
|
|
494
|
+
# Get the response in JSON format
|
|
495
|
+
experiment_dict = create_experiment.to_json()
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
### Create an Experiment
|
|
499
|
+
|
|
500
|
+
It is possible that you have run the experiment yourself without the above function, and hence you already have experiment data that you want to send to Arize. In this case, use the `client.experiments.create()` method by passing the runs data, we currently don't support creating an empty experiment, for instance, these are 2 rows of runs, as a list of dictionaries. You can also pass a pandas dataframe for the runs data.
|
|
501
|
+
|
|
502
|
+
> NOTE: If you don't have experiment data and want to run experiment, see the `client.experiments.run()` section above.
|
|
503
|
+
|
|
504
|
+
```python
|
|
505
|
+
# TODO
|
|
506
|
+
runs = [
|
|
507
|
+
]
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
In addition, you must specify which columns are the `example_id` and the `result`, you can do so by using the `ExperimentTaskResultFieldNames`. Moreover, if you choose to pass evaluation data, you can indicate the evaluation columns using `EvaluationResultFieldNames`:
|
|
511
|
+
|
|
512
|
+
```python
|
|
513
|
+
# TODO
|
|
514
|
+
```
|
|
515
|
+
|
|
516
|
+
If the number of runs (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is an `Experiment` object.
|
|
517
|
+
|
|
518
|
+
```python
|
|
519
|
+
created_experiment = client.experiments.create(
|
|
520
|
+
name="<your-experiment-name>", # Name must be unique within a dataset
|
|
521
|
+
dataset_id="<desired-dataset-id>",
|
|
522
|
+
experiment_runs=..., # List of dictionaries or pandas dataframe
|
|
523
|
+
task_fields=ExperimentTaskResultFieldNames(...),
|
|
524
|
+
evaluator_columns=... # Optional
|
|
525
|
+
# force_http=... # Optionally pass force_http to create experiments via HTTP instead of gRPC, defaults to False
|
|
526
|
+
)
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
### Get an Experiment
|
|
530
|
+
|
|
531
|
+
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
|
|
532
|
+
|
|
533
|
+
```python
|
|
534
|
+
dataset = client.datasets.get(
|
|
535
|
+
dataset_id=... # The unique identifier of the dataset
|
|
536
|
+
dataset_version_id=... # The unique identifier of the dataset version
|
|
537
|
+
)
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
### Delete an Experiment
|
|
541
|
+
|
|
542
|
+
To delete an experiment by its ID use `client.experiments.delete()`. The call returns `None` if successful deletion took place, error otherwise.
|
|
543
|
+
|
|
544
|
+
```python
|
|
545
|
+
client.experiments.delete(
|
|
546
|
+
experiment_id=... # The unique identifier of the experiment
|
|
547
|
+
)
|
|
548
|
+
```
|
|
549
|
+
|
|
550
|
+
### List Experiment runs
|
|
407
551
|
|
|
408
|
-
|
|
552
|
+
You can list the runs of a given experiment using `client.experiments.list_runs()` and passing the experiment ID. You can specify the number of runs desired using the `limit` parameter. If you want a large number of runs, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
|
|
553
|
+
|
|
554
|
+
```python
|
|
555
|
+
resp = client.experiments.list_runs(
|
|
556
|
+
experiment_id="your-experiment-id>",
|
|
557
|
+
limit=... # number of desired runs. Defaults to 100
|
|
558
|
+
all=... # Whether or not to export all of the runs. Defaults to False
|
|
559
|
+
)
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
The response is an object of type `ExperimentsRunsList200Response`, and you can access the list of runs via its `experiment_runs` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
563
|
+
|
|
564
|
+
```python
|
|
565
|
+
# Get the list of datasets from the response
|
|
566
|
+
run_list = resp.experiments_runs
|
|
567
|
+
# Get the response as a dictionary
|
|
568
|
+
resp_dict = resp.to_dict()
|
|
569
|
+
# Get the response in JSON format
|
|
570
|
+
resp_json = resp.to_json()
|
|
571
|
+
# Get the response as a pandas dataframe
|
|
572
|
+
resp_df = resp.to_df()
|
|
573
|
+
```
|
|
574
|
+
|
|
575
|
+
# SDK Configuration
|
|
576
|
+
|
|
577
|
+
## Logging
|
|
578
|
+
|
|
579
|
+
### In Code
|
|
409
580
|
|
|
410
581
|
You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
|
|
411
582
|
|
|
@@ -418,14 +589,14 @@ configure_logging(
|
|
|
418
589
|
)
|
|
419
590
|
```
|
|
420
591
|
|
|
421
|
-
|
|
592
|
+
### Via Environment Variables
|
|
422
593
|
|
|
423
594
|
Configure the same options as the section above, via:
|
|
424
595
|
|
|
425
596
|
```python
|
|
426
597
|
import os
|
|
427
598
|
|
|
428
|
-
#
|
|
599
|
+
# Whether or not you want to disable logging altogether
|
|
429
600
|
os.environ["ARIZE_LOG_ENABLE"] = "true"
|
|
430
601
|
# Set up the logging level
|
|
431
602
|
os.environ["ARIZE_LOG_LEVEL"] = "debug"
|
|
@@ -435,6 +606,38 @@ os.environ["ARIZE_LOG_STRUCTURED"] = "false"
|
|
|
435
606
|
|
|
436
607
|
The default behavior of Arize's logs is: enabled, `INFO` level, and not structured.
|
|
437
608
|
|
|
609
|
+
## Caching
|
|
610
|
+
|
|
611
|
+
When downloading big segments of data from Arize, such as a `Dataset` with all of its examples, the SDK will cache the file in `parquet` format under `~/.arize/datasets/dataset_<updated_at_timestamp>.parquet`.
|
|
612
|
+
|
|
613
|
+
### In Code
|
|
614
|
+
|
|
615
|
+
You can disable caching via the `enable_caching` parameter when instantiating the client, and also edit the "arize directory":
|
|
616
|
+
|
|
617
|
+
```python
|
|
618
|
+
client = ArizeClient(
|
|
619
|
+
enable_caching=False, # Optional parameter, defaults to True
|
|
620
|
+
arize_directory="my-desired-directory", # Optional parameter, defaults to ~/.arize
|
|
621
|
+
)
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
### Via Environment Variables
|
|
625
|
+
|
|
626
|
+
You can also configure the above via:
|
|
627
|
+
|
|
628
|
+
```python
|
|
629
|
+
import os
|
|
630
|
+
|
|
631
|
+
# Whether or not you want to disable caching
|
|
632
|
+
os.environ["ARIZE_ENABLE_CACHING"] = "true"
|
|
633
|
+
# Where you want the SDK to store the files
|
|
634
|
+
os.environ["ARIZE_DIRECTORY"] = "~/.arize"
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
### Clean the cache
|
|
638
|
+
|
|
639
|
+
To clean the cache you can directly `rm` the files or directory.
|
|
640
|
+
|
|
438
641
|
# Community
|
|
439
642
|
|
|
440
643
|
Join our community to connect with thousands of AI builders.
|
|
@@ -87,3 +87,4 @@ def make_to_df(field_name: str):
|
|
|
87
87
|
models.DatasetsList200Response.to_df = make_to_df("datasets") # type: ignore[attr-defined]
|
|
88
88
|
models.DatasetsListExamples200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
|
|
89
89
|
models.ExperimentsList200Response.to_df = make_to_df("experiments") # type: ignore[attr-defined]
|
|
90
|
+
models.ExperimentsRunsList200Response.to_df = make_to_df("experiment_runs") # type: ignore[attr-defined]
|
|
@@ -25,6 +25,7 @@ from arize.utils.proto import get_pb_schema_tracing
|
|
|
25
25
|
from arize.version import __version__
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
28
|
+
import pandas as pd
|
|
28
29
|
import pyarrow as pa
|
|
29
30
|
|
|
30
31
|
|
|
@@ -260,7 +261,7 @@ class ArizeFlightClient:
|
|
|
260
261
|
space_id: str,
|
|
261
262
|
dataset_id: str,
|
|
262
263
|
dataset_version_id: str | None = None,
|
|
263
|
-
):
|
|
264
|
+
) -> pd.DataFrame:
|
|
264
265
|
# TODO(Kiko): Space ID should not be needed,
|
|
265
266
|
# should work on server tech debt to remove this
|
|
266
267
|
doget_request = flight_ing_pb2.DoGetRequest(
|
|
@@ -283,6 +284,36 @@ class ArizeFlightClient:
|
|
|
283
284
|
logger.exception(f"Failed to get dataset id={dataset_id}")
|
|
284
285
|
raise RuntimeError(f"Failed to get dataset id={dataset_id}") from e
|
|
285
286
|
|
|
287
|
+
# ---------- experiment methods ----------
|
|
288
|
+
|
|
289
|
+
def get_experiment_runs(
|
|
290
|
+
self,
|
|
291
|
+
space_id: str,
|
|
292
|
+
experiment_id: str,
|
|
293
|
+
) -> pd.DataFrame:
|
|
294
|
+
# TODO(Kiko): Space ID should not be needed,
|
|
295
|
+
# should work on server tech debt to remove this
|
|
296
|
+
doget_request = flight_ing_pb2.DoGetRequest(
|
|
297
|
+
get_experiment=flight_ing_pb2.GetExperimentRequest(
|
|
298
|
+
space_id=space_id,
|
|
299
|
+
experiment_id=experiment_id,
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
descriptor = flight.Ticket(
|
|
303
|
+
json_format.MessageToJson(doget_request).encode("utf-8")
|
|
304
|
+
)
|
|
305
|
+
try:
|
|
306
|
+
reader = self.do_get(descriptor, options=self.call_options)
|
|
307
|
+
# read all data into pandas dataframe
|
|
308
|
+
df = reader.read_all().to_pandas()
|
|
309
|
+
df = convert_json_str_to_dict(df)
|
|
310
|
+
return df
|
|
311
|
+
except Exception as e:
|
|
312
|
+
logger.exception(f"Failed to get experiment id={experiment_id}")
|
|
313
|
+
raise RuntimeError(
|
|
314
|
+
f"Failed to get experiment id={experiment_id}"
|
|
315
|
+
) from e
|
|
316
|
+
|
|
286
317
|
def init_experiment(
|
|
287
318
|
self,
|
|
288
319
|
space_id: str,
|
|
@@ -12,6 +12,14 @@ if TYPE_CHECKING:
|
|
|
12
12
|
from arize.spans.client import SpansClient
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
# TODO(Kiko): models need to follow resource first pattern
|
|
16
|
+
# - models.DatasetsList200Response
|
|
17
|
+
# - models.DatasetsListExamples200Response
|
|
18
|
+
# - models.ExperimentsList200Response
|
|
19
|
+
# - models.ExperimentsRunsList200Response
|
|
20
|
+
# TODO(Kiko): Root client should have option to clear caches
|
|
21
|
+
# TODO(Kiko): Document caching behavior
|
|
22
|
+
# TODO(Kiko): Force keyword arguments
|
|
15
23
|
# TODO(Kiko): Protobuf versioning is too old
|
|
16
24
|
# TODO(Kiko): Make sure the client has same options as SDKConfiguration
|
|
17
25
|
# TODO(Kiko): It does not make any sense to require space ID in run_experiment, dataset ID should suffice
|