arize 8.0.0a15__tar.gz → 8.0.0a17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {arize-8.0.0a15 → arize-8.0.0a17}/PKG-INFO +219 -14
- {arize-8.0.0a15 → arize-8.0.0a17}/README.md +216 -13
- {arize-8.0.0a15 → arize-8.0.0a17}/pyproject.toml +10 -1
- {arize-8.0.0a15/src/arize/datasets → arize-8.0.0a17/src/arize}/__init__.py +20 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_flight/client.py +188 -41
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_flight/types.py +1 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/__init__.py +5 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/api/datasets_api.py +6 -6
- arize-8.0.0a17/src/arize/_generated/api_client/api/experiments_api.py +1468 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/api_client.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/configuration.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/exceptions.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/models/__init__.py +3 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/models/dataset.py +2 -2
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/models/dataset_version.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/models/datasets_create_request.py +3 -3
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/models/datasets_list200_response.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/models/datasets_list_examples200_response.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/models/error.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/models/experiment.py +6 -6
- arize-8.0.0a17/src/arize/_generated/api_client/models/experiments_create_request.py +98 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/models/experiments_list200_response.py +1 -1
- arize-8.0.0a17/src/arize/_generated/api_client/models/experiments_runs_list200_response.py +92 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/rest.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_dataset.py +2 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_dataset_version.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_datasets_api.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_datasets_create_request.py +2 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_datasets_list200_response.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_datasets_list_examples200_response.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_error.py +1 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_experiment.py +6 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_experiments_api.py +23 -2
- arize-8.0.0a17/src/arize/_generated/api_client/test/test_experiments_create_request.py +61 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/test_experiments_list200_response.py +1 -1
- arize-8.0.0a17/src/arize/_generated/api_client/test/test_experiments_runs_list200_response.py +56 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client_README.md +13 -8
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/client.py +27 -2
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/config.py +64 -3
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/constants/config.py +12 -2
- arize-8.0.0a17/src/arize/constants/openinference.py +14 -0
- arize-8.0.0a17/src/arize/constants/pyarrow.py +1 -0
- arize-8.0.0a17/src/arize/datasets/client.py +250 -0
- arize-8.0.0a17/src/arize/datasets/errors.py +61 -0
- arize-8.0.0a17/src/arize/datasets/validation.py +46 -0
- arize-8.0.0a17/src/arize/experiments/client.py +566 -0
- arize-8.0.0a17/src/arize/experiments/evaluators/base.py +255 -0
- arize-8.0.0a17/src/arize/experiments/evaluators/exceptions.py +10 -0
- arize-8.0.0a17/src/arize/experiments/evaluators/executors.py +502 -0
- arize-8.0.0a17/src/arize/experiments/evaluators/rate_limiters.py +277 -0
- arize-8.0.0a17/src/arize/experiments/evaluators/types.py +122 -0
- arize-8.0.0a17/src/arize/experiments/evaluators/utils.py +198 -0
- arize-8.0.0a17/src/arize/experiments/functions.py +920 -0
- arize-8.0.0a17/src/arize/experiments/tracing.py +276 -0
- arize-8.0.0a17/src/arize/experiments/types.py +394 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/models/client.py +4 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/client.py +16 -20
- arize-8.0.0a17/src/arize/spans/validation/spans/__init__.py +0 -0
- arize-8.0.0a17/src/arize/utils/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/utils/arrow.py +4 -3
- arize-8.0.0a17/src/arize/utils/cache.py +68 -0
- arize-8.0.0a17/src/arize/utils/openinference_conversion.py +56 -0
- arize-8.0.0a17/src/arize/utils/proto.py +13 -0
- arize-8.0.0a17/src/arize/utils/size.py +22 -0
- arize-8.0.0a17/src/arize/version.py +1 -0
- arize-8.0.0a15/src/arize/__init__.py +0 -20
- arize-8.0.0a15/src/arize/_generated/api_client/api/experiments_api.py +0 -605
- arize-8.0.0a15/src/arize/datasets/client.py +0 -142
- arize-8.0.0a15/src/arize/experiments/client.py +0 -10
- arize-8.0.0a15/src/arize/version.py +0 -1
- {arize-8.0.0a15 → arize-8.0.0a17}/.gitignore +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/LICENSE.md +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_exporter/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_exporter/client.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_exporter/parsers/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_exporter/parsers/tracing_data_parser.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_exporter/validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_flight/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/api/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/api_response.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/api_client/test/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/protocol/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/protocol/flight/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/protocol/flight/export_pb2.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/protocol/flight/ingest_pb2.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/protocol/rec/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_generated/protocol/rec/public_pb2.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/_lazy.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/constants/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/constants/ml.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/constants/model_mapping.json +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/constants/spans.py +0 -0
- {arize-8.0.0a15/src/arize/exceptions → arize-8.0.0a17/src/arize/datasets}/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/embeddings/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/embeddings/auto_generator.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/embeddings/base_generators.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/embeddings/constants.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/embeddings/cv_generators.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/embeddings/errors.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/embeddings/nlp_generators.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/embeddings/tabular_generators.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/embeddings/usecases.py +0 -0
- {arize-8.0.0a15/src/arize/experiments → arize-8.0.0a17/src/arize/exceptions}/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/exceptions/auth.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/exceptions/base.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/exceptions/models.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/exceptions/parameters.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/exceptions/spaces.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/exceptions/types.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/exceptions/values.py +0 -0
- {arize-8.0.0a15/src/arize/models → arize-8.0.0a17/src/arize/experiments}/__init__.py +0 -0
- {arize-8.0.0a15/src/arize/models/batch_validation → arize-8.0.0a17/src/arize/experiments/evaluators}/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/logging.py +0 -0
- {arize-8.0.0a15/src/arize/models/surrogate_explainer → arize-8.0.0a17/src/arize/models}/__init__.py +0 -0
- {arize-8.0.0a15/src/arize/spans → arize-8.0.0a17/src/arize/models/batch_validation}/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/models/batch_validation/errors.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/models/batch_validation/validator.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/models/bounded_executor.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/models/casting.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/models/proto.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/models/stream_validation.py +0 -0
- {arize-8.0.0a15/src/arize/spans/validation → arize-8.0.0a17/src/arize/models/surrogate_explainer}/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/models/surrogate_explainer/mimic.py +0 -0
- {arize-8.0.0a15/src/arize/spans/validation/annotations → arize-8.0.0a17/src/arize/spans}/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/columns.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/conversion.py +0 -0
- {arize-8.0.0a15/src/arize/spans/validation/common → arize-8.0.0a17/src/arize/spans/validation}/__init__.py +0 -0
- {arize-8.0.0a15/src/arize/spans/validation/evals → arize-8.0.0a17/src/arize/spans/validation/annotations}/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/annotations/annotations_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/annotations/dataframe_form_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/annotations/value_validation.py +0 -0
- {arize-8.0.0a15/src/arize/spans/validation/spans → arize-8.0.0a17/src/arize/spans/validation/common}/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/common/argument_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/common/dataframe_form_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/common/errors.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/common/value_validation.py +0 -0
- {arize-8.0.0a15/src/arize/utils → arize-8.0.0a17/src/arize/spans/validation/evals}/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/evals/dataframe_form_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/evals/evals_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/evals/value_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/metadata/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/metadata/argument_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/metadata/dataframe_form_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/metadata/value_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/spans/dataframe_form_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/spans/spans_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/spans/validation/spans/value_validation.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/types.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/utils/dataframe.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/utils/online_tasks/__init__.py +0 -0
- {arize-8.0.0a15 → arize-8.0.0a17}/src/arize/utils/online_tasks/dataframe_preprocessor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arize
|
|
3
|
-
Version: 8.0.
|
|
3
|
+
Version: 8.0.0a17
|
|
4
4
|
Summary: A helper library to interact with Arize AI APIs
|
|
5
5
|
Project-URL: Homepage, https://arize.com
|
|
6
6
|
Project-URL: Documentation, https://docs.arize.com/arize
|
|
@@ -35,7 +35,9 @@ Requires-Dist: tokenizers<1,>=0.13; extra == 'auto-embeddings'
|
|
|
35
35
|
Requires-Dist: torch<3,>=1.13; extra == 'auto-embeddings'
|
|
36
36
|
Requires-Dist: transformers<5,>=4.25; extra == 'auto-embeddings'
|
|
37
37
|
Provides-Extra: datasets-experiments
|
|
38
|
+
Requires-Dist: numpy>=2.0.0; extra == 'datasets-experiments'
|
|
38
39
|
Requires-Dist: pydantic; extra == 'datasets-experiments'
|
|
40
|
+
Requires-Dist: wrapt<2.0.0,>=1.0.0; extra == 'datasets-experiments'
|
|
39
41
|
Provides-Extra: dev
|
|
40
42
|
Requires-Dist: pytest==8.4.2; extra == 'dev'
|
|
41
43
|
Requires-Dist: ruff==0.13.2; extra == 'dev'
|
|
@@ -97,11 +99,24 @@ Description-Content-Type: text/markdown
|
|
|
97
99
|
- [Operations on Datasets](#operations-on-datasets)
|
|
98
100
|
- [List Datasets](#list-datasets)
|
|
99
101
|
- [Create a Dataset](#create-a-dataset)
|
|
100
|
-
- [Get Dataset
|
|
102
|
+
- [Get Dataset](#get-dataset)
|
|
101
103
|
- [Delete a Dataset](#delete-a-dataset)
|
|
102
|
-
- [
|
|
103
|
-
- [
|
|
104
|
-
|
|
104
|
+
- [List Dataset Examples](#list-dataset-examples)
|
|
105
|
+
- [Operations on Experiments](#operations-on-experiments)
|
|
106
|
+
- [List Experiments](#list-experiments)
|
|
107
|
+
- [Run an Experiment](#run-an-experiment)
|
|
108
|
+
- [Create an Experiment](#create-an-experiment)
|
|
109
|
+
- [Get an Experiment](#get-an-experiment)
|
|
110
|
+
- [Delete an Experiment](#delete-an-experiment)
|
|
111
|
+
- [List Experiment runs](#list-experiment-runs)
|
|
112
|
+
- [SDK Configuration](#sdk-configuration)
|
|
113
|
+
- [Logging](#logging)
|
|
114
|
+
- [In Code](#in-code)
|
|
115
|
+
- [Via Environment Variables](#via-environment-variables)
|
|
116
|
+
- [Caching](#caching)
|
|
117
|
+
- [In Code](#in-code-1)
|
|
118
|
+
- [Via Environment Variables](#via-environment-variables-1)
|
|
119
|
+
- [Clean the cache](#clean-the-cache)
|
|
105
120
|
- [Community](#community)
|
|
106
121
|
|
|
107
122
|
# Overview
|
|
@@ -396,9 +411,9 @@ dataset_list = resp.datasets
|
|
|
396
411
|
# Get the response as a dictionary
|
|
397
412
|
resp_dict = resp.to_dict()
|
|
398
413
|
# Get the response in JSON format
|
|
399
|
-
|
|
414
|
+
resp_json = resp.to_json()
|
|
400
415
|
# Get the response as a pandas dataframe
|
|
401
|
-
|
|
416
|
+
resp_df = resp.to_df()
|
|
402
417
|
```
|
|
403
418
|
|
|
404
419
|
### Create a Dataset
|
|
@@ -428,9 +443,10 @@ If the number of examples (rows in dataframe, items in list) is too large, the c
|
|
|
428
443
|
|
|
429
444
|
```python
|
|
430
445
|
created_dataset = client.datasets.create(
|
|
431
|
-
|
|
446
|
+
space_id="<target-space-id>",
|
|
432
447
|
name="<your-dataset-name>", # Name must be unique within a space
|
|
433
448
|
examples=..., # List of dictionaries or pandas dataframe
|
|
449
|
+
# force_http=... # Optionally pass force_http to create datasets via HTTP instead of gRPC, defaults to False
|
|
434
450
|
)
|
|
435
451
|
```
|
|
436
452
|
|
|
@@ -443,8 +459,7 @@ dataset_dict = create_dataset.to_dict()
|
|
|
443
459
|
dataset_dict = create_dataset.to_json()
|
|
444
460
|
```
|
|
445
461
|
|
|
446
|
-
|
|
447
|
-
### Get Dataset by ID
|
|
462
|
+
### Get Dataset
|
|
448
463
|
|
|
449
464
|
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
|
|
450
465
|
|
|
@@ -465,9 +480,167 @@ client.datasets.delete(
|
|
|
465
480
|
)
|
|
466
481
|
```
|
|
467
482
|
|
|
468
|
-
|
|
483
|
+
### List Dataset Examples
|
|
484
|
+
|
|
485
|
+
You can list the examples of a given dataset using `client.datasets.list_examples()` and passing the dataset ID and, optionally, the dataset version ID. You can specify the number of examples desired using the `limit` parameter. If you want a large number of examples, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
|
|
486
|
+
|
|
487
|
+
```python
|
|
488
|
+
resp = client.datasets.list_examples(
|
|
489
|
+
dataset_id="your-dataset-id>",
|
|
490
|
+
dataset_version_id="your-dataset-version-id>", # Optional, defaults to latest version
|
|
491
|
+
limit=... # number of desired examples. Defaults to 100
|
|
492
|
+
all=... # Whether or not to export all of the examples. Defaults to False
|
|
493
|
+
)
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
The response is an object of type `DatasetsExamplesList200Response`, and you can access the list of examples via its `examples` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
497
|
+
|
|
498
|
+
```python
|
|
499
|
+
# Get the list of datasets from the response
|
|
500
|
+
examples_list = resp.examples
|
|
501
|
+
# Get the response as a dictionary
|
|
502
|
+
resp_dict = resp.to_dict()
|
|
503
|
+
# Get the response in JSON format
|
|
504
|
+
resp_json = resp.to_json()
|
|
505
|
+
# Get the response as a pandas dataframe
|
|
506
|
+
resp_df = resp.to_df()
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
## Operations on Experiments
|
|
510
|
+
|
|
511
|
+
### List Experiments
|
|
512
|
+
|
|
513
|
+
You can list all experiments that the user has access to using `client.experiments.list()`. You can use the `limit` parameter to specify the maximum number of datasets desired in the response and you can specify the `dataset_id` to target the list operation to a particular dataset.
|
|
514
|
+
|
|
515
|
+
```python
|
|
516
|
+
resp = client.experiments.list(
|
|
517
|
+
limit=... # Optional
|
|
518
|
+
dataset_id=... # Optional
|
|
519
|
+
)
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
The response is an object of type `ExperimentsList200Response`, and you can access the list of experiments via its `experiments` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
523
|
+
|
|
524
|
+
```python
|
|
525
|
+
# Get the list of datasets from the response
|
|
526
|
+
experiment_list = resp.experiments
|
|
527
|
+
# Get the response as a dictionary
|
|
528
|
+
resp_dict = resp.to_dict()
|
|
529
|
+
# Get the response in JSON format
|
|
530
|
+
resp_json = resp.to_json()
|
|
531
|
+
# Get the response as a pandas dataframe
|
|
532
|
+
resp_df = resp.to_df()
|
|
533
|
+
```
|
|
534
|
+
|
|
535
|
+
### Run an Experiment
|
|
536
|
+
|
|
537
|
+
You can run an experiment on a dataset using `client.experiments.run()` by defining a task, evaluators (optional), and passing the dataset id of the dataset you want to use, together with a name for the experiment. The function will download the entire dataset from Arize (unless cached, see caching section under "SDK Configuration"), execute the task to obtain an output, and perform evaluations (if evaluators were passed). The experiments will also be traced, and these traces will be visible in Arize. The experiment will be created and the data logged into Arize automatically. You can avoid logging to Arize by making `dry_run=True`. The function will return the `Experiment` object (or `None` if `dry_run=True`) together with the dataframe with the experiment data.
|
|
538
|
+
|
|
539
|
+
```python
|
|
540
|
+
experiment, experiment_df = client.run_experiment(
|
|
541
|
+
name="<name-your-experiment>",
|
|
542
|
+
dataset_id="<id-of-dataset-to-use>",
|
|
543
|
+
task=... # The task to be performed in the experiment.
|
|
544
|
+
evaluators=... # Optional: The evaluators to use in the experiment.
|
|
545
|
+
dry_run=..., # If True, the experiment result will not be uploaded to Arize. Defaults to False
|
|
546
|
+
dry_run_count=..., # Number of examples of the dataset to use in the dry run. Defaults to 10
|
|
547
|
+
concurrency=..., # The number of concurrent tasks to run. Defaults to 3.
|
|
548
|
+
set_global_tracer_provider=..., # If True, sets the global tracer provider for the experiment. Defaults to False
|
|
549
|
+
exit_on_error=..., # If True, the experiment will stop running on first occurrence of an error. Defaults to False
|
|
550
|
+
)
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
The `Experiment` object also counts with convenience method similar to `List***` objects:
|
|
554
|
+
|
|
555
|
+
```python
|
|
556
|
+
# Get the response as a dictionary
|
|
557
|
+
experiment_dict = create_experiment.to_dict()
|
|
558
|
+
# Get the response in JSON format
|
|
559
|
+
experiment_dict = create_experiment.to_json()
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
### Create an Experiment
|
|
563
|
+
|
|
564
|
+
It is possible that you have run the experiment yourself without the above function, and hence you already have experiment data that you want to send to Arize. In this case, use the `client.experiments.create()` method by passing the runs data, we currently don't support creating an empty experiment, for instance, these are 2 rows of runs, as a list of dictionaries. You can also pass a pandas dataframe for the runs data.
|
|
565
|
+
|
|
566
|
+
> NOTE: If you don't have experiment data and want to run experiment, see the `client.experiments.run()` section above.
|
|
567
|
+
|
|
568
|
+
```python
|
|
569
|
+
# TODO
|
|
570
|
+
runs = [
|
|
571
|
+
]
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
In addition, you must specify which columns are the `example_id` and the `result`, you can do so by using the `ExperimentTaskResultFieldNames`. Moreover, if you choose to pass evaluation data, you can indicate the evaluation columns using `EvaluationResultFieldNames`:
|
|
575
|
+
|
|
576
|
+
```python
|
|
577
|
+
# TODO
|
|
578
|
+
```
|
|
579
|
+
|
|
580
|
+
If the number of runs (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is an `Experiment` object.
|
|
581
|
+
|
|
582
|
+
```python
|
|
583
|
+
created_experiment = client.experiments.create(
|
|
584
|
+
name="<your-experiment-name>", # Name must be unique within a dataset
|
|
585
|
+
dataset_id="<desired-dataset-id>",
|
|
586
|
+
experiment_runs=..., # List of dictionaries or pandas dataframe
|
|
587
|
+
task_fields=ExperimentTaskResultFieldNames(...),
|
|
588
|
+
evaluator_columns=... # Optional
|
|
589
|
+
# force_http=... # Optionally pass force_http to create experiments via HTTP instead of gRPC, defaults to False
|
|
590
|
+
)
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
### Get an Experiment
|
|
594
|
+
|
|
595
|
+
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
|
|
596
|
+
|
|
597
|
+
```python
|
|
598
|
+
dataset = client.datasets.get(
|
|
599
|
+
dataset_id=... # The unique identifier of the dataset
|
|
600
|
+
dataset_version_id=... # The unique identifier of the dataset version
|
|
601
|
+
)
|
|
602
|
+
```
|
|
603
|
+
|
|
604
|
+
### Delete an Experiment
|
|
605
|
+
|
|
606
|
+
To delete an experiment by its ID use `client.experiments.delete()`. The call returns `None` if successful deletion took place, error otherwise.
|
|
607
|
+
|
|
608
|
+
```python
|
|
609
|
+
client.experiments.delete(
|
|
610
|
+
experiment_id=... # The unique identifier of the experiment
|
|
611
|
+
)
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
### List Experiment runs
|
|
469
615
|
|
|
470
|
-
|
|
616
|
+
You can list the runs of a given experiment using `client.experiments.list_runs()` and passing the experiment ID. You can specify the number of runs desired using the `limit` parameter. If you want a large number of runs, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
|
|
617
|
+
|
|
618
|
+
```python
|
|
619
|
+
resp = client.experiments.list_runs(
|
|
620
|
+
experiment_id="your-experiment-id>",
|
|
621
|
+
limit=... # number of desired runs. Defaults to 100
|
|
622
|
+
all=... # Whether or not to export all of the runs. Defaults to False
|
|
623
|
+
)
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
The response is an object of type `ExperimentsRunsList200Response`, and you can access the list of runs via its `experiment_runs` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
627
|
+
|
|
628
|
+
```python
|
|
629
|
+
# Get the list of datasets from the response
|
|
630
|
+
run_list = resp.experiments_runs
|
|
631
|
+
# Get the response as a dictionary
|
|
632
|
+
resp_dict = resp.to_dict()
|
|
633
|
+
# Get the response in JSON format
|
|
634
|
+
resp_json = resp.to_json()
|
|
635
|
+
# Get the response as a pandas dataframe
|
|
636
|
+
resp_df = resp.to_df()
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
# SDK Configuration
|
|
640
|
+
|
|
641
|
+
## Logging
|
|
642
|
+
|
|
643
|
+
### In Code
|
|
471
644
|
|
|
472
645
|
You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
|
|
473
646
|
|
|
@@ -480,14 +653,14 @@ configure_logging(
|
|
|
480
653
|
)
|
|
481
654
|
```
|
|
482
655
|
|
|
483
|
-
|
|
656
|
+
### Via Environment Variables
|
|
484
657
|
|
|
485
658
|
Configure the same options as the section above, via:
|
|
486
659
|
|
|
487
660
|
```python
|
|
488
661
|
import os
|
|
489
662
|
|
|
490
|
-
#
|
|
663
|
+
# Whether or not you want to disable logging altogether
|
|
491
664
|
os.environ["ARIZE_LOG_ENABLE"] = "true"
|
|
492
665
|
# Set up the logging level
|
|
493
666
|
os.environ["ARIZE_LOG_LEVEL"] = "debug"
|
|
@@ -497,6 +670,38 @@ os.environ["ARIZE_LOG_STRUCTURED"] = "false"
|
|
|
497
670
|
|
|
498
671
|
The default behavior of Arize's logs is: enabled, `INFO` level, and not structured.
|
|
499
672
|
|
|
673
|
+
## Caching
|
|
674
|
+
|
|
675
|
+
When downloading big segments of data from Arize, such as a `Dataset` with all of its examples, the SDK will cache the file in `parquet` format under `~/.arize/datasets/dataset_<updated_at_timestamp>.parquet`.
|
|
676
|
+
|
|
677
|
+
### In Code
|
|
678
|
+
|
|
679
|
+
You can disable caching via the `enable_caching` parameter when instantiating the client, and also edit the "arize directory":
|
|
680
|
+
|
|
681
|
+
```python
|
|
682
|
+
client = ArizeClient(
|
|
683
|
+
enable_caching=False, # Optional parameter, defaults to True
|
|
684
|
+
arize_directory="my-desired-directory", # Optional parameter, defaults to ~/.arize
|
|
685
|
+
)
|
|
686
|
+
```
|
|
687
|
+
|
|
688
|
+
### Via Environment Variables
|
|
689
|
+
|
|
690
|
+
You can also configure the above via:
|
|
691
|
+
|
|
692
|
+
```python
|
|
693
|
+
import os
|
|
694
|
+
|
|
695
|
+
# Whether or not you want to disable caching
|
|
696
|
+
os.environ["ARIZE_ENABLE_CACHING"] = "true"
|
|
697
|
+
# Where you want the SDK to store the files
|
|
698
|
+
os.environ["ARIZE_DIRECTORY"] = "~/.arize"
|
|
699
|
+
```
|
|
700
|
+
|
|
701
|
+
### Clean the cache
|
|
702
|
+
|
|
703
|
+
To clean the cache you can directly `rm` the files or directory.
|
|
704
|
+
|
|
500
705
|
# Community
|
|
501
706
|
|
|
502
707
|
Join our community to connect with thousands of AI builders.
|
|
@@ -35,11 +35,24 @@
|
|
|
35
35
|
- [Operations on Datasets](#operations-on-datasets)
|
|
36
36
|
- [List Datasets](#list-datasets)
|
|
37
37
|
- [Create a Dataset](#create-a-dataset)
|
|
38
|
-
- [Get Dataset
|
|
38
|
+
- [Get Dataset](#get-dataset)
|
|
39
39
|
- [Delete a Dataset](#delete-a-dataset)
|
|
40
|
-
- [
|
|
41
|
-
- [
|
|
42
|
-
|
|
40
|
+
- [List Dataset Examples](#list-dataset-examples)
|
|
41
|
+
- [Operations on Experiments](#operations-on-experiments)
|
|
42
|
+
- [List Experiments](#list-experiments)
|
|
43
|
+
- [Run an Experiment](#run-an-experiment)
|
|
44
|
+
- [Create an Experiment](#create-an-experiment)
|
|
45
|
+
- [Get an Experiment](#get-an-experiment)
|
|
46
|
+
- [Delete an Experiment](#delete-an-experiment)
|
|
47
|
+
- [List Experiment runs](#list-experiment-runs)
|
|
48
|
+
- [SDK Configuration](#sdk-configuration)
|
|
49
|
+
- [Logging](#logging)
|
|
50
|
+
- [In Code](#in-code)
|
|
51
|
+
- [Via Environment Variables](#via-environment-variables)
|
|
52
|
+
- [Caching](#caching)
|
|
53
|
+
- [In Code](#in-code-1)
|
|
54
|
+
- [Via Environment Variables](#via-environment-variables-1)
|
|
55
|
+
- [Clean the cache](#clean-the-cache)
|
|
43
56
|
- [Community](#community)
|
|
44
57
|
|
|
45
58
|
# Overview
|
|
@@ -334,9 +347,9 @@ dataset_list = resp.datasets
|
|
|
334
347
|
# Get the response as a dictionary
|
|
335
348
|
resp_dict = resp.to_dict()
|
|
336
349
|
# Get the response in JSON format
|
|
337
|
-
|
|
350
|
+
resp_json = resp.to_json()
|
|
338
351
|
# Get the response as a pandas dataframe
|
|
339
|
-
|
|
352
|
+
resp_df = resp.to_df()
|
|
340
353
|
```
|
|
341
354
|
|
|
342
355
|
### Create a Dataset
|
|
@@ -366,9 +379,10 @@ If the number of examples (rows in dataframe, items in list) is too large, the c
|
|
|
366
379
|
|
|
367
380
|
```python
|
|
368
381
|
created_dataset = client.datasets.create(
|
|
369
|
-
|
|
382
|
+
space_id="<target-space-id>",
|
|
370
383
|
name="<your-dataset-name>", # Name must be unique within a space
|
|
371
384
|
examples=..., # List of dictionaries or pandas dataframe
|
|
385
|
+
# force_http=... # Optionally pass force_http to create datasets via HTTP instead of gRPC, defaults to False
|
|
372
386
|
)
|
|
373
387
|
```
|
|
374
388
|
|
|
@@ -381,8 +395,7 @@ dataset_dict = create_dataset.to_dict()
|
|
|
381
395
|
dataset_dict = create_dataset.to_json()
|
|
382
396
|
```
|
|
383
397
|
|
|
384
|
-
|
|
385
|
-
### Get Dataset by ID
|
|
398
|
+
### Get Dataset
|
|
386
399
|
|
|
387
400
|
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
|
|
388
401
|
|
|
@@ -403,9 +416,167 @@ client.datasets.delete(
|
|
|
403
416
|
)
|
|
404
417
|
```
|
|
405
418
|
|
|
406
|
-
|
|
419
|
+
### List Dataset Examples
|
|
420
|
+
|
|
421
|
+
You can list the examples of a given dataset using `client.datasets.list_examples()` and passing the dataset ID and, optionally, the dataset version ID. You can specify the number of examples desired using the `limit` parameter. If you want a large number of examples, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
|
|
422
|
+
|
|
423
|
+
```python
|
|
424
|
+
resp = client.datasets.list_examples(
|
|
425
|
+
dataset_id="your-dataset-id>",
|
|
426
|
+
dataset_version_id="your-dataset-version-id>", # Optional, defaults to latest version
|
|
427
|
+
limit=... # number of desired examples. Defaults to 100
|
|
428
|
+
all=... # Whether or not to export all of the examples. Defaults to False
|
|
429
|
+
)
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
The response is an object of type `DatasetsExamplesList200Response`, and you can access the list of examples via its `examples` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
433
|
+
|
|
434
|
+
```python
|
|
435
|
+
# Get the list of datasets from the response
|
|
436
|
+
examples_list = resp.examples
|
|
437
|
+
# Get the response as a dictionary
|
|
438
|
+
resp_dict = resp.to_dict()
|
|
439
|
+
# Get the response in JSON format
|
|
440
|
+
resp_json = resp.to_json()
|
|
441
|
+
# Get the response as a pandas dataframe
|
|
442
|
+
resp_df = resp.to_df()
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
## Operations on Experiments
|
|
446
|
+
|
|
447
|
+
### List Experiments
|
|
448
|
+
|
|
449
|
+
You can list all experiments that the user has access to using `client.experiments.list()`. You can use the `limit` parameter to specify the maximum number of datasets desired in the response and you can specify the `dataset_id` to target the list operation to a particular dataset.
|
|
450
|
+
|
|
451
|
+
```python
|
|
452
|
+
resp = client.experiments.list(
|
|
453
|
+
limit=... # Optional
|
|
454
|
+
dataset_id=... # Optional
|
|
455
|
+
)
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
The response is an object of type `ExperimentsList200Response`, and you can access the list of experiments via its `experiments` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
459
|
+
|
|
460
|
+
```python
|
|
461
|
+
# Get the list of datasets from the response
|
|
462
|
+
experiment_list = resp.experiments
|
|
463
|
+
# Get the response as a dictionary
|
|
464
|
+
resp_dict = resp.to_dict()
|
|
465
|
+
# Get the response in JSON format
|
|
466
|
+
resp_json = resp.to_json()
|
|
467
|
+
# Get the response as a pandas dataframe
|
|
468
|
+
resp_df = resp.to_df()
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
### Run an Experiment
|
|
472
|
+
|
|
473
|
+
You can run an experiment on a dataset using `client.experiments.run()` by defining a task, evaluators (optional), and passing the dataset id of the dataset you want to use, together with a name for the experiment. The function will download the entire dataset from Arize (unless cached, see caching section under "SDK Configuration"), execute the task to obtain an output, and perform evaluations (if evaluators were passed). The experiments will also be traced, and these traces will be visible in Arize. The experiment will be created and the data logged into Arize automatically. You can avoid logging to Arize by making `dry_run=True`. The function will return the `Experiment` object (or `None` if `dry_run=True`) together with the dataframe with the experiment data.
|
|
474
|
+
|
|
475
|
+
```python
|
|
476
|
+
experiment, experiment_df = client.run_experiment(
|
|
477
|
+
name="<name-your-experiment>",
|
|
478
|
+
dataset_id="<id-of-dataset-to-use>",
|
|
479
|
+
task=... # The task to be performed in the experiment.
|
|
480
|
+
evaluators=... # Optional: The evaluators to use in the experiment.
|
|
481
|
+
dry_run=..., # If True, the experiment result will not be uploaded to Arize. Defaults to False
|
|
482
|
+
dry_run_count=..., # Number of examples of the dataset to use in the dry run. Defaults to 10
|
|
483
|
+
concurrency=..., # The number of concurrent tasks to run. Defaults to 3.
|
|
484
|
+
set_global_tracer_provider=..., # If True, sets the global tracer provider for the experiment. Defaults to False
|
|
485
|
+
exit_on_error=..., # If True, the experiment will stop running on first occurrence of an error. Defaults to False
|
|
486
|
+
)
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
The `Experiment` object also counts with convenience method similar to `List***` objects:
|
|
490
|
+
|
|
491
|
+
```python
|
|
492
|
+
# Get the response as a dictionary
|
|
493
|
+
experiment_dict = create_experiment.to_dict()
|
|
494
|
+
# Get the response in JSON format
|
|
495
|
+
experiment_dict = create_experiment.to_json()
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
### Create an Experiment
|
|
499
|
+
|
|
500
|
+
It is possible that you have run the experiment yourself without the above function, and hence you already have experiment data that you want to send to Arize. In this case, use the `client.experiments.create()` method by passing the runs data, we currently don't support creating an empty experiment, for instance, these are 2 rows of runs, as a list of dictionaries. You can also pass a pandas dataframe for the runs data.
|
|
501
|
+
|
|
502
|
+
> NOTE: If you don't have experiment data and want to run experiment, see the `client.experiments.run()` section above.
|
|
503
|
+
|
|
504
|
+
```python
|
|
505
|
+
# TODO
|
|
506
|
+
runs = [
|
|
507
|
+
]
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
In addition, you must specify which columns are the `example_id` and the `result`, you can do so by using the `ExperimentTaskResultFieldNames`. Moreover, if you choose to pass evaluation data, you can indicate the evaluation columns using `EvaluationResultFieldNames`:
|
|
511
|
+
|
|
512
|
+
```python
|
|
513
|
+
# TODO
|
|
514
|
+
```
|
|
515
|
+
|
|
516
|
+
If the number of runs (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is an `Experiment` object.
|
|
517
|
+
|
|
518
|
+
```python
|
|
519
|
+
created_experiment = client.experiments.create(
|
|
520
|
+
name="<your-experiment-name>", # Name must be unique within a dataset
|
|
521
|
+
dataset_id="<desired-dataset-id>",
|
|
522
|
+
experiment_runs=..., # List of dictionaries or pandas dataframe
|
|
523
|
+
task_fields=ExperimentTaskResultFieldNames(...),
|
|
524
|
+
evaluator_columns=... # Optional
|
|
525
|
+
# force_http=... # Optionally pass force_http to create experiments via HTTP instead of gRPC, defaults to False
|
|
526
|
+
)
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
### Get an Experiment
|
|
530
|
+
|
|
531
|
+
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
|
|
532
|
+
|
|
533
|
+
```python
|
|
534
|
+
dataset = client.datasets.get(
|
|
535
|
+
dataset_id=... # The unique identifier of the dataset
|
|
536
|
+
dataset_version_id=... # The unique identifier of the dataset version
|
|
537
|
+
)
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
### Delete an Experiment
|
|
541
|
+
|
|
542
|
+
To delete an experiment by its ID use `client.experiments.delete()`. The call returns `None` if successful deletion took place, error otherwise.
|
|
543
|
+
|
|
544
|
+
```python
|
|
545
|
+
client.experiments.delete(
|
|
546
|
+
experiment_id=... # The unique identifier of the experiment
|
|
547
|
+
)
|
|
548
|
+
```
|
|
549
|
+
|
|
550
|
+
### List Experiment runs
|
|
407
551
|
|
|
408
|
-
|
|
552
|
+
You can list the runs of a given experiment using `client.experiments.list_runs()` and passing the experiment ID. You can specify the number of runs desired using the `limit` parameter. If you want a large number of runs, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
|
|
553
|
+
|
|
554
|
+
```python
|
|
555
|
+
resp = client.experiments.list_runs(
|
|
556
|
+
experiment_id="your-experiment-id>",
|
|
557
|
+
limit=... # number of desired runs. Defaults to 100
|
|
558
|
+
all=... # Whether or not to export all of the runs. Defaults to False
|
|
559
|
+
)
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
The response is an object of type `ExperimentsRunsList200Response`, and you can access the list of runs via its `experiment_runs` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
|
|
563
|
+
|
|
564
|
+
```python
|
|
565
|
+
# Get the list of datasets from the response
|
|
566
|
+
run_list = resp.experiments_runs
|
|
567
|
+
# Get the response as a dictionary
|
|
568
|
+
resp_dict = resp.to_dict()
|
|
569
|
+
# Get the response in JSON format
|
|
570
|
+
resp_json = resp.to_json()
|
|
571
|
+
# Get the response as a pandas dataframe
|
|
572
|
+
resp_df = resp.to_df()
|
|
573
|
+
```
|
|
574
|
+
|
|
575
|
+
# SDK Configuration
|
|
576
|
+
|
|
577
|
+
## Logging
|
|
578
|
+
|
|
579
|
+
### In Code
|
|
409
580
|
|
|
410
581
|
You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
|
|
411
582
|
|
|
@@ -418,14 +589,14 @@ configure_logging(
|
|
|
418
589
|
)
|
|
419
590
|
```
|
|
420
591
|
|
|
421
|
-
|
|
592
|
+
### Via Environment Variables
|
|
422
593
|
|
|
423
594
|
Configure the same options as the section above, via:
|
|
424
595
|
|
|
425
596
|
```python
|
|
426
597
|
import os
|
|
427
598
|
|
|
428
|
-
#
|
|
599
|
+
# Whether or not you want to disable logging altogether
|
|
429
600
|
os.environ["ARIZE_LOG_ENABLE"] = "true"
|
|
430
601
|
# Set up the logging level
|
|
431
602
|
os.environ["ARIZE_LOG_LEVEL"] = "debug"
|
|
@@ -435,6 +606,38 @@ os.environ["ARIZE_LOG_STRUCTURED"] = "false"
|
|
|
435
606
|
|
|
436
607
|
The default behavior of Arize's logs is: enabled, `INFO` level, and not structured.
|
|
437
608
|
|
|
609
|
+
## Caching
|
|
610
|
+
|
|
611
|
+
When downloading big segments of data from Arize, such as a `Dataset` with all of its examples, the SDK will cache the file in `parquet` format under `~/.arize/datasets/dataset_<updated_at_timestamp>.parquet`.
|
|
612
|
+
|
|
613
|
+
### In Code
|
|
614
|
+
|
|
615
|
+
You can disable caching via the `enable_caching` parameter when instantiating the client, and also edit the "arize directory":
|
|
616
|
+
|
|
617
|
+
```python
|
|
618
|
+
client = ArizeClient(
|
|
619
|
+
enable_caching=False, # Optional parameter, defaults to True
|
|
620
|
+
arize_directory="my-desired-directory", # Optional parameter, defaults to ~/.arize
|
|
621
|
+
)
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
### Via Environment Variables
|
|
625
|
+
|
|
626
|
+
You can also configure the above via:
|
|
627
|
+
|
|
628
|
+
```python
|
|
629
|
+
import os
|
|
630
|
+
|
|
631
|
+
# Whether or not you want to disable caching
|
|
632
|
+
os.environ["ARIZE_ENABLE_CACHING"] = "true"
|
|
633
|
+
# Where you want the SDK to store the files
|
|
634
|
+
os.environ["ARIZE_DIRECTORY"] = "~/.arize"
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
### Clean the cache
|
|
638
|
+
|
|
639
|
+
To clean the cache you can directly `rm` the files or directory.
|
|
640
|
+
|
|
438
641
|
# Community
|
|
439
642
|
|
|
440
643
|
Join our community to connect with thousands of AI builders.
|
|
@@ -38,7 +38,7 @@ dependencies = [
|
|
|
38
38
|
"lazy-imports",
|
|
39
39
|
# "requests_futures==1.0.0",
|
|
40
40
|
# "googleapis_common_protos>=1.51.0,<2",
|
|
41
|
-
# "protobuf>=4.21.0,<
|
|
41
|
+
# "protobuf>=4.21.0,<7",
|
|
42
42
|
# "pyarrow>=0.15.0",
|
|
43
43
|
# "tqdm>=4.60.0,<5",
|
|
44
44
|
# "pydantic>=2.0.0,<3",
|
|
@@ -72,6 +72,15 @@ ml-batch = [
|
|
|
72
72
|
]
|
|
73
73
|
datasets-experiments = [
|
|
74
74
|
"pydantic",
|
|
75
|
+
"numpy>=2.0.0",
|
|
76
|
+
"wrapt>=1.0.0,<2.0.0",
|
|
77
|
+
# "openinference-semantic-conventions>=0.1.21, <1",
|
|
78
|
+
# "opentelemetry-exporter-otlp-proto-common>=1.38.0",
|
|
79
|
+
# "opentelemetry-exporter-otlp-proto-grpc>=1.38.0",
|
|
80
|
+
# "opentelemetry-sdk>=1.38.0",
|
|
81
|
+
#
|
|
82
|
+
# "opentelemetry-api>=1.38.0",
|
|
83
|
+
# "opentelemetry-proto>=1.38.0",
|
|
75
84
|
]
|
|
76
85
|
mimic-explainer = [
|
|
77
86
|
"interpret-community[mimic]>=0.22.0,<1",
|
|
@@ -1,6 +1,24 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from collections.abc import Mapping
|
|
2
3
|
|
|
3
4
|
from arize._generated.api_client import models
|
|
5
|
+
from arize.client import ArizeClient
|
|
6
|
+
from arize.config import SDKConfiguration
|
|
7
|
+
|
|
8
|
+
# Attach a NullHandler by default in the top-level package
|
|
9
|
+
# so that if no configuration is installed, nothing explodes.
|
|
10
|
+
logging.getLogger("arize").addHandler(logging.NullHandler())
|
|
11
|
+
|
|
12
|
+
# Opt-in env-based logging
|
|
13
|
+
try:
|
|
14
|
+
from .logging import auto_configure_from_env
|
|
15
|
+
|
|
16
|
+
auto_configure_from_env()
|
|
17
|
+
except Exception:
|
|
18
|
+
# Never let logging config crash imports
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
__all__ = ["ArizeClient", "SDKConfiguration"]
|
|
4
22
|
|
|
5
23
|
|
|
6
24
|
def make_to_df(field_name: str):
|
|
@@ -68,3 +86,5 @@ def make_to_df(field_name: str):
|
|
|
68
86
|
|
|
69
87
|
models.DatasetsList200Response.to_df = make_to_df("datasets") # type: ignore[attr-defined]
|
|
70
88
|
models.DatasetsListExamples200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
|
|
89
|
+
models.ExperimentsList200Response.to_df = make_to_df("experiments") # type: ignore[attr-defined]
|
|
90
|
+
models.ExperimentsRunsList200Response.to_df = make_to_df("experiment_runs") # type: ignore[attr-defined]
|