arize 8.0.0a16__tar.gz → 8.0.0a18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. {arize-8.0.0a16 → arize-8.0.0a18}/PKG-INFO +217 -14
  2. {arize-8.0.0a16 → arize-8.0.0a18}/README.md +216 -13
  3. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/__init__.py +1 -0
  4. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_flight/client.py +32 -1
  5. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/client.py +8 -0
  6. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/config.py +14 -0
  7. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/constants/config.py +4 -0
  8. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/datasets/client.py +77 -56
  9. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/client.py +118 -17
  10. arize-8.0.0a18/src/arize/utils/cache.py +68 -0
  11. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/utils/size.py +2 -3
  12. arize-8.0.0a18/src/arize/version.py +1 -0
  13. arize-8.0.0a16/src/arize/version.py +0 -1
  14. {arize-8.0.0a16 → arize-8.0.0a18}/.gitignore +0 -0
  15. {arize-8.0.0a16 → arize-8.0.0a18}/LICENSE.md +0 -0
  16. {arize-8.0.0a16 → arize-8.0.0a18}/pyproject.toml +0 -0
  17. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_exporter/__init__.py +0 -0
  18. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_exporter/client.py +0 -0
  19. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_exporter/parsers/__init__.py +0 -0
  20. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_exporter/parsers/tracing_data_parser.py +0 -0
  21. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_exporter/validation.py +0 -0
  22. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_flight/__init__.py +0 -0
  23. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_flight/types.py +0 -0
  24. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/__init__.py +0 -0
  25. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/__init__.py +0 -0
  26. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/api/__init__.py +0 -0
  27. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/api/datasets_api.py +0 -0
  28. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/api/experiments_api.py +0 -0
  29. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/api_client.py +0 -0
  30. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/api_response.py +0 -0
  31. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/configuration.py +0 -0
  32. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/exceptions.py +0 -0
  33. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/__init__.py +0 -0
  34. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/dataset.py +0 -0
  35. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/dataset_version.py +0 -0
  36. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/datasets_create_request.py +0 -0
  37. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/datasets_list200_response.py +0 -0
  38. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/datasets_list_examples200_response.py +0 -0
  39. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/error.py +0 -0
  40. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/experiment.py +0 -0
  41. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/experiments_create_request.py +0 -0
  42. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/experiments_list200_response.py +0 -0
  43. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/models/experiments_runs_list200_response.py +0 -0
  44. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/rest.py +0 -0
  45. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/__init__.py +0 -0
  46. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_dataset.py +0 -0
  47. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_dataset_version.py +0 -0
  48. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_datasets_api.py +0 -0
  49. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_datasets_create_request.py +0 -0
  50. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_datasets_list200_response.py +0 -0
  51. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_datasets_list_examples200_response.py +0 -0
  52. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_error.py +0 -0
  53. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_experiment.py +0 -0
  54. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_experiments_api.py +0 -0
  55. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_experiments_create_request.py +0 -0
  56. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_experiments_list200_response.py +0 -0
  57. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client/test/test_experiments_runs_list200_response.py +0 -0
  58. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/api_client_README.md +0 -0
  59. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/protocol/__init__.py +0 -0
  60. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/protocol/flight/__init__.py +0 -0
  61. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/protocol/flight/export_pb2.py +0 -0
  62. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/protocol/flight/ingest_pb2.py +0 -0
  63. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/protocol/rec/__init__.py +0 -0
  64. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_generated/protocol/rec/public_pb2.py +0 -0
  65. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/_lazy.py +0 -0
  66. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/constants/__init__.py +0 -0
  67. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/constants/ml.py +0 -0
  68. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/constants/model_mapping.json +0 -0
  69. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/constants/openinference.py +0 -0
  70. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/constants/pyarrow.py +0 -0
  71. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/constants/spans.py +0 -0
  72. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/datasets/__init__.py +0 -0
  73. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/datasets/errors.py +0 -0
  74. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/datasets/validation.py +0 -0
  75. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/embeddings/__init__.py +0 -0
  76. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/embeddings/auto_generator.py +0 -0
  77. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/embeddings/base_generators.py +0 -0
  78. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/embeddings/constants.py +0 -0
  79. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/embeddings/cv_generators.py +0 -0
  80. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/embeddings/errors.py +0 -0
  81. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/embeddings/nlp_generators.py +0 -0
  82. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/embeddings/tabular_generators.py +0 -0
  83. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/embeddings/usecases.py +0 -0
  84. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/exceptions/__init__.py +0 -0
  85. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/exceptions/auth.py +0 -0
  86. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/exceptions/base.py +0 -0
  87. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/exceptions/models.py +0 -0
  88. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/exceptions/parameters.py +0 -0
  89. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/exceptions/spaces.py +0 -0
  90. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/exceptions/types.py +0 -0
  91. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/exceptions/values.py +0 -0
  92. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/__init__.py +0 -0
  93. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/evaluators/__init__.py +0 -0
  94. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/evaluators/base.py +0 -0
  95. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/evaluators/exceptions.py +0 -0
  96. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/evaluators/executors.py +0 -0
  97. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/evaluators/rate_limiters.py +0 -0
  98. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/evaluators/types.py +0 -0
  99. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/evaluators/utils.py +0 -0
  100. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/functions.py +0 -0
  101. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/tracing.py +0 -0
  102. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/experiments/types.py +0 -0
  103. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/logging.py +0 -0
  104. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/__init__.py +0 -0
  105. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/batch_validation/__init__.py +0 -0
  106. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/batch_validation/errors.py +0 -0
  107. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/batch_validation/validator.py +0 -0
  108. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/bounded_executor.py +0 -0
  109. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/casting.py +0 -0
  110. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/client.py +0 -0
  111. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/proto.py +0 -0
  112. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/stream_validation.py +0 -0
  113. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/surrogate_explainer/__init__.py +0 -0
  114. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/models/surrogate_explainer/mimic.py +0 -0
  115. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/__init__.py +0 -0
  116. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/client.py +0 -0
  117. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/columns.py +0 -0
  118. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/conversion.py +0 -0
  119. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/__init__.py +0 -0
  120. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/annotations/__init__.py +0 -0
  121. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/annotations/annotations_validation.py +0 -0
  122. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/annotations/dataframe_form_validation.py +0 -0
  123. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/annotations/value_validation.py +0 -0
  124. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/common/__init__.py +0 -0
  125. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/common/argument_validation.py +0 -0
  126. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/common/dataframe_form_validation.py +0 -0
  127. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/common/errors.py +0 -0
  128. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/common/value_validation.py +0 -0
  129. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/evals/__init__.py +0 -0
  130. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/evals/dataframe_form_validation.py +0 -0
  131. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/evals/evals_validation.py +0 -0
  132. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/evals/value_validation.py +0 -0
  133. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/metadata/__init__.py +0 -0
  134. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/metadata/argument_validation.py +0 -0
  135. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/metadata/dataframe_form_validation.py +0 -0
  136. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/metadata/value_validation.py +0 -0
  137. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/spans/__init__.py +0 -0
  138. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/spans/dataframe_form_validation.py +0 -0
  139. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/spans/spans_validation.py +0 -0
  140. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/spans/validation/spans/value_validation.py +0 -0
  141. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/types.py +0 -0
  142. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/utils/__init__.py +0 -0
  143. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/utils/arrow.py +0 -0
  144. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/utils/dataframe.py +0 -0
  145. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/utils/online_tasks/__init__.py +0 -0
  146. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/utils/online_tasks/dataframe_preprocessor.py +0 -0
  147. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/utils/openinference_conversion.py +0 -0
  148. {arize-8.0.0a16 → arize-8.0.0a18}/src/arize/utils/proto.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arize
3
- Version: 8.0.0a16
3
+ Version: 8.0.0a18
4
4
  Summary: A helper library to interact with Arize AI APIs
5
5
  Project-URL: Homepage, https://arize.com
6
6
  Project-URL: Documentation, https://docs.arize.com/arize
@@ -99,11 +99,24 @@ Description-Content-Type: text/markdown
99
99
  - [Operations on Datasets](#operations-on-datasets)
100
100
  - [List Datasets](#list-datasets)
101
101
  - [Create a Dataset](#create-a-dataset)
102
- - [Get Dataset by ID](#get-dataset-by-id)
102
+ - [Get Dataset](#get-dataset)
103
103
  - [Delete a Dataset](#delete-a-dataset)
104
- - [Configure Logging](#configure-logging)
105
- - [In Code](#in-code)
106
- - [Via Environment Variables](#via-environment-variables)
104
+ - [List Dataset Examples](#list-dataset-examples)
105
+ - [Operations on Experiments](#operations-on-experiments)
106
+ - [List Experiments](#list-experiments)
107
+ - [Run an Experiment](#run-an-experiment)
108
+ - [Create an Experiment](#create-an-experiment)
109
+ - [Get an Experiment](#get-an-experiment)
110
+ - [Delete an Experiment](#delete-an-experiment)
111
+ - [List Experiment runs](#list-experiment-runs)
112
+ - [SDK Configuration](#sdk-configuration)
113
+ - [Logging](#logging)
114
+ - [In Code](#in-code)
115
+ - [Via Environment Variables](#via-environment-variables)
116
+ - [Caching](#caching)
117
+ - [In Code](#in-code-1)
118
+ - [Via Environment Variables](#via-environment-variables-1)
119
+ - [Clean the cache](#clean-the-cache)
107
120
  - [Community](#community)
108
121
 
109
122
  # Overview
@@ -398,9 +411,9 @@ dataset_list = resp.datasets
398
411
  # Get the response as a dictionary
399
412
  resp_dict = resp.to_dict()
400
413
  # Get the response in JSON format
401
- resp_dict = resp.to_json()
414
+ resp_json = resp.to_json()
402
415
  # Get the response as a pandas dataframe
403
- resp_dict = resp.to_df()
416
+ resp_df = resp.to_df()
404
417
  ```
405
418
 
406
419
  ### Create a Dataset
@@ -430,9 +443,10 @@ If the number of examples (rows in dataframe, items in list) is too large, the c
430
443
 
431
444
  ```python
432
445
  created_dataset = client.datasets.create(
433
- space_i="<target-space-id>",
446
+ space_id="<target-space-id>",
434
447
  name="<your-dataset-name>", # Name must be unique within a space
435
448
  examples=..., # List of dictionaries or pandas dataframe
449
+ # force_http=... # Optionally pass force_http to create datasets via HTTP instead of gRPC, defaults to False
436
450
  )
437
451
  ```
438
452
 
@@ -445,8 +459,7 @@ dataset_dict = create_dataset.to_dict()
445
459
  dataset_dict = create_dataset.to_json()
446
460
  ```
447
461
 
448
-
449
- ### Get Dataset by ID
462
+ ### Get Dataset
450
463
 
451
464
  To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
452
465
 
@@ -467,9 +480,167 @@ client.datasets.delete(
467
480
  )
468
481
  ```
469
482
 
470
- # Configure Logging
483
+ ### List Dataset Examples
484
+
485
+ You can list the examples of a given dataset using `client.datasets.list_examples()` and passing the dataset ID and, optionally, the dataset version ID. You can specify the number of examples desired using the `limit` parameter. If you want a large number of examples, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
486
+
487
+ ```python
488
+ resp = client.datasets.list_examples(
489
+ dataset_id="your-dataset-id>",
490
+ dataset_version_id="your-dataset-version-id>", # Optional, defaults to latest version
491
+ limit=... # number of desired examples. Defaults to 100
492
+ all=... # Whether or not to export all of the examples. Defaults to False
493
+ )
494
+ ```
495
+
496
+ The response is an object of type `DatasetsExamplesList200Response`, and you can access the list of examples via its `examples` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
497
+
498
+ ```python
499
+ # Get the list of datasets from the response
500
+ examples_list = resp.examples
501
+ # Get the response as a dictionary
502
+ resp_dict = resp.to_dict()
503
+ # Get the response in JSON format
504
+ resp_json = resp.to_json()
505
+ # Get the response as a pandas dataframe
506
+ resp_df = resp.to_df()
507
+ ```
508
+
509
+ ## Operations on Experiments
510
+
511
+ ### List Experiments
512
+
513
+ You can list all experiments that the user has access to using `client.experiments.list()`. You can use the `limit` parameter to specify the maximum number of datasets desired in the response and you can specify the `dataset_id` to target the list operation to a particular dataset.
514
+
515
+ ```python
516
+ resp = client.experiments.list(
517
+ limit=... # Optional
518
+ dataset_id=... # Optional
519
+ )
520
+ ```
521
+
522
+ The response is an object of type `ExperimentsList200Response`, and you can access the list of experiments via its `experiments` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
523
+
524
+ ```python
525
+ # Get the list of datasets from the response
526
+ experiment_list = resp.experiments
527
+ # Get the response as a dictionary
528
+ resp_dict = resp.to_dict()
529
+ # Get the response in JSON format
530
+ resp_json = resp.to_json()
531
+ # Get the response as a pandas dataframe
532
+ resp_df = resp.to_df()
533
+ ```
534
+
535
+ ### Run an Experiment
536
+
537
+ You can run an experiment on a dataset using `client.experiments.run()` by defining a task, evaluators (optional), and passing the dataset id of the dataset you want to use, together with a name for the experiment. The function will download the entire dataset from Arize (unless cached, see caching section under "SDK Configuration"), execute the task to obtain an output, and perform evaluations (if evaluators were passed). The experiments will also be traced, and these traces will be visible in Arize. The experiment will be created and the data logged into Arize automatically. You can avoid logging to Arize by making `dry_run=True`. The function will return the `Experiment` object (or `None` if `dry_run=True`) together with the dataframe with the experiment data.
538
+
539
+ ```python
540
+ experiment, experiment_df = client.run_experiment(
541
+ name="<name-your-experiment>",
542
+ dataset_id="<id-of-dataset-to-use>",
543
+ task=... # The task to be performed in the experiment.
544
+ evaluators=... # Optional: The evaluators to use in the experiment.
545
+ dry_run=..., # If True, the experiment result will not be uploaded to Arize. Defaults to False
546
+ dry_run_count=..., # Number of examples of the dataset to use in the dry run. Defaults to 10
547
+ concurrency=..., # The number of concurrent tasks to run. Defaults to 3.
548
+ set_global_tracer_provider=..., # If True, sets the global tracer provider for the experiment. Defaults to False
549
+ exit_on_error=..., # If True, the experiment will stop running on first occurrence of an error. Defaults to False
550
+ )
551
+ ```
552
+
553
+ The `Experiment` object also counts with convenience method similar to `List***` objects:
554
+
555
+ ```python
556
+ # Get the response as a dictionary
557
+ experiment_dict = create_experiment.to_dict()
558
+ # Get the response in JSON format
559
+ experiment_dict = create_experiment.to_json()
560
+ ```
561
+
562
+ ### Create an Experiment
563
+
564
+ It is possible that you have run the experiment yourself without the above function, and hence you already have experiment data that you want to send to Arize. In this case, use the `client.experiments.create()` method by passing the runs data, we currently don't support creating an empty experiment, for instance, these are 2 rows of runs, as a list of dictionaries. You can also pass a pandas dataframe for the runs data.
565
+
566
+ > NOTE: If you don't have experiment data and want to run experiment, see the `client.experiments.run()` section above.
567
+
568
+ ```python
569
+ # TODO
570
+ runs = [
571
+ ]
572
+ ```
573
+
574
+ In addition, you must specify which columns are the `example_id` and the `result`, you can do so by using the `ExperimentTaskResultFieldNames`. Moreover, if you choose to pass evaluation data, you can indicate the evaluation columns using `EvaluationResultFieldNames`:
575
+
576
+ ```python
577
+ # TODO
578
+ ```
579
+
580
+ If the number of runs (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is an `Experiment` object.
581
+
582
+ ```python
583
+ created_experiment = client.experiments.create(
584
+ name="<your-experiment-name>", # Name must be unique within a dataset
585
+ dataset_id="<desired-dataset-id>",
586
+ experiment_runs=..., # List of dictionaries or pandas dataframe
587
+ task_fields=ExperimentTaskResultFieldNames(...),
588
+ evaluator_columns=... # Optional
589
+ # force_http=... # Optionally pass force_http to create experiments via HTTP instead of gRPC, defaults to False
590
+ )
591
+ ```
592
+
593
+ ### Get an Experiment
594
+
595
+ To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
596
+
597
+ ```python
598
+ dataset = client.datasets.get(
599
+ dataset_id=... # The unique identifier of the dataset
600
+ dataset_version_id=... # The unique identifier of the dataset version
601
+ )
602
+ ```
603
+
604
+ ### Delete an Experiment
605
+
606
+ To delete an experiment by its ID use `client.experiments.delete()`. The call returns `None` if successful deletion took place, error otherwise.
607
+
608
+ ```python
609
+ client.experiments.delete(
610
+ experiment_id=... # The unique identifier of the experiment
611
+ )
612
+ ```
613
+
614
+ ### List Experiment runs
471
615
 
472
- ## In Code
616
+ You can list the runs of a given experiment using `client.experiments.list_runs()` and passing the experiment ID. You can specify the number of runs desired using the `limit` parameter. If you want a large number of runs, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
617
+
618
+ ```python
619
+ resp = client.experiments.list_runs(
620
+ experiment_id="your-experiment-id>",
621
+ limit=... # number of desired runs. Defaults to 100
622
+ all=... # Whether or not to export all of the runs. Defaults to False
623
+ )
624
+ ```
625
+
626
+ The response is an object of type `ExperimentsRunsList200Response`, and you can access the list of runs via its `experiment_runs` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
627
+
628
+ ```python
629
+ # Get the list of datasets from the response
630
+ run_list = resp.experiments_runs
631
+ # Get the response as a dictionary
632
+ resp_dict = resp.to_dict()
633
+ # Get the response in JSON format
634
+ resp_json = resp.to_json()
635
+ # Get the response as a pandas dataframe
636
+ resp_df = resp.to_df()
637
+ ```
638
+
639
+ # SDK Configuration
640
+
641
+ ## Logging
642
+
643
+ ### In Code
473
644
 
474
645
  You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
475
646
 
@@ -482,14 +653,14 @@ configure_logging(
482
653
  )
483
654
  ```
484
655
 
485
- ## Via Environment Variables
656
+ ### Via Environment Variables
486
657
 
487
658
  Configure the same options as the section above, via:
488
659
 
489
660
  ```python
490
661
  import os
491
662
 
492
- # You can disable logging altogether
663
+ # Whether or not you want to disable logging altogether
493
664
  os.environ["ARIZE_LOG_ENABLE"] = "true"
494
665
  # Set up the logging level
495
666
  os.environ["ARIZE_LOG_LEVEL"] = "debug"
@@ -499,6 +670,38 @@ os.environ["ARIZE_LOG_STRUCTURED"] = "false"
499
670
 
500
671
  The default behavior of Arize's logs is: enabled, `INFO` level, and not structured.
501
672
 
673
+ ## Caching
674
+
675
+ When downloading big segments of data from Arize, such as a `Dataset` with all of its examples, the SDK will cache the file in `parquet` format under `~/.arize/datasets/dataset_<updated_at_timestamp>.parquet`.
676
+
677
+ ### In Code
678
+
679
+ You can disable caching via the `enable_caching` parameter when instantiating the client, and also edit the "arize directory":
680
+
681
+ ```python
682
+ client = ArizeClient(
683
+ enable_caching=False, # Optional parameter, defaults to True
684
+ arize_directory="my-desired-directory", # Optional parameter, defaults to ~/.arize
685
+ )
686
+ ```
687
+
688
+ ### Via Environment Variables
689
+
690
+ You can also configure the above via:
691
+
692
+ ```python
693
+ import os
694
+
695
+ # Whether or not you want to disable caching
696
+ os.environ["ARIZE_ENABLE_CACHING"] = "true"
697
+ # Where you want the SDK to store the files
698
+ os.environ["ARIZE_DIRECTORY"] = "~/.arize"
699
+ ```
700
+
701
+ ### Clean the cache
702
+
703
+ To clean the cache you can directly `rm` the files or directory.
704
+
502
705
  # Community
503
706
 
504
707
  Join our community to connect with thousands of AI builders.
@@ -35,11 +35,24 @@
35
35
  - [Operations on Datasets](#operations-on-datasets)
36
36
  - [List Datasets](#list-datasets)
37
37
  - [Create a Dataset](#create-a-dataset)
38
- - [Get Dataset by ID](#get-dataset-by-id)
38
+ - [Get Dataset](#get-dataset)
39
39
  - [Delete a Dataset](#delete-a-dataset)
40
- - [Configure Logging](#configure-logging)
41
- - [In Code](#in-code)
42
- - [Via Environment Variables](#via-environment-variables)
40
+ - [List Dataset Examples](#list-dataset-examples)
41
+ - [Operations on Experiments](#operations-on-experiments)
42
+ - [List Experiments](#list-experiments)
43
+ - [Run an Experiment](#run-an-experiment)
44
+ - [Create an Experiment](#create-an-experiment)
45
+ - [Get an Experiment](#get-an-experiment)
46
+ - [Delete an Experiment](#delete-an-experiment)
47
+ - [List Experiment runs](#list-experiment-runs)
48
+ - [SDK Configuration](#sdk-configuration)
49
+ - [Logging](#logging)
50
+ - [In Code](#in-code)
51
+ - [Via Environment Variables](#via-environment-variables)
52
+ - [Caching](#caching)
53
+ - [In Code](#in-code-1)
54
+ - [Via Environment Variables](#via-environment-variables-1)
55
+ - [Clean the cache](#clean-the-cache)
43
56
  - [Community](#community)
44
57
 
45
58
  # Overview
@@ -334,9 +347,9 @@ dataset_list = resp.datasets
334
347
  # Get the response as a dictionary
335
348
  resp_dict = resp.to_dict()
336
349
  # Get the response in JSON format
337
- resp_dict = resp.to_json()
350
+ resp_json = resp.to_json()
338
351
  # Get the response as a pandas dataframe
339
- resp_dict = resp.to_df()
352
+ resp_df = resp.to_df()
340
353
  ```
341
354
 
342
355
  ### Create a Dataset
@@ -366,9 +379,10 @@ If the number of examples (rows in dataframe, items in list) is too large, the c
366
379
 
367
380
  ```python
368
381
  created_dataset = client.datasets.create(
369
- space_i="<target-space-id>",
382
+ space_id="<target-space-id>",
370
383
  name="<your-dataset-name>", # Name must be unique within a space
371
384
  examples=..., # List of dictionaries or pandas dataframe
385
+ # force_http=... # Optionally pass force_http to create datasets via HTTP instead of gRPC, defaults to False
372
386
  )
373
387
  ```
374
388
 
@@ -381,8 +395,7 @@ dataset_dict = create_dataset.to_dict()
381
395
  dataset_dict = create_dataset.to_json()
382
396
  ```
383
397
 
384
-
385
- ### Get Dataset by ID
398
+ ### Get Dataset
386
399
 
387
400
  To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
388
401
 
@@ -403,9 +416,167 @@ client.datasets.delete(
403
416
  )
404
417
  ```
405
418
 
406
- # Configure Logging
419
+ ### List Dataset Examples
420
+
421
+ You can list the examples of a given dataset using `client.datasets.list_examples()` and passing the dataset ID and, optionally, the dataset version ID. You can specify the number of examples desired using the `limit` parameter. If you want a large number of examples, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
422
+
423
+ ```python
424
+ resp = client.datasets.list_examples(
425
+ dataset_id="your-dataset-id>",
426
+ dataset_version_id="your-dataset-version-id>", # Optional, defaults to latest version
427
+ limit=... # number of desired examples. Defaults to 100
428
+ all=... # Whether or not to export all of the examples. Defaults to False
429
+ )
430
+ ```
431
+
432
+ The response is an object of type `DatasetsExamplesList200Response`, and you can access the list of examples via its `examples` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
433
+
434
+ ```python
435
+ # Get the list of datasets from the response
436
+ examples_list = resp.examples
437
+ # Get the response as a dictionary
438
+ resp_dict = resp.to_dict()
439
+ # Get the response in JSON format
440
+ resp_json = resp.to_json()
441
+ # Get the response as a pandas dataframe
442
+ resp_df = resp.to_df()
443
+ ```
444
+
445
+ ## Operations on Experiments
446
+
447
+ ### List Experiments
448
+
449
+ You can list all experiments that the user has access to using `client.experiments.list()`. You can use the `limit` parameter to specify the maximum number of datasets desired in the response and you can specify the `dataset_id` to target the list operation to a particular dataset.
450
+
451
+ ```python
452
+ resp = client.experiments.list(
453
+ limit=... # Optional
454
+ dataset_id=... # Optional
455
+ )
456
+ ```
457
+
458
+ The response is an object of type `ExperimentsList200Response`, and you can access the list of experiments via its `experiments` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
459
+
460
+ ```python
461
+ # Get the list of datasets from the response
462
+ experiment_list = resp.experiments
463
+ # Get the response as a dictionary
464
+ resp_dict = resp.to_dict()
465
+ # Get the response in JSON format
466
+ resp_json = resp.to_json()
467
+ # Get the response as a pandas dataframe
468
+ resp_df = resp.to_df()
469
+ ```
470
+
471
+ ### Run an Experiment
472
+
473
+ You can run an experiment on a dataset using `client.experiments.run()` by defining a task, evaluators (optional), and passing the dataset id of the dataset you want to use, together with a name for the experiment. The function will download the entire dataset from Arize (unless cached, see caching section under "SDK Configuration"), execute the task to obtain an output, and perform evaluations (if evaluators were passed). The experiments will also be traced, and these traces will be visible in Arize. The experiment will be created and the data logged into Arize automatically. You can avoid logging to Arize by making `dry_run=True`. The function will return the `Experiment` object (or `None` if `dry_run=True`) together with the dataframe with the experiment data.
474
+
475
+ ```python
476
+ experiment, experiment_df = client.run_experiment(
477
+ name="<name-your-experiment>",
478
+ dataset_id="<id-of-dataset-to-use>",
479
+ task=... # The task to be performed in the experiment.
480
+ evaluators=... # Optional: The evaluators to use in the experiment.
481
+ dry_run=..., # If True, the experiment result will not be uploaded to Arize. Defaults to False
482
+ dry_run_count=..., # Number of examples of the dataset to use in the dry run. Defaults to 10
483
+ concurrency=..., # The number of concurrent tasks to run. Defaults to 3.
484
+ set_global_tracer_provider=..., # If True, sets the global tracer provider for the experiment. Defaults to False
485
+ exit_on_error=..., # If True, the experiment will stop running on first occurrence of an error. Defaults to False
486
+ )
487
+ ```
488
+
489
+ The `Experiment` object also counts with convenience method similar to `List***` objects:
490
+
491
+ ```python
492
+ # Get the response as a dictionary
493
+ experiment_dict = create_experiment.to_dict()
494
+ # Get the response in JSON format
495
+ experiment_dict = create_experiment.to_json()
496
+ ```
497
+
498
+ ### Create an Experiment
499
+
500
+ It is possible that you have run the experiment yourself without the above function, and hence you already have experiment data that you want to send to Arize. In this case, use the `client.experiments.create()` method by passing the runs data, we currently don't support creating an empty experiment, for instance, these are 2 rows of runs, as a list of dictionaries. You can also pass a pandas dataframe for the runs data.
501
+
502
+ > NOTE: If you don't have experiment data and want to run experiment, see the `client.experiments.run()` section above.
503
+
504
+ ```python
505
+ # TODO
506
+ runs = [
507
+ ]
508
+ ```
509
+
510
+ In addition, you must specify which columns are the `example_id` and the `result`, you can do so by using the `ExperimentTaskResultFieldNames`. Moreover, if you choose to pass evaluation data, you can indicate the evaluation columns using `EvaluationResultFieldNames`:
511
+
512
+ ```python
513
+ # TODO
514
+ ```
515
+
516
+ If the number of runs (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is an `Experiment` object.
517
+
518
+ ```python
519
+ created_experiment = client.experiments.create(
520
+ name="<your-experiment-name>", # Name must be unique within a dataset
521
+ dataset_id="<desired-dataset-id>",
522
+ experiment_runs=..., # List of dictionaries or pandas dataframe
523
+ task_fields=ExperimentTaskResultFieldNames(...),
524
+ evaluator_columns=... # Optional
525
+ # force_http=... # Optionally pass force_http to create experiments via HTTP instead of gRPC, defaults to False
526
+ )
527
+ ```
528
+
529
+ ### Get an Experiment
530
+
531
+ To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
532
+
533
+ ```python
534
+ dataset = client.datasets.get(
535
+ dataset_id=... # The unique identifier of the dataset
536
+ dataset_version_id=... # The unique identifier of the dataset version
537
+ )
538
+ ```
539
+
540
+ ### Delete an Experiment
541
+
542
+ To delete an experiment by its ID use `client.experiments.delete()`. The call returns `None` if successful deletion took place, error otherwise.
543
+
544
+ ```python
545
+ client.experiments.delete(
546
+ experiment_id=... # The unique identifier of the experiment
547
+ )
548
+ ```
549
+
550
+ ### List Experiment runs
407
551
 
408
- ## In Code
552
+ You can list the runs of a given experiment using `client.experiments.list_runs()` and passing the experiment ID. You can specify the number of runs desired using the `limit` parameter. If you want a large number of runs, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
553
+
554
+ ```python
555
+ resp = client.experiments.list_runs(
556
+ experiment_id="your-experiment-id>",
557
+ limit=... # number of desired runs. Defaults to 100
558
+ all=... # Whether or not to export all of the runs. Defaults to False
559
+ )
560
+ ```
561
+
562
+ The response is an object of type `ExperimentsRunsList200Response`, and you can access the list of runs via its `experiment_runs` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
563
+
564
+ ```python
565
+ # Get the list of datasets from the response
566
+ run_list = resp.experiments_runs
567
+ # Get the response as a dictionary
568
+ resp_dict = resp.to_dict()
569
+ # Get the response in JSON format
570
+ resp_json = resp.to_json()
571
+ # Get the response as a pandas dataframe
572
+ resp_df = resp.to_df()
573
+ ```
574
+
575
+ # SDK Configuration
576
+
577
+ ## Logging
578
+
579
+ ### In Code
409
580
 
410
581
  You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
411
582
 
@@ -418,14 +589,14 @@ configure_logging(
418
589
  )
419
590
  ```
420
591
 
421
- ## Via Environment Variables
592
+ ### Via Environment Variables
422
593
 
423
594
  Configure the same options as the section above, via:
424
595
 
425
596
  ```python
426
597
  import os
427
598
 
428
- # You can disable logging altogether
599
+ # Whether or not you want to disable logging altogether
429
600
  os.environ["ARIZE_LOG_ENABLE"] = "true"
430
601
  # Set up the logging level
431
602
  os.environ["ARIZE_LOG_LEVEL"] = "debug"
@@ -435,6 +606,38 @@ os.environ["ARIZE_LOG_STRUCTURED"] = "false"
435
606
 
436
607
  The default behavior of Arize's logs is: enabled, `INFO` level, and not structured.
437
608
 
609
+ ## Caching
610
+
611
+ When downloading big segments of data from Arize, such as a `Dataset` with all of its examples, the SDK will cache the file in `parquet` format under `~/.arize/datasets/dataset_<updated_at_timestamp>.parquet`.
612
+
613
+ ### In Code
614
+
615
+ You can disable caching via the `enable_caching` parameter when instantiating the client, and also edit the "arize directory":
616
+
617
+ ```python
618
+ client = ArizeClient(
619
+ enable_caching=False, # Optional parameter, defaults to True
620
+ arize_directory="my-desired-directory", # Optional parameter, defaults to ~/.arize
621
+ )
622
+ ```
623
+
624
+ ### Via Environment Variables
625
+
626
+ You can also configure the above via:
627
+
628
+ ```python
629
+ import os
630
+
631
+ # Whether or not you want to disable caching
632
+ os.environ["ARIZE_ENABLE_CACHING"] = "true"
633
+ # Where you want the SDK to store the files
634
+ os.environ["ARIZE_DIRECTORY"] = "~/.arize"
635
+ ```
636
+
637
+ ### Clean the cache
638
+
639
+ To clean the cache you can directly `rm` the files or directory.
640
+
438
641
  # Community
439
642
 
440
643
  Join our community to connect with thousands of AI builders.
@@ -87,3 +87,4 @@ def make_to_df(field_name: str):
87
87
  models.DatasetsList200Response.to_df = make_to_df("datasets") # type: ignore[attr-defined]
88
88
  models.DatasetsListExamples200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
89
89
  models.ExperimentsList200Response.to_df = make_to_df("experiments") # type: ignore[attr-defined]
90
+ models.ExperimentsRunsList200Response.to_df = make_to_df("experiment_runs") # type: ignore[attr-defined]
@@ -25,6 +25,7 @@ from arize.utils.proto import get_pb_schema_tracing
25
25
  from arize.version import __version__
26
26
 
27
27
  if TYPE_CHECKING:
28
+ import pandas as pd
28
29
  import pyarrow as pa
29
30
 
30
31
 
@@ -260,7 +261,7 @@ class ArizeFlightClient:
260
261
  space_id: str,
261
262
  dataset_id: str,
262
263
  dataset_version_id: str | None = None,
263
- ):
264
+ ) -> pd.DataFrame:
264
265
  # TODO(Kiko): Space ID should not be needed,
265
266
  # should work on server tech debt to remove this
266
267
  doget_request = flight_ing_pb2.DoGetRequest(
@@ -283,6 +284,36 @@ class ArizeFlightClient:
283
284
  logger.exception(f"Failed to get dataset id={dataset_id}")
284
285
  raise RuntimeError(f"Failed to get dataset id={dataset_id}") from e
285
286
 
287
+ # ---------- experiment methods ----------
288
+
289
+ def get_experiment_runs(
290
+ self,
291
+ space_id: str,
292
+ experiment_id: str,
293
+ ) -> pd.DataFrame:
294
+ # TODO(Kiko): Space ID should not be needed,
295
+ # should work on server tech debt to remove this
296
+ doget_request = flight_ing_pb2.DoGetRequest(
297
+ get_experiment=flight_ing_pb2.GetExperimentRequest(
298
+ space_id=space_id,
299
+ experiment_id=experiment_id,
300
+ )
301
+ )
302
+ descriptor = flight.Ticket(
303
+ json_format.MessageToJson(doget_request).encode("utf-8")
304
+ )
305
+ try:
306
+ reader = self.do_get(descriptor, options=self.call_options)
307
+ # read all data into pandas dataframe
308
+ df = reader.read_all().to_pandas()
309
+ df = convert_json_str_to_dict(df)
310
+ return df
311
+ except Exception as e:
312
+ logger.exception(f"Failed to get experiment id={experiment_id}")
313
+ raise RuntimeError(
314
+ f"Failed to get experiment id={experiment_id}"
315
+ ) from e
316
+
286
317
  def init_experiment(
287
318
  self,
288
319
  space_id: str,
@@ -12,6 +12,14 @@ if TYPE_CHECKING:
12
12
  from arize.spans.client import SpansClient
13
13
 
14
14
 
15
+ # TODO(Kiko): models need to follow resource first pattern
16
+ # - models.DatasetsList200Response
17
+ # - models.DatasetsListExamples200Response
18
+ # - models.ExperimentsList200Response
19
+ # - models.ExperimentsRunsList200Response
20
+ # TODO(Kiko): Root client should have option to clear caches
21
+ # TODO(Kiko): Document caching behavior
22
+ # TODO(Kiko): Force keyword arguments
15
23
  # TODO(Kiko): Protobuf versioning is too old
16
24
  # TODO(Kiko): Make sure the client has same options as SDKConfiguration
17
25
  # TODO(Kiko): It does not make any sense to require space ID in run_experiment, dataset ID should suffice