arize 8.0.0a12__tar.gz → 8.0.0a14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. {arize-8.0.0a12 → arize-8.0.0a14}/PKG-INFO +98 -1
  2. {arize-8.0.0a12 → arize-8.0.0a14}/README.md +95 -0
  3. {arize-8.0.0a12 → arize-8.0.0a14}/pyproject.toml +3 -3
  4. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_exporter/client.py +18 -3
  5. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_flight/client.py +79 -3
  6. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/__init__.py +1 -43
  7. arize-8.0.0a14/src/arize/_generated/api_client/api/__init__.py +6 -0
  8. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/api/datasets_api.py +10 -11
  9. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/api/experiments_api.py +276 -0
  10. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/api_client.py +4 -4
  11. arize-8.0.0a14/src/arize/_generated/api_client/models/__init__.py +24 -0
  12. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/models/dataset.py +5 -13
  13. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/models/dataset_version.py +5 -13
  14. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/models/datasets_create_request.py +5 -13
  15. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/models/datasets_list200_response.py +5 -0
  16. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/models/datasets_list_examples200_response.py +8 -3
  17. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/models/error.py +5 -13
  18. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/models/experiment.py +5 -13
  19. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/models/experiments_list200_response.py +14 -9
  20. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_datasets_list_examples200_response.py +2 -2
  21. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_experiments_api.py +7 -0
  22. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_experiments_list200_response.py +2 -2
  23. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client_README.md +2 -2
  24. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/protocol/flight/export_pb2.py +8 -8
  25. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_lazy.py +4 -0
  26. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/client.py +17 -4
  27. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/config.py +13 -3
  28. arize-8.0.0a14/src/arize/datasets/__init__.py +70 -0
  29. arize-8.0.0a14/src/arize/datasets/client.py +142 -0
  30. {arize-8.0.0a12/src/arize/utils → arize-8.0.0a14/src/arize/models}/casting.py +12 -12
  31. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/models/client.py +330 -5
  32. arize-8.0.0a14/src/arize/models/proto.py +461 -0
  33. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/client.py +30 -6
  34. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/utils/arrow.py +4 -4
  35. arize-8.0.0a14/src/arize/version.py +1 -0
  36. arize-8.0.0a12/src/arize/_generated/api_client/api/__init__.py +0 -22
  37. arize-8.0.0a12/src/arize/_generated/api_client/models/__init__.py +0 -49
  38. arize-8.0.0a12/src/arize/_generated/api_client/models/datasets_create201_response.py +0 -87
  39. arize-8.0.0a12/src/arize/_generated/api_client/test/test_datasets_create201_response.py +0 -52
  40. arize-8.0.0a12/src/arize/datasets/client.py +0 -19
  41. arize-8.0.0a12/src/arize/utils/__init__.py +0 -0
  42. arize-8.0.0a12/src/arize/utils/proto.py +0 -865
  43. arize-8.0.0a12/src/arize/version.py +0 -1
  44. {arize-8.0.0a12 → arize-8.0.0a14}/.gitignore +0 -0
  45. {arize-8.0.0a12 → arize-8.0.0a14}/LICENSE.md +0 -0
  46. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/__init__.py +0 -0
  47. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_exporter/__init__.py +0 -0
  48. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_exporter/parsers/__init__.py +0 -0
  49. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_exporter/parsers/tracing_data_parser.py +0 -0
  50. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_exporter/validation.py +0 -0
  51. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_flight/__init__.py +0 -0
  52. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_flight/types.py +0 -0
  53. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/__init__.py +0 -0
  54. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/api_response.py +0 -0
  55. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/configuration.py +0 -0
  56. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/exceptions.py +0 -0
  57. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/rest.py +0 -0
  58. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/__init__.py +0 -0
  59. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_dataset.py +0 -0
  60. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_dataset_version.py +0 -0
  61. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_datasets_api.py +0 -0
  62. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_datasets_create_request.py +0 -0
  63. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_datasets_list200_response.py +0 -0
  64. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_error.py +0 -0
  65. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/api_client/test/test_experiment.py +0 -0
  66. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/protocol/__init__.py +0 -0
  67. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/protocol/flight/__init__.py +0 -0
  68. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/protocol/flight/ingest_pb2.py +0 -0
  69. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/protocol/rec/__init__.py +0 -0
  70. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/_generated/protocol/rec/public_pb2.py +0 -0
  71. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/constants/__init__.py +0 -0
  72. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/constants/config.py +0 -0
  73. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/constants/ml.py +0 -0
  74. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/constants/model_mapping.json +0 -0
  75. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/constants/spans.py +0 -0
  76. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/embeddings/__init__.py +0 -0
  77. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/embeddings/auto_generator.py +0 -0
  78. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/embeddings/base_generators.py +0 -0
  79. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/embeddings/constants.py +0 -0
  80. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/embeddings/cv_generators.py +0 -0
  81. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/embeddings/errors.py +0 -0
  82. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/embeddings/nlp_generators.py +0 -0
  83. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/embeddings/tabular_generators.py +0 -0
  84. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/embeddings/usecases.py +0 -0
  85. {arize-8.0.0a12/src/arize/datasets → arize-8.0.0a14/src/arize/exceptions}/__init__.py +0 -0
  86. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/exceptions/auth.py +0 -0
  87. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/exceptions/base.py +0 -0
  88. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/exceptions/models.py +0 -0
  89. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/exceptions/parameters.py +0 -0
  90. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/exceptions/spaces.py +0 -0
  91. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/exceptions/types.py +0 -0
  92. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/exceptions/values.py +0 -0
  93. {arize-8.0.0a12/src/arize/exceptions → arize-8.0.0a14/src/arize/experiments}/__init__.py +0 -0
  94. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/experiments/client.py +0 -0
  95. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/logging.py +0 -0
  96. {arize-8.0.0a12/src/arize/experiments → arize-8.0.0a14/src/arize/models}/__init__.py +0 -0
  97. {arize-8.0.0a12/src/arize/models → arize-8.0.0a14/src/arize/models/batch_validation}/__init__.py +0 -0
  98. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/models/batch_validation/errors.py +0 -0
  99. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/models/batch_validation/validator.py +0 -0
  100. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/models/bounded_executor.py +0 -0
  101. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/models/stream_validation.py +0 -0
  102. {arize-8.0.0a12/src/arize/models/batch_validation → arize-8.0.0a14/src/arize/models/surrogate_explainer}/__init__.py +0 -0
  103. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/models/surrogate_explainer/mimic.py +0 -0
  104. {arize-8.0.0a12/src/arize/models/surrogate_explainer → arize-8.0.0a14/src/arize/spans}/__init__.py +0 -0
  105. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/columns.py +0 -0
  106. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/conversion.py +0 -0
  107. {arize-8.0.0a12/src/arize/spans → arize-8.0.0a14/src/arize/spans/validation}/__init__.py +0 -0
  108. {arize-8.0.0a12/src/arize/spans/validation → arize-8.0.0a14/src/arize/spans/validation/annotations}/__init__.py +0 -0
  109. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/annotations/annotations_validation.py +0 -0
  110. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/annotations/dataframe_form_validation.py +0 -0
  111. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/annotations/value_validation.py +0 -0
  112. {arize-8.0.0a12/src/arize/spans/validation/annotations → arize-8.0.0a14/src/arize/spans/validation/common}/__init__.py +0 -0
  113. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/common/argument_validation.py +0 -0
  114. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/common/dataframe_form_validation.py +0 -0
  115. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/common/errors.py +0 -0
  116. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/common/value_validation.py +0 -0
  117. {arize-8.0.0a12/src/arize/spans/validation/common → arize-8.0.0a14/src/arize/spans/validation/evals}/__init__.py +0 -0
  118. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/evals/dataframe_form_validation.py +0 -0
  119. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/evals/evals_validation.py +0 -0
  120. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/evals/value_validation.py +0 -0
  121. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/metadata/__init__.py +0 -0
  122. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/metadata/argument_validation.py +0 -0
  123. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/metadata/dataframe_form_validation.py +0 -0
  124. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/metadata/value_validation.py +0 -0
  125. {arize-8.0.0a12/src/arize/spans/validation/evals → arize-8.0.0a14/src/arize/spans/validation/spans}/__init__.py +0 -0
  126. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/spans/dataframe_form_validation.py +0 -0
  127. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/spans/spans_validation.py +0 -0
  128. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/spans/validation/spans/value_validation.py +0 -0
  129. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/types.py +0 -0
  130. {arize-8.0.0a12/src/arize/spans/validation/spans → arize-8.0.0a14/src/arize/utils}/__init__.py +0 -0
  131. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/utils/dataframe.py +0 -0
  132. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/utils/online_tasks/__init__.py +0 -0
  133. {arize-8.0.0a12 → arize-8.0.0a14}/src/arize/utils/online_tasks/dataframe_preprocessor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arize
3
- Version: 8.0.0a12
3
+ Version: 8.0.0a14
4
4
  Summary: A helper library to interact with Arize AI APIs
5
5
  Project-URL: Homepage, https://arize.com
6
6
  Project-URL: Documentation, https://docs.arize.com/arize
@@ -34,6 +34,8 @@ Requires-Dist: pillow<11,>=8.4.0; extra == 'auto-embeddings'
34
34
  Requires-Dist: tokenizers<1,>=0.13; extra == 'auto-embeddings'
35
35
  Requires-Dist: torch<3,>=1.13; extra == 'auto-embeddings'
36
36
  Requires-Dist: transformers<5,>=4.25; extra == 'auto-embeddings'
37
+ Provides-Extra: datasets-experiments
38
+ Requires-Dist: pydantic; extra == 'datasets-experiments'
37
39
  Provides-Extra: dev
38
40
  Requires-Dist: pytest==8.4.2; extra == 'dev'
39
41
  Requires-Dist: ruff==0.13.2; extra == 'dev'
@@ -92,6 +94,11 @@ Description-Content-Type: text/markdown
92
94
  - [Log a batch of ML Data for a Object Detection use-case](#log-a-batch-of-ml-data-for-a-object-detection-use-case)
93
95
  - [Exporting ML Data](#exporting-ml-data)
94
96
  - [Generate embeddings for your data](#generate-embeddings-for-your-data)
97
+ - [Operations on Datasets](#operations-on-datasets)
98
+ - [List Datasets](#list-datasets)
99
+ - [Create a Dataset](#create-a-dataset)
100
+ - [Get Dataset by ID](#get-dataset-by-id)
101
+ - [Delete a Dataset](#delete-a-dataset)
95
102
  - [Configure Logging](#configure-logging)
96
103
  - [In Code](#in-code)
97
104
  - [Via Environment Variables](#via-environment-variables)
@@ -368,6 +375,96 @@ generator = EmbeddingGenerator.from_use_case(
368
375
  df["text_vector"] = generator.generate_embeddings(text_col=df["text"])
369
376
  ```
370
377
 
378
+ ## Operations on Datasets
379
+
380
+ ### List Datasets
381
+
382
+ You can list all datasets that the user has access to using `client.datasets.list()`. You can use the `limit` parameter to specify the maximum number of datasets desired in the response and you can specify the `space_id` to target the list operation to a particular space.
383
+
384
+ ```python
385
+ resp = client.datasets.list(
386
+ limit=... # Optional
387
+ space_id=... # Optional
388
+ )
389
+ ```
390
+
391
+ The response is an object of type `DatasetsList200Response`, and you can access the list of datasets via its `datasets` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
392
+
393
+ ```python
394
+ # Get the list of datasets from the response
395
+ dataset_list = resp.datasets
396
+ # Get the response as a dictionary
397
+ resp_dict = resp.to_dict()
398
+ # Get the response in JSON format
399
+ resp_dict = resp.to_json()
400
+ # Get the response as a pandas dataframe
401
+ resp_dict = resp.to_df()
402
+ ```
403
+
404
+ ### Create a Dataset
405
+
406
+ You can create a dataset using `client.datasets.create()`. You must pass examples, we currently don't support creating an empty dataset, for instance, these are 2 rows of examples, as a list of dictionaries. You can also pass a pandas dataframe for the examples.
407
+
408
+ ```python
409
+ examples = [
410
+ {
411
+ "eval.Correctness Basic.explanation": "The query indicates that the user is having trouble accessing their account on their laptop, while access on their phone is still working. This suggests a potential issue with the login process on the laptop, which aligns with the 'Login Issues' queue. The mention of a possible change in the account could relate to login credentials or settings affecting the laptop specifically, but it still falls under the broader category of login issues.",
412
+ "eval.Correctness Basic.label": "correct",
413
+ "eval.Correctness Basic.score": 1,
414
+ "llm output": "Login Issues",
415
+ "query": "I can't get in on my laptop anymore, but my phone still works fine — could this be because I changed something in my account?"
416
+ },
417
+ {
418
+ "eval.Correctness Basic.explanation": "The query is about a user who signed up but is unable to log in because the system says no account is found. This issue is related to the login process, as the user is trying to access their account and is facing a problem with the login system recognizing their account. Therefore, assigning this query to the 'Login Issues' queue is appropriate.",
419
+ "eval.Correctness Basic.label": "correct",
420
+ "eval.Correctness Basic.score": 1,
421
+ "llm output": "Login Issues",
422
+ "query": "Signed up ages ago but never got around to logging in — now it says no account found. Do I start over?"
423
+ }
424
+ ]
425
+ ```
426
+
427
+ If the number of examples (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is a `Dataset` object.
428
+
429
+ ```python
430
+ created_dataset = client.datasets.create(
431
+ space_i="<target-space-id>",
432
+ name="<your-dataset-name>", # Name must be unique within a space
433
+ examples=..., # List of dictionaries or pandas dataframe
434
+ )
435
+ ```
436
+
437
+ The `Dataset` object also counts with convenience method similar to `List***` objects:
438
+
439
+ ```python
440
+ # Get the response as a dictionary
441
+ dataset_dict = create_dataset.to_dict()
442
+ # Get the response in JSON format
443
+ dataset_dict = create_dataset.to_json()
444
+ ```
445
+
446
+
447
+ ### Get Dataset by ID
448
+
449
+ To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
450
+
451
+ ```python
452
+ dataset = client.datasets.get(
453
+ dataset_id=... # The unique identifier of the dataset
454
+ dataset_version_id=... # The unique identifier of the dataset version
455
+ )
456
+ ```
457
+
458
+ ### Delete a Dataset
459
+
460
+ To delete a dataset by its ID use `client.datasets.delete()`. The call returns `None` if successful deletion took place, error otherwise.
461
+
462
+ ```python
463
+ client.datasets.delete(
464
+ dataset_id=... # The unique identifier of the dataset
465
+ )
466
+ ```
467
+
371
468
  # Configure Logging
372
469
 
373
470
  ## In Code
@@ -32,6 +32,11 @@
32
32
  - [Log a batch of ML Data for a Object Detection use-case](#log-a-batch-of-ml-data-for-a-object-detection-use-case)
33
33
  - [Exporting ML Data](#exporting-ml-data)
34
34
  - [Generate embeddings for your data](#generate-embeddings-for-your-data)
35
+ - [Operations on Datasets](#operations-on-datasets)
36
+ - [List Datasets](#list-datasets)
37
+ - [Create a Dataset](#create-a-dataset)
38
+ - [Get Dataset by ID](#get-dataset-by-id)
39
+ - [Delete a Dataset](#delete-a-dataset)
35
40
  - [Configure Logging](#configure-logging)
36
41
  - [In Code](#in-code)
37
42
  - [Via Environment Variables](#via-environment-variables)
@@ -308,6 +313,96 @@ generator = EmbeddingGenerator.from_use_case(
308
313
  df["text_vector"] = generator.generate_embeddings(text_col=df["text"])
309
314
  ```
310
315
 
316
+ ## Operations on Datasets
317
+
318
+ ### List Datasets
319
+
320
+ You can list all datasets that the user has access to using `client.datasets.list()`. You can use the `limit` parameter to specify the maximum number of datasets desired in the response and you can specify the `space_id` to target the list operation to a particular space.
321
+
322
+ ```python
323
+ resp = client.datasets.list(
324
+ limit=... # Optional
325
+ space_id=... # Optional
326
+ )
327
+ ```
328
+
329
+ The response is an object of type `DatasetsList200Response`, and you can access the list of datasets via its `datasets` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
330
+
331
+ ```python
332
+ # Get the list of datasets from the response
333
+ dataset_list = resp.datasets
334
+ # Get the response as a dictionary
335
+ resp_dict = resp.to_dict()
336
+ # Get the response in JSON format
337
+ resp_dict = resp.to_json()
338
+ # Get the response as a pandas dataframe
339
+ resp_dict = resp.to_df()
340
+ ```
341
+
342
+ ### Create a Dataset
343
+
344
+ You can create a dataset using `client.datasets.create()`. You must pass examples, we currently don't support creating an empty dataset, for instance, these are 2 rows of examples, as a list of dictionaries. You can also pass a pandas dataframe for the examples.
345
+
346
+ ```python
347
+ examples = [
348
+ {
349
+ "eval.Correctness Basic.explanation": "The query indicates that the user is having trouble accessing their account on their laptop, while access on their phone is still working. This suggests a potential issue with the login process on the laptop, which aligns with the 'Login Issues' queue. The mention of a possible change in the account could relate to login credentials or settings affecting the laptop specifically, but it still falls under the broader category of login issues.",
350
+ "eval.Correctness Basic.label": "correct",
351
+ "eval.Correctness Basic.score": 1,
352
+ "llm output": "Login Issues",
353
+ "query": "I can't get in on my laptop anymore, but my phone still works fine — could this be because I changed something in my account?"
354
+ },
355
+ {
356
+ "eval.Correctness Basic.explanation": "The query is about a user who signed up but is unable to log in because the system says no account is found. This issue is related to the login process, as the user is trying to access their account and is facing a problem with the login system recognizing their account. Therefore, assigning this query to the 'Login Issues' queue is appropriate.",
357
+ "eval.Correctness Basic.label": "correct",
358
+ "eval.Correctness Basic.score": 1,
359
+ "llm output": "Login Issues",
360
+ "query": "Signed up ages ago but never got around to logging in — now it says no account found. Do I start over?"
361
+ }
362
+ ]
363
+ ```
364
+
365
+ If the number of examples (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is a `Dataset` object.
366
+
367
+ ```python
368
+ created_dataset = client.datasets.create(
369
+ space_i="<target-space-id>",
370
+ name="<your-dataset-name>", # Name must be unique within a space
371
+ examples=..., # List of dictionaries or pandas dataframe
372
+ )
373
+ ```
374
+
375
+ The `Dataset` object also counts with convenience method similar to `List***` objects:
376
+
377
+ ```python
378
+ # Get the response as a dictionary
379
+ dataset_dict = create_dataset.to_dict()
380
+ # Get the response in JSON format
381
+ dataset_dict = create_dataset.to_json()
382
+ ```
383
+
384
+
385
+ ### Get Dataset by ID
386
+
387
+ To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
388
+
389
+ ```python
390
+ dataset = client.datasets.get(
391
+ dataset_id=... # The unique identifier of the dataset
392
+ dataset_version_id=... # The unique identifier of the dataset version
393
+ )
394
+ ```
395
+
396
+ ### Delete a Dataset
397
+
398
+ To delete a dataset by its ID use `client.datasets.delete()`. The call returns `None` if successful deletion took place, error otherwise.
399
+
400
+ ```python
401
+ client.datasets.delete(
402
+ dataset_id=... # The unique identifier of the dataset
403
+ )
404
+ ```
405
+
311
406
  # Configure Logging
312
407
 
313
408
  ## In Code
@@ -70,9 +70,9 @@ ml-batch = [
70
70
  "requests>=2.0.0, <3", # For posting pyarrow files
71
71
  "tqdm", # For export progress bars
72
72
  ]
73
- # datasets-experiments = [
74
- # "pydantic",
75
- # ]
73
+ datasets-experiments = [
74
+ "pydantic",
75
+ ]
76
76
  mimic-explainer = [
77
77
  "interpret-community[mimic]>=0.22.0,<1",
78
78
  ]
@@ -20,7 +20,6 @@ from arize._generated.protocol.flight import export_pb2
20
20
  from arize.logging import CtxAdapter
21
21
  from arize.types import Environments, SimilaritySearchParams
22
22
  from arize.utils.dataframe import reset_dataframe_index
23
- from arize.utils.proto import get_pb_flight_doput_request
24
23
 
25
24
  logger = logging.getLogger(__name__)
26
25
 
@@ -131,7 +130,7 @@ class ArizeExportClient:
131
130
  reset_dataframe_index(df)
132
131
  return df
133
132
 
134
- def export_model_to_parquet(
133
+ def export_to_parquet(
135
134
  self,
136
135
  path: str,
137
136
  space_id: str,
@@ -285,7 +284,7 @@ class ArizeExportClient:
285
284
  end_time=Timestamp(seconds=int(end_time.timestamp())),
286
285
  filter_expression=where,
287
286
  similarity_search_params=(
288
- get_pb_flight_doput_request(similarity_search_params)
287
+ _get_pb_similarity_search_params(similarity_search_params)
289
288
  if similarity_search_params
290
289
  else None
291
290
  ),
@@ -326,3 +325,19 @@ class ArizeExportClient:
326
325
  colour="#008000",
327
326
  unit=" row",
328
327
  )
328
+
329
+
330
+ def _get_pb_similarity_search_params(
331
+ similarity_params: SimilaritySearchParams,
332
+ ) -> export_pb2.SimilaritySearchParams:
333
+ proto_params = export_pb2.SimilaritySearchParams()
334
+ proto_params.search_column_name = similarity_params.search_column_name
335
+ proto_params.threshold = similarity_params.threshold
336
+ for ref in similarity_params.references:
337
+ new_ref = proto_params.references.add()
338
+ new_ref.prediction_id = ref.prediction_id
339
+ new_ref.reference_column_name = ref.reference_column_name
340
+ if ref.prediction_timestamp:
341
+ new_ref.prediction_timestamp.FromDatetime(ref.prediction_timestamp)
342
+
343
+ return proto_params
@@ -10,6 +10,7 @@ from google.protobuf import json_format
10
10
  from pyarrow import flight
11
11
 
12
12
  from arize._flight.types import FlightRequestType
13
+ from arize._generated.protocol.flight import ingest_pb2 as flight_ing_pb2
13
14
  from arize._generated.protocol.flight.ingest_pb2 import (
14
15
  WriteSpanAnnotationResponse,
15
16
  WriteSpanAttributesMetadataResponse,
@@ -17,7 +18,6 @@ from arize._generated.protocol.flight.ingest_pb2 import (
17
18
  )
18
19
  from arize.config import get_python_version
19
20
  from arize.logging import log_a_list
20
- from arize.utils.proto import get_pb_flight_doput_request, get_pb_schema_tracing
21
21
  from arize.version import __version__
22
22
 
23
23
  if TYPE_CHECKING:
@@ -137,7 +137,7 @@ class ArizeFlightClient:
137
137
  pa_table.schema, {"arize-schema": base64_schema}
138
138
  )
139
139
 
140
- doput_request = get_pb_flight_doput_request(
140
+ doput_request = _get_pb_flight_doput_request(
141
141
  space_id=space_id,
142
142
  model_id=project_name,
143
143
  request_type=request_type,
@@ -179,7 +179,49 @@ class ArizeFlightClient:
179
179
  return res
180
180
  except Exception as e:
181
181
  logger.exception(f"Error logging arrow table to Arize: {e}")
182
- raise
182
+ raise RuntimeError(f"Error logging arrow table to Arize: {e}") from e
183
+
184
+ # ---------- dataset methods ----------
185
+
186
+ def create_dataset(
187
+ self,
188
+ space_id: str,
189
+ dataset_name: str,
190
+ pa_table: pa.Table,
191
+ ) -> str:
192
+ doput_request = flight_ing_pb2.DoPutRequest(
193
+ create_dataset=flight_ing_pb2.CreateDatasetRequest(
194
+ space_id=space_id,
195
+ dataset_name=dataset_name,
196
+ dataset_type=flight_ing_pb2.GENERATIVE,
197
+ )
198
+ )
199
+ descriptor = flight.FlightDescriptor.for_command(
200
+ json_format.MessageToJson(doput_request).encode("utf-8")
201
+ )
202
+ try:
203
+ flight_writer, flight_metadata_reader = self.do_put(
204
+ descriptor, pa_table.schema, options=self.call_options
205
+ )
206
+ with flight_writer:
207
+ # write table as stream to flight server
208
+ flight_writer.write_table(pa_table)
209
+ # indicate that client has flushed all contents to stream
210
+ flight_writer.done_writing()
211
+ # read response from flight server
212
+ flight_response = flight_metadata_reader.read()
213
+ if flight_response is None:
214
+ return None
215
+
216
+ res = None
217
+ res = flight_ing_pb2.CreateDatasetResponse()
218
+ res.ParseFromString(flight_response.to_pybytes())
219
+ if res:
220
+ return str(res.dataset_id)
221
+ return res
222
+ except Exception as e:
223
+ logger.exception(f"Error logging arrow table to Arize: {e}")
224
+ raise RuntimeError(f"Error logging arrow table to Arize: {e}") from e
183
225
 
184
226
 
185
227
  def append_to_pyarrow_metadata(
@@ -201,3 +243,37 @@ def append_to_pyarrow_metadata(
201
243
  updated_metadata = metadata.copy()
202
244
  updated_metadata.update(new_metadata)
203
245
  return pa_schema.with_metadata(updated_metadata)
246
+
247
+
248
+ def _get_pb_flight_doput_request(
249
+ space_id,
250
+ model_id: str,
251
+ request_type: FlightRequestType,
252
+ ) -> flight_ing_pb2.DoPutRequest:
253
+ """Return a DoPutRequest for the given request_type."""
254
+ common_args: dict[str, str] = {
255
+ "space_id": space_id,
256
+ "external_model_id": model_id,
257
+ }
258
+
259
+ match request_type:
260
+ case FlightRequestType.EVALUATION:
261
+ return flight_ing_pb2.DoPutRequest(
262
+ write_span_evaluation_request=flight_ing_pb2.WriteSpanEvaluationRequest(
263
+ **common_args
264
+ )
265
+ )
266
+ case FlightRequestType.ANNOTATION:
267
+ return flight_ing_pb2.DoPutRequest(
268
+ write_span_annotation_request=flight_ing_pb2.WriteSpanAnnotationRequest(
269
+ **common_args
270
+ )
271
+ )
272
+ case FlightRequestType.METADATA:
273
+ return flight_ing_pb2.DoPutRequest(
274
+ write_span_attributes_metadata_request=flight_ing_pb2.WriteSpanAttributesMetadataRequest(
275
+ **common_args
276
+ )
277
+ )
278
+ case _:
279
+ raise ValueError(f"Unsupported request_type: {request_type}")
@@ -31,7 +31,6 @@ __all__ = [
31
31
  "ApiException",
32
32
  "Dataset",
33
33
  "DatasetVersion",
34
- "DatasetsCreate201Response",
35
34
  "DatasetsCreateRequest",
36
35
  "DatasetsList200Response",
37
36
  "DatasetsListExamples200Response",
@@ -40,42 +39,7 @@ __all__ = [
40
39
  "ExperimentsList200Response",
41
40
  ]
42
41
 
43
- if __import__("typing").TYPE_CHECKING:
44
- # import apis into sdk package
45
- from arize._generated.api_client.api.datasets_api import DatasetsApi as DatasetsApi
46
- from arize._generated.api_client.api.experiments_api import ExperimentsApi as ExperimentsApi
47
-
48
- # import ApiClient
49
- from arize._generated.api_client.api_response import ApiResponse as ApiResponse
50
- from arize._generated.api_client.api_client import ApiClient as ApiClient
51
- from arize._generated.api_client.configuration import Configuration as Configuration
52
- from arize._generated.api_client.exceptions import OpenApiException as OpenApiException
53
- from arize._generated.api_client.exceptions import ApiTypeError as ApiTypeError
54
- from arize._generated.api_client.exceptions import ApiValueError as ApiValueError
55
- from arize._generated.api_client.exceptions import ApiKeyError as ApiKeyError
56
- from arize._generated.api_client.exceptions import ApiAttributeError as ApiAttributeError
57
- from arize._generated.api_client.exceptions import ApiException as ApiException
58
-
59
- # import models into sdk package
60
- from arize._generated.api_client.models.dataset import Dataset as Dataset
61
- from arize._generated.api_client.models.dataset_version import DatasetVersion as DatasetVersion
62
- from arize._generated.api_client.models.datasets_create201_response import DatasetsCreate201Response as DatasetsCreate201Response
63
- from arize._generated.api_client.models.datasets_create_request import DatasetsCreateRequest as DatasetsCreateRequest
64
- from arize._generated.api_client.models.datasets_list200_response import DatasetsList200Response as DatasetsList200Response
65
- from arize._generated.api_client.models.datasets_list_examples200_response import DatasetsListExamples200Response as DatasetsListExamples200Response
66
- from arize._generated.api_client.models.error import Error as Error
67
- from arize._generated.api_client.models.experiment import Experiment as Experiment
68
- from arize._generated.api_client.models.experiments_list200_response import ExperimentsList200Response as ExperimentsList200Response
69
-
70
- else:
71
- from lazy_imports import LazyModule, as_package, load
72
-
73
- load(
74
- LazyModule(
75
- *as_package(__file__),
76
- ("__version__", __version__),
77
- ("__all__", __all__),
78
- """# import apis into sdk package
42
+ # import apis into sdk package
79
43
  from arize._generated.api_client.api.datasets_api import DatasetsApi as DatasetsApi
80
44
  from arize._generated.api_client.api.experiments_api import ExperimentsApi as ExperimentsApi
81
45
 
@@ -93,7 +57,6 @@ from arize._generated.api_client.exceptions import ApiException as ApiException
93
57
  # import models into sdk package
94
58
  from arize._generated.api_client.models.dataset import Dataset as Dataset
95
59
  from arize._generated.api_client.models.dataset_version import DatasetVersion as DatasetVersion
96
- from arize._generated.api_client.models.datasets_create201_response import DatasetsCreate201Response as DatasetsCreate201Response
97
60
  from arize._generated.api_client.models.datasets_create_request import DatasetsCreateRequest as DatasetsCreateRequest
98
61
  from arize._generated.api_client.models.datasets_list200_response import DatasetsList200Response as DatasetsList200Response
99
62
  from arize._generated.api_client.models.datasets_list_examples200_response import DatasetsListExamples200Response as DatasetsListExamples200Response
@@ -101,8 +64,3 @@ from arize._generated.api_client.models.error import Error as Error
101
64
  from arize._generated.api_client.models.experiment import Experiment as Experiment
102
65
  from arize._generated.api_client.models.experiments_list200_response import ExperimentsList200Response as ExperimentsList200Response
103
66
 
104
- """,
105
- name=__name__,
106
- doc=__doc__,
107
- )
108
- )
@@ -0,0 +1,6 @@
1
+ # flake8: noqa
2
+
3
+ # import apis into api package
4
+ from arize._generated.api_client.api.datasets_api import DatasetsApi
5
+ from arize._generated.api_client.api.experiments_api import ExperimentsApi
6
+
@@ -20,7 +20,6 @@ from pydantic import Field, StrictInt, StrictStr
20
20
  from typing import Optional
21
21
  from typing_extensions import Annotated
22
22
  from arize._generated.api_client.models.dataset import Dataset
23
- from arize._generated.api_client.models.datasets_create201_response import DatasetsCreate201Response
24
23
  from arize._generated.api_client.models.datasets_create_request import DatasetsCreateRequest
25
24
  from arize._generated.api_client.models.datasets_list200_response import DatasetsList200Response
26
25
  from arize._generated.api_client.models.datasets_list_examples200_response import DatasetsListExamples200Response
@@ -59,7 +58,7 @@ class DatasetsApi:
59
58
  _content_type: Optional[StrictStr] = None,
60
59
  _headers: Optional[Dict[StrictStr, Any]] = None,
61
60
  _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
62
- ) -> DatasetsCreate201Response:
61
+ ) -> Dataset:
63
62
  """Create a new dataset with JSON examples
64
63
 
65
64
 
@@ -96,7 +95,7 @@ class DatasetsApi:
96
95
  )
97
96
 
98
97
  _response_types_map: Dict[str, Optional[str]] = {
99
- '201': "DatasetsCreate201Response",
98
+ '201': "Dataset",
100
99
  '400': "Error",
101
100
  '401': "Error",
102
101
  '403': "Error",
@@ -129,7 +128,7 @@ class DatasetsApi:
129
128
  _content_type: Optional[StrictStr] = None,
130
129
  _headers: Optional[Dict[StrictStr, Any]] = None,
131
130
  _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
132
- ) -> ApiResponse[DatasetsCreate201Response]:
131
+ ) -> ApiResponse[Dataset]:
133
132
  """Create a new dataset with JSON examples
134
133
 
135
134
 
@@ -166,7 +165,7 @@ class DatasetsApi:
166
165
  )
167
166
 
168
167
  _response_types_map: Dict[str, Optional[str]] = {
169
- '201': "DatasetsCreate201Response",
168
+ '201': "Dataset",
170
169
  '400': "Error",
171
170
  '401': "Error",
172
171
  '403': "Error",
@@ -236,7 +235,7 @@ class DatasetsApi:
236
235
  )
237
236
 
238
237
  _response_types_map: Dict[str, Optional[str]] = {
239
- '201': "DatasetsCreate201Response",
238
+ '201': "Dataset",
240
239
  '400': "Error",
241
240
  '401': "Error",
242
241
  '403': "Error",
@@ -342,7 +341,7 @@ class DatasetsApi:
342
341
  _content_type: Optional[StrictStr] = None,
343
342
  _headers: Optional[Dict[StrictStr, Any]] = None,
344
343
  _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
345
- ) -> DatasetsCreate201Response:
344
+ ) -> None:
346
345
  """Delete a dataset by ID
347
346
 
348
347
 
@@ -379,7 +378,7 @@ class DatasetsApi:
379
378
  )
380
379
 
381
380
  _response_types_map: Dict[str, Optional[str]] = {
382
- '200': "DatasetsCreate201Response",
381
+ '204': None,
383
382
  '400': "Error",
384
383
  '401': "Error",
385
384
  '403': "Error",
@@ -414,7 +413,7 @@ class DatasetsApi:
414
413
  _content_type: Optional[StrictStr] = None,
415
414
  _headers: Optional[Dict[StrictStr, Any]] = None,
416
415
  _host_index: Annotated[StrictInt, Field(ge=0, le=0)] = 0,
417
- ) -> ApiResponse[DatasetsCreate201Response]:
416
+ ) -> ApiResponse[None]:
418
417
  """Delete a dataset by ID
419
418
 
420
419
 
@@ -451,7 +450,7 @@ class DatasetsApi:
451
450
  )
452
451
 
453
452
  _response_types_map: Dict[str, Optional[str]] = {
454
- '200': "DatasetsCreate201Response",
453
+ '204': None,
455
454
  '400': "Error",
456
455
  '401': "Error",
457
456
  '403': "Error",
@@ -523,7 +522,7 @@ class DatasetsApi:
523
522
  )
524
523
 
525
524
  _response_types_map: Dict[str, Optional[str]] = {
526
- '200': "DatasetsCreate201Response",
525
+ '204': None,
527
526
  '400': "Error",
528
527
  '401': "Error",
529
528
  '403': "Error",