arize 8.0.0b0__py3-none-any.whl → 8.0.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. arize/__init__.py +1 -1
  2. arize/_client_factory.py +50 -0
  3. arize/_flight/client.py +4 -4
  4. arize/_generated/api_client/__init__.py +0 -2
  5. arize/_generated/api_client/api/datasets_api.py +6 -6
  6. arize/_generated/api_client/api/experiments_api.py +6 -6
  7. arize/_generated/api_client/api/projects_api.py +3 -3
  8. arize/_generated/api_client/models/__init__.py +0 -1
  9. arize/_generated/api_client/models/datasets_create_request.py +2 -10
  10. arize/_generated/api_client/models/datasets_examples_insert_request.py +2 -10
  11. arize/_generated/api_client/test/test_datasets_create_request.py +2 -6
  12. arize/_generated/api_client/test/test_datasets_examples_insert_request.py +2 -6
  13. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +2 -6
  14. arize/_generated/api_client/test/test_datasets_examples_update_request.py +2 -6
  15. arize/_generated/api_client/test/test_experiments_create_request.py +2 -6
  16. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +2 -6
  17. arize/_generated/api_client_README.md +0 -1
  18. arize/_lazy.py +25 -9
  19. arize/client.py +16 -52
  20. arize/config.py +9 -36
  21. arize/constants/ml.py +9 -16
  22. arize/constants/spans.py +5 -10
  23. arize/datasets/client.py +13 -9
  24. arize/datasets/errors.py +1 -1
  25. arize/datasets/validation.py +2 -2
  26. arize/embeddings/auto_generator.py +2 -2
  27. arize/embeddings/errors.py +2 -2
  28. arize/embeddings/tabular_generators.py +1 -1
  29. arize/exceptions/base.py +0 -52
  30. arize/exceptions/parameters.py +0 -329
  31. arize/experiments/__init__.py +2 -2
  32. arize/experiments/client.py +16 -10
  33. arize/experiments/evaluators/base.py +6 -6
  34. arize/experiments/evaluators/executors.py +10 -3
  35. arize/experiments/evaluators/types.py +2 -2
  36. arize/experiments/functions.py +24 -17
  37. arize/experiments/types.py +6 -8
  38. arize/logging.py +1 -1
  39. arize/ml/batch_validation/errors.py +10 -1004
  40. arize/ml/batch_validation/validator.py +273 -225
  41. arize/ml/casting.py +7 -7
  42. arize/ml/client.py +12 -11
  43. arize/ml/proto.py +6 -6
  44. arize/ml/stream_validation.py +2 -3
  45. arize/ml/surrogate_explainer/mimic.py +3 -3
  46. arize/ml/types.py +1 -55
  47. arize/pre_releases.py +6 -3
  48. arize/projects/client.py +9 -4
  49. arize/regions.py +2 -2
  50. arize/spans/client.py +14 -12
  51. arize/spans/columns.py +32 -36
  52. arize/spans/conversion.py +5 -6
  53. arize/spans/validation/common/argument_validation.py +3 -3
  54. arize/spans/validation/common/dataframe_form_validation.py +6 -6
  55. arize/spans/validation/common/value_validation.py +1 -1
  56. arize/spans/validation/evals/dataframe_form_validation.py +4 -4
  57. arize/spans/validation/evals/evals_validation.py +6 -6
  58. arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
  59. arize/spans/validation/spans/dataframe_form_validation.py +2 -2
  60. arize/spans/validation/spans/spans_validation.py +6 -6
  61. arize/utils/arrow.py +2 -2
  62. arize/utils/cache.py +2 -2
  63. arize/utils/dataframe.py +4 -4
  64. arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
  65. arize/utils/openinference_conversion.py +10 -10
  66. arize/utils/proto.py +1 -1
  67. arize/version.py +1 -1
  68. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/METADATA +71 -63
  69. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/RECORD +72 -73
  70. arize/_generated/api_client/models/primitive_value.py +0 -172
  71. arize/_generated/api_client/test/test_primitive_value.py +0 -50
  72. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
  73. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
  74. {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0
@@ -56,7 +56,7 @@ from arize.experiments.types import (
56
56
  ExperimentEvaluationRun,
57
57
  ExperimentRun,
58
58
  ExperimentTask,
59
- ExperimentTaskResultFieldNames,
59
+ ExperimentTaskFieldNames,
60
60
  _TaskSummary,
61
61
  )
62
62
 
@@ -76,23 +76,25 @@ def run_experiment(
76
76
  evaluators: Evaluators | None = None,
77
77
  concurrency: int = 3,
78
78
  exit_on_error: bool = False,
79
+ timeout: int = 120,
79
80
  ) -> pd.DataFrame:
80
81
  """Run an experiment on a dataset.
81
82
 
82
83
  Args:
83
84
  experiment_name (str): The name for the experiment.
84
85
  experiment_id (str): The ID for the experiment.
85
- dataset (pd.DataFrame): The dataset to run the experiment on.
86
+ dataset (:class:`pandas.DataFrame`): The dataset to run the experiment on.
86
87
  task (ExperimentTask): The task to be executed on the dataset.
87
88
  tracer (Tracer): Tracer for tracing the experiment.
88
89
  resource (Resource): The resource for tracing the experiment.
89
- rate_limit_errors (Optional[RateLimitErrors]): Optional rate limit errors.
90
- evaluators (Optional[Evaluators]): Optional evaluators to assess the task.
90
+ rate_limit_errors (RateLimitErrors | :obj:`None`): Optional rate limit errors.
91
+ evaluators (Evaluators | :obj:`None`): Optional evaluators to assess the task.
91
92
  concurrency (int): The number of concurrent tasks to run. Default is 3.
92
93
  exit_on_error (bool): Whether to exit on error. Default is False.
94
+ timeout (int): The timeout for each task execution in seconds. Default is 120.
93
95
 
94
96
  Returns:
95
- pd.DataFrame: The results of the experiment.
97
+ :class:`pandas.DataFrame`: The results of the experiment.
96
98
  """
97
99
  task_signature = inspect.signature(task)
98
100
  _validate_task_signature(task_signature)
@@ -289,6 +291,7 @@ def run_experiment(
289
291
  fallback_return_value=None,
290
292
  tqdm_bar_format=get_tqdm_progress_bar_formatter("running tasks"),
291
293
  concurrency=concurrency,
294
+ timeout=timeout,
292
295
  )
293
296
 
294
297
  runs, _ = executor.run(examples)
@@ -321,6 +324,7 @@ def run_experiment(
321
324
  tracer=tracer,
322
325
  resource=resource,
323
326
  exit_on_error=exit_on_error,
327
+ timeout=timeout,
324
328
  )
325
329
 
326
330
  if exit_on_error and (None in eval_results):
@@ -371,6 +375,7 @@ def evaluate_experiment(
371
375
  tracer: Tracer | None = None,
372
376
  resource: Resource | None = None,
373
377
  exit_on_error: bool = False,
378
+ timeout: int = 120,
374
379
  ) -> list[ExperimentEvaluationRun]:
375
380
  """Evaluate the results of an experiment using the provided evaluators.
376
381
 
@@ -379,11 +384,12 @@ def evaluate_experiment(
379
384
  examples (Sequence[Example]): The examples to evaluate.
380
385
  experiment_results (Sequence[ExperimentRun]): The results of the experiment.
381
386
  evaluators (Evaluators): The evaluators to use for assessment.
382
- rate_limit_errors (Optional[RateLimitErrors]): Optional rate limit errors.
387
+ rate_limit_errors (RateLimitErrors | :obj:`None`): Optional rate limit errors.
383
388
  concurrency (int): The number of concurrent tasks to run. Default is 3.
384
- tracer (Optional[Tracer]): Optional tracer for tracing the evaluation.
385
- resource (Optional[Resource]): Optional resource for the evaluation.
389
+ tracer (Tracer | :obj:`None`): Optional tracer for tracing the evaluation.
390
+ resource (Resource | :obj:`None`): Optional resource for the evaluation.
386
391
  exit_on_error (bool): Whether to exit on error. Default is False.
392
+ timeout (int): The timeout for each evaluation in seconds. Default is 120.
387
393
 
388
394
  Returns:
389
395
  List[ExperimentEvaluationRun]: The evaluation results.
@@ -556,6 +562,7 @@ def evaluate_experiment(
556
562
  "running experiment evaluations"
557
563
  ),
558
564
  concurrency=concurrency,
565
+ timeout=timeout,
559
566
  )
560
567
  eval_runs, _ = executor.run(evaluation_input)
561
568
  return eval_runs
@@ -768,19 +775,19 @@ def get_result_attr(r: object, attr: str, default: object = None) -> object:
768
775
 
769
776
  def transform_to_experiment_format(
770
777
  experiment_runs: list[dict[str, object]] | pd.DataFrame,
771
- task_fields: ExperimentTaskResultFieldNames,
778
+ task_fields: ExperimentTaskFieldNames,
772
779
  evaluator_fields: dict[str, EvaluationResultFieldNames] | None = None,
773
780
  ) -> pd.DataFrame:
774
- """Transform a DataFrame to match the format returned by run_experiment().
781
+ """Transform a :class:`pandas.DataFrame` to match the format returned by run_experiment().
775
782
 
776
783
  Args:
777
- experiment_runs: Input list of dictionaries or DataFrame containing experiment results
784
+ experiment_runs: Input list of dictionaries or :class:`pandas.DataFrame` containing experiment results
778
785
  task_fields: Field name mapping for task results
779
786
  evaluator_fields: Dictionary mapping evaluator names (str)
780
787
  to their field name mappings (EvaluationResultFieldNames)
781
788
 
782
789
  Returns:
783
- DataFrame in the format matching run_experiment() output
790
+ :class:`pandas.DataFrame` in the format matching run_experiment() output
784
791
  """
785
792
  data = (
786
793
  experiment_runs
@@ -788,7 +795,7 @@ def transform_to_experiment_format(
788
795
  else pd.DataFrame(experiment_runs)
789
796
  )
790
797
  # Validate required columns
791
- required_cols = {task_fields.example_id, task_fields.result}
798
+ required_cols = {task_fields.example_id, task_fields.output}
792
799
  missing_cols = required_cols - set(data.columns)
793
800
  if missing_cols:
794
801
  raise ValueError(f"Missing required columns: {missing_cols}")
@@ -799,11 +806,11 @@ def transform_to_experiment_format(
799
806
  out_df["example_id"] = data[task_fields.example_id]
800
807
  if task_fields.example_id != "example_id":
801
808
  out_df.drop(task_fields.example_id, axis=1, inplace=True)
802
- out_df["result"] = data[task_fields.result].apply(
809
+ out_df["output"] = data[task_fields.output].apply(
803
810
  lambda x: json.dumps(x) if isinstance(x, dict) else x
804
811
  )
805
- if task_fields.result != "result":
806
- out_df.drop(task_fields.result, axis=1, inplace=True)
812
+ if task_fields.output != "output":
813
+ out_df.drop(task_fields.output, axis=1, inplace=True)
807
814
 
808
815
  # Process evaluator results
809
816
  if evaluator_fields:
@@ -822,7 +829,7 @@ def _add_evaluator_columns(
822
829
  evaluator_name: str,
823
830
  column_names: EvaluationResultFieldNames,
824
831
  ) -> None:
825
- """Helper function to add evaluator columns to output DataFrame."""
832
+ """Helper function to add evaluator columns to output :class:`pandas.DataFrame`."""
826
833
  # Add score if specified
827
834
  if column_names.score and column_names.score in input_df.columns:
828
835
  output_df[f"eval.{evaluator_name}.score"] = input_df[column_names.score]
@@ -23,8 +23,6 @@ from arize.experiments.evaluators.types import (
23
23
  )
24
24
 
25
25
  ExperimentId = str
26
- # DatasetId= str
27
- # DatasetVersionId= str
28
26
  ExampleId = str
29
27
  RepetitionNumber = int
30
28
  ExperimentRunId = str
@@ -263,9 +261,9 @@ class ExperimentEvaluationRun:
263
261
  name: The name of the evaluation run.
264
262
  annotator_kind: The kind of annotator used in the evaluation run.
265
263
  error: The error message if the evaluation run failed.
266
- result (Optional[EvaluationResult]): The result of the evaluation run.
264
+ result (EvaluationResult | :obj:`None`): The result of the evaluation run.
267
265
  id (str): The unique identifier for the evaluation run.
268
- trace_id (Optional[TraceId]): The trace identifier for the evaluation run.
266
+ trace_id (TraceId | :obj:`None`): The trace identifier for the evaluation run.
269
267
  """
270
268
 
271
269
  experiment_run_id: ExperimentRunId
@@ -397,17 +395,17 @@ def _top_string(s: pd.Series, length: int = 100) -> str | None:
397
395
 
398
396
 
399
397
  @dataclass
400
- class ExperimentTaskResultFieldNames:
401
- """Column names for mapping experiment task results in a DataFrame.
398
+ class ExperimentTaskFieldNames:
399
+ """Column names for mapping experiment task results in a :class:`pandas.DataFrame`.
402
400
 
403
401
  Args:
404
402
  example_id: Name of column containing example IDs.
405
403
  The ID values must match the id of the dataset rows.
406
- result: Name of column containing task results
404
+ output: Name of column containing task results
407
405
  """
408
406
 
409
407
  example_id: str
410
- result: str
408
+ output: str
411
409
 
412
410
 
413
411
  TaskOutput = JSONSerializable
arize/logging.py CHANGED
@@ -259,7 +259,7 @@ def log_a_list(values: Iterable[Any] | None, join_word: str) -> str:
259
259
  """Format a list of values into a human-readable string with a joining word.
260
260
 
261
261
  Args:
262
- values: An iterable of values to format, or None.
262
+ values: An iterable of values to format, or :obj:`None`.
263
263
  join_word: The word to use before the last item (e.g., "and", "or").
264
264
 
265
265
  Returns: