arize 8.0.0b0__py3-none-any.whl → 8.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +1 -1
- arize/_client_factory.py +50 -0
- arize/_flight/client.py +4 -4
- arize/_generated/api_client/__init__.py +0 -2
- arize/_generated/api_client/api/datasets_api.py +6 -6
- arize/_generated/api_client/api/experiments_api.py +6 -6
- arize/_generated/api_client/api/projects_api.py +3 -3
- arize/_generated/api_client/models/__init__.py +0 -1
- arize/_generated/api_client/models/datasets_create_request.py +2 -10
- arize/_generated/api_client/models/datasets_examples_insert_request.py +2 -10
- arize/_generated/api_client/test/test_datasets_create_request.py +2 -6
- arize/_generated/api_client/test/test_datasets_examples_insert_request.py +2 -6
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +2 -6
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +2 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +2 -6
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +2 -6
- arize/_generated/api_client_README.md +0 -1
- arize/_lazy.py +25 -9
- arize/client.py +16 -52
- arize/config.py +9 -36
- arize/constants/ml.py +9 -16
- arize/constants/spans.py +5 -10
- arize/datasets/client.py +13 -9
- arize/datasets/errors.py +1 -1
- arize/datasets/validation.py +2 -2
- arize/embeddings/auto_generator.py +2 -2
- arize/embeddings/errors.py +2 -2
- arize/embeddings/tabular_generators.py +1 -1
- arize/exceptions/base.py +0 -52
- arize/exceptions/parameters.py +0 -329
- arize/experiments/__init__.py +2 -2
- arize/experiments/client.py +16 -10
- arize/experiments/evaluators/base.py +6 -6
- arize/experiments/evaluators/executors.py +10 -3
- arize/experiments/evaluators/types.py +2 -2
- arize/experiments/functions.py +24 -17
- arize/experiments/types.py +6 -8
- arize/logging.py +1 -1
- arize/ml/batch_validation/errors.py +10 -1004
- arize/ml/batch_validation/validator.py +273 -225
- arize/ml/casting.py +7 -7
- arize/ml/client.py +12 -11
- arize/ml/proto.py +6 -6
- arize/ml/stream_validation.py +2 -3
- arize/ml/surrogate_explainer/mimic.py +3 -3
- arize/ml/types.py +1 -55
- arize/pre_releases.py +6 -3
- arize/projects/client.py +9 -4
- arize/regions.py +2 -2
- arize/spans/client.py +14 -12
- arize/spans/columns.py +32 -36
- arize/spans/conversion.py +5 -6
- arize/spans/validation/common/argument_validation.py +3 -3
- arize/spans/validation/common/dataframe_form_validation.py +6 -6
- arize/spans/validation/common/value_validation.py +1 -1
- arize/spans/validation/evals/dataframe_form_validation.py +4 -4
- arize/spans/validation/evals/evals_validation.py +6 -6
- arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
- arize/spans/validation/spans/dataframe_form_validation.py +2 -2
- arize/spans/validation/spans/spans_validation.py +6 -6
- arize/utils/arrow.py +2 -2
- arize/utils/cache.py +2 -2
- arize/utils/dataframe.py +4 -4
- arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
- arize/utils/openinference_conversion.py +10 -10
- arize/utils/proto.py +1 -1
- arize/version.py +1 -1
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/METADATA +71 -63
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/RECORD +72 -73
- arize/_generated/api_client/models/primitive_value.py +0 -172
- arize/_generated/api_client/test/test_primitive_value.py +0 -50
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0
arize/experiments/functions.py
CHANGED
|
@@ -56,7 +56,7 @@ from arize.experiments.types import (
|
|
|
56
56
|
ExperimentEvaluationRun,
|
|
57
57
|
ExperimentRun,
|
|
58
58
|
ExperimentTask,
|
|
59
|
-
|
|
59
|
+
ExperimentTaskFieldNames,
|
|
60
60
|
_TaskSummary,
|
|
61
61
|
)
|
|
62
62
|
|
|
@@ -76,23 +76,25 @@ def run_experiment(
|
|
|
76
76
|
evaluators: Evaluators | None = None,
|
|
77
77
|
concurrency: int = 3,
|
|
78
78
|
exit_on_error: bool = False,
|
|
79
|
+
timeout: int = 120,
|
|
79
80
|
) -> pd.DataFrame:
|
|
80
81
|
"""Run an experiment on a dataset.
|
|
81
82
|
|
|
82
83
|
Args:
|
|
83
84
|
experiment_name (str): The name for the experiment.
|
|
84
85
|
experiment_id (str): The ID for the experiment.
|
|
85
|
-
dataset (
|
|
86
|
+
dataset (:class:`pandas.DataFrame`): The dataset to run the experiment on.
|
|
86
87
|
task (ExperimentTask): The task to be executed on the dataset.
|
|
87
88
|
tracer (Tracer): Tracer for tracing the experiment.
|
|
88
89
|
resource (Resource): The resource for tracing the experiment.
|
|
89
|
-
rate_limit_errors (
|
|
90
|
-
evaluators (
|
|
90
|
+
rate_limit_errors (RateLimitErrors | :obj:`None`): Optional rate limit errors.
|
|
91
|
+
evaluators (Evaluators | :obj:`None`): Optional evaluators to assess the task.
|
|
91
92
|
concurrency (int): The number of concurrent tasks to run. Default is 3.
|
|
92
93
|
exit_on_error (bool): Whether to exit on error. Default is False.
|
|
94
|
+
timeout (int): The timeout for each task execution in seconds. Default is 120.
|
|
93
95
|
|
|
94
96
|
Returns:
|
|
95
|
-
|
|
97
|
+
:class:`pandas.DataFrame`: The results of the experiment.
|
|
96
98
|
"""
|
|
97
99
|
task_signature = inspect.signature(task)
|
|
98
100
|
_validate_task_signature(task_signature)
|
|
@@ -289,6 +291,7 @@ def run_experiment(
|
|
|
289
291
|
fallback_return_value=None,
|
|
290
292
|
tqdm_bar_format=get_tqdm_progress_bar_formatter("running tasks"),
|
|
291
293
|
concurrency=concurrency,
|
|
294
|
+
timeout=timeout,
|
|
292
295
|
)
|
|
293
296
|
|
|
294
297
|
runs, _ = executor.run(examples)
|
|
@@ -321,6 +324,7 @@ def run_experiment(
|
|
|
321
324
|
tracer=tracer,
|
|
322
325
|
resource=resource,
|
|
323
326
|
exit_on_error=exit_on_error,
|
|
327
|
+
timeout=timeout,
|
|
324
328
|
)
|
|
325
329
|
|
|
326
330
|
if exit_on_error and (None in eval_results):
|
|
@@ -371,6 +375,7 @@ def evaluate_experiment(
|
|
|
371
375
|
tracer: Tracer | None = None,
|
|
372
376
|
resource: Resource | None = None,
|
|
373
377
|
exit_on_error: bool = False,
|
|
378
|
+
timeout: int = 120,
|
|
374
379
|
) -> list[ExperimentEvaluationRun]:
|
|
375
380
|
"""Evaluate the results of an experiment using the provided evaluators.
|
|
376
381
|
|
|
@@ -379,11 +384,12 @@ def evaluate_experiment(
|
|
|
379
384
|
examples (Sequence[Example]): The examples to evaluate.
|
|
380
385
|
experiment_results (Sequence[ExperimentRun]): The results of the experiment.
|
|
381
386
|
evaluators (Evaluators): The evaluators to use for assessment.
|
|
382
|
-
rate_limit_errors (
|
|
387
|
+
rate_limit_errors (RateLimitErrors | :obj:`None`): Optional rate limit errors.
|
|
383
388
|
concurrency (int): The number of concurrent tasks to run. Default is 3.
|
|
384
|
-
tracer (
|
|
385
|
-
resource (
|
|
389
|
+
tracer (Tracer | :obj:`None`): Optional tracer for tracing the evaluation.
|
|
390
|
+
resource (Resource | :obj:`None`): Optional resource for the evaluation.
|
|
386
391
|
exit_on_error (bool): Whether to exit on error. Default is False.
|
|
392
|
+
timeout (int): The timeout for each evaluation in seconds. Default is 120.
|
|
387
393
|
|
|
388
394
|
Returns:
|
|
389
395
|
List[ExperimentEvaluationRun]: The evaluation results.
|
|
@@ -556,6 +562,7 @@ def evaluate_experiment(
|
|
|
556
562
|
"running experiment evaluations"
|
|
557
563
|
),
|
|
558
564
|
concurrency=concurrency,
|
|
565
|
+
timeout=timeout,
|
|
559
566
|
)
|
|
560
567
|
eval_runs, _ = executor.run(evaluation_input)
|
|
561
568
|
return eval_runs
|
|
@@ -768,19 +775,19 @@ def get_result_attr(r: object, attr: str, default: object = None) -> object:
|
|
|
768
775
|
|
|
769
776
|
def transform_to_experiment_format(
|
|
770
777
|
experiment_runs: list[dict[str, object]] | pd.DataFrame,
|
|
771
|
-
task_fields:
|
|
778
|
+
task_fields: ExperimentTaskFieldNames,
|
|
772
779
|
evaluator_fields: dict[str, EvaluationResultFieldNames] | None = None,
|
|
773
780
|
) -> pd.DataFrame:
|
|
774
|
-
"""Transform a DataFrame to match the format returned by run_experiment().
|
|
781
|
+
"""Transform a :class:`pandas.DataFrame` to match the format returned by run_experiment().
|
|
775
782
|
|
|
776
783
|
Args:
|
|
777
|
-
experiment_runs: Input list of dictionaries or DataFrame containing experiment results
|
|
784
|
+
experiment_runs: Input list of dictionaries or :class:`pandas.DataFrame` containing experiment results
|
|
778
785
|
task_fields: Field name mapping for task results
|
|
779
786
|
evaluator_fields: Dictionary mapping evaluator names (str)
|
|
780
787
|
to their field name mappings (EvaluationResultFieldNames)
|
|
781
788
|
|
|
782
789
|
Returns:
|
|
783
|
-
DataFrame in the format matching run_experiment() output
|
|
790
|
+
:class:`pandas.DataFrame` in the format matching run_experiment() output
|
|
784
791
|
"""
|
|
785
792
|
data = (
|
|
786
793
|
experiment_runs
|
|
@@ -788,7 +795,7 @@ def transform_to_experiment_format(
|
|
|
788
795
|
else pd.DataFrame(experiment_runs)
|
|
789
796
|
)
|
|
790
797
|
# Validate required columns
|
|
791
|
-
required_cols = {task_fields.example_id, task_fields.
|
|
798
|
+
required_cols = {task_fields.example_id, task_fields.output}
|
|
792
799
|
missing_cols = required_cols - set(data.columns)
|
|
793
800
|
if missing_cols:
|
|
794
801
|
raise ValueError(f"Missing required columns: {missing_cols}")
|
|
@@ -799,11 +806,11 @@ def transform_to_experiment_format(
|
|
|
799
806
|
out_df["example_id"] = data[task_fields.example_id]
|
|
800
807
|
if task_fields.example_id != "example_id":
|
|
801
808
|
out_df.drop(task_fields.example_id, axis=1, inplace=True)
|
|
802
|
-
out_df["
|
|
809
|
+
out_df["output"] = data[task_fields.output].apply(
|
|
803
810
|
lambda x: json.dumps(x) if isinstance(x, dict) else x
|
|
804
811
|
)
|
|
805
|
-
if task_fields.
|
|
806
|
-
out_df.drop(task_fields.
|
|
812
|
+
if task_fields.output != "output":
|
|
813
|
+
out_df.drop(task_fields.output, axis=1, inplace=True)
|
|
807
814
|
|
|
808
815
|
# Process evaluator results
|
|
809
816
|
if evaluator_fields:
|
|
@@ -822,7 +829,7 @@ def _add_evaluator_columns(
|
|
|
822
829
|
evaluator_name: str,
|
|
823
830
|
column_names: EvaluationResultFieldNames,
|
|
824
831
|
) -> None:
|
|
825
|
-
"""Helper function to add evaluator columns to output DataFrame
|
|
832
|
+
"""Helper function to add evaluator columns to output :class:`pandas.DataFrame`."""
|
|
826
833
|
# Add score if specified
|
|
827
834
|
if column_names.score and column_names.score in input_df.columns:
|
|
828
835
|
output_df[f"eval.{evaluator_name}.score"] = input_df[column_names.score]
|
arize/experiments/types.py
CHANGED
|
@@ -23,8 +23,6 @@ from arize.experiments.evaluators.types import (
|
|
|
23
23
|
)
|
|
24
24
|
|
|
25
25
|
ExperimentId = str
|
|
26
|
-
# DatasetId= str
|
|
27
|
-
# DatasetVersionId= str
|
|
28
26
|
ExampleId = str
|
|
29
27
|
RepetitionNumber = int
|
|
30
28
|
ExperimentRunId = str
|
|
@@ -263,9 +261,9 @@ class ExperimentEvaluationRun:
|
|
|
263
261
|
name: The name of the evaluation run.
|
|
264
262
|
annotator_kind: The kind of annotator used in the evaluation run.
|
|
265
263
|
error: The error message if the evaluation run failed.
|
|
266
|
-
result (
|
|
264
|
+
result (EvaluationResult | :obj:`None`): The result of the evaluation run.
|
|
267
265
|
id (str): The unique identifier for the evaluation run.
|
|
268
|
-
trace_id (
|
|
266
|
+
trace_id (TraceId | :obj:`None`): The trace identifier for the evaluation run.
|
|
269
267
|
"""
|
|
270
268
|
|
|
271
269
|
experiment_run_id: ExperimentRunId
|
|
@@ -397,17 +395,17 @@ def _top_string(s: pd.Series, length: int = 100) -> str | None:
|
|
|
397
395
|
|
|
398
396
|
|
|
399
397
|
@dataclass
|
|
400
|
-
class
|
|
401
|
-
"""Column names for mapping experiment task results in a DataFrame
|
|
398
|
+
class ExperimentTaskFieldNames:
|
|
399
|
+
"""Column names for mapping experiment task results in a :class:`pandas.DataFrame`.
|
|
402
400
|
|
|
403
401
|
Args:
|
|
404
402
|
example_id: Name of column containing example IDs.
|
|
405
403
|
The ID values must match the id of the dataset rows.
|
|
406
|
-
|
|
404
|
+
output: Name of column containing task results
|
|
407
405
|
"""
|
|
408
406
|
|
|
409
407
|
example_id: str
|
|
410
|
-
|
|
408
|
+
output: str
|
|
411
409
|
|
|
412
410
|
|
|
413
411
|
TaskOutput = JSONSerializable
|
arize/logging.py
CHANGED
|
@@ -259,7 +259,7 @@ def log_a_list(values: Iterable[Any] | None, join_word: str) -> str:
|
|
|
259
259
|
"""Format a list of values into a human-readable string with a joining word.
|
|
260
260
|
|
|
261
261
|
Args:
|
|
262
|
-
values: An iterable of values to format, or None
|
|
262
|
+
values: An iterable of values to format, or :obj:`None`.
|
|
263
263
|
join_word: The word to use before the last item (e.g., "and", "or").
|
|
264
264
|
|
|
265
265
|
Returns:
|