arize 8.0.0b1__py3-none-any.whl → 8.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. arize/__init__.py +9 -2
  2. arize/_client_factory.py +50 -0
  3. arize/_exporter/client.py +18 -17
  4. arize/_exporter/parsers/tracing_data_parser.py +9 -4
  5. arize/_exporter/validation.py +1 -1
  6. arize/_flight/client.py +37 -17
  7. arize/_generated/api_client/api/datasets_api.py +6 -6
  8. arize/_generated/api_client/api/experiments_api.py +6 -6
  9. arize/_generated/api_client/api/projects_api.py +3 -3
  10. arize/_lazy.py +61 -10
  11. arize/client.py +66 -50
  12. arize/config.py +175 -48
  13. arize/constants/config.py +1 -0
  14. arize/constants/ml.py +9 -16
  15. arize/constants/spans.py +5 -10
  16. arize/datasets/client.py +45 -28
  17. arize/datasets/errors.py +1 -1
  18. arize/datasets/validation.py +2 -2
  19. arize/embeddings/auto_generator.py +16 -9
  20. arize/embeddings/base_generators.py +15 -9
  21. arize/embeddings/cv_generators.py +2 -2
  22. arize/embeddings/errors.py +2 -2
  23. arize/embeddings/nlp_generators.py +8 -8
  24. arize/embeddings/tabular_generators.py +6 -6
  25. arize/exceptions/base.py +0 -52
  26. arize/exceptions/config.py +22 -0
  27. arize/exceptions/parameters.py +1 -330
  28. arize/exceptions/values.py +8 -5
  29. arize/experiments/__init__.py +4 -0
  30. arize/experiments/client.py +31 -18
  31. arize/experiments/evaluators/base.py +12 -9
  32. arize/experiments/evaluators/executors.py +16 -7
  33. arize/experiments/evaluators/rate_limiters.py +3 -1
  34. arize/experiments/evaluators/types.py +9 -7
  35. arize/experiments/evaluators/utils.py +7 -5
  36. arize/experiments/functions.py +128 -58
  37. arize/experiments/tracing.py +4 -1
  38. arize/experiments/types.py +34 -31
  39. arize/logging.py +54 -33
  40. arize/ml/batch_validation/errors.py +10 -1004
  41. arize/ml/batch_validation/validator.py +351 -291
  42. arize/ml/bounded_executor.py +25 -6
  43. arize/ml/casting.py +51 -33
  44. arize/ml/client.py +43 -35
  45. arize/ml/proto.py +21 -22
  46. arize/ml/stream_validation.py +64 -27
  47. arize/ml/surrogate_explainer/mimic.py +18 -10
  48. arize/ml/types.py +27 -67
  49. arize/pre_releases.py +10 -6
  50. arize/projects/client.py +9 -4
  51. arize/py.typed +0 -0
  52. arize/regions.py +11 -11
  53. arize/spans/client.py +125 -31
  54. arize/spans/columns.py +32 -36
  55. arize/spans/conversion.py +12 -11
  56. arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
  57. arize/spans/validation/annotations/value_validation.py +11 -14
  58. arize/spans/validation/common/argument_validation.py +3 -3
  59. arize/spans/validation/common/dataframe_form_validation.py +7 -7
  60. arize/spans/validation/common/value_validation.py +11 -14
  61. arize/spans/validation/evals/dataframe_form_validation.py +4 -4
  62. arize/spans/validation/evals/evals_validation.py +6 -6
  63. arize/spans/validation/evals/value_validation.py +1 -1
  64. arize/spans/validation/metadata/argument_validation.py +1 -1
  65. arize/spans/validation/metadata/dataframe_form_validation.py +2 -2
  66. arize/spans/validation/metadata/value_validation.py +23 -1
  67. arize/spans/validation/spans/dataframe_form_validation.py +2 -2
  68. arize/spans/validation/spans/spans_validation.py +6 -6
  69. arize/utils/arrow.py +38 -2
  70. arize/utils/cache.py +2 -2
  71. arize/utils/dataframe.py +4 -4
  72. arize/utils/online_tasks/dataframe_preprocessor.py +15 -11
  73. arize/utils/openinference_conversion.py +10 -10
  74. arize/utils/proto.py +0 -1
  75. arize/utils/types.py +6 -6
  76. arize/version.py +1 -1
  77. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/METADATA +32 -7
  78. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/RECORD +81 -78
  79. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
  80. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
  81. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0
arize/__init__.py CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  import logging
4
4
  from collections.abc import Mapping
5
+ from typing import Literal, cast
5
6
 
6
7
  from arize._generated.api_client import models
7
8
  from arize.client import ArizeClient
@@ -37,7 +38,7 @@ def make_to_df(field_name: str) -> object:
37
38
  json_normalize: bool = False,
38
39
  convert_dtypes: bool = True,
39
40
  ) -> object:
40
- """Convert a list of objects to a pandas DataFrame.
41
+ """Convert a list of objects to a :class:`pandas.DataFrame`.
41
42
 
42
43
  Behavior:
43
44
  - If an item is a Pydantic v2 model, use `.model_dump(by_alias=...)`.
@@ -82,7 +83,11 @@ def make_to_df(field_name: str) -> object:
82
83
 
83
84
  # Drop None/NaN columns if requested
84
85
  if exclude_none in ("any", "all", True):
85
- drop_how = "all" if exclude_none is True else exclude_none
86
+ drop_how: Literal["any", "all"] = (
87
+ "all"
88
+ if exclude_none is True
89
+ else cast("Literal['any', 'all']", exclude_none)
90
+ )
86
91
  df.dropna(axis=1, how=drop_how, inplace=True)
87
92
 
88
93
  if convert_dtypes:
@@ -92,6 +97,8 @@ def make_to_df(field_name: str) -> object:
92
97
  return to_df
93
98
 
94
99
 
100
+ # Monkey-patch convenience methods onto generated response models
101
+ # Type ignore comments needed: mypy can't verify runtime attribute additions
95
102
  models.DatasetsList200Response.to_df = make_to_df("datasets") # type: ignore[attr-defined]
96
103
  models.DatasetsExamplesList200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
97
104
  models.ExperimentsList200Response.to_df = make_to_df("experiments") # type: ignore[attr-defined]
@@ -0,0 +1,50 @@
1
+ """Factory for creating and caching the generated OpenAPI client."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from arize._generated.api_client.api_client import ApiClient
10
+ from arize.config import SDKConfiguration
11
+
12
+
13
+ class GeneratedClientFactory:
14
+ """Factory for creating and caching generated OpenAPI clients.
15
+
16
+ This factory is owned by ArizeClient and provides thread-safe lazy
17
+ initialization of the OpenAPI client used by various subclients.
18
+ """
19
+
20
+ def __init__(self, sdk_config: SDKConfiguration) -> None:
21
+ """Initialize the factory.
22
+
23
+ Args:
24
+ sdk_config: SDK configuration containing API settings.
25
+ """
26
+ self._sdk_config = sdk_config
27
+ self._client: ApiClient | None = None
28
+ self._lock = threading.Lock()
29
+
30
+ def get_client(self) -> ApiClient:
31
+ """Get or create the generated OpenAPI client instance.
32
+
33
+ Returns:
34
+ The shared generated API client instance.
35
+ """
36
+ if self._client is not None:
37
+ return self._client
38
+
39
+ with self._lock:
40
+ if self._client is not None:
41
+ return self._client
42
+
43
+ # Import lazily to avoid extra dependencies at config time
44
+ from arize._generated import api_client as gen
45
+
46
+ cfg = gen.Configuration(host=self._sdk_config.api_url)
47
+ if self._sdk_config.api_key:
48
+ cfg.access_token = self._sdk_config.api_key
49
+ self._client = gen.ApiClient(cfg)
50
+ return self._client
arize/_exporter/client.py CHANGED
@@ -1,4 +1,3 @@
1
- # type: ignore[pb2]
2
1
  import logging
3
2
  from dataclasses import dataclass
4
3
  from datetime import datetime
@@ -41,7 +40,7 @@ class ArizeExportClient:
41
40
  batch_id: str = "",
42
41
  include_actuals: bool = False,
43
42
  stream_chunk_size: int | None = None,
44
- ) -> object:
43
+ ) -> pd.DataFrame:
45
44
  """Exports data of a specific model in the Arize platform to a pandas dataframe.
46
45
 
47
46
  The export covers a defined time interval and model environment, and can
@@ -237,6 +236,22 @@ class ArizeExportClient:
237
236
  columns: list | None = None,
238
237
  stream_chunk_size: int | None = None,
239
238
  ) -> tuple[flight.FlightStreamReader | None, int]:
239
+ # Validate inputs first before creating logging context
240
+ validate_input_type(space_id, "space_id", str)
241
+ validate_input_type(model_id, "model_id", str)
242
+ validate_input_type(environment, "environment", Environments)
243
+ validate_input_type(include_actuals, "include_actuals", bool)
244
+ validate_input_type(start_time, "start_time", datetime)
245
+ validate_input_type(end_time, "end_time", datetime)
246
+ validate_input_type(model_version, "model_version", str)
247
+ validate_input_type(batch_id, "batch_id", str)
248
+ validate_input_type(where, "where", str)
249
+ validate_input_type(columns, "columns", list, allow_none=True)
250
+ validate_input_type(
251
+ stream_chunk_size, "stream_chunk_size", int, allow_none=True
252
+ )
253
+ validate_start_end_time(start_time, end_time)
254
+
240
255
  # Bind common context for this operation
241
256
  log = CtxAdapter(
242
257
  logger,
@@ -258,20 +273,6 @@ class ArizeExportClient:
258
273
  },
259
274
  )
260
275
  log.debug("Getting stream reader...")
261
- validate_input_type(space_id, "space_id", str)
262
- validate_input_type(model_id, "model_id", str)
263
- validate_input_type(environment, "environment", Environments)
264
- validate_input_type(include_actuals, "include_actuals", bool)
265
- validate_input_type(start_time, "start_time", datetime)
266
- validate_input_type(end_time, "end_time", datetime)
267
- validate_input_type(model_version, "model_version", str)
268
- validate_input_type(batch_id, "batch_id", str)
269
- validate_input_type(where, "where", str)
270
- validate_input_type(columns, "columns", list, allow_none=True)
271
- validate_input_type(
272
- stream_chunk_size, "stream_chunk_size", int, allow_none=True
273
- )
274
- validate_start_end_time(start_time, end_time)
275
276
 
276
277
  # Create query descriptor
277
278
  query_descriptor = flight_pb2.RecordQueryDescriptor(
@@ -300,7 +301,7 @@ class ArizeExportClient:
300
301
  try:
301
302
  flight_info = self.flight_client.get_flight_info(
302
303
  flight.FlightDescriptor.for_command(
303
- json_format.MessageToJson(query_descriptor) # type: ignore
304
+ json_format.MessageToJson(query_descriptor)
304
305
  ),
305
306
  )
306
307
  logger.info("Fetching data...")
@@ -1,5 +1,7 @@
1
1
  import json
2
2
  import logging
3
+ from collections.abc import Callable
4
+ from typing import Any, TypeGuard
3
5
 
4
6
  import numpy as np
5
7
  import pandas as pd
@@ -28,7 +30,10 @@ logger = logging.getLogger(__name__)
28
30
  # of the error is on the data; It should not prevent a user from continuing to use the data
29
31
  class OtelTracingDataTransformer:
30
32
  def _apply_column_transformation(
31
- self, df: pd.DataFrame, col_name: str, transform_func: object
33
+ self,
34
+ df: pd.DataFrame,
35
+ col_name: str,
36
+ transform_func: Callable[[Any], Any],
32
37
  ) -> str | None:
33
38
  """Apply a transformation to a column and return error message if it fails."""
34
39
  try:
@@ -89,7 +94,7 @@ class OtelTracingDataTransformer:
89
94
  if col.name in df.columns
90
95
  ]
91
96
  for col_name in dirty_string_column_names:
92
- df[col_name] = df[col_name].apply(self._clean_json_string)
97
+ df[col_name] = df[col_name].apply(self._clean_json_string) # type: ignore[arg-type]
93
98
 
94
99
  # Convert timestamp columns to datetime objects
95
100
  timestamp_column_names: list[str] = [
@@ -102,7 +107,7 @@ class OtelTracingDataTransformer:
102
107
  ]
103
108
  for col_name in timestamp_column_names:
104
109
  df[col_name] = df[col_name].apply(
105
- self._convert_timestamp_to_datetime
110
+ self._convert_timestamp_to_datetime # type: ignore[arg-type]
106
111
  )
107
112
 
108
113
  for err in errors:
@@ -138,7 +143,7 @@ class OtelTracingDataTransformer:
138
143
  return None
139
144
  return None
140
145
 
141
- def _is_non_empty_string(self, value: object) -> bool:
146
+ def _is_non_empty_string(self, value: object) -> TypeGuard[str]:
142
147
  return isinstance(value, str) and value != ""
143
148
 
144
149
  def _deserialize_json_string_to_dict(self, value: str) -> object:
@@ -35,7 +35,7 @@ def validate_input_value(
35
35
  if input in choices:
36
36
  return
37
37
  raise ValueError(
38
- f"{input_name} is {input}, but must be one of {', '.join(choices)}"
38
+ f"{input_name} is {input}, but must be one of {', '.join(str(c) for c in choices)}"
39
39
  )
40
40
 
41
41
 
arize/_flight/client.py CHANGED
@@ -1,11 +1,10 @@
1
- # type: ignore[pb2]
2
1
  from __future__ import annotations
3
2
 
4
3
  import base64
5
4
  import logging
6
5
  from dataclasses import dataclass, field
7
6
  from enum import Enum
8
- from typing import TYPE_CHECKING
7
+ from typing import TYPE_CHECKING, TypeAlias
9
8
 
10
9
  from google.protobuf import json_format
11
10
  from pyarrow import flight
@@ -20,15 +19,15 @@ from arize.version import __version__
20
19
 
21
20
  if TYPE_CHECKING:
22
21
  import types
23
- from collections.abc import Iterable
22
+ from collections.abc import Iterator
24
23
 
25
24
  import pandas as pd
26
25
  import pyarrow as pa
27
26
 
28
27
 
29
- BytesPair = tuple[bytes, bytes]
30
- Headers = list[BytesPair]
31
- FlightPostArrowFileResponse = (
28
+ BytesPair: TypeAlias = tuple[bytes, bytes]
29
+ Headers: TypeAlias = list[BytesPair]
30
+ FlightPostArrowFileResponse: TypeAlias = (
32
31
  flight_pb2.WriteSpanEvaluationResponse
33
32
  | flight_pb2.WriteSpanAnnotationResponse
34
33
  | flight_pb2.WriteSpanAttributesMetadataResponse
@@ -90,7 +89,11 @@ class ArizeFlightClient:
90
89
  # ---------- Connection management ----------
91
90
 
92
91
  def _ensure_client(self) -> flight.FlightClient:
93
- """Lazily initialize and return the underlying Flight client connection."""
92
+ """Lazily initialize and return the underlying Flight client connection.
93
+
94
+ Returns:
95
+ flight.FlightClient: The initialized Apache Arrow Flight client.
96
+ """
94
97
  client = object.__getattribute__(self, "_client")
95
98
  if client is not None:
96
99
  return client
@@ -135,7 +138,11 @@ class ArizeFlightClient:
135
138
  # ---------- methods simple passthrough wrappers ----------
136
139
 
137
140
  def get_flight_info(self, *args: object, **kwargs: object) -> object:
138
- """Get flight information. Passthrough to underlying Flight client with auth options."""
141
+ """Get flight information. Passthrough to underlying Flight client with auth options.
142
+
143
+ Returns:
144
+ object: FlightInfo object containing metadata about the requested data stream.
145
+ """
139
146
  client = self._ensure_client()
140
147
  kwargs.setdefault("options", self.call_options)
141
148
  return client.get_flight_info(*args, **kwargs)
@@ -146,6 +153,9 @@ class ArizeFlightClient:
146
153
  """Retrieve data stream via Flight DoGet.
147
154
 
148
155
  Passthrough to underlying Flight client with auth options.
156
+
157
+ Returns:
158
+ flight.FlightStreamReader: A stream reader for retrieving Arrow record batches.
149
159
  """
150
160
  client = self._ensure_client()
151
161
  kwargs.setdefault("options", self.call_options)
@@ -153,10 +163,15 @@ class ArizeFlightClient:
153
163
 
154
164
  def do_put(
155
165
  self, *args: object, **kwargs: object
156
- ) -> [flight.FlightStreamWriter, flight.FlightMetadataReader]:
166
+ ) -> tuple[flight.FlightStreamWriter, flight.FlightMetadataReader]:
157
167
  """Upload data stream via Flight DoPut.
158
168
 
159
169
  Passthrough to underlying Flight client with auth options.
170
+
171
+ Returns:
172
+ tuple[flight.FlightStreamWriter, flight.FlightMetadataReader]: A tuple containing
173
+ a stream writer for uploading Arrow record batches and a metadata reader for
174
+ receiving server responses.
160
175
  """
161
176
  client = self._ensure_client()
162
177
  kwargs.setdefault("options", self.call_options)
@@ -164,10 +179,13 @@ class ArizeFlightClient:
164
179
 
165
180
  def do_action(
166
181
  self, *args: object, **kwargs: object
167
- ) -> Iterable[flight.Result]:
182
+ ) -> Iterator[flight.Result]:
168
183
  """Execute an action via Flight DoAction.
169
184
 
170
185
  Passthrough to underlying Flight client with auth options.
186
+
187
+ Returns:
188
+ Iterable[flight.Result]: An iterable of Result objects from the action execution.
171
189
  """
172
190
  client = self._ensure_client()
173
191
  kwargs.setdefault("options", self.call_options)
@@ -210,6 +228,10 @@ class ArizeFlightClient:
210
228
  FlightRequestType.ANNOTATION,
211
229
  FlightRequestType.METADATA,
212
230
  ):
231
+ if project_name is None:
232
+ raise ValueError(
233
+ f"project_name is required for {request_type.name} request type"
234
+ )
213
235
  proto_schema = get_pb_schema_tracing(project_name=project_name)
214
236
  base64_schema = base64.b64encode(proto_schema.SerializeToString())
215
237
  pa_schema = append_to_pyarrow_metadata(
@@ -260,8 +282,6 @@ class ArizeFlightClient:
260
282
  case FlightRequestType.LOG_EXPERIMENT_DATA:
261
283
  res = flight_pb2.PostExperimentDataResponse()
262
284
  res.ParseFromString(flight_response.to_pybytes())
263
- case _:
264
- raise ValueError(f"Unsupported request_type: {request_type}")
265
285
  return res
266
286
 
267
287
  # ---------- dataset methods ----------
@@ -271,7 +291,7 @@ class ArizeFlightClient:
271
291
  space_id: str,
272
292
  dataset_name: str,
273
293
  pa_table: pa.Table,
274
- ) -> str:
294
+ ) -> str | None:
275
295
  """Create a new dataset via Flight DoPut.
276
296
 
277
297
  Args:
@@ -333,8 +353,8 @@ class ArizeFlightClient:
333
353
  latest version.
334
354
 
335
355
  Returns:
336
- A pandas DataFrame containing the dataset examples with JSON string columns
337
- converted to dict objects.
356
+ :class:`pandas.DataFrame`: A pandas DataFrame containing the dataset examples
357
+ with JSON string columns converted to dict objects.
338
358
 
339
359
  Raises:
340
360
  RuntimeError: If the Flight request fails.
@@ -374,8 +394,8 @@ class ArizeFlightClient:
374
394
  experiment_id: Experiment ID to retrieve runs from.
375
395
 
376
396
  Returns:
377
- A pandas DataFrame containing the experiment runs with JSON string columns
378
- converted to dict objects.
397
+ :class:`pandas.DataFrame`: A pandas DataFrame containing the experiment runs
398
+ with JSON string columns converted to dict objects.
379
399
 
380
400
  Raises:
381
401
  RuntimeError: If the Flight request fails.
@@ -940,7 +940,7 @@ class DatasetsApi:
940
940
  self,
941
941
  dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
942
942
  dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
943
- limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
943
+ limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
944
944
  _request_timeout: Union[
945
945
  None,
946
946
  Annotated[StrictFloat, Field(gt=0)],
@@ -1021,7 +1021,7 @@ class DatasetsApi:
1021
1021
  self,
1022
1022
  dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
1023
1023
  dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
1024
- limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1024
+ limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1025
1025
  _request_timeout: Union[
1026
1026
  None,
1027
1027
  Annotated[StrictFloat, Field(gt=0)],
@@ -1102,7 +1102,7 @@ class DatasetsApi:
1102
1102
  self,
1103
1103
  dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
1104
1104
  dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
1105
- limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1105
+ limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1106
1106
  _request_timeout: Union[
1107
1107
  None,
1108
1108
  Annotated[StrictFloat, Field(gt=0)],
@@ -1861,7 +1861,7 @@ class DatasetsApi:
1861
1861
  def datasets_list(
1862
1862
  self,
1863
1863
  space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
1864
- limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1864
+ limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1865
1865
  cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
1866
1866
  _request_timeout: Union[
1867
1867
  None,
@@ -1941,7 +1941,7 @@ class DatasetsApi:
1941
1941
  def datasets_list_with_http_info(
1942
1942
  self,
1943
1943
  space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
1944
- limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1944
+ limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1945
1945
  cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
1946
1946
  _request_timeout: Union[
1947
1947
  None,
@@ -2021,7 +2021,7 @@ class DatasetsApi:
2021
2021
  def datasets_list_without_preload_content(
2022
2022
  self,
2023
2023
  space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
2024
- limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
2024
+ limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
2025
2025
  cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
2026
2026
  _request_timeout: Union[
2027
2027
  None,
@@ -898,7 +898,7 @@ class ExperimentsApi:
898
898
  def experiments_list(
899
899
  self,
900
900
  dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
901
- limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
901
+ limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
902
902
  cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
903
903
  _request_timeout: Union[
904
904
  None,
@@ -978,7 +978,7 @@ class ExperimentsApi:
978
978
  def experiments_list_with_http_info(
979
979
  self,
980
980
  dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
981
- limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
981
+ limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
982
982
  cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
983
983
  _request_timeout: Union[
984
984
  None,
@@ -1058,7 +1058,7 @@ class ExperimentsApi:
1058
1058
  def experiments_list_without_preload_content(
1059
1059
  self,
1060
1060
  dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
1061
- limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1061
+ limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1062
1062
  cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
1063
1063
  _request_timeout: Union[
1064
1064
  None,
@@ -1211,7 +1211,7 @@ class ExperimentsApi:
1211
1211
  def experiments_runs_list(
1212
1212
  self,
1213
1213
  experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
1214
- limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1214
+ limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1215
1215
  _request_timeout: Union[
1216
1216
  None,
1217
1217
  Annotated[StrictFloat, Field(gt=0)],
@@ -1288,7 +1288,7 @@ class ExperimentsApi:
1288
1288
  def experiments_runs_list_with_http_info(
1289
1289
  self,
1290
1290
  experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
1291
- limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1291
+ limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1292
1292
  _request_timeout: Union[
1293
1293
  None,
1294
1294
  Annotated[StrictFloat, Field(gt=0)],
@@ -1365,7 +1365,7 @@ class ExperimentsApi:
1365
1365
  def experiments_runs_list_without_preload_content(
1366
1366
  self,
1367
1367
  experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
1368
- limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1368
+ limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1369
1369
  _request_timeout: Union[
1370
1370
  None,
1371
1371
  Annotated[StrictFloat, Field(gt=0)],
@@ -891,7 +891,7 @@ class ProjectsApi:
891
891
  def projects_list(
892
892
  self,
893
893
  space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
894
- limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
894
+ limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
895
895
  cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
896
896
  _request_timeout: Union[
897
897
  None,
@@ -970,7 +970,7 @@ class ProjectsApi:
970
970
  def projects_list_with_http_info(
971
971
  self,
972
972
  space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
973
- limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
973
+ limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
974
974
  cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
975
975
  _request_timeout: Union[
976
976
  None,
@@ -1049,7 +1049,7 @@ class ProjectsApi:
1049
1049
  def projects_list_without_preload_content(
1050
1050
  self,
1051
1051
  space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
1052
- limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1052
+ limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
1053
1053
  cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
1054
1054
  _request_timeout: Union[
1055
1055
  None,
arize/_lazy.py CHANGED
@@ -1,6 +1,7 @@
1
1
  # src/arize/_lazy.py
2
2
  from __future__ import annotations
3
3
 
4
+ import inspect
4
5
  import logging
5
6
  import sys
6
7
  import threading
@@ -8,6 +9,8 @@ from importlib import import_module
8
9
  from typing import TYPE_CHECKING, ClassVar
9
10
 
10
11
  if TYPE_CHECKING:
12
+ import types
13
+
11
14
  from arize.config import SDKConfiguration
12
15
 
13
16
  logger = logging.getLogger(__name__)
@@ -22,6 +25,11 @@ class LazySubclientsMixin:
22
25
  self._lazy_cache: dict[str, object] = {}
23
26
  self._lazy_lock = threading.Lock()
24
27
 
28
+ # Add generated client factory
29
+ from arize._client_factory import GeneratedClientFactory
30
+
31
+ self._gen_client_factory = GeneratedClientFactory(sdk_config)
32
+
25
33
  def __getattr__(self, name: str) -> object:
26
34
  subs = self._SUBCLIENTS
27
35
  if name not in subs:
@@ -41,12 +49,18 @@ class LazySubclientsMixin:
41
49
  module = _dynamic_import(module_path)
42
50
  klass = getattr(module, class_name)
43
51
 
44
- # Pass sdk_config if the child accepts it; otherwise construct bare.
45
- try:
46
- instance = klass(sdk_config=self.sdk_config)
47
- except TypeError:
48
- instance = klass()
49
-
52
+ # Determine which parameters this subclient needs
53
+ # and build kwargs accordingly
54
+ sig = inspect.signature(klass.__init__)
55
+ kwargs: dict[str, object] = {}
56
+ if "sdk_config" in sig.parameters:
57
+ kwargs["sdk_config"] = self.sdk_config
58
+ if "generated_client" in sig.parameters:
59
+ kwargs["generated_client"] = (
60
+ self._gen_client_factory.get_client()
61
+ )
62
+
63
+ instance = klass(**kwargs)
50
64
  self._lazy_cache[name] = instance
51
65
  return instance
52
66
 
@@ -58,7 +72,14 @@ class OptionalDependencyError(ImportError): ...
58
72
 
59
73
 
60
74
  def _can_import(module_name: str) -> bool:
61
- """Check if a module can be imported without raising an exception."""
75
+ """Check if a module can be imported without raising an exception.
76
+
77
+ Args:
78
+ module_name: The fully qualified module name to check (e.g., 'numpy', 'sklearn.preprocessing').
79
+
80
+ Returns:
81
+ bool: True if the module can be imported successfully, False otherwise.
82
+ """
62
83
  try:
63
84
  import_module(module_name)
64
85
  except Exception:
@@ -72,6 +93,18 @@ def require(
72
93
  required: tuple[str, ...],
73
94
  pkgname: str = "arize",
74
95
  ) -> None:
96
+ """Ensure required optional dependencies are installed, raising an error if missing.
97
+
98
+ Args:
99
+ extra_key: The extras group key for pip install (e.g., 'mimic', 'embeddings').
100
+ Used in the error message to guide users.
101
+ required: Tuple of required module names to check for availability.
102
+ pkgname: The package name for installation instructions. Defaults to 'arize'.
103
+
104
+ Raises:
105
+ OptionalDependencyError: If any of the required modules cannot be imported.
106
+ The error message includes pip install instructions with the extras group.
107
+ """
75
108
  if not required:
76
109
  return
77
110
  missing = [p for p in required if not _can_import(p)]
@@ -82,8 +115,24 @@ def require(
82
115
  )
83
116
 
84
117
 
85
- def _dynamic_import(modname: str, retries: int = 2) -> object:
86
- def _attempt_import(remaining_attempts: int) -> object:
118
+ def _dynamic_import(modname: str, retries: int = 2) -> types.ModuleType:
119
+ """Dynamically import a module with retry logic and sys.modules cleanup on failure.
120
+
121
+ Args:
122
+ modname: The fully qualified module name to import.
123
+ retries: Number of import attempts to make. Must be > 0. Defaults to 2.
124
+
125
+ Returns:
126
+ types.ModuleType: The successfully imported module.
127
+
128
+ Raises:
129
+ ValueError: If retries is <= 0.
130
+ ModuleNotFoundError: If the module cannot be found after all retry attempts.
131
+ ImportError: If the module import fails after all retry attempts.
132
+ KeyError: If a key error occurs during import after all retry attempts.
133
+ """
134
+
135
+ def _attempt_import(remaining_attempts: int) -> types.ModuleType:
87
136
  try:
88
137
  return import_module(modname)
89
138
  except (ModuleNotFoundError, ImportError, KeyError):
@@ -92,4 +141,6 @@ def _dynamic_import(modname: str, retries: int = 2) -> object:
92
141
  raise
93
142
  return _attempt_import(remaining_attempts - 1)
94
143
 
95
- return _attempt_import(retries) if retries > 0 else None
144
+ if retries <= 0:
145
+ raise ValueError(f"retries must be > 0, got {retries}")
146
+ return _attempt_import(retries)