arize 8.0.0a16__py3-none-any.whl → 8.0.0a17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
arize/__init__.py CHANGED
@@ -87,3 +87,4 @@ def make_to_df(field_name: str):
87
87
  models.DatasetsList200Response.to_df = make_to_df("datasets") # type: ignore[attr-defined]
88
88
  models.DatasetsListExamples200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
89
89
  models.ExperimentsList200Response.to_df = make_to_df("experiments") # type: ignore[attr-defined]
90
+ models.ExperimentsRunsList200Response.to_df = make_to_df("experiment_runs") # type: ignore[attr-defined]
arize/_flight/client.py CHANGED
@@ -25,6 +25,7 @@ from arize.utils.proto import get_pb_schema_tracing
25
25
  from arize.version import __version__
26
26
 
27
27
  if TYPE_CHECKING:
28
+ import pandas as pd
28
29
  import pyarrow as pa
29
30
 
30
31
 
@@ -260,7 +261,7 @@ class ArizeFlightClient:
260
261
  space_id: str,
261
262
  dataset_id: str,
262
263
  dataset_version_id: str | None = None,
263
- ):
264
+ ) -> pd.DataFrame:
264
265
  # TODO(Kiko): Space ID should not be needed,
265
266
  # should work on server tech debt to remove this
266
267
  doget_request = flight_ing_pb2.DoGetRequest(
@@ -283,6 +284,36 @@ class ArizeFlightClient:
283
284
  logger.exception(f"Failed to get dataset id={dataset_id}")
284
285
  raise RuntimeError(f"Failed to get dataset id={dataset_id}") from e
285
286
 
287
+ # ---------- experiment methods ----------
288
+
289
+ def get_experiment_runs(
290
+ self,
291
+ space_id: str,
292
+ experiment_id: str,
293
+ ) -> pd.DataFrame:
294
+ # TODO(Kiko): Space ID should not be needed,
295
+ # should work on server tech debt to remove this
296
+ doget_request = flight_ing_pb2.DoGetRequest(
297
+ get_experiment=flight_ing_pb2.GetExperimentRequest(
298
+ space_id=space_id,
299
+ experiment_id=experiment_id,
300
+ )
301
+ )
302
+ descriptor = flight.Ticket(
303
+ json_format.MessageToJson(doget_request).encode("utf-8")
304
+ )
305
+ try:
306
+ reader = self.do_get(descriptor, options=self.call_options)
307
+ # read all data into pandas dataframe
308
+ df = reader.read_all().to_pandas()
309
+ df = convert_json_str_to_dict(df)
310
+ return df
311
+ except Exception as e:
312
+ logger.exception(f"Failed to get experiment id={experiment_id}")
313
+ raise RuntimeError(
314
+ f"Failed to get experiment id={experiment_id}"
315
+ ) from e
316
+
286
317
  def init_experiment(
287
318
  self,
288
319
  space_id: str,
arize/client.py CHANGED
@@ -12,6 +12,14 @@ if TYPE_CHECKING:
12
12
  from arize.spans.client import SpansClient
13
13
 
14
14
 
15
+ # TODO(Kiko): models need to follow resource first pattern
16
+ # - models.DatasetsList200Response
17
+ # - models.DatasetsListExamples200Response
18
+ # - models.ExperimentsList200Response
19
+ # - models.ExperimentsRunsList200Response
20
+ # TODO(Kiko): Root client should have option to clear caches
21
+ # TODO(Kiko): Document caching behavior
22
+ # TODO(Kiko): Force keyword arguments
15
23
  # TODO(Kiko): Protobuf versioning is too old
16
24
  # TODO(Kiko): Make sure the client has same options as SDKConfiguration
17
25
  # TODO(Kiko): It does not make any sense to require space ID in run_experiment, dataset ID should suffice
arize/config.py CHANGED
@@ -7,6 +7,8 @@ from typing import Any, Dict
7
7
 
8
8
  from arize.constants.config import (
9
9
  DEFAULT_API_HOST,
10
+ DEFAULT_ARIZE_DIRECTORY,
11
+ DEFAULT_ENABLE_CACHING,
10
12
  DEFAULT_FLIGHT_HOST,
11
13
  DEFAULT_FLIGHT_PORT,
12
14
  DEFAULT_FLIGHT_TRANSPORT_SCHEME,
@@ -19,6 +21,8 @@ from arize.constants.config import (
19
21
  DEFAULT_STREAM_MAX_WORKERS,
20
22
  ENV_API_HOST,
21
23
  ENV_API_KEY,
24
+ ENV_ARIZE_DIRECTORY,
25
+ ENV_ENABLE_CACHING,
22
26
  ENV_FLIGHT_HOST,
23
27
  ENV_FLIGHT_PORT,
24
28
  ENV_FLIGHT_TRANSPORT_SCHEME,
@@ -116,6 +120,14 @@ def _max_http_payload_size_mb_factory() -> float:
116
120
  )
117
121
 
118
122
 
123
+ def _arize_dir_factory() -> str:
124
+ return os.getenv(ENV_ARIZE_DIRECTORY, DEFAULT_ARIZE_DIRECTORY)
125
+
126
+
127
+ def _enable_cache_factory() -> bool:
128
+ return _parse_bool(os.getenv(ENV_ENABLE_CACHING, DEFAULT_ENABLE_CACHING))
129
+
130
+
119
131
  def _mask_secret(secret: str, N: int = 4) -> str:
120
132
  """Show first N chars then '***'; empty string if empty."""
121
133
  return f"{secret[:N]}***"
@@ -147,6 +159,8 @@ class SDKConfiguration:
147
159
  max_http_payload_size_mb: float = field(
148
160
  default_factory=_max_http_payload_size_mb_factory
149
161
  )
162
+ arize_direcory: str = field(default_factory=_arize_dir_factory)
163
+ enable_caching: bool = field(default_factory=_enable_cache_factory)
150
164
 
151
165
  # Private, excluded from comparisons & repr
152
166
  _headers: Dict[str, str] = field(init=False, repr=False, compare=False)
arize/constants/config.py CHANGED
@@ -11,6 +11,8 @@ ENV_PYARROW_MAX_CHUNKSIZE = "ARIZE_MAX_CHUNKSIZE"
11
11
  ENV_REQUEST_VERIFY = "ARIZE_REQUEST_VERIFY"
12
12
  ENV_INSECURE = "ARIZE_INSECURE"
13
13
  ENV_MAX_HTTP_PAYLOAD_SIZE_MB = "ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB"
14
+ ENV_ARIZE_DIRECTORY = "ARIZE_DIRECTORY"
15
+ ENV_ENABLE_CACHING = "ARIZE_ENABLE_CACHING"
14
16
 
15
17
  # Server configuration default values
16
18
  DEFAULT_API_HOST = "api.arize.com" # NOTE: Must not prefix with https://
@@ -22,6 +24,8 @@ DEFAULT_PYARROW_MAX_CHUNKSIZE = 10_000
22
24
  DEFAULT_REQUEST_VERIFY = True
23
25
  DEFAULT_INSECURE = False
24
26
  DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB = 100
27
+ DEFAULT_ARIZE_DIRECTORY = "~/.arize"
28
+ DEFAULT_ENABLE_CACHING = True
25
29
 
26
30
  # ML Streaming configuration
27
31
  ENV_STREAM_MAX_WORKERS = "ARIZE_STREAM_MAX_WORKERS"
arize/datasets/client.py CHANGED
@@ -13,6 +13,7 @@ from arize._generated.api_client import models
13
13
  from arize.config import SDKConfiguration
14
14
  from arize.datasets.validation import validate_dataset_df
15
15
  from arize.exceptions.base import INVALID_ARROW_CONVERSION_MSG
16
+ from arize.utils.cache import cache_resource, load_cached_resource
16
17
  from arize.utils.openinference_conversion import (
17
18
  convert_boolean_columns_to_str,
18
19
  convert_datetime_columns_to_int,
@@ -22,9 +23,6 @@ from arize.utils.size import get_payload_size_mb
22
23
 
23
24
  logger = logging.getLogger(__name__)
24
25
 
25
- # TODO(Kiko): Decide based on size of payload instead
26
- REST_LIMIT_DATASET_EXAMPLES = 0
27
-
28
26
 
29
27
  class DatasetsClient:
30
28
  def __init__(self, sdk_config: SDKConfiguration):
@@ -42,57 +40,8 @@ class DatasetsClient:
42
40
  self.delete = self._api.datasets_delete
43
41
 
44
42
  # Custom methods
45
- self.list_examples = self._list_examples
46
43
  self.create = self._create_dataset
47
-
48
- def _list_examples(
49
- self,
50
- dataset_id: str,
51
- dataset_version_id: str = "",
52
- limit: int = 100,
53
- all: bool = False,
54
- ):
55
- if not all:
56
- return self._api.datasets_list_examples(
57
- dataset_id=dataset_id,
58
- dataset_version_id=dataset_version_id,
59
- limit=limit,
60
- )
61
-
62
- # TODO(Kiko): Space ID should not be needed,
63
- # should work on server tech debt to remove this
64
- dataset = self.get(dataset_id=dataset_id)
65
- space_id = dataset.space_id
66
-
67
- with ArizeFlightClient(
68
- api_key=self._sdk_config.api_key,
69
- host=self._sdk_config.flight_server_host,
70
- port=self._sdk_config.flight_server_port,
71
- scheme=self._sdk_config.flight_scheme,
72
- request_verify=self._sdk_config.request_verify,
73
- max_chunksize=self._sdk_config.pyarrow_max_chunksize,
74
- ) as flight_client:
75
- try:
76
- response = flight_client.get_dataset_examples(
77
- space_id=space_id,
78
- dataset_id=dataset_id,
79
- dataset_version_id=dataset_version_id,
80
- )
81
- except Exception as e:
82
- msg = f"Error during request: {str(e)}"
83
- logger.error(msg)
84
- raise RuntimeError(msg) from e
85
- if response is None:
86
- # This should not happen with proper Flight client implementation,
87
- # but we handle it defensively
88
- msg = "No response received from flight server during request"
89
- logger.error(msg)
90
- raise RuntimeError(msg)
91
- # The response from flightserver is the dataset ID. To return the dataset
92
- # object we make a GET query
93
- return models.DatasetsListExamples200Response(
94
- examples=response.to_dict(orient="records")
95
- )
44
+ self.list_examples = self._list_examples
96
45
 
97
46
  def _create_dataset(
98
47
  self,
@@ -203,23 +152,95 @@ class DatasetsClient:
203
152
  dataset = self.get(dataset_id=response)
204
153
  return dataset
205
154
 
155
+ def _list_examples(
156
+ self,
157
+ dataset_id: str,
158
+ dataset_version_id: str = "",
159
+ limit: int = 100,
160
+ all: bool = False,
161
+ ):
162
+ if not all:
163
+ return self._api.datasets_list_examples(
164
+ dataset_id=dataset_id,
165
+ dataset_version_id=dataset_version_id,
166
+ limit=limit,
167
+ )
168
+
169
+ dataset = self.get(dataset_id=dataset_id)
170
+ dataset_updated_at = getattr(dataset, "updated_at", None)
171
+ # TODO(Kiko): Space ID should not be needed,
172
+ # should work on server tech debt to remove this
173
+ space_id = dataset.space_id
174
+
175
+ dataset_df = None
176
+ # try to load dataset from cache
177
+ if self._sdk_config.enable_caching:
178
+ dataset_df = load_cached_resource(
179
+ cache_dir=self._sdk_config.arize_direcory,
180
+ resource="dataset",
181
+ resource_id=dataset_id,
182
+ resource_updated_at=dataset_updated_at,
183
+ )
184
+ if dataset_df is not None:
185
+ return models.DatasetsListExamples200Response(
186
+ examples=dataset_df.to_dict(orient="records")
187
+ )
188
+
189
+ with ArizeFlightClient(
190
+ api_key=self._sdk_config.api_key,
191
+ host=self._sdk_config.flight_server_host,
192
+ port=self._sdk_config.flight_server_port,
193
+ scheme=self._sdk_config.flight_scheme,
194
+ request_verify=self._sdk_config.request_verify,
195
+ max_chunksize=self._sdk_config.pyarrow_max_chunksize,
196
+ ) as flight_client:
197
+ try:
198
+ dataset_df = flight_client.get_dataset_examples(
199
+ space_id=space_id,
200
+ dataset_id=dataset_id,
201
+ dataset_version_id=dataset_version_id,
202
+ )
203
+ except Exception as e:
204
+ msg = f"Error during request: {str(e)}"
205
+ logger.error(msg)
206
+ raise RuntimeError(msg) from e
207
+ if dataset_df is None:
208
+ # This should not happen with proper Flight client implementation,
209
+ # but we handle it defensively
210
+ msg = "No response received from flight server during request"
211
+ logger.error(msg)
212
+ raise RuntimeError(msg)
213
+
214
+ # cache dataset for future use
215
+ cache_resource(
216
+ cache_dir=self._sdk_config.arize_direcory,
217
+ resource="dataset",
218
+ resource_id=dataset_id,
219
+ resource_updated_at=dataset_updated_at,
220
+ resource_data=dataset_df,
221
+ )
222
+
223
+ return models.DatasetsListExamples200Response(
224
+ examples=dataset_df.to_dict(orient="records")
225
+ )
226
+
206
227
 
207
228
  def _set_default_columns_for_dataset(df: pd.DataFrame) -> pd.DataFrame:
208
229
  current_time = int(time.time() * 1000)
209
230
  if "created_at" in df.columns:
210
- if df["created_at"].isnull().values.any():
231
+ if df["created_at"].isnull().values.any(): # type: ignore
211
232
  df["created_at"].fillna(current_time, inplace=True)
212
233
  else:
213
234
  df["created_at"] = current_time
214
235
 
215
236
  if "updated_at" in df.columns:
216
- if df["updated_at"].isnull().values.any():
237
+ if df["updated_at"].isnull().values.any(): # type: ignore
217
238
  df["updated_at"].fillna(current_time, inplace=True)
218
239
  else:
219
240
  df["updated_at"] = current_time
220
241
 
221
242
  if "id" in df.columns:
222
- if df["id"].isnull().values.any():
243
+ if df["id"].isnull().values.any(): # type: ignore
223
244
  df["id"] = df["id"].apply(
224
245
  lambda x: str(uuid.uuid4()) if pd.isnull(x) else x
225
246
  )
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import hashlib
3
4
  import logging
4
5
  from typing import TYPE_CHECKING, Any, Dict, List, Tuple
5
6
 
@@ -19,6 +20,7 @@ from opentelemetry.trace import Tracer
19
20
 
20
21
  from arize._flight.client import ArizeFlightClient
21
22
  from arize._flight.types import FlightRequestType
23
+ from arize._generated.api_client import models
22
24
  from arize.config import SDKConfiguration
23
25
  from arize.exceptions.base import INVALID_ARROW_CONVERSION_MSG
24
26
  from arize.experiments.evaluators.base import Evaluators
@@ -31,6 +33,7 @@ from arize.experiments.types import (
31
33
  ExperimentTask,
32
34
  ExperimentTaskResultFieldNames,
33
35
  )
36
+ from arize.utils.cache import cache_resource, load_cached_resource
34
37
  from arize.utils.openinference_conversion import (
35
38
  convert_boolean_columns_to_str,
36
39
  convert_default_columns_to_json_str,
@@ -57,27 +60,28 @@ class ExperimentsClient:
57
60
  self._datasets_api = gen.DatasetsApi(
58
61
  self._sdk_config.get_generated_client()
59
62
  )
63
+
60
64
  self.list = self._api.experiments_list
61
65
  self.get = self._api.experiments_get
62
66
  self.delete = self._api.experiments_delete
63
- self.list_runs = self._api.experiments_runs_list # REST ?
64
67
 
65
68
  # Custom methods
66
- self.create = self._create_experiment
67
69
  self.run = self._run_experiment
70
+ self.create = self._create_experiment
71
+ self.list_runs = self._api.experiments_runs_list
68
72
 
69
73
  def _run_experiment(
70
74
  self,
71
75
  name: str,
72
76
  dataset_id: str,
73
77
  task: ExperimentTask,
74
- dataset_df: pd.DataFrame | None = None,
75
78
  evaluators: Evaluators | None = None,
76
79
  dry_run: bool = False,
80
+ dry_run_count: int = 10,
77
81
  concurrency: int = 3,
78
82
  set_global_tracer_provider: bool = False,
79
83
  exit_on_error: bool = False,
80
- ) -> Tuple[str, pd.DataFrame] | None:
84
+ ) -> Tuple[Experiment | None, pd.DataFrame] | None:
81
85
  """
82
86
  Run an experiment on a dataset and upload the results.
83
87
 
@@ -87,9 +91,6 @@ class ExperimentsClient:
87
91
  Args:
88
92
  experiment_name (str): The name of the experiment.
89
93
  task (ExperimentTask): The task to be performed in the experiment.
90
- dataset_df (Optional[pd.DataFrame], optional): The dataset as a pandas DataFrame.
91
- If not provided, the dataset will be downloaded using dataset_id or dataset_name.
92
- Defaults to None.
93
94
  dataset_id (Optional[str], optional): The ID of the dataset to use.
94
95
  Required if dataset_df and dataset_name are not provided. Defaults to None.
95
96
  dataset_name (Optional[str], optional): The name of the dataset to use.
@@ -116,6 +117,7 @@ class ExperimentsClient:
116
117
  # should work on server tech debt to remove this
117
118
  dataset = self._datasets_api.datasets_get(dataset_id=dataset_id)
118
119
  space_id = dataset.space_id
120
+ dataset_updated_at = getattr(dataset, "updated_at", None)
119
121
 
120
122
  with ArizeFlightClient(
121
123
  api_key=self._sdk_config.api_key,
@@ -152,10 +154,20 @@ class ExperimentsClient:
152
154
  raise RuntimeError(msg)
153
155
  experiment_id, trace_model_name = response
154
156
 
155
- # download dataset if not provided
157
+ dataset_df = None
158
+ # try to load dataset from cache
159
+ if self._sdk_config.enable_caching:
160
+ dataset_df = load_cached_resource(
161
+ cache_dir=self._sdk_config.arize_direcory,
162
+ resource="dataset",
163
+ resource_id=dataset_id,
164
+ resource_updated_at=dataset_updated_at,
165
+ )
166
+
156
167
  if dataset_df is None:
168
+ # download dataset
157
169
  try:
158
- response = flight_client.get_dataset_examples(
170
+ dataset_df = flight_client.get_dataset_examples(
159
171
  space_id=space_id,
160
172
  dataset_id=dataset_id,
161
173
  )
@@ -163,7 +175,7 @@ class ExperimentsClient:
163
175
  msg = f"Error during request: {str(e)}"
164
176
  logger.error(msg)
165
177
  raise RuntimeError(msg) from e
166
- if response is None:
178
+ if dataset_df is None:
167
179
  # This should not happen with proper Flight client implementation,
168
180
  # but we handle it defensively
169
181
  msg = (
@@ -172,13 +184,21 @@ class ExperimentsClient:
172
184
  logger.error(msg)
173
185
  raise RuntimeError(msg)
174
186
 
175
- if dataset_df is None or dataset_df.empty:
187
+ if dataset_df.empty:
176
188
  raise ValueError(f"Dataset {dataset_id} is empty")
177
189
 
178
- input_df = dataset_df.copy()
190
+ # cache dataset for future use
191
+ cache_resource(
192
+ cache_dir=self._sdk_config.arize_direcory,
193
+ resource="dataset",
194
+ resource_id=dataset_id,
195
+ resource_updated_at=dataset_updated_at,
196
+ resource_data=dataset_df,
197
+ )
198
+
179
199
  if dry_run:
180
- # only dry_run experiment on a subset (first 10 rows) of the dataset
181
- input_df = input_df.head(10)
200
+ # only dry_run experiment on a subset (first N rows) of the dataset
201
+ dataset_df = dataset_df.head(dry_run_count)
182
202
 
183
203
  # trace model and resource for the experiment
184
204
  tracer, resource = _get_tracer_resource(
@@ -193,7 +213,7 @@ class ExperimentsClient:
193
213
  output_df = run_experiment(
194
214
  experiment_name=name,
195
215
  experiment_id=experiment_id,
196
- dataset=input_df,
216
+ dataset=dataset_df,
197
217
  task=task,
198
218
  tracer=tracer,
199
219
  resource=resource,
@@ -204,7 +224,7 @@ class ExperimentsClient:
204
224
  output_df = convert_default_columns_to_json_str(output_df)
205
225
  output_df = convert_boolean_columns_to_str(output_df)
206
226
  if dry_run:
207
- return "", output_df
227
+ return None, output_df
208
228
 
209
229
  # Convert to Arrow table
210
230
  try:
@@ -241,7 +261,10 @@ class ExperimentsClient:
241
261
  logger.error(msg)
242
262
  raise RuntimeError(msg)
243
263
 
244
- return str(post_resp.experiment_id), output_df # type: ignore
264
+ experiment = self.get(
265
+ experiment_id=str(post_resp.experiment_id) # type: ignore
266
+ )
267
+ return experiment, output_df
245
268
 
246
269
  def _create_experiment(
247
270
  self,
@@ -352,6 +375,78 @@ class ExperimentsClient:
352
375
  experiment_df=experiment_df,
353
376
  )
354
377
 
378
+ def _list_runs(
379
+ self,
380
+ experiment_id: str,
381
+ limit: int = 100,
382
+ all: bool = False,
383
+ ):
384
+ if not all:
385
+ return self._api.experiments_runs_list(
386
+ experiment_id=experiment_id,
387
+ limit=limit,
388
+ )
389
+
390
+ experiment = self.get(experiment_id=experiment_id)
391
+ experiment_updated_at = getattr(experiment, "updated_at", None)
392
+ # TODO(Kiko): Space ID should not be needed,
393
+ # should work on server tech debt to remove this
394
+ dataset = self._datasets_api.datasets_get(
395
+ dataset_id=experiment.dataset_id
396
+ )
397
+ space_id = dataset.space_id
398
+
399
+ experiment_df = None
400
+ # try to load dataset from cache
401
+ if self._sdk_config.enable_caching:
402
+ experiment_df = load_cached_resource(
403
+ cache_dir=self._sdk_config.arize_direcory,
404
+ resource="experiment",
405
+ resource_id=experiment_id,
406
+ resource_updated_at=experiment_updated_at,
407
+ )
408
+ if experiment_df is not None:
409
+ return models.ExperimentsRunsList200Response(
410
+ experimentRuns=experiment_df.to_dict(orient="records")
411
+ )
412
+
413
+ with ArizeFlightClient(
414
+ api_key=self._sdk_config.api_key,
415
+ host=self._sdk_config.flight_server_host,
416
+ port=self._sdk_config.flight_server_port,
417
+ scheme=self._sdk_config.flight_scheme,
418
+ request_verify=self._sdk_config.request_verify,
419
+ max_chunksize=self._sdk_config.pyarrow_max_chunksize,
420
+ ) as flight_client:
421
+ try:
422
+ experiment_df = flight_client.get_experiment_runs(
423
+ space_id=space_id,
424
+ experiment_id=experiment_id,
425
+ )
426
+ except Exception as e:
427
+ msg = f"Error during request: {str(e)}"
428
+ logger.error(msg)
429
+ raise RuntimeError(msg) from e
430
+ if experiment_df is None:
431
+ # This should not happen with proper Flight client implementation,
432
+ # but we handle it defensively
433
+ msg = "No response received from flight server during request"
434
+ logger.error(msg)
435
+ raise RuntimeError(msg)
436
+
437
+ # cache dataset for future use
438
+ cache_resource(
439
+ cache_dir=self._sdk_config.arize_direcory,
440
+ resource="dataset",
441
+ resource_id=experiment_id,
442
+ resource_updated_at=experiment_updated_at,
443
+ resource_data=experiment_df,
444
+ )
445
+
446
+ return models.ExperimentsRunsList200Response(
447
+ experimentRuns=experiment_df.to_dict(orient="records")
448
+ )
449
+
355
450
  def _create_experiment_via_flight(
356
451
  self,
357
452
  name: str,
@@ -463,3 +558,9 @@ def _get_tracer_resource(
463
558
  trace.set_tracer_provider(tracer_provider)
464
559
 
465
560
  return tracer_provider.get_tracer(__name__), resource
561
+
562
+
563
+ def _dataset_cache_key(dataset_id: str, dataset_updated_at: str | None) -> str:
564
+ # include updated_at if present to produce a new key when dataset changes
565
+ key_src = f"{dataset_id}:{dataset_updated_at or ''}"
566
+ return hashlib.sha256(key_src.encode("utf-8")).hexdigest()
arize/utils/cache.py ADDED
@@ -0,0 +1,68 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def load_cached_resource(
12
+ cache_dir: str,
13
+ resource: str,
14
+ resource_id: str,
15
+ resource_updated_at: str | None,
16
+ format: str = "parquet",
17
+ ) -> pd.DataFrame | None:
18
+ key = _get_cache_key(resource, resource_id, resource_updated_at)
19
+ filepath = _get_abs_file_path(cache_dir, f"{key}.{format}", resource)
20
+ if not filepath.exists():
21
+ return None
22
+ try:
23
+ return pd.read_parquet(filepath)
24
+ except Exception as e:
25
+ logger.warning(f"Failed to load cached resource from {filepath}: {e}")
26
+ return None
27
+
28
+
29
+ def cache_resource(
30
+ cache_dir: str,
31
+ resource: str,
32
+ resource_id: str,
33
+ resource_updated_at: str | None,
34
+ resource_data: pd.DataFrame,
35
+ format: str = "parquet",
36
+ ) -> None:
37
+ key = _get_cache_key(resource, resource_id, resource_updated_at)
38
+ filepath = _get_abs_file_path(cache_dir, f"{key}.{format}", resource)
39
+ filepath.parent.mkdir(parents=True, exist_ok=True)
40
+ resource_data.to_parquet(filepath, index=False)
41
+ logger.debug(f"Cached resource to {filepath}")
42
+
43
+
44
+ def _get_cache_key(
45
+ resource: str,
46
+ resource_id: str,
47
+ resource_updated_at: str | None,
48
+ ) -> str:
49
+ # include updated_at if present to produce a new key when dataset changes
50
+ key = f"{resource}_{resource_id}"
51
+ if resource_updated_at:
52
+ key += f"_{resource_updated_at}"
53
+ return key
54
+
55
+
56
+ def _get_abs_file_path(
57
+ directory: str,
58
+ filename: str,
59
+ subdirectory: str | None = None,
60
+ ) -> Path:
61
+ """
62
+ Return an absolute path to a file located under `directory[/subdirectory]/filename`.
63
+ Expands '~' and resolves relative components.
64
+ """
65
+ base = Path(directory).expanduser()
66
+ if subdirectory:
67
+ base = base / subdirectory
68
+ return (base / filename).resolve()
arize/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "8.0.0a16"
1
+ __version__ = "8.0.0a17"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: arize
3
- Version: 8.0.0a16
3
+ Version: 8.0.0a17
4
4
  Summary: A helper library to interact with Arize AI APIs
5
5
  Project-URL: Homepage, https://arize.com
6
6
  Project-URL: Documentation, https://docs.arize.com/arize
@@ -99,11 +99,24 @@ Description-Content-Type: text/markdown
99
99
  - [Operations on Datasets](#operations-on-datasets)
100
100
  - [List Datasets](#list-datasets)
101
101
  - [Create a Dataset](#create-a-dataset)
102
- - [Get Dataset by ID](#get-dataset-by-id)
102
+ - [Get Dataset](#get-dataset)
103
103
  - [Delete a Dataset](#delete-a-dataset)
104
- - [Configure Logging](#configure-logging)
105
- - [In Code](#in-code)
106
- - [Via Environment Variables](#via-environment-variables)
104
+ - [List Dataset Examples](#list-dataset-examples)
105
+ - [Operations on Experiments](#operations-on-experiments)
106
+ - [List Experiments](#list-experiments)
107
+ - [Run an Experiment](#run-an-experiment)
108
+ - [Create an Experiment](#create-an-experiment)
109
+ - [Get an Experiment](#get-an-experiment)
110
+ - [Delete an Experiment](#delete-an-experiment)
111
+ - [List Experiment runs](#list-experiment-runs)
112
+ - [SDK Configuration](#sdk-configuration)
113
+ - [Logging](#logging)
114
+ - [In Code](#in-code)
115
+ - [Via Environment Variables](#via-environment-variables)
116
+ - [Caching](#caching)
117
+ - [In Code](#in-code-1)
118
+ - [Via Environment Variables](#via-environment-variables-1)
119
+ - [Clean the cache](#clean-the-cache)
107
120
  - [Community](#community)
108
121
 
109
122
  # Overview
@@ -398,9 +411,9 @@ dataset_list = resp.datasets
398
411
  # Get the response as a dictionary
399
412
  resp_dict = resp.to_dict()
400
413
  # Get the response in JSON format
401
- resp_dict = resp.to_json()
414
+ resp_json = resp.to_json()
402
415
  # Get the response as a pandas dataframe
403
- resp_dict = resp.to_df()
416
+ resp_df = resp.to_df()
404
417
  ```
405
418
 
406
419
  ### Create a Dataset
@@ -430,9 +443,10 @@ If the number of examples (rows in dataframe, items in list) is too large, the c
430
443
 
431
444
  ```python
432
445
  created_dataset = client.datasets.create(
433
- space_i="<target-space-id>",
446
+ space_id="<target-space-id>",
434
447
  name="<your-dataset-name>", # Name must be unique within a space
435
448
  examples=..., # List of dictionaries or pandas dataframe
449
+ # force_http=... # Optionally pass force_http to create datasets via HTTP instead of gRPC, defaults to False
436
450
  )
437
451
  ```
438
452
 
@@ -445,8 +459,7 @@ dataset_dict = create_dataset.to_dict()
445
459
  dataset_dict = create_dataset.to_json()
446
460
  ```
447
461
 
448
-
449
- ### Get Dataset by ID
462
+ ### Get Dataset
450
463
 
451
464
  To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
452
465
 
@@ -467,9 +480,167 @@ client.datasets.delete(
467
480
  )
468
481
  ```
469
482
 
470
- # Configure Logging
483
+ ### List Dataset Examples
484
+
485
+ You can list the examples of a given dataset using `client.datasets.list_examples()` and passing the dataset ID and, optionally, the dataset version ID. You can specify the number of examples desired using the `limit` parameter. If you want a large number of examples, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
486
+
487
+ ```python
488
+ resp = client.datasets.list_examples(
489
+ dataset_id="your-dataset-id>",
490
+ dataset_version_id="your-dataset-version-id>", # Optional, defaults to latest version
491
+ limit=... # number of desired examples. Defaults to 100
492
+ all=... # Whether or not to export all of the examples. Defaults to False
493
+ )
494
+ ```
495
+
496
+ The response is an object of type `DatasetsExamplesList200Response`, and you can access the list of examples via its `examples` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
497
+
498
+ ```python
499
+ # Get the list of datasets from the response
500
+ examples_list = resp.examples
501
+ # Get the response as a dictionary
502
+ resp_dict = resp.to_dict()
503
+ # Get the response in JSON format
504
+ resp_json = resp.to_json()
505
+ # Get the response as a pandas dataframe
506
+ resp_df = resp.to_df()
507
+ ```
508
+
509
+ ## Operations on Experiments
510
+
511
+ ### List Experiments
512
+
513
+ You can list all experiments that the user has access to using `client.experiments.list()`. You can use the `limit` parameter to specify the maximum number of datasets desired in the response and you can specify the `dataset_id` to target the list operation to a particular dataset.
514
+
515
+ ```python
516
+ resp = client.experiments.list(
517
+ limit=... # Optional
518
+ dataset_id=... # Optional
519
+ )
520
+ ```
521
+
522
+ The response is an object of type `ExperimentsList200Response`, and you can access the list of experiments via its `experiments` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
523
+
524
+ ```python
525
+ # Get the list of datasets from the response
526
+ experiment_list = resp.experiments
527
+ # Get the response as a dictionary
528
+ resp_dict = resp.to_dict()
529
+ # Get the response in JSON format
530
+ resp_json = resp.to_json()
531
+ # Get the response as a pandas dataframe
532
+ resp_df = resp.to_df()
533
+ ```
534
+
535
+ ### Run an Experiment
536
+
537
+ You can run an experiment on a dataset using `client.experiments.run()` by defining a task, evaluators (optional), and passing the dataset id of the dataset you want to use, together with a name for the experiment. The function will download the entire dataset from Arize (unless cached, see caching section under "SDK Configuration"), execute the task to obtain an output, and perform evaluations (if evaluators were passed). The experiments will also be traced, and these traces will be visible in Arize. The experiment will be created and the data logged into Arize automatically. You can avoid logging to Arize by making `dry_run=True`. The function will return the `Experiment` object (or `None` if `dry_run=True`) together with the dataframe with the experiment data.
538
+
539
+ ```python
540
+ experiment, experiment_df = client.run_experiment(
541
+ name="<name-your-experiment>",
542
+ dataset_id="<id-of-dataset-to-use>",
543
+ task=... # The task to be performed in the experiment.
544
+ evaluators=... # Optional: The evaluators to use in the experiment.
545
+ dry_run=..., # If True, the experiment result will not be uploaded to Arize. Defaults to False
546
+ dry_run_count=..., # Number of examples of the dataset to use in the dry run. Defaults to 10
547
+ concurrency=..., # The number of concurrent tasks to run. Defaults to 3.
548
+ set_global_tracer_provider=..., # If True, sets the global tracer provider for the experiment. Defaults to False
549
+ exit_on_error=..., # If True, the experiment will stop running on first occurrence of an error. Defaults to False
550
+ )
551
+ ```
552
+
553
+ The `Experiment` object also counts with convenience method similar to `List***` objects:
554
+
555
+ ```python
556
+ # Get the response as a dictionary
557
+ experiment_dict = create_experiment.to_dict()
558
+ # Get the response in JSON format
559
+ experiment_dict = create_experiment.to_json()
560
+ ```
561
+
562
+ ### Create an Experiment
563
+
564
+ It is possible that you have run the experiment yourself without the above function, and hence you already have experiment data that you want to send to Arize. In this case, use the `client.experiments.create()` method by passing the runs data, we currently don't support creating an empty experiment, for instance, these are 2 rows of runs, as a list of dictionaries. You can also pass a pandas dataframe for the runs data.
565
+
566
+ > NOTE: If you don't have experiment data and want to run experiment, see the `client.experiments.run()` section above.
567
+
568
+ ```python
569
+ # TODO
570
+ runs = [
571
+ ]
572
+ ```
573
+
574
+ In addition, you must specify which columns are the `example_id` and the `result`, you can do so by using the `ExperimentTaskResultFieldNames`. Moreover, if you choose to pass evaluation data, you can indicate the evaluation columns using `EvaluationResultFieldNames`:
575
+
576
+ ```python
577
+ # TODO
578
+ ```
579
+
580
+ If the number of runs (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is an `Experiment` object.
581
+
582
+ ```python
583
+ created_experiment = client.experiments.create(
584
+ name="<your-experiment-name>", # Name must be unique within a dataset
585
+ dataset_id="<desired-dataset-id>",
586
+ experiment_runs=..., # List of dictionaries or pandas dataframe
587
+ task_fields=ExperimentTaskResultFieldNames(...),
588
+ evaluator_columns=... # Optional
589
+ # force_http=... # Optionally pass force_http to create experiments via HTTP instead of gRPC, defaults to False
590
+ )
591
+ ```
592
+
593
+ ### Get an Experiment
594
+
595
+ To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
596
+
597
+ ```python
598
+ dataset = client.datasets.get(
599
+ dataset_id=... # The unique identifier of the dataset
600
+ dataset_version_id=... # The unique identifier of the dataset version
601
+ )
602
+ ```
603
+
604
+ ### Delete an Experiment
605
+
606
+ To delete an experiment by its ID use `client.experiments.delete()`. The call returns `None` if successful deletion took place, error otherwise.
607
+
608
+ ```python
609
+ client.experiments.delete(
610
+ experiment_id=... # The unique identifier of the experiment
611
+ )
612
+ ```
613
+
614
+ ### List Experiment runs
471
615
 
472
- ## In Code
616
+ You can list the runs of a given experiment using `client.experiments.list_runs()` and passing the experiment ID. You can specify the number of runs desired using the `limit` parameter. If you want a large number of runs, consider using the `all=True` parameter, which will make it so the SDK exports the data using Arrow Flight via gRPC, for increased performance.
617
+
618
+ ```python
619
+ resp = client.experiments.list_runs(
620
+ experiment_id="your-experiment-id>",
621
+ limit=... # number of desired runs. Defaults to 100
622
+ all=... # Whether or not to export all of the runs. Defaults to False
623
+ )
624
+ ```
625
+
626
+ The response is an object of type `ExperimentsRunsList200Response`, and you can access the list of runs via its `experiment_runs` attribute. In addition, you can transform the response object to a dictionary, to JSON format, or a pandas dataframe.
627
+
628
+ ```python
629
+ # Get the list of datasets from the response
630
+ run_list = resp.experiments_runs
631
+ # Get the response as a dictionary
632
+ resp_dict = resp.to_dict()
633
+ # Get the response in JSON format
634
+ resp_json = resp.to_json()
635
+ # Get the response as a pandas dataframe
636
+ resp_df = resp.to_df()
637
+ ```
638
+
639
+ # SDK Configuration
640
+
641
+ ## Logging
642
+
643
+ ### In Code
473
644
 
474
645
  You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
475
646
 
@@ -482,14 +653,14 @@ configure_logging(
482
653
  )
483
654
  ```
484
655
 
485
- ## Via Environment Variables
656
+ ### Via Environment Variables
486
657
 
487
658
  Configure the same options as the section above, via:
488
659
 
489
660
  ```python
490
661
  import os
491
662
 
492
- # You can disable logging altogether
663
+ # Whether or not you want to disable logging altogether
493
664
  os.environ["ARIZE_LOG_ENABLE"] = "true"
494
665
  # Set up the logging level
495
666
  os.environ["ARIZE_LOG_LEVEL"] = "debug"
@@ -499,6 +670,38 @@ os.environ["ARIZE_LOG_STRUCTURED"] = "false"
499
670
 
500
671
  The default behavior of Arize's logs is: enabled, `INFO` level, and not structured.
501
672
 
673
+ ## Caching
674
+
675
+ When downloading big segments of data from Arize, such as a `Dataset` with all of its examples, the SDK will cache the file in `parquet` format under `~/.arize/datasets/dataset_<updated_at_timestamp>.parquet`.
676
+
677
+ ### In Code
678
+
679
+ You can disable caching via the `enable_caching` parameter when instantiating the client, and also edit the "arize directory":
680
+
681
+ ```python
682
+ client = ArizeClient(
683
+ enable_caching=False, # Optional parameter, defaults to True
684
+ arize_directory="my-desired-directory", # Optional parameter, defaults to ~/.arize
685
+ )
686
+ ```
687
+
688
+ ### Via Environment Variables
689
+
690
+ You can also configure the above via:
691
+
692
+ ```python
693
+ import os
694
+
695
+ # Whether or not you want to disable caching
696
+ os.environ["ARIZE_ENABLE_CACHING"] = "true"
697
+ # Where you want the SDK to store the files
698
+ os.environ["ARIZE_DIRECTORY"] = "~/.arize"
699
+ ```
700
+
701
+ ### Clean the cache
702
+
703
+ To clean the cache you can directly `rm` the files or directory.
704
+
502
705
  # Community
503
706
 
504
707
  Join our community to connect with thousands of AI builders.
@@ -1,17 +1,17 @@
1
- arize/__init__.py,sha256=_MTUL3z00_ytJ2YcT5lvgjRJjRCCNgOAMYReM0VlNA0,2896
1
+ arize/__init__.py,sha256=G9wbTaZsccUIwntIriIIW74lS1-tHeG58Vt4XV1ZV9s,3002
2
2
  arize/_lazy.py,sha256=1Lnm4l42t7W-m2JYCYD-S7ASBOIl0XJkBuli3Ei1VXA,2474
3
- arize/client.py,sha256=frYYTpE7px7Peg7bwXjdSDBinHBxLkgEWef6qGotlAE,6876
4
- arize/config.py,sha256=dHofOMN5PPmgBydaz6K3EZvyd77u3gH8d9MDAWA_RF8,7444
3
+ arize/client.py,sha256=-SeZloT7qqWRtr1WXS5d2yn7gvpNYYyGE2yjGPvYi74,7236
4
+ arize/config.py,sha256=PDKUkJfGvTxX2NZ5FLxXz1YaXBOuAkyL5eW7kdbZc5A,7909
5
5
  arize/logging.py,sha256=OahBaJRG-z5DPqWrj2_rbe2n0r4fMGOrXpxN_4M_i_w,7244
6
6
  arize/types.py,sha256=z1yg5-brmTD4kVHDmmTVkYke53JpusXXeOOpdQw7rYg,69508
7
- arize/version.py,sha256=3zKIu3lsiVs70w9ALInVHDEz29CoKjmoBoLAVkpcNco,25
7
+ arize/version.py,sha256=dVbZUbQ1PraD-0qvMFzVVGSr1QRGrJYBgb-CUfl0LQc,25
8
8
  arize/_exporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  arize/_exporter/client.py,sha256=k3xS-2wx_UlB5toI5RKBoy1bi3ONIxh4KQy4A4a2Omc,15822
10
10
  arize/_exporter/validation.py,sha256=6ROu5p7uaolxQ93lO_Eiwv9NVw_uyi3E5T--C5Klo5Q,1021
11
11
  arize/_exporter/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  arize/_exporter/parsers/tracing_data_parser.py,sha256=zVS-w8t1HJkz-AIC_JCdjPJ7gJXgFpfELfqNM_vK42E,5395
13
13
  arize/_flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- arize/_flight/client.py,sha256=gAyU5Jsu-zEyq6mS8OtRp3xI85M2JWR_fg7UpYIYxLA,14373
14
+ arize/_flight/client.py,sha256=14dYkHM0Pi-GP1AeNPQX-RQ3uMmtwRwxoSmR7--1eW0,15499
15
15
  arize/_flight/types.py,sha256=GB_4dQu2ElIrcDGAcqhG7oI4g-b0ZdSlbrQkf0TFzVE,194
16
16
  arize/_generated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  arize/_generated/api_client_README.md,sha256=OSAc24mxj4fZB7k0i8DIZ8uoXfn6hGjptO5om6ferRE,5632
@@ -55,14 +55,14 @@ arize/_generated/protocol/flight/ingest_pb2.py,sha256=-wC5rbLK4yjROQuXOU9c_gPwA4
55
55
  arize/_generated/protocol/rec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  arize/_generated/protocol/rec/public_pb2.py,sha256=vgP-yTSZLeomVwfIzcOo6t3i1mPCCNJGgd41ZkfLNng,79898
57
57
  arize/constants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- arize/constants/config.py,sha256=hD13trH2Ih1dG8vq18ItHCyBo1nqOp29_YGHgUoa_5w,1367
58
+ arize/constants/config.py,sha256=RvvMZrhbMSv3_Do1jKTVGyWt_Pwal82pIL4S9FH0XS4,1518
59
59
  arize/constants/ml.py,sha256=X_vtKpt1AdhLoT2DWEyKDSXAVEuzjwGFacIbgUOpB3M,2358
60
60
  arize/constants/model_mapping.json,sha256=OPE54rBATzmwRhx0tycsxnGae1jBhtqEmQqQvzleTSc,5725
61
61
  arize/constants/openinference.py,sha256=3tVLyUz6ZvE8ht_ZLnndYXFhDjt_ibJbFeBM1PcxIbY,532
62
62
  arize/constants/pyarrow.py,sha256=XUZQXQ-431fQYM2ZJy6xRwW4pfABPg7NZspQ5BXAxRc,24
63
63
  arize/constants/spans.py,sha256=EfMgbEIK_2EUcvUY5BGnNAbS7bupBKePlI3j2L5T5CE,2532
64
64
  arize/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
- arize/datasets/client.py,sha256=LtQYUOx69L5F7eYmrH4OaCxvJUGrSS3W6F2xJ-HIR8M,8167
65
+ arize/datasets/client.py,sha256=g4qAWYkteDjcw8EgTdr4XBrtT0JYF7ewD8D-slNxAZ4,8970
66
66
  arize/datasets/errors.py,sha256=9hmE7KyBWBSi4FkVQYsI3E-KPgzXaCZc681czNBhS-Q,1685
67
67
  arize/datasets/validation.py,sha256=KT_X9bnEMxGbh2o9N3aXwgTMVOQPzz1AW-JyaKxcs48,1336
68
68
  arize/embeddings/__init__.py,sha256=6_C8908W_qDixkoBJl1wapgmQCzI8TPLH207kzbYsFA,156
@@ -83,7 +83,7 @@ arize/exceptions/spaces.py,sha256=C1mbtbUx7bVFnGM7iJg03pttnd-jVl2dnFmO102wXrA,31
83
83
  arize/exceptions/types.py,sha256=ALzH6S63zbHSno2n6Lp3lRf7Galo-HctrkkDU61fKBo,6050
84
84
  arize/exceptions/values.py,sha256=aNAL4P9nN0LOtuHrIARBbty2V0ZtMgBsT1wyz1fB6Kk,18948
85
85
  arize/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
- arize/experiments/client.py,sha256=9dY8mKMCjJy2-neN18u0ZR99hfBiMgfEFo2ZlG5e4Fo,18628
86
+ arize/experiments/client.py,sha256=VXK2Dl8wOruvX6yLeHyhSMJ-hZIRh9AXdzrYNgxl4pM,22329
87
87
  arize/experiments/functions.py,sha256=-6yAumc4ZZxoouEnKXkR8GxFqEFfDBCOOC3j6OAVt40,33833
88
88
  arize/experiments/tracing.py,sha256=DGhJrJU2yUchMUVWPr_4PTqmM0VbSiNnRoV08hnN4nU,9660
89
89
  arize/experiments/types.py,sha256=EEf0EdjldNX6Hg98bX0E9HtZeu__3Ofy0x9fDqrflAg,12752
@@ -133,13 +133,14 @@ arize/spans/validation/spans/spans_validation.py,sha256=p6IjbQMtOhotGBfw3axj7yMW
133
133
  arize/spans/validation/spans/value_validation.py,sha256=H3qV96w6JQNCed_MxhWDas9Jf6vUj6RFabShcwf4jr4,19102
134
134
  arize/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
135
135
  arize/utils/arrow.py,sha256=6kbTY3mPL8oAk9C3sL-vE5dLuQ7bNU74qbRHcSbuIBg,5334
136
+ arize/utils/cache.py,sha256=5KP6D-Dru-HjB7hSwFttUf8B4veXNqK7wq82B4bfECU,1892
136
137
  arize/utils/dataframe.py,sha256=I0FloPgNiqlKga32tMOvTE70598QA8Hhrgf-6zjYMAM,1120
137
138
  arize/utils/openinference_conversion.py,sha256=i3QBngObcc-LrUWFe_pg9egrFs2pqqbFSncUA-wnqNE,1679
138
139
  arize/utils/proto.py,sha256=RfdiXtq2cvIG1IV8W0jz2m-vdrA2CD8f542UUi6GLoY,381
139
140
  arize/utils/size.py,sha256=uAM-bs7Jk7fIu6vjQ9khZuJZnpAmFvA3lTXiRT0aJS4,788
140
141
  arize/utils/online_tasks/__init__.py,sha256=nDuTLUTYnZaWgyJoYR1P7O8ZKA-Nba7X6tJ9OislbWM,144
141
142
  arize/utils/online_tasks/dataframe_preprocessor.py,sha256=YyeeeFu_FwCYImbYvBZvQIH_5TK2lHru8KSfqV893ps,8884
142
- arize-8.0.0a16.dist-info/METADATA,sha256=Rmirc79t04oHmZLhAhsQ5sRbcmQ4x6GevaU4n81fe7E,19286
143
- arize-8.0.0a16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
144
- arize-8.0.0a16.dist-info/licenses/LICENSE.md,sha256=8vLN8Gms62NCBorxIv9MUvuK7myueb6_-dhXHPmm4H0,1479
145
- arize-8.0.0a16.dist-info/RECORD,,
143
+ arize-8.0.0a17.dist-info/METADATA,sha256=FUSvD19Y91lZs32i1d3nDB1oM8Aqv38LTaL2LDlouyE,28471
144
+ arize-8.0.0a17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
145
+ arize-8.0.0a17.dist-info/licenses/LICENSE.md,sha256=8vLN8Gms62NCBorxIv9MUvuK7myueb6_-dhXHPmm4H0,1479
146
+ arize-8.0.0a17.dist-info/RECORD,,