arize 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. arize/__init__.py +17 -9
  2. arize/_exporter/client.py +55 -36
  3. arize/_exporter/parsers/tracing_data_parser.py +41 -30
  4. arize/_exporter/validation.py +3 -3
  5. arize/_flight/client.py +208 -77
  6. arize/_generated/api_client/__init__.py +30 -6
  7. arize/_generated/api_client/api/__init__.py +1 -0
  8. arize/_generated/api_client/api/datasets_api.py +864 -190
  9. arize/_generated/api_client/api/experiments_api.py +167 -131
  10. arize/_generated/api_client/api/projects_api.py +1197 -0
  11. arize/_generated/api_client/api_client.py +2 -2
  12. arize/_generated/api_client/configuration.py +42 -34
  13. arize/_generated/api_client/exceptions.py +2 -2
  14. arize/_generated/api_client/models/__init__.py +15 -4
  15. arize/_generated/api_client/models/dataset.py +10 -10
  16. arize/_generated/api_client/models/dataset_example.py +111 -0
  17. arize/_generated/api_client/models/dataset_example_update.py +100 -0
  18. arize/_generated/api_client/models/dataset_version.py +13 -13
  19. arize/_generated/api_client/models/datasets_create_request.py +16 -8
  20. arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
  21. arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
  22. arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
  23. arize/_generated/api_client/models/datasets_list200_response.py +10 -4
  24. arize/_generated/api_client/models/experiment.py +14 -16
  25. arize/_generated/api_client/models/experiment_run.py +108 -0
  26. arize/_generated/api_client/models/experiment_run_create.py +102 -0
  27. arize/_generated/api_client/models/experiments_create_request.py +16 -10
  28. arize/_generated/api_client/models/experiments_list200_response.py +10 -4
  29. arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
  30. arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
  31. arize/_generated/api_client/models/primitive_value.py +172 -0
  32. arize/_generated/api_client/models/problem.py +100 -0
  33. arize/_generated/api_client/models/project.py +99 -0
  34. arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
  35. arize/_generated/api_client/models/projects_list200_response.py +106 -0
  36. arize/_generated/api_client/rest.py +2 -2
  37. arize/_generated/api_client/test/test_dataset.py +4 -2
  38. arize/_generated/api_client/test/test_dataset_example.py +56 -0
  39. arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
  40. arize/_generated/api_client/test/test_dataset_version.py +7 -2
  41. arize/_generated/api_client/test/test_datasets_api.py +27 -13
  42. arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
  43. arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
  44. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
  45. arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
  46. arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
  47. arize/_generated/api_client/test/test_experiment.py +2 -4
  48. arize/_generated/api_client/test/test_experiment_run.py +56 -0
  49. arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
  50. arize/_generated/api_client/test/test_experiments_api.py +6 -6
  51. arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
  52. arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
  53. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
  54. arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
  55. arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
  56. arize/_generated/api_client/test/test_problem.py +57 -0
  57. arize/_generated/api_client/test/test_project.py +58 -0
  58. arize/_generated/api_client/test/test_projects_api.py +59 -0
  59. arize/_generated/api_client/test/test_projects_create_request.py +54 -0
  60. arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
  61. arize/_generated/api_client_README.md +43 -29
  62. arize/_generated/protocol/flight/flight_pb2.py +400 -0
  63. arize/_lazy.py +27 -19
  64. arize/client.py +269 -55
  65. arize/config.py +365 -116
  66. arize/constants/__init__.py +1 -0
  67. arize/constants/config.py +11 -4
  68. arize/constants/ml.py +6 -4
  69. arize/constants/openinference.py +2 -0
  70. arize/constants/pyarrow.py +2 -0
  71. arize/constants/spans.py +3 -1
  72. arize/datasets/__init__.py +1 -0
  73. arize/datasets/client.py +299 -84
  74. arize/datasets/errors.py +32 -2
  75. arize/datasets/validation.py +18 -8
  76. arize/embeddings/__init__.py +2 -0
  77. arize/embeddings/auto_generator.py +23 -19
  78. arize/embeddings/base_generators.py +89 -36
  79. arize/embeddings/constants.py +2 -0
  80. arize/embeddings/cv_generators.py +26 -4
  81. arize/embeddings/errors.py +27 -5
  82. arize/embeddings/nlp_generators.py +31 -12
  83. arize/embeddings/tabular_generators.py +32 -20
  84. arize/embeddings/usecases.py +12 -2
  85. arize/exceptions/__init__.py +1 -0
  86. arize/exceptions/auth.py +11 -1
  87. arize/exceptions/base.py +29 -4
  88. arize/exceptions/models.py +21 -2
  89. arize/exceptions/parameters.py +31 -0
  90. arize/exceptions/spaces.py +12 -1
  91. arize/exceptions/types.py +86 -7
  92. arize/exceptions/values.py +220 -20
  93. arize/experiments/__init__.py +1 -0
  94. arize/experiments/client.py +390 -286
  95. arize/experiments/evaluators/__init__.py +1 -0
  96. arize/experiments/evaluators/base.py +74 -41
  97. arize/experiments/evaluators/exceptions.py +6 -3
  98. arize/experiments/evaluators/executors.py +121 -73
  99. arize/experiments/evaluators/rate_limiters.py +106 -57
  100. arize/experiments/evaluators/types.py +34 -7
  101. arize/experiments/evaluators/utils.py +65 -27
  102. arize/experiments/functions.py +103 -101
  103. arize/experiments/tracing.py +52 -44
  104. arize/experiments/types.py +56 -31
  105. arize/logging.py +54 -22
  106. arize/models/__init__.py +1 -0
  107. arize/models/batch_validation/__init__.py +1 -0
  108. arize/models/batch_validation/errors.py +543 -65
  109. arize/models/batch_validation/validator.py +339 -300
  110. arize/models/bounded_executor.py +20 -7
  111. arize/models/casting.py +75 -29
  112. arize/models/client.py +326 -107
  113. arize/models/proto.py +95 -40
  114. arize/models/stream_validation.py +42 -14
  115. arize/models/surrogate_explainer/__init__.py +1 -0
  116. arize/models/surrogate_explainer/mimic.py +24 -13
  117. arize/pre_releases.py +43 -0
  118. arize/projects/__init__.py +1 -0
  119. arize/projects/client.py +129 -0
  120. arize/regions.py +40 -0
  121. arize/spans/__init__.py +1 -0
  122. arize/spans/client.py +130 -106
  123. arize/spans/columns.py +13 -0
  124. arize/spans/conversion.py +54 -38
  125. arize/spans/validation/__init__.py +1 -0
  126. arize/spans/validation/annotations/__init__.py +1 -0
  127. arize/spans/validation/annotations/annotations_validation.py +6 -4
  128. arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
  129. arize/spans/validation/annotations/value_validation.py +35 -11
  130. arize/spans/validation/common/__init__.py +1 -0
  131. arize/spans/validation/common/argument_validation.py +33 -8
  132. arize/spans/validation/common/dataframe_form_validation.py +35 -9
  133. arize/spans/validation/common/errors.py +211 -11
  134. arize/spans/validation/common/value_validation.py +80 -13
  135. arize/spans/validation/evals/__init__.py +1 -0
  136. arize/spans/validation/evals/dataframe_form_validation.py +28 -8
  137. arize/spans/validation/evals/evals_validation.py +34 -4
  138. arize/spans/validation/evals/value_validation.py +26 -3
  139. arize/spans/validation/metadata/__init__.py +1 -1
  140. arize/spans/validation/metadata/argument_validation.py +14 -5
  141. arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
  142. arize/spans/validation/metadata/value_validation.py +24 -10
  143. arize/spans/validation/spans/__init__.py +1 -0
  144. arize/spans/validation/spans/dataframe_form_validation.py +34 -13
  145. arize/spans/validation/spans/spans_validation.py +35 -4
  146. arize/spans/validation/spans/value_validation.py +76 -7
  147. arize/types.py +293 -157
  148. arize/utils/__init__.py +1 -0
  149. arize/utils/arrow.py +31 -15
  150. arize/utils/cache.py +34 -6
  151. arize/utils/dataframe.py +19 -2
  152. arize/utils/online_tasks/__init__.py +2 -0
  153. arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
  154. arize/utils/openinference_conversion.py +44 -5
  155. arize/utils/proto.py +10 -0
  156. arize/utils/size.py +5 -3
  157. arize/version.py +3 -1
  158. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
  159. arize-8.0.0a23.dist-info/RECORD +174 -0
  160. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
  161. arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
  162. arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
  163. arize/_generated/protocol/flight/export_pb2.py +0 -61
  164. arize/_generated/protocol/flight/ingest_pb2.py +0 -365
  165. arize-8.0.0a21.dist-info/RECORD +0 -146
  166. arize-8.0.0a21.dist-info/licenses/LICENSE.md +0 -12
arize/__init__.py CHANGED
@@ -1,9 +1,12 @@
1
+ """Arize SDK for model observability and LLM tracing."""
2
+
1
3
  import logging
2
4
  from collections.abc import Mapping
3
5
 
4
6
  from arize._generated.api_client import models
5
7
  from arize.client import ArizeClient
6
8
  from arize.config import SDKConfiguration
9
+ from arize.regions import Region
7
10
 
8
11
  # Attach a NullHandler by default in the top-level package
9
12
  # so that if no configuration is installed, nothing explodes.
@@ -14,23 +17,27 @@ try:
14
17
  from .logging import auto_configure_from_env
15
18
 
16
19
  auto_configure_from_env()
17
- except Exception:
18
- # Never let logging config crash imports
20
+ except Exception: # noqa: S110
21
+ # Intentionally silent: logging configuration is optional and should never
22
+ # prevent SDK initialization. Users can configure logging explicitly if needed.
19
23
  pass
20
24
 
21
- __all__ = ["ArizeClient", "SDKConfiguration"]
25
+ __all__ = [
26
+ "ArizeClient",
27
+ "Region",
28
+ "SDKConfiguration",
29
+ ]
22
30
 
23
31
 
24
- def make_to_df(field_name: str):
32
+ def make_to_df(field_name: str) -> object:
25
33
  def to_df(
26
- self,
34
+ self: object,
27
35
  by_alias: bool = False,
28
36
  exclude_none: str | bool = False,
29
37
  json_normalize: bool = False,
30
38
  convert_dtypes: bool = True,
31
- ):
32
- """
33
- Convert a list of objects to a pandas DataFrame.
39
+ ) -> object:
40
+ """Convert a list of objects to a pandas DataFrame.
34
41
 
35
42
  Behavior:
36
43
  - If an item is a Pydantic v2 model, use `.model_dump(by_alias=...)`.
@@ -85,6 +92,7 @@ def make_to_df(field_name: str):
85
92
 
86
93
 
87
94
  models.DatasetsList200Response.to_df = make_to_df("datasets") # type: ignore[attr-defined]
88
- models.DatasetsListExamples200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
95
+ models.DatasetsExamplesList200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
89
96
  models.ExperimentsList200Response.to_df = make_to_df("experiments") # type: ignore[attr-defined]
90
97
  models.ExperimentsRunsList200Response.to_df = make_to_df("experiment_runs") # type: ignore[attr-defined]
98
+ models.ProjectsList200Response.to_df = make_to_df("projects") # type: ignore[attr-defined]
arize/_exporter/client.py CHANGED
@@ -2,7 +2,6 @@
2
2
  import logging
3
3
  from dataclasses import dataclass
4
4
  from datetime import datetime
5
- from typing import List, Tuple
6
5
 
7
6
  import pandas as pd
8
7
  import pyarrow.parquet as pq
@@ -16,7 +15,7 @@ from arize._exporter.validation import (
16
15
  validate_input_type,
17
16
  validate_start_end_time,
18
17
  )
19
- from arize._generated.protocol.flight import export_pb2
18
+ from arize._generated.protocol.flight import flight_pb2
20
19
  from arize.logging import CtxAdapter
21
20
  from arize.types import Environments, SimilaritySearchParams
22
21
  from arize.utils.dataframe import reset_dataframe_index
@@ -33,19 +32,20 @@ class ArizeExportClient:
33
32
  space_id: str,
34
33
  model_id: str,
35
34
  environment: Environments,
36
- start_time: str | datetime,
37
- end_time: str | datetime,
35
+ start_time: datetime,
36
+ end_time: datetime,
38
37
  where: str = "",
39
- columns: List | None = None,
38
+ columns: list | None = None,
40
39
  similarity_search_params: SimilaritySearchParams | None = None,
41
40
  model_version: str = "",
42
41
  batch_id: str = "",
43
42
  include_actuals: bool = False,
44
43
  stream_chunk_size: int | None = None,
45
- ):
46
- """
47
- Exports data of a specific model in the Arize platform to a pandas dataframe for a defined
48
- time interval and model environment, optionally by model version and/or batch id.
44
+ ) -> object:
45
+ """Exports data of a specific model in the Arize platform to a pandas dataframe.
46
+
47
+ The export covers a defined time interval and model environment, and can
48
+ optionally be filtered by model version and/or batch id.
49
49
 
50
50
  Args:
51
51
  space_id (str): The id for the space where to export models from, can be retrieved from
@@ -104,14 +104,14 @@ class ArizeExportClient:
104
104
  return pd.DataFrame()
105
105
  progress_bar = self._get_progress_bar(num_recs)
106
106
  list_of_df = []
107
- while True:
108
- try:
107
+ try:
108
+ while True:
109
109
  flight_batch = stream_reader.read_chunk()
110
110
  batch_df = flight_batch.data.to_pandas()
111
111
  list_of_df.append(batch_df)
112
112
  progress_bar.update(batch_df.shape[0])
113
- except StopIteration:
114
- break
113
+ except StopIteration:
114
+ pass
115
115
  progress_bar.close()
116
116
  df = pd.concat(list_of_df)
117
117
  null_columns = df.columns[df.isnull().all()]
@@ -139,16 +139,17 @@ class ArizeExportClient:
139
139
  start_time: datetime,
140
140
  end_time: datetime,
141
141
  where: str = "",
142
- columns: List | None = None,
142
+ columns: list | None = None,
143
143
  similarity_search_params: SimilaritySearchParams | None = None,
144
144
  model_version: str = "",
145
145
  batch_id: str = "",
146
146
  include_actuals: bool = False,
147
147
  stream_chunk_size: int | None = None,
148
148
  ) -> None:
149
- """
150
- Exports data of a specific model in the Arize platform to a parquet file for a defined time
151
- interval and model environment, optionally by model version and/or batch id.
149
+ """Exports data of a specific model in the Arize platform to a parquet file.
150
+
151
+ The export covers a defined time interval and model environment, and can
152
+ optionally be filtered by model version and/or batch id.
152
153
 
153
154
  Args:
154
155
  path (str): path to the file to store exported data. File must be in parquet format and
@@ -208,17 +209,17 @@ class ArizeExportClient:
208
209
  stream_chunk_size=stream_chunk_size,
209
210
  )
210
211
  if stream_reader is None:
211
- return None
212
+ return
212
213
  progress_bar = self._get_progress_bar(num_recs)
213
214
  with pq.ParquetWriter(path, schema=stream_reader.schema) as writer:
214
- while True:
215
- try:
215
+ try:
216
+ while True:
216
217
  flight_batch = stream_reader.read_chunk()
217
218
  record_batch = flight_batch.data
218
219
  writer.write_batch(record_batch)
219
220
  progress_bar.update(record_batch.num_rows)
220
- except StopIteration:
221
- break
221
+ except StopIteration:
222
+ pass
222
223
  progress_bar.close()
223
224
 
224
225
  def _get_stream_reader(
@@ -233,9 +234,9 @@ class ArizeExportClient:
233
234
  batch_id: str = "",
234
235
  where: str = "",
235
236
  similarity_search_params: SimilaritySearchParams | None = None,
236
- columns: List | None = None,
237
+ columns: list | None = None,
237
238
  stream_chunk_size: int | None = None,
238
- ) -> Tuple[flight.FlightStreamReader, int]:
239
+ ) -> tuple[flight.FlightStreamReader | None, int]:
239
240
  # Bind common context for this operation
240
241
  log = CtxAdapter(
241
242
  logger,
@@ -273,7 +274,7 @@ class ArizeExportClient:
273
274
  validate_start_end_time(start_time, end_time)
274
275
 
275
276
  # Create query descriptor
276
- query_descriptor = export_pb2.RecordQueryDescriptor(
277
+ query_descriptor = flight_pb2.RecordQueryDescriptor(
277
278
  space_id=space_id,
278
279
  model_id=model_id,
279
280
  environment=environment.name,
@@ -289,9 +290,11 @@ class ArizeExportClient:
289
290
  else None
290
291
  ),
291
292
  projected_columns=columns if columns else [],
292
- stream_chunk_size=Int64Value(value=stream_chunk_size)
293
- if stream_chunk_size is not None
294
- else None,
293
+ stream_chunk_size=(
294
+ Int64Value(value=stream_chunk_size)
295
+ if stream_chunk_size is not None
296
+ else None
297
+ ),
295
298
  )
296
299
 
297
300
  try:
@@ -306,17 +309,24 @@ class ArizeExportClient:
306
309
  logger.warning("Query returns no data")
307
310
  return None, 0
308
311
  logger.debug("Ticket: %s", flight_info.endpoints[0].ticket)
309
-
310
- # Retrieve the result set as flight stream reader
311
- reader = self.flight_client.do_get(flight_info.endpoints[0].ticket)
312
- return reader, flight_info.total_records
313
312
  except Exception as e:
314
313
  msg = f"Error getting flight info or do_get: {e}"
315
- logger.error(msg)
314
+ logger.exception(msg)
316
315
  raise RuntimeError(msg) from e
316
+ # Retrieve the result set as flight stream reader
317
+ reader = self.flight_client.do_get(flight_info.endpoints[0].ticket)
318
+ return reader, flight_info.total_records
317
319
 
318
320
  @staticmethod
319
- def _get_progress_bar(num_recs):
321
+ def _get_progress_bar(num_recs: int) -> tqdm:
322
+ """Create a progress bar for export operations.
323
+
324
+ Args:
325
+ num_recs: Total number of records to export.
326
+
327
+ Returns:
328
+ A tqdm progress bar configured for data export display.
329
+ """
320
330
  return tqdm(
321
331
  total=num_recs,
322
332
  desc=f" exporting {num_recs} rows",
@@ -329,8 +339,17 @@ class ArizeExportClient:
329
339
 
330
340
  def _get_pb_similarity_search_params(
331
341
  similarity_params: SimilaritySearchParams,
332
- ) -> export_pb2.SimilaritySearchParams:
333
- proto_params = export_pb2.SimilaritySearchParams()
342
+ ) -> flight_pb2.SimilaritySearchParams:
343
+ """Convert SimilaritySearchParams to protocol buffer format.
344
+
345
+ Args:
346
+ similarity_params: Similarity search parameters containing search column name,
347
+ threshold, and reference examples.
348
+
349
+ Returns:
350
+ A protocol buffer SimilaritySearchParams object for Flight requests.
351
+ """
352
+ proto_params = flight_pb2.SimilaritySearchParams()
334
353
  proto_params.search_column_name = similarity_params.search_column_name
335
354
  proto_params.threshold = similarity_params.threshold
336
355
  for ref in similarity_params.references:
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import logging
3
- from typing import List
4
3
 
5
4
  import numpy as np
6
5
  import pandas as pd
@@ -28,12 +27,26 @@ logger = logging.getLogger(__name__)
28
27
  # but the resulting error messages provide clarity on what the effect
29
28
  # of the error is on the data; It should not prevent a user from continuing to use the data
30
29
  class OtelTracingDataTransformer:
30
+ def _apply_column_transformation(
31
+ self, df: pd.DataFrame, col_name: str, transform_func: object
32
+ ) -> str | None:
33
+ """Apply a transformation to a column and return error message if it fails."""
34
+ try:
35
+ df[col_name] = df[col_name].apply(transform_func)
36
+ except Exception as e:
37
+ return (
38
+ f"Unable to transform json string data to a Python dict in column '{col_name}'; "
39
+ f"May encounter issues when importing data back into Arize; Error: {e}"
40
+ )
41
+ else:
42
+ return None
43
+
31
44
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
32
- errors: List[str] = []
45
+ errors: list[str] = []
33
46
 
34
47
  # Convert list of json serializable strings columns to list of dictionaries for more
35
48
  # conveinent data processing in Python
36
- list_of_json_string_column_names: List[str] = [
49
+ list_of_json_string_column_names: list[str] = [
37
50
  col.name
38
51
  for col in [
39
52
  SPAN_ATTRIBUTES_LLM_INPUT_MESSAGES_COL,
@@ -45,17 +58,13 @@ class OtelTracingDataTransformer:
45
58
  if col.name in df.columns
46
59
  ]
47
60
  for col_name in list_of_json_string_column_names:
48
- try:
49
- df[col_name] = df[col_name].apply(
50
- self._transform_value_to_list_of_dict
51
- )
52
- except Exception as e:
53
- errors.append(
54
- f"Unable to transform json string data to a Python dict in column '{col_name}'; "
55
- f"May encounter issues when importing data back into Arize; Error: {e}"
56
- )
57
-
58
- json_string_column_names: List[str] = [
61
+ error = self._apply_column_transformation(
62
+ df, col_name, self._transform_value_to_list_of_dict
63
+ )
64
+ if error:
65
+ errors.append(error)
66
+
67
+ json_string_column_names: list[str] = [
59
68
  col.name
60
69
  for col in [
61
70
  SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VARIABLES_COL,
@@ -64,16 +73,14 @@ class OtelTracingDataTransformer:
64
73
  if col.name in df.columns
65
74
  ]
66
75
  for col_name in json_string_column_names:
67
- try:
68
- df[col_name] = df[col_name].apply(self._transform_json_to_dict)
69
- except Exception as e:
70
- errors.append(
71
- f"Unable to transform json string data to a Python dict in column '{col_name}'; "
72
- f"May encounter issues when importing data back into Arize; Error: {e}"
73
- )
76
+ error = self._apply_column_transformation(
77
+ df, col_name, self._transform_json_to_dict
78
+ )
79
+ if error:
80
+ errors.append(error)
74
81
 
75
82
  # Clean json string columns since empty strings are equivalent here to None but are not valid json
76
- dirty_string_column_names: List[str] = [
83
+ dirty_string_column_names: list[str] = [
77
84
  col.name
78
85
  for col in [
79
86
  SPAN_ATTRIBUTES_LLM_INVOCATION_PARAMETERS_COL,
@@ -85,7 +92,7 @@ class OtelTracingDataTransformer:
85
92
  df[col_name] = df[col_name].apply(self._clean_json_string)
86
93
 
87
94
  # Convert timestamp columns to datetime objects
88
- timestamp_column_names: List[str] = [
95
+ timestamp_column_names: list[str] = [
89
96
  col.name
90
97
  for col in [
91
98
  SPAN_START_TIME_COL,
@@ -103,7 +110,9 @@ class OtelTracingDataTransformer:
103
110
 
104
111
  return df
105
112
 
106
- def _transform_value_to_list_of_dict(self, value):
113
+ def _transform_value_to_list_of_dict(
114
+ self, value: object
115
+ ) -> list[object] | None:
107
116
  if value is None:
108
117
  return None
109
118
 
@@ -113,10 +122,11 @@ class OtelTracingDataTransformer:
113
122
  for i in value
114
123
  if self._is_non_empty_string(i)
115
124
  ]
116
- elif self._is_non_empty_string(value):
125
+ if self._is_non_empty_string(value):
117
126
  return [self._deserialize_json_string_to_dict(value)]
127
+ return None
118
128
 
119
- def _transform_json_to_dict(self, value):
129
+ def _transform_json_to_dict(self, value: object) -> object | None:
120
130
  if value is None:
121
131
  return None
122
132
 
@@ -126,20 +136,21 @@ class OtelTracingDataTransformer:
126
136
  if isinstance(value, str) and value == "":
127
137
  # transform empty string to None
128
138
  return None
139
+ return None
129
140
 
130
- def _is_non_empty_string(self, value):
141
+ def _is_non_empty_string(self, value: object) -> bool:
131
142
  return isinstance(value, str) and value != ""
132
143
 
133
- def _deserialize_json_string_to_dict(self, value: str):
144
+ def _deserialize_json_string_to_dict(self, value: str) -> object:
134
145
  try:
135
146
  return json.loads(value)
136
147
  except json.JSONDecodeError as e:
137
148
  raise ValueError(f"Invalid JSON string: {value}") from e
138
149
 
139
- def _clean_json_string(self, value):
150
+ def _clean_json_string(self, value: object) -> object | None:
140
151
  return value if self._is_non_empty_string(value) else None
141
152
 
142
- def _convert_timestamp_to_datetime(self, value):
153
+ def _convert_timestamp_to_datetime(self, value: object) -> object:
143
154
  return (
144
155
  pd.Timestamp(value, unit="ns")
145
156
  if value and isinstance(value, (int, float, np.int64))
@@ -1,13 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any
3
+ from typing import TYPE_CHECKING
4
4
 
5
5
  if TYPE_CHECKING:
6
6
  from datetime import datetime
7
7
 
8
8
 
9
9
  def validate_input_type(
10
- input: Any,
10
+ input: object,
11
11
  input_name: str,
12
12
  input_type: type,
13
13
  allow_none: bool = False,
@@ -28,7 +28,7 @@ def validate_input_type(
28
28
 
29
29
 
30
30
  def validate_input_value(
31
- input: Any,
31
+ input: object,
32
32
  input_name: str,
33
33
  choices: tuple,
34
34
  ) -> None: