arize 8.0.0a13__py3-none-any.whl → 8.0.0a15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/_exporter/client.py +18 -3
- arize/_flight/client.py +6 -2
- arize/datasets/client.py +88 -83
- arize/{utils → models}/casting.py +12 -12
- arize/models/client.py +330 -5
- arize/{utils → models}/proto.py +1 -369
- arize/spans/client.py +30 -6
- arize/utils/arrow.py +4 -4
- arize/version.py +1 -1
- {arize-8.0.0a13.dist-info → arize-8.0.0a15.dist-info}/METADATA +11 -3
- {arize-8.0.0a13.dist-info → arize-8.0.0a15.dist-info}/RECORD +13 -13
- {arize-8.0.0a13.dist-info → arize-8.0.0a15.dist-info}/WHEEL +0 -0
- {arize-8.0.0a13.dist-info → arize-8.0.0a15.dist-info}/licenses/LICENSE.md +0 -0
arize/{utils → models}/proto.py
RENAMED
|
@@ -1,21 +1,17 @@
|
|
|
1
1
|
# type: ignore[pb2]
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Tuple
|
|
5
5
|
|
|
6
|
-
from google.protobuf import json_format, message
|
|
7
6
|
from google.protobuf.timestamp_pb2 import Timestamp
|
|
8
7
|
from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
|
|
9
8
|
|
|
10
|
-
from arize._generated.protocol.flight import export_pb2 as flight_exp_pb2
|
|
11
9
|
from arize._generated.protocol.rec import public_pb2 as pb2
|
|
12
10
|
from arize.exceptions.parameters import InvalidValueType
|
|
13
11
|
from arize.types import (
|
|
14
12
|
CATEGORICAL_MODEL_TYPES,
|
|
15
13
|
NUMERIC_MODEL_TYPES,
|
|
16
|
-
CorpusSchema,
|
|
17
14
|
Embedding,
|
|
18
|
-
Environments,
|
|
19
15
|
InstanceSegmentationActualLabel,
|
|
20
16
|
InstanceSegmentationPredictionLabel,
|
|
21
17
|
ModelTypes,
|
|
@@ -25,374 +21,10 @@ from arize.types import (
|
|
|
25
21
|
RankingActualLabel,
|
|
26
22
|
RankingPredictionLabel,
|
|
27
23
|
SemanticSegmentationLabel,
|
|
28
|
-
SimilaritySearchParams,
|
|
29
24
|
convert_element,
|
|
30
25
|
is_list_of,
|
|
31
26
|
)
|
|
32
27
|
|
|
33
|
-
if TYPE_CHECKING:
|
|
34
|
-
from arize.types import (
|
|
35
|
-
EmbeddingColumnNames,
|
|
36
|
-
Schema,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def get_pb_schema(
|
|
41
|
-
schema: Schema,
|
|
42
|
-
model_id: str,
|
|
43
|
-
model_version: str | None,
|
|
44
|
-
model_type: ModelTypes,
|
|
45
|
-
environment: Environments,
|
|
46
|
-
batch_id: str,
|
|
47
|
-
):
|
|
48
|
-
s = pb2.Schema()
|
|
49
|
-
s.constants.model_id = model_id
|
|
50
|
-
|
|
51
|
-
if model_version is not None:
|
|
52
|
-
s.constants.model_version = model_version
|
|
53
|
-
|
|
54
|
-
if environment == Environments.PRODUCTION:
|
|
55
|
-
s.constants.environment = pb2.Schema.Environment.PRODUCTION
|
|
56
|
-
elif environment == Environments.VALIDATION:
|
|
57
|
-
s.constants.environment = pb2.Schema.Environment.VALIDATION
|
|
58
|
-
elif environment == Environments.TRAINING:
|
|
59
|
-
s.constants.environment = pb2.Schema.Environment.TRAINING
|
|
60
|
-
else:
|
|
61
|
-
raise ValueError(f"unexpected environment: {environment}")
|
|
62
|
-
|
|
63
|
-
# Map user-friendly external model types -> internal model types when sending to Arize
|
|
64
|
-
if model_type in NUMERIC_MODEL_TYPES:
|
|
65
|
-
s.constants.model_type = pb2.Schema.ModelType.NUMERIC
|
|
66
|
-
elif model_type in CATEGORICAL_MODEL_TYPES:
|
|
67
|
-
s.constants.model_type = pb2.Schema.ModelType.SCORE_CATEGORICAL
|
|
68
|
-
elif model_type == ModelTypes.RANKING:
|
|
69
|
-
s.constants.model_type = pb2.Schema.ModelType.RANKING
|
|
70
|
-
elif model_type == ModelTypes.OBJECT_DETECTION:
|
|
71
|
-
s.constants.model_type = pb2.Schema.ModelType.OBJECT_DETECTION
|
|
72
|
-
elif model_type == ModelTypes.GENERATIVE_LLM:
|
|
73
|
-
s.constants.model_type = pb2.Schema.ModelType.GENERATIVE_LLM
|
|
74
|
-
elif model_type == ModelTypes.MULTI_CLASS:
|
|
75
|
-
s.constants.model_type = pb2.Schema.ModelType.MULTI_CLASS
|
|
76
|
-
|
|
77
|
-
if batch_id is not None:
|
|
78
|
-
s.constants.batch_id = batch_id
|
|
79
|
-
|
|
80
|
-
if schema.prediction_id_column_name is not None:
|
|
81
|
-
s.arrow_schema.prediction_id_column_name = (
|
|
82
|
-
schema.prediction_id_column_name
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
if schema.timestamp_column_name is not None:
|
|
86
|
-
s.arrow_schema.timestamp_column_name = schema.timestamp_column_name
|
|
87
|
-
|
|
88
|
-
if schema.prediction_label_column_name is not None:
|
|
89
|
-
s.arrow_schema.prediction_label_column_name = (
|
|
90
|
-
schema.prediction_label_column_name
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
if model_type == ModelTypes.OBJECT_DETECTION:
|
|
94
|
-
if schema.object_detection_prediction_column_names is not None:
|
|
95
|
-
s.arrow_schema.prediction_object_detection_label_column_names.bboxes_coordinates_column_name = (
|
|
96
|
-
schema.object_detection_prediction_column_names.bounding_boxes_coordinates_column_name # noqa: E501
|
|
97
|
-
)
|
|
98
|
-
s.arrow_schema.prediction_object_detection_label_column_names.bboxes_categories_column_name = (
|
|
99
|
-
schema.object_detection_prediction_column_names.categories_column_name # noqa: E501
|
|
100
|
-
)
|
|
101
|
-
if (
|
|
102
|
-
schema.object_detection_prediction_column_names.scores_column_name
|
|
103
|
-
is not None
|
|
104
|
-
):
|
|
105
|
-
s.arrow_schema.prediction_object_detection_label_column_names.bboxes_scores_column_name = (
|
|
106
|
-
schema.object_detection_prediction_column_names.scores_column_name # noqa: E501
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
if schema.semantic_segmentation_prediction_column_names is not None:
|
|
110
|
-
s.arrow_schema.prediction_semantic_segmentation_label_column_names.polygons_coordinates_column_name = ( # noqa: E501
|
|
111
|
-
schema.semantic_segmentation_prediction_column_names.polygon_coordinates_column_name
|
|
112
|
-
)
|
|
113
|
-
s.arrow_schema.prediction_semantic_segmentation_label_column_names.polygons_categories_column_name = ( # noqa: E501
|
|
114
|
-
schema.semantic_segmentation_prediction_column_names.categories_column_name
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
if schema.instance_segmentation_prediction_column_names is not None:
|
|
118
|
-
s.arrow_schema.prediction_instance_segmentation_label_column_names.polygons_coordinates_column_name = ( # noqa: E501
|
|
119
|
-
schema.instance_segmentation_prediction_column_names.polygon_coordinates_column_name
|
|
120
|
-
)
|
|
121
|
-
s.arrow_schema.prediction_instance_segmentation_label_column_names.polygons_categories_column_name = ( # noqa: E501
|
|
122
|
-
schema.instance_segmentation_prediction_column_names.categories_column_name
|
|
123
|
-
)
|
|
124
|
-
if (
|
|
125
|
-
schema.instance_segmentation_prediction_column_names.scores_column_name
|
|
126
|
-
is not None
|
|
127
|
-
):
|
|
128
|
-
s.arrow_schema.prediction_instance_segmentation_label_column_names.polygons_scores_column_name = ( # noqa: E501
|
|
129
|
-
schema.instance_segmentation_prediction_column_names.scores_column_name
|
|
130
|
-
)
|
|
131
|
-
if (
|
|
132
|
-
schema.instance_segmentation_prediction_column_names.bounding_boxes_coordinates_column_name
|
|
133
|
-
is not None
|
|
134
|
-
):
|
|
135
|
-
s.arrow_schema.prediction_instance_segmentation_label_column_names.bboxes_coordinates_column_name = ( # noqa: E501
|
|
136
|
-
schema.instance_segmentation_prediction_column_names.bounding_boxes_coordinates_column_name
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
if schema.prediction_score_column_name is not None:
|
|
140
|
-
if model_type in NUMERIC_MODEL_TYPES:
|
|
141
|
-
# allow numeric prediction to be sent in as either prediction_label (legacy) or
|
|
142
|
-
# prediction_score.
|
|
143
|
-
s.arrow_schema.prediction_label_column_name = (
|
|
144
|
-
schema.prediction_score_column_name
|
|
145
|
-
)
|
|
146
|
-
else:
|
|
147
|
-
s.arrow_schema.prediction_score_column_name = (
|
|
148
|
-
schema.prediction_score_column_name
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
if schema.feature_column_names is not None:
|
|
152
|
-
s.arrow_schema.feature_column_names.extend(schema.feature_column_names)
|
|
153
|
-
|
|
154
|
-
if schema.embedding_feature_column_names is not None:
|
|
155
|
-
for (
|
|
156
|
-
emb_name,
|
|
157
|
-
emb_col_names,
|
|
158
|
-
) in schema.embedding_feature_column_names.items():
|
|
159
|
-
# emb_name is how it will show in the UI
|
|
160
|
-
s.arrow_schema.embedding_feature_column_names_map[
|
|
161
|
-
emb_name
|
|
162
|
-
].vector_column_name = emb_col_names.vector_column_name
|
|
163
|
-
if emb_col_names.data_column_name:
|
|
164
|
-
s.arrow_schema.embedding_feature_column_names_map[
|
|
165
|
-
emb_name
|
|
166
|
-
].data_column_name = emb_col_names.data_column_name
|
|
167
|
-
if emb_col_names.link_to_data_column_name:
|
|
168
|
-
s.arrow_schema.embedding_feature_column_names_map[
|
|
169
|
-
emb_name
|
|
170
|
-
].link_to_data_column_name = (
|
|
171
|
-
emb_col_names.link_to_data_column_name
|
|
172
|
-
)
|
|
173
|
-
|
|
174
|
-
if schema.prompt_column_names is not None:
|
|
175
|
-
if isinstance(schema.prompt_column_names, str):
|
|
176
|
-
s.arrow_schema.embedding_feature_column_names_map[
|
|
177
|
-
"prompt"
|
|
178
|
-
].data_column_name = schema.prompt_column_names
|
|
179
|
-
elif isinstance(schema.prompt_column_names, EmbeddingColumnNames):
|
|
180
|
-
col_names = schema.prompt_column_names
|
|
181
|
-
s.arrow_schema.embedding_feature_column_names_map[
|
|
182
|
-
"prompt"
|
|
183
|
-
].vector_column_name = col_names.vector_column_name
|
|
184
|
-
if col_names.data_column_name:
|
|
185
|
-
s.arrow_schema.embedding_feature_column_names_map[
|
|
186
|
-
"prompt"
|
|
187
|
-
].data_column_name = col_names.data_column_name
|
|
188
|
-
if schema.response_column_names is not None:
|
|
189
|
-
if isinstance(schema.response_column_names, str):
|
|
190
|
-
s.arrow_schema.embedding_feature_column_names_map[
|
|
191
|
-
"response"
|
|
192
|
-
].data_column_name = schema.response_column_names
|
|
193
|
-
elif isinstance(schema.response_column_names, EmbeddingColumnNames):
|
|
194
|
-
col_names = schema.response_column_names
|
|
195
|
-
s.arrow_schema.embedding_feature_column_names_map[
|
|
196
|
-
"response"
|
|
197
|
-
].vector_column_name = col_names.vector_column_name
|
|
198
|
-
if col_names.data_column_name:
|
|
199
|
-
s.arrow_schema.embedding_feature_column_names_map[
|
|
200
|
-
"response"
|
|
201
|
-
].data_column_name = col_names.data_column_name
|
|
202
|
-
|
|
203
|
-
if schema.tag_column_names is not None:
|
|
204
|
-
s.arrow_schema.tag_column_names.extend(schema.tag_column_names)
|
|
205
|
-
|
|
206
|
-
if (
|
|
207
|
-
model_type == ModelTypes.RANKING
|
|
208
|
-
and schema.relevance_labels_column_name is not None
|
|
209
|
-
):
|
|
210
|
-
s.arrow_schema.actual_label_column_name = (
|
|
211
|
-
schema.relevance_labels_column_name
|
|
212
|
-
)
|
|
213
|
-
elif (
|
|
214
|
-
model_type == ModelTypes.RANKING
|
|
215
|
-
and schema.attributions_column_name is not None
|
|
216
|
-
):
|
|
217
|
-
s.arrow_schema.actual_label_column_name = (
|
|
218
|
-
schema.attributions_column_name
|
|
219
|
-
)
|
|
220
|
-
elif schema.actual_label_column_name is not None:
|
|
221
|
-
s.arrow_schema.actual_label_column_name = (
|
|
222
|
-
schema.actual_label_column_name
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
if (
|
|
226
|
-
model_type == ModelTypes.RANKING
|
|
227
|
-
and schema.relevance_score_column_name is not None
|
|
228
|
-
):
|
|
229
|
-
s.arrow_schema.actual_score_column_name = (
|
|
230
|
-
schema.relevance_score_column_name
|
|
231
|
-
)
|
|
232
|
-
elif schema.actual_score_column_name is not None:
|
|
233
|
-
if model_type in NUMERIC_MODEL_TYPES:
|
|
234
|
-
# allow numeric prediction to be sent in as either prediction_label (legacy) or
|
|
235
|
-
# prediction_score.
|
|
236
|
-
s.arrow_schema.actual_label_column_name = (
|
|
237
|
-
schema.actual_score_column_name
|
|
238
|
-
)
|
|
239
|
-
else:
|
|
240
|
-
s.arrow_schema.actual_score_column_name = (
|
|
241
|
-
schema.actual_score_column_name
|
|
242
|
-
)
|
|
243
|
-
|
|
244
|
-
if schema.shap_values_column_names is not None:
|
|
245
|
-
s.arrow_schema.shap_values_column_names.update(
|
|
246
|
-
schema.shap_values_column_names
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
if schema.prediction_group_id_column_name is not None:
|
|
250
|
-
s.arrow_schema.prediction_group_id_column_name = (
|
|
251
|
-
schema.prediction_group_id_column_name
|
|
252
|
-
)
|
|
253
|
-
|
|
254
|
-
if schema.rank_column_name is not None:
|
|
255
|
-
s.arrow_schema.rank_column_name = schema.rank_column_name
|
|
256
|
-
|
|
257
|
-
if model_type == ModelTypes.OBJECT_DETECTION:
|
|
258
|
-
if schema.object_detection_actual_column_names is not None:
|
|
259
|
-
s.arrow_schema.actual_object_detection_label_column_names.bboxes_coordinates_column_name = ( # noqa: E501
|
|
260
|
-
schema.object_detection_actual_column_names.bounding_boxes_coordinates_column_name
|
|
261
|
-
)
|
|
262
|
-
s.arrow_schema.actual_object_detection_label_column_names.bboxes_categories_column_name = ( # noqa: E501
|
|
263
|
-
schema.object_detection_actual_column_names.categories_column_name
|
|
264
|
-
)
|
|
265
|
-
if (
|
|
266
|
-
schema.object_detection_actual_column_names.scores_column_name
|
|
267
|
-
is not None
|
|
268
|
-
):
|
|
269
|
-
s.arrow_schema.actual_object_detection_label_column_names.bboxes_scores_column_name = ( # noqa: E501
|
|
270
|
-
schema.object_detection_actual_column_names.scores_column_name
|
|
271
|
-
)
|
|
272
|
-
|
|
273
|
-
if schema.semantic_segmentation_actual_column_names is not None:
|
|
274
|
-
s.arrow_schema.actual_semantic_segmentation_label_column_names.polygons_coordinates_column_name = ( # noqa: E501
|
|
275
|
-
schema.semantic_segmentation_actual_column_names.polygon_coordinates_column_name
|
|
276
|
-
)
|
|
277
|
-
s.arrow_schema.actual_semantic_segmentation_label_column_names.polygons_categories_column_name = ( # noqa: E501
|
|
278
|
-
schema.semantic_segmentation_actual_column_names.categories_column_name
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
if schema.instance_segmentation_actual_column_names is not None:
|
|
282
|
-
s.arrow_schema.actual_instance_segmentation_label_column_names.polygons_coordinates_column_name = ( # noqa: E501
|
|
283
|
-
schema.instance_segmentation_actual_column_names.polygon_coordinates_column_name
|
|
284
|
-
)
|
|
285
|
-
s.arrow_schema.actual_instance_segmentation_label_column_names.polygons_categories_column_name = ( # noqa: E501
|
|
286
|
-
schema.instance_segmentation_actual_column_names.categories_column_name
|
|
287
|
-
)
|
|
288
|
-
if (
|
|
289
|
-
schema.instance_segmentation_actual_column_names.bounding_boxes_coordinates_column_name
|
|
290
|
-
is not None
|
|
291
|
-
):
|
|
292
|
-
s.arrow_schema.actual_instance_segmentation_label_column_names.bboxes_coordinates_column_name = ( # noqa: E501
|
|
293
|
-
schema.instance_segmentation_actual_column_names.bounding_boxes_coordinates_column_name
|
|
294
|
-
)
|
|
295
|
-
|
|
296
|
-
if model_type == ModelTypes.GENERATIVE_LLM:
|
|
297
|
-
if schema.prompt_template_column_names is not None:
|
|
298
|
-
s.arrow_schema.prompt_template_column_names.template_column_name = (
|
|
299
|
-
schema.prompt_template_column_names.template_column_name
|
|
300
|
-
)
|
|
301
|
-
s.arrow_schema.prompt_template_column_names.template_version_column_name = ( # noqa: E501
|
|
302
|
-
schema.prompt_template_column_names.template_version_column_name
|
|
303
|
-
)
|
|
304
|
-
if schema.llm_config_column_names is not None:
|
|
305
|
-
s.arrow_schema.llm_config_column_names.model_column_name = (
|
|
306
|
-
schema.llm_config_column_names.model_column_name
|
|
307
|
-
)
|
|
308
|
-
s.arrow_schema.llm_config_column_names.params_map_column_name = (
|
|
309
|
-
schema.llm_config_column_names.params_column_name
|
|
310
|
-
)
|
|
311
|
-
if schema.retrieved_document_ids_column_name is not None:
|
|
312
|
-
s.arrow_schema.retrieved_document_ids_column_name = (
|
|
313
|
-
schema.retrieved_document_ids_column_name
|
|
314
|
-
)
|
|
315
|
-
if model_type == ModelTypes.MULTI_CLASS:
|
|
316
|
-
if schema.prediction_score_column_name is not None:
|
|
317
|
-
s.arrow_schema.prediction_score_column_name = (
|
|
318
|
-
schema.prediction_score_column_name
|
|
319
|
-
)
|
|
320
|
-
if schema.multi_class_threshold_scores_column_name is not None:
|
|
321
|
-
s.arrow_schema.multi_class_threshold_scores_column_name = (
|
|
322
|
-
schema.multi_class_threshold_scores_column_name
|
|
323
|
-
)
|
|
324
|
-
if schema.actual_score_column_name is not None:
|
|
325
|
-
s.arrow_schema.actual_score_column_name = (
|
|
326
|
-
schema.actual_score_column_name
|
|
327
|
-
)
|
|
328
|
-
return s
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
def get_pb_schema_corpus(
|
|
332
|
-
schema: CorpusSchema,
|
|
333
|
-
model_id: str,
|
|
334
|
-
) -> pb2.Schema:
|
|
335
|
-
s = pb2.Schema()
|
|
336
|
-
s.constants.model_id = model_id
|
|
337
|
-
s.constants.environment = pb2.Schema.Environment.CORPUS
|
|
338
|
-
s.constants.model_type = pb2.Schema.ModelType.GENERATIVE_LLM
|
|
339
|
-
if schema.document_id_column_name is not None:
|
|
340
|
-
s.arrow_schema.document_column_names.id_column_name = (
|
|
341
|
-
schema.document_id_column_name
|
|
342
|
-
)
|
|
343
|
-
if schema.document_version_column_name is not None:
|
|
344
|
-
s.arrow_schema.document_column_names.version_column_name = (
|
|
345
|
-
schema.document_version_column_name
|
|
346
|
-
)
|
|
347
|
-
if schema.document_text_embedding_column_names is not None:
|
|
348
|
-
s.arrow_schema.document_column_names.text_column_name.vector_column_name = schema.document_text_embedding_column_names.vector_column_name # noqa: E501
|
|
349
|
-
s.arrow_schema.document_column_names.text_column_name.data_column_name = schema.document_text_embedding_column_names.data_column_name # noqa: E501
|
|
350
|
-
if (
|
|
351
|
-
schema.document_text_embedding_column_names.link_to_data_column_name
|
|
352
|
-
is not None
|
|
353
|
-
):
|
|
354
|
-
s.arrow_schema.document_column_names.text_column_name.link_to_data_column_name = schema.document_text_embedding_column_names.link_to_data_column_name # noqa: E501
|
|
355
|
-
return s
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
def get_pb_schema_tracing(
|
|
359
|
-
project_name: str,
|
|
360
|
-
) -> pb2.Schema:
|
|
361
|
-
s = pb2.Schema()
|
|
362
|
-
s.constants.model_id = project_name
|
|
363
|
-
s.constants.environment = pb2.Schema.Environment.TRACING
|
|
364
|
-
s.constants.model_type = pb2.Schema.ModelType.GENERATIVE_LLM
|
|
365
|
-
s.arize_spans.SetInParent()
|
|
366
|
-
return s
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
def message_to_dict(
|
|
370
|
-
msg: message.Message,
|
|
371
|
-
preserve_names: bool = True,
|
|
372
|
-
use_int_enums: bool = False,
|
|
373
|
-
):
|
|
374
|
-
return json_format.MessageToDict(
|
|
375
|
-
msg,
|
|
376
|
-
preserving_proto_field_name=preserve_names,
|
|
377
|
-
use_integers_for_enums=use_int_enums,
|
|
378
|
-
)
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
def get_pb_similarity_search_params(
|
|
382
|
-
similarity_params: SimilaritySearchParams,
|
|
383
|
-
) -> flight_exp_pb2.SimilaritySearchParams:
|
|
384
|
-
proto_params = flight_exp_pb2.SimilaritySearchParams()
|
|
385
|
-
proto_params.search_column_name = similarity_params.search_column_name
|
|
386
|
-
proto_params.threshold = similarity_params.threshold
|
|
387
|
-
for ref in similarity_params.references:
|
|
388
|
-
new_ref = proto_params.references.add()
|
|
389
|
-
new_ref.prediction_id = ref.prediction_id
|
|
390
|
-
new_ref.reference_column_name = ref.reference_column_name
|
|
391
|
-
if ref.prediction_timestamp:
|
|
392
|
-
new_ref.prediction_timestamp.FromDatetime(ref.prediction_timestamp)
|
|
393
|
-
|
|
394
|
-
return proto_params
|
|
395
|
-
|
|
396
28
|
|
|
397
29
|
def get_pb_dictionary(d):
|
|
398
30
|
if d is None:
|
arize/spans/client.py
CHANGED
|
@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Any, Dict, List
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
import pandas as pd
|
|
13
13
|
import pyarrow as pa
|
|
14
|
+
from google.protobuf import json_format, message
|
|
14
15
|
|
|
15
16
|
from arize._exporter.client import ArizeExportClient
|
|
16
17
|
from arize._flight.client import ArizeFlightClient, WriteSpanResponse
|
|
@@ -19,6 +20,7 @@ from arize._generated.protocol.flight.ingest_pb2 import (
|
|
|
19
20
|
WriteSpanAnnotationResponse,
|
|
20
21
|
WriteSpanEvaluationResponse,
|
|
21
22
|
)
|
|
23
|
+
from arize._generated.protocol.rec import public_pb2 as pb2
|
|
22
24
|
from arize.constants.spans import DEFAULT_DATETIME_FMT
|
|
23
25
|
from arize.exceptions.base import (
|
|
24
26
|
INVALID_ARROW_CONVERSION_MSG,
|
|
@@ -35,7 +37,6 @@ from arize.utils.dataframe import (
|
|
|
35
37
|
remove_extraneous_columns,
|
|
36
38
|
reset_dataframe_index,
|
|
37
39
|
)
|
|
38
|
-
from arize.utils.proto import get_pb_schema_tracing, message_to_dict
|
|
39
40
|
|
|
40
41
|
if TYPE_CHECKING:
|
|
41
42
|
import requests
|
|
@@ -226,7 +227,7 @@ class SpansClient:
|
|
|
226
227
|
log.error(f"Unexpected error creating Arrow table: {str(e)}")
|
|
227
228
|
raise
|
|
228
229
|
|
|
229
|
-
proto_schema =
|
|
230
|
+
proto_schema = _get_pb_schema_tracing(project_name=project_name)
|
|
230
231
|
# Create headers copy for the spans client
|
|
231
232
|
# Safe to mutate, returns a deep copy
|
|
232
233
|
headers = self._sdk_config.headers
|
|
@@ -363,7 +364,7 @@ class SpansClient:
|
|
|
363
364
|
raise
|
|
364
365
|
|
|
365
366
|
if force_http:
|
|
366
|
-
proto_schema =
|
|
367
|
+
proto_schema = _get_pb_schema_tracing(project_name=project_name)
|
|
367
368
|
# Create headers copy for the spans client
|
|
368
369
|
# Safe to mutate, returns a deep copy
|
|
369
370
|
headers = self._sdk_config.headers
|
|
@@ -422,7 +423,7 @@ class SpansClient:
|
|
|
422
423
|
)
|
|
423
424
|
|
|
424
425
|
# Convert Protocol Buffer SpanError objects to dictionaries for easier access
|
|
425
|
-
return
|
|
426
|
+
return _message_to_dict(response)
|
|
426
427
|
|
|
427
428
|
def update_annotations(
|
|
428
429
|
self,
|
|
@@ -636,7 +637,7 @@ class SpansClient:
|
|
|
636
637
|
)
|
|
637
638
|
|
|
638
639
|
# Convert Protocol Buffer SpanError objects to dictionaries for easier access
|
|
639
|
-
return
|
|
640
|
+
return _message_to_dict(response)
|
|
640
641
|
|
|
641
642
|
def update_metadata(
|
|
642
643
|
self,
|
|
@@ -967,7 +968,7 @@ class SpansClient:
|
|
|
967
968
|
)
|
|
968
969
|
|
|
969
970
|
# Convert Protocol Buffer SpanError objects to dictionaries for easier access
|
|
970
|
-
return
|
|
971
|
+
return _message_to_dict(response)
|
|
971
972
|
|
|
972
973
|
def export_to_df(
|
|
973
974
|
self,
|
|
@@ -1226,3 +1227,26 @@ def _log_flight_update_summary(
|
|
|
1226
1227
|
**err,
|
|
1227
1228
|
},
|
|
1228
1229
|
)
|
|
1230
|
+
|
|
1231
|
+
|
|
1232
|
+
def _get_pb_schema_tracing(
|
|
1233
|
+
project_name: str,
|
|
1234
|
+
) -> pb2.Schema:
|
|
1235
|
+
s = pb2.Schema()
|
|
1236
|
+
s.constants.model_id = project_name
|
|
1237
|
+
s.constants.environment = pb2.Schema.Environment.TRACING
|
|
1238
|
+
s.constants.model_type = pb2.Schema.ModelType.GENERATIVE_LLM
|
|
1239
|
+
s.arize_spans.SetInParent()
|
|
1240
|
+
return s
|
|
1241
|
+
|
|
1242
|
+
|
|
1243
|
+
def _message_to_dict(
|
|
1244
|
+
msg: message.Message,
|
|
1245
|
+
preserve_names: bool = True,
|
|
1246
|
+
use_int_enums: bool = False,
|
|
1247
|
+
):
|
|
1248
|
+
return json_format.MessageToDict(
|
|
1249
|
+
msg,
|
|
1250
|
+
preserving_proto_field_name=preserve_names,
|
|
1251
|
+
use_integers_for_enums=use_int_enums,
|
|
1252
|
+
)
|
arize/utils/arrow.py
CHANGED
|
@@ -39,7 +39,7 @@ def post_arrow_table(
|
|
|
39
39
|
|
|
40
40
|
logger.debug("Serializing schema")
|
|
41
41
|
base64_schema = base64.b64encode(proto_schema.SerializeToString())
|
|
42
|
-
pa_schema =
|
|
42
|
+
pa_schema = _append_to_pyarrow_metadata(
|
|
43
43
|
pa_table.schema, {"arize-schema": base64_schema}
|
|
44
44
|
)
|
|
45
45
|
|
|
@@ -68,7 +68,7 @@ def post_arrow_table(
|
|
|
68
68
|
try:
|
|
69
69
|
# Write arrow file
|
|
70
70
|
logger.debug(f"Writing table to temporary file: {outfile}")
|
|
71
|
-
|
|
71
|
+
_write_arrow_file(outfile, pa_table, pa_schema)
|
|
72
72
|
|
|
73
73
|
# Send to Arize
|
|
74
74
|
logger.debug(
|
|
@@ -104,7 +104,7 @@ def post_arrow_table(
|
|
|
104
104
|
)
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
def
|
|
107
|
+
def _append_to_pyarrow_metadata(
|
|
108
108
|
pa_schema: pa.Schema, new_metadata: Dict[str, Any]
|
|
109
109
|
):
|
|
110
110
|
# Ensure metadata is handled correctly, even if initially None.
|
|
@@ -125,7 +125,7 @@ def append_to_pyarrow_metadata(
|
|
|
125
125
|
return pa_schema.with_metadata(updated_metadata)
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
def
|
|
128
|
+
def _write_arrow_file(
|
|
129
129
|
path: str, pa_table: pa.Table, pa_schema: pa.Schema
|
|
130
130
|
) -> None:
|
|
131
131
|
with pa.OSFile(path, mode="wb") as sink, pa.ipc.RecordBatchStreamWriter(
|
arize/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "8.0.
|
|
1
|
+
__version__ = "8.0.0a15"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arize
|
|
3
|
-
Version: 8.0.
|
|
3
|
+
Version: 8.0.0a15
|
|
4
4
|
Summary: A helper library to interact with Arize AI APIs
|
|
5
5
|
Project-URL: Homepage, https://arize.com
|
|
6
6
|
Project-URL: Documentation, https://docs.arize.com/arize
|
|
@@ -424,7 +424,7 @@ examples = [
|
|
|
424
424
|
]
|
|
425
425
|
```
|
|
426
426
|
|
|
427
|
-
If the number of examples (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is
|
|
427
|
+
If the number of examples (rows in dataframe, items in list) is too large, the client SDK will try to send the data via Arrow Flight via gRPC for better performance. If you want to force the data transfer to HTTP you can use the `force_http` flag. The response is a `Dataset` object.
|
|
428
428
|
|
|
429
429
|
```python
|
|
430
430
|
created_dataset = client.datasets.create(
|
|
@@ -434,11 +434,19 @@ created_dataset = client.datasets.create(
|
|
|
434
434
|
)
|
|
435
435
|
```
|
|
436
436
|
|
|
437
|
+
The `Dataset` object also counts with convenience method similar to `List***` objects:
|
|
438
|
+
|
|
439
|
+
```python
|
|
440
|
+
# Get the response as a dictionary
|
|
441
|
+
dataset_dict = create_dataset.to_dict()
|
|
442
|
+
# Get the response in JSON format
|
|
443
|
+
dataset_dict = create_dataset.to_json()
|
|
444
|
+
```
|
|
437
445
|
|
|
438
446
|
|
|
439
447
|
### Get Dataset by ID
|
|
440
448
|
|
|
441
|
-
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset.
|
|
449
|
+
To get a dataset by its ID use `client.datasets.get()`, you can optionally also pass the version ID of a particular version of interest of the dataset. The returned type is `Dataset`.
|
|
442
450
|
|
|
443
451
|
```python
|
|
444
452
|
dataset = client.datasets.get(
|
|
@@ -4,14 +4,14 @@ arize/client.py,sha256=H1VdHwtruq9koc37LPP1eSZEax32iQR0porczhCOLYg,6070
|
|
|
4
4
|
arize/config.py,sha256=_EucdoM6k-6DZSurwX_6_n7VIePcKcUccil-Iwk4JH0,6015
|
|
5
5
|
arize/logging.py,sha256=OahBaJRG-z5DPqWrj2_rbe2n0r4fMGOrXpxN_4M_i_w,7244
|
|
6
6
|
arize/types.py,sha256=z1yg5-brmTD4kVHDmmTVkYke53JpusXXeOOpdQw7rYg,69508
|
|
7
|
-
arize/version.py,sha256=
|
|
7
|
+
arize/version.py,sha256=xfGkuMmBI4vsziZkVsELqOGVBEOpe9m7wHXcYtd_5TA,25
|
|
8
8
|
arize/_exporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
arize/_exporter/client.py,sha256=
|
|
9
|
+
arize/_exporter/client.py,sha256=k3xS-2wx_UlB5toI5RKBoy1bi3ONIxh4KQy4A4a2Omc,15822
|
|
10
10
|
arize/_exporter/validation.py,sha256=6ROu5p7uaolxQ93lO_Eiwv9NVw_uyi3E5T--C5Klo5Q,1021
|
|
11
11
|
arize/_exporter/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
arize/_exporter/parsers/tracing_data_parser.py,sha256=zVS-w8t1HJkz-AIC_JCdjPJ7gJXgFpfELfqNM_vK42E,5395
|
|
13
13
|
arize/_flight/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
arize/_flight/client.py,sha256=
|
|
14
|
+
arize/_flight/client.py,sha256=9f5oiMj9UwAGWpQU84QXkS6jpJNdQFdKItpAEuAh_Qk,9833
|
|
15
15
|
arize/_flight/types.py,sha256=OuLupzkGYt7r0PEzsX4NmXV9uq3lD11AeRaHHI5NsSw,146
|
|
16
16
|
arize/_generated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
arize/_generated/api_client_README.md,sha256=Ww8o5NA3M07uLm8_SI51HDC8cPJO1tLTOqTU1Pnwo5s,4942
|
|
@@ -56,7 +56,7 @@ arize/constants/ml.py,sha256=X_vtKpt1AdhLoT2DWEyKDSXAVEuzjwGFacIbgUOpB3M,2358
|
|
|
56
56
|
arize/constants/model_mapping.json,sha256=OPE54rBATzmwRhx0tycsxnGae1jBhtqEmQqQvzleTSc,5725
|
|
57
57
|
arize/constants/spans.py,sha256=EfMgbEIK_2EUcvUY5BGnNAbS7bupBKePlI3j2L5T5CE,2532
|
|
58
58
|
arize/datasets/__init__.py,sha256=GVNsjaqzQt4x-nILE41BvWyZqfYAxPQB0oHZgIDW7ws,2289
|
|
59
|
-
arize/datasets/client.py,sha256=
|
|
59
|
+
arize/datasets/client.py,sha256=ykGa60Ms6mXGCegW4Oq5plEJgX_9rJjd6Nu8nEDBnwU,5065
|
|
60
60
|
arize/embeddings/__init__.py,sha256=6_C8908W_qDixkoBJl1wapgmQCzI8TPLH207kzbYsFA,156
|
|
61
61
|
arize/embeddings/auto_generator.py,sha256=ukZUJWRkiG9HFgSHXhr44rt2tdVHn1phb7_nOxYXWEg,4111
|
|
62
62
|
arize/embeddings/base_generators.py,sha256=HybEUAzeESswEDmkmvPayzFab1y8deg5X20HSphGp8Q,8855
|
|
@@ -78,7 +78,9 @@ arize/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
78
78
|
arize/experiments/client.py,sha256=2fDq0fr_h6Knn_9zgDAlAhSUCKUrKozGLOQRTInCr4c,344
|
|
79
79
|
arize/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
80
|
arize/models/bounded_executor.py,sha256=o-PJsDAXQdiJ9dc-jzGCHMhT0-QBY9bvl4Ckn1017Eo,1131
|
|
81
|
-
arize/models/
|
|
81
|
+
arize/models/casting.py,sha256=YVnN0UiddLUVJDnOiXe2YyAzC1THBKbnm_V0qKYORJo,12480
|
|
82
|
+
arize/models/client.py,sha256=xnOC6ViF2NJjgXc0AL5w0wy8HlNevEGVZO6SAk0vVKs,46500
|
|
83
|
+
arize/models/proto.py,sha256=9YkXn5Q6DuQP641fvBu-dlTx1ULQWMtH2wV7gCFDta8,17412
|
|
82
84
|
arize/models/stream_validation.py,sha256=PtmqWgRdCxVtTNkHHEHIM1S6ECbYLA1vuQQFBw_t3Lw,7118
|
|
83
85
|
arize/models/batch_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
86
|
arize/models/batch_validation/errors.py,sha256=__I8l25zf4kGv6qgiwEm9LzGNgqmMSM8Fb88pBtyMxE,39990
|
|
@@ -86,7 +88,7 @@ arize/models/batch_validation/validator.py,sha256=acnGcMt-pETmPJUfYj5tIzIBvmBhWo
|
|
|
86
88
|
arize/models/surrogate_explainer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
89
|
arize/models/surrogate_explainer/mimic.py,sha256=MsMfhU9IhQJWm0kK6jpFkcTW6kw5IGJE3Kv94oOzMo0,5517
|
|
88
90
|
arize/spans/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
|
-
arize/spans/client.py,sha256=
|
|
91
|
+
arize/spans/client.py,sha256=xiXKnFtzzulngNVscK4TIwj_JMAckPPzA-yLjrVHR3k,47869
|
|
90
92
|
arize/spans/columns.py,sha256=BbB11jF4YHYfjrKbSd1r3K2F0AGA8KULTj1W3e2rwhM,12912
|
|
91
93
|
arize/spans/conversion.py,sha256=U9elK_znCADOcg5pOmq7srrdZwDSQRFApxN7jFZZ0X4,4038
|
|
92
94
|
arize/spans/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -112,13 +114,11 @@ arize/spans/validation/spans/dataframe_form_validation.py,sha256=NqyaOYzZL5_XIHR
|
|
|
112
114
|
arize/spans/validation/spans/spans_validation.py,sha256=p6IjbQMtOhotGBfw3axj7yMWxb2pC47EAzJSB887nzs,2451
|
|
113
115
|
arize/spans/validation/spans/value_validation.py,sha256=H3qV96w6JQNCed_MxhWDas9Jf6vUj6RFabShcwf4jr4,19102
|
|
114
116
|
arize/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
|
-
arize/utils/arrow.py,sha256=
|
|
116
|
-
arize/utils/casting.py,sha256=KUrPUQN6qJEVe39nxbr0T-0GjAJLHjf4xWuzV71QezI,12468
|
|
117
|
+
arize/utils/arrow.py,sha256=C_UDeTW3ugB3rR--1o0dljEfxT727nMvqZV-c9WJRc8,5281
|
|
117
118
|
arize/utils/dataframe.py,sha256=I0FloPgNiqlKga32tMOvTE70598QA8Hhrgf-6zjYMAM,1120
|
|
118
|
-
arize/utils/proto.py,sha256=7PwggGH7iz1Ldwv0BCIxu9-WBeojNXUcT2pDU502Mto,34175
|
|
119
119
|
arize/utils/online_tasks/__init__.py,sha256=nDuTLUTYnZaWgyJoYR1P7O8ZKA-Nba7X6tJ9OislbWM,144
|
|
120
120
|
arize/utils/online_tasks/dataframe_preprocessor.py,sha256=YyeeeFu_FwCYImbYvBZvQIH_5TK2lHru8KSfqV893ps,8884
|
|
121
|
-
arize-8.0.
|
|
122
|
-
arize-8.0.
|
|
123
|
-
arize-8.0.
|
|
124
|
-
arize-8.0.
|
|
121
|
+
arize-8.0.0a15.dist-info/METADATA,sha256=bDqrnSLCMtT-zsdWSG9nakgVJ3v5BMtwYZECuIcSgm0,19157
|
|
122
|
+
arize-8.0.0a15.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
123
|
+
arize-8.0.0a15.dist-info/licenses/LICENSE.md,sha256=8vLN8Gms62NCBorxIv9MUvuK7myueb6_-dhXHPmm4H0,1479
|
|
124
|
+
arize-8.0.0a15.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|