arize-phoenix 2.0.0__py3-none-any.whl → 2.2.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/METADATA +5 -1
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/RECORD +31 -29
- phoenix/__init__.py +2 -2
- phoenix/core/evals.py +29 -8
- phoenix/core/traces.py +45 -34
- phoenix/experimental/evals/__init__.py +4 -1
- phoenix/experimental/evals/evaluators.py +85 -8
- phoenix/experimental/evals/functions/classify.py +16 -41
- phoenix/experimental/evals/functions/executor.py +1 -0
- phoenix/experimental/evals/models/anthropic.py +171 -0
- phoenix/experimental/evals/models/vertex.py +155 -0
- phoenix/experimental/evals/templates/__init__.py +2 -0
- phoenix/experimental/evals/templates/default_templates.py +12 -0
- phoenix/experimental/evals/utils/__init__.py +64 -2
- phoenix/server/api/schema.py +24 -0
- phoenix/server/app.py +6 -5
- phoenix/server/main.py +6 -7
- phoenix/server/span_handler.py +7 -7
- phoenix/server/static/index.js +586 -499
- phoenix/server/templates/index.html +5 -1
- phoenix/server/trace_handler.py +56 -0
- phoenix/session/session.py +2 -1
- phoenix/trace/exporter.py +4 -3
- phoenix/trace/langchain/tracer.py +14 -4
- phoenix/trace/otel.py +409 -0
- phoenix/trace/semantic_conventions.py +2 -0
- phoenix/trace/v1/__init__.py +0 -4
- phoenix/version.py +1 -0
- phoenix/trace/v1/trace_pb2.py +0 -54
- phoenix/trace/v1/trace_pb2.pyi +0 -361
- phoenix/trace/v1/utils.py +0 -538
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/WHEEL +0 -0
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-2.0.0.dist-info → arize_phoenix-2.2.0rc0.dist-info}/licenses/LICENSE +0 -0
phoenix/trace/v1/utils.py
DELETED
|
@@ -1,538 +0,0 @@
|
|
|
1
|
-
from datetime import datetime, timezone
|
|
2
|
-
from itertools import chain
|
|
3
|
-
from typing import (
|
|
4
|
-
Any,
|
|
5
|
-
Dict,
|
|
6
|
-
Iterable,
|
|
7
|
-
Iterator,
|
|
8
|
-
Mapping,
|
|
9
|
-
Optional,
|
|
10
|
-
SupportsFloat,
|
|
11
|
-
Tuple,
|
|
12
|
-
Union,
|
|
13
|
-
cast,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
from google.protobuf.json_format import MessageToDict
|
|
17
|
-
from google.protobuf.struct_pb2 import ListValue, Struct
|
|
18
|
-
from google.protobuf.timestamp_pb2 import Timestamp
|
|
19
|
-
from google.protobuf.wrappers_pb2 import BoolValue, FloatValue, StringValue
|
|
20
|
-
|
|
21
|
-
import phoenix.trace.v1 as pb
|
|
22
|
-
from phoenix.trace.schemas import (
|
|
23
|
-
MimeType,
|
|
24
|
-
Span,
|
|
25
|
-
SpanContext,
|
|
26
|
-
SpanEvent,
|
|
27
|
-
SpanException,
|
|
28
|
-
SpanID,
|
|
29
|
-
SpanKind,
|
|
30
|
-
SpanStatusCode,
|
|
31
|
-
TraceID,
|
|
32
|
-
)
|
|
33
|
-
from phoenix.trace.semantic_conventions import (
|
|
34
|
-
DOCUMENT_CONTENT,
|
|
35
|
-
DOCUMENT_ID,
|
|
36
|
-
DOCUMENT_METADATA,
|
|
37
|
-
DOCUMENT_SCORE,
|
|
38
|
-
EMBEDDING_EMBEDDINGS,
|
|
39
|
-
EMBEDDING_MODEL_NAME,
|
|
40
|
-
EMBEDDING_TEXT,
|
|
41
|
-
EMBEDDING_VECTOR,
|
|
42
|
-
EXCEPTION_ESCAPED,
|
|
43
|
-
EXCEPTION_MESSAGE,
|
|
44
|
-
EXCEPTION_STACKTRACE,
|
|
45
|
-
EXCEPTION_TYPE,
|
|
46
|
-
INPUT_MIME_TYPE,
|
|
47
|
-
INPUT_VALUE,
|
|
48
|
-
OUTPUT_MIME_TYPE,
|
|
49
|
-
OUTPUT_VALUE,
|
|
50
|
-
RETRIEVAL_DOCUMENTS,
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def encode(span: Span) -> pb.Span:
|
|
55
|
-
_attributes: Mapping[str, Any] = dict(span.attributes)
|
|
56
|
-
retrieval, _attributes = _excise_retrieval(_attributes)
|
|
57
|
-
embedding, _attributes = _excise_embedding(_attributes)
|
|
58
|
-
input, _attributes = _excise_input(_attributes)
|
|
59
|
-
output, _attributes = _excise_output(_attributes)
|
|
60
|
-
status = pb.Span.Status(message=_maybe_str(span.status_message))
|
|
61
|
-
if span.status_code is SpanStatusCode.ERROR:
|
|
62
|
-
status.code = pb.Span.Status.Code.ERROR
|
|
63
|
-
elif span.status_code is SpanStatusCode.OK:
|
|
64
|
-
status.code = pb.Span.Status.Code.OK
|
|
65
|
-
parent_span_id = StringValue(value=str(span.parent_id)) if span.parent_id else None
|
|
66
|
-
pb_span = pb.Span(
|
|
67
|
-
start_time=_as_timestamp(span.start_time),
|
|
68
|
-
end_time=_maybe_timestamp(span.end_time),
|
|
69
|
-
status=status,
|
|
70
|
-
name=span.name,
|
|
71
|
-
kind=span.span_kind.value,
|
|
72
|
-
context=pb.Span.Context(
|
|
73
|
-
trace_id=str(span.context.trace_id),
|
|
74
|
-
span_id=str(span.context.span_id),
|
|
75
|
-
),
|
|
76
|
-
parent_span_id=parent_span_id,
|
|
77
|
-
attributes=_maybe_struct(_attributes),
|
|
78
|
-
input=input,
|
|
79
|
-
output=output,
|
|
80
|
-
retrieval=retrieval,
|
|
81
|
-
embedding=embedding,
|
|
82
|
-
)
|
|
83
|
-
for event in span.events:
|
|
84
|
-
if event.name == "exception":
|
|
85
|
-
pb_span.exceptions.append(
|
|
86
|
-
_encode_exception(
|
|
87
|
-
cast(SpanException, event),
|
|
88
|
-
),
|
|
89
|
-
)
|
|
90
|
-
else:
|
|
91
|
-
pb_span.events.append(
|
|
92
|
-
_encode_event(event),
|
|
93
|
-
)
|
|
94
|
-
return pb_span
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def decode(
|
|
98
|
-
pb_span: pb.Span,
|
|
99
|
-
) -> Span:
|
|
100
|
-
trace_id = TraceID(pb_span.context.trace_id)
|
|
101
|
-
span_id = SpanID(pb_span.context.span_id)
|
|
102
|
-
parent_id = SpanID(pb_span.parent_span_id.value) if pb_span.HasField("parent_span_id") else None
|
|
103
|
-
start_time = pb_span.start_time.ToDatetime(timezone.utc)
|
|
104
|
-
end_time = pb_span.end_time.ToDatetime(timezone.utc) if pb_span.HasField("end_time") else None
|
|
105
|
-
attributes = MessageToDict(pb_span.attributes)
|
|
106
|
-
if pb_span.HasField("input"):
|
|
107
|
-
attributes.update(_decode_input(pb_span.input))
|
|
108
|
-
if pb_span.HasField("output"):
|
|
109
|
-
attributes.update(_decode_output(pb_span.output))
|
|
110
|
-
if pb_span.HasField("retrieval"):
|
|
111
|
-
attributes.update(_decode_retrieval(pb_span.retrieval))
|
|
112
|
-
if pb_span.HasField("embedding"):
|
|
113
|
-
attributes.update(_decode_embedding(pb_span.embedding))
|
|
114
|
-
events = sorted(
|
|
115
|
-
chain(
|
|
116
|
-
map(_decode_event, pb_span.events),
|
|
117
|
-
map(_decode_exception, pb_span.exceptions),
|
|
118
|
-
),
|
|
119
|
-
key=lambda event: event.timestamp,
|
|
120
|
-
)
|
|
121
|
-
status_code = SpanStatusCode.UNSET
|
|
122
|
-
if pb_span.status.code == pb.Span.Status.Code.OK:
|
|
123
|
-
status_code = SpanStatusCode.OK
|
|
124
|
-
elif pb_span.status.code == pb.Span.Status.Code.ERROR:
|
|
125
|
-
status_code = SpanStatusCode.ERROR
|
|
126
|
-
return Span(
|
|
127
|
-
name=pb_span.name,
|
|
128
|
-
context=SpanContext(trace_id=trace_id, span_id=span_id),
|
|
129
|
-
parent_id=parent_id,
|
|
130
|
-
span_kind=SpanKind(pb_span.kind.upper()),
|
|
131
|
-
start_time=start_time,
|
|
132
|
-
end_time=end_time,
|
|
133
|
-
attributes=attributes,
|
|
134
|
-
status_code=status_code,
|
|
135
|
-
status_message=pb_span.status.message.value,
|
|
136
|
-
conversation=None,
|
|
137
|
-
events=events,
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def _excise_input(
|
|
142
|
-
attributes: Mapping[str, Any],
|
|
143
|
-
) -> Tuple[Optional[pb.Span.IOValue], Dict[str, Any]]:
|
|
144
|
-
_attributes = dict(attributes)
|
|
145
|
-
if not (
|
|
146
|
-
_attributes.keys()
|
|
147
|
-
& {
|
|
148
|
-
INPUT_VALUE,
|
|
149
|
-
INPUT_MIME_TYPE,
|
|
150
|
-
}
|
|
151
|
-
):
|
|
152
|
-
return None, _attributes
|
|
153
|
-
input_value: Optional[str] = _attributes.pop(INPUT_VALUE, None)
|
|
154
|
-
assert input_value is None or isinstance(
|
|
155
|
-
input_value, str
|
|
156
|
-
), f"{INPUT_VALUE} must be str, found {type(input_value)}"
|
|
157
|
-
input_mime_type: Optional[MimeType] = _attributes.pop(INPUT_MIME_TYPE, None)
|
|
158
|
-
assert input_mime_type is None or isinstance(
|
|
159
|
-
input_mime_type, MimeType
|
|
160
|
-
), f"{INPUT_MIME_TYPE} must be MimeType, found {type(input_mime_type)}"
|
|
161
|
-
return (
|
|
162
|
-
_encode_io_value(
|
|
163
|
-
input_value or "",
|
|
164
|
-
input_mime_type,
|
|
165
|
-
),
|
|
166
|
-
_attributes,
|
|
167
|
-
)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def _excise_output(
|
|
171
|
-
attributes: Mapping[str, Any],
|
|
172
|
-
) -> Tuple[Optional[pb.Span.IOValue], Dict[str, Any]]:
|
|
173
|
-
_attributes = dict(attributes)
|
|
174
|
-
if not (
|
|
175
|
-
_attributes.keys()
|
|
176
|
-
& {
|
|
177
|
-
OUTPUT_VALUE,
|
|
178
|
-
OUTPUT_MIME_TYPE,
|
|
179
|
-
}
|
|
180
|
-
):
|
|
181
|
-
return None, _attributes
|
|
182
|
-
output_value: Optional[str] = _attributes.pop(OUTPUT_VALUE, None)
|
|
183
|
-
assert output_value is None or isinstance(
|
|
184
|
-
output_value, str
|
|
185
|
-
), f"{OUTPUT_VALUE} must be str, found {type(output_value)}"
|
|
186
|
-
output_mime_type: Optional[MimeType] = _attributes.pop(OUTPUT_MIME_TYPE, None)
|
|
187
|
-
assert output_mime_type is None or isinstance(
|
|
188
|
-
output_mime_type, MimeType
|
|
189
|
-
), f"{OUTPUT_MIME_TYPE} must be MimeType, found {type(output_mime_type)}"
|
|
190
|
-
return (
|
|
191
|
-
_encode_io_value(
|
|
192
|
-
output_value or "",
|
|
193
|
-
output_mime_type,
|
|
194
|
-
),
|
|
195
|
-
_attributes,
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
def _excise_retrieval(
|
|
200
|
-
attributes: Mapping[str, Any],
|
|
201
|
-
) -> Tuple[Optional[pb.Retrieval], Dict[str, Any]]:
|
|
202
|
-
_attributes = dict(attributes)
|
|
203
|
-
if not (
|
|
204
|
-
_attributes.keys()
|
|
205
|
-
& {
|
|
206
|
-
RETRIEVAL_DOCUMENTS,
|
|
207
|
-
}
|
|
208
|
-
):
|
|
209
|
-
return None, _attributes
|
|
210
|
-
documents: Optional[Iterable[Mapping[str, Any]]] = _attributes.pop(RETRIEVAL_DOCUMENTS, None)
|
|
211
|
-
assert documents is None or isinstance(
|
|
212
|
-
documents, Iterable
|
|
213
|
-
), f"{RETRIEVAL_DOCUMENTS} must be Iterable, found {type(documents)}"
|
|
214
|
-
return (
|
|
215
|
-
_encode_retrieval(documents=() if documents is None else documents),
|
|
216
|
-
_attributes,
|
|
217
|
-
)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
def _excise_embedding(
|
|
221
|
-
attributes: Mapping[str, Any],
|
|
222
|
-
) -> Tuple[Optional[pb.Embedding], Dict[str, Any]]:
|
|
223
|
-
_attributes = dict(attributes)
|
|
224
|
-
if not (
|
|
225
|
-
_attributes.keys()
|
|
226
|
-
& {
|
|
227
|
-
EMBEDDING_EMBEDDINGS,
|
|
228
|
-
EMBEDDING_MODEL_NAME,
|
|
229
|
-
}
|
|
230
|
-
):
|
|
231
|
-
return None, _attributes
|
|
232
|
-
embeddings: Optional[Iterable[Mapping[str, Any]]] = _attributes.pop(EMBEDDING_EMBEDDINGS, None)
|
|
233
|
-
assert embeddings is None or isinstance(
|
|
234
|
-
embeddings, Iterable
|
|
235
|
-
), f"{EMBEDDING_EMBEDDINGS} must be Mapping, found {type(embeddings)}"
|
|
236
|
-
model_name: Optional[str] = _attributes.pop(EMBEDDING_MODEL_NAME, None)
|
|
237
|
-
assert model_name is None or isinstance(
|
|
238
|
-
model_name, str
|
|
239
|
-
), f"{EMBEDDING_MODEL_NAME} must be str, found {type(model_name)}"
|
|
240
|
-
return (
|
|
241
|
-
_encode_embedding(
|
|
242
|
-
embeddings=embeddings or (),
|
|
243
|
-
model_name=model_name,
|
|
244
|
-
),
|
|
245
|
-
_attributes,
|
|
246
|
-
)
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
def _encode_event(
|
|
250
|
-
span_event: SpanEvent,
|
|
251
|
-
) -> pb.Span.Event:
|
|
252
|
-
timestamp = Timestamp()
|
|
253
|
-
timestamp.FromDatetime(span_event.timestamp)
|
|
254
|
-
attributes = None
|
|
255
|
-
if span_event.attributes:
|
|
256
|
-
attributes = Struct()
|
|
257
|
-
attributes.update(span_event.attributes)
|
|
258
|
-
pb_span_event = pb.Span.Event(
|
|
259
|
-
name=span_event.name,
|
|
260
|
-
timestamp=timestamp,
|
|
261
|
-
attributes=attributes,
|
|
262
|
-
)
|
|
263
|
-
return pb_span_event
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
def _decode_event(
|
|
267
|
-
pb_span_event: pb.Span.Event,
|
|
268
|
-
) -> SpanEvent:
|
|
269
|
-
return SpanEvent(
|
|
270
|
-
name=pb_span_event.name,
|
|
271
|
-
timestamp=pb_span_event.timestamp.ToDatetime(timezone.utc),
|
|
272
|
-
attributes=MessageToDict(pb_span_event.attributes),
|
|
273
|
-
)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
def _encode_exception(
|
|
277
|
-
span_exception: SpanException,
|
|
278
|
-
) -> pb.Span.Exception:
|
|
279
|
-
timestamp = Timestamp()
|
|
280
|
-
timestamp.FromDatetime(span_exception.timestamp)
|
|
281
|
-
_attributes: Dict[str, Any] = dict(span_exception.attributes)
|
|
282
|
-
exception_message: Optional[str] = _attributes.pop(EXCEPTION_MESSAGE, None)
|
|
283
|
-
assert exception_message is None or isinstance(
|
|
284
|
-
exception_message, str
|
|
285
|
-
), f"{EXCEPTION_MESSAGE} must be str, found {type(exception_message)}"
|
|
286
|
-
exception_type: Optional[str] = _attributes.pop(EXCEPTION_TYPE, None)
|
|
287
|
-
assert exception_type is None or isinstance(
|
|
288
|
-
exception_type, str
|
|
289
|
-
), f"{EXCEPTION_TYPE} must be str, found {type(exception_type)}"
|
|
290
|
-
exception_escaped: Optional[bool] = _attributes.pop(EXCEPTION_ESCAPED, None)
|
|
291
|
-
assert exception_escaped is None or isinstance(
|
|
292
|
-
exception_escaped, bool
|
|
293
|
-
), f"{EXCEPTION_ESCAPED} must be bool, found {type(exception_escaped)}"
|
|
294
|
-
exception_stacktrace: Optional[str] = _attributes.pop(EXCEPTION_STACKTRACE, None)
|
|
295
|
-
assert exception_stacktrace is None or isinstance(
|
|
296
|
-
exception_stacktrace, str
|
|
297
|
-
), f"{EXCEPTION_STACKTRACE} must be str, found {type(exception_stacktrace)}"
|
|
298
|
-
pb_span_exception = pb.Span.Exception(
|
|
299
|
-
timestamp=timestamp,
|
|
300
|
-
message=_maybe_str(exception_message),
|
|
301
|
-
type=_maybe_str(exception_type),
|
|
302
|
-
escaped=_maybe_bool(exception_escaped),
|
|
303
|
-
stacktrace=_maybe_str(exception_stacktrace),
|
|
304
|
-
attributes=_maybe_struct(_attributes),
|
|
305
|
-
)
|
|
306
|
-
return pb_span_exception
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
def _decode_exception(
|
|
310
|
-
pb_span_exception: pb.Span.Exception,
|
|
311
|
-
) -> SpanException:
|
|
312
|
-
exception_message = (
|
|
313
|
-
pb_span_exception.message.value if pb_span_exception.HasField("message") else None
|
|
314
|
-
)
|
|
315
|
-
exception_type = pb_span_exception.type.value if pb_span_exception.HasField("type") else None
|
|
316
|
-
exception_escaped = (
|
|
317
|
-
pb_span_exception.escaped.value if pb_span_exception.HasField("escaped") else None
|
|
318
|
-
)
|
|
319
|
-
exception_stacktrace = (
|
|
320
|
-
pb_span_exception.stacktrace.value if pb_span_exception.HasField("stacktrace") else None
|
|
321
|
-
)
|
|
322
|
-
span_exception = SpanException(
|
|
323
|
-
timestamp=pb_span_exception.timestamp.ToDatetime(timezone.utc),
|
|
324
|
-
message=exception_message or "",
|
|
325
|
-
exception_type=exception_type,
|
|
326
|
-
exception_escaped=exception_escaped,
|
|
327
|
-
exception_stacktrace=exception_stacktrace,
|
|
328
|
-
)
|
|
329
|
-
span_exception.attributes.update(
|
|
330
|
-
MessageToDict(
|
|
331
|
-
pb_span_exception.attributes,
|
|
332
|
-
),
|
|
333
|
-
)
|
|
334
|
-
return span_exception
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
def _decode_input(
|
|
338
|
-
pb_io_value: pb.Span.IOValue,
|
|
339
|
-
) -> Iterator[Tuple[str, Union[str, MimeType]]]:
|
|
340
|
-
return zip(
|
|
341
|
-
(INPUT_VALUE, INPUT_MIME_TYPE),
|
|
342
|
-
_decode_io_value(pb_io_value),
|
|
343
|
-
)
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
def _decode_output(
|
|
347
|
-
pb_io_value: pb.Span.IOValue,
|
|
348
|
-
) -> Iterator[Tuple[str, Union[str, MimeType]]]:
|
|
349
|
-
return zip(
|
|
350
|
-
(OUTPUT_VALUE, OUTPUT_MIME_TYPE),
|
|
351
|
-
_decode_io_value(pb_io_value),
|
|
352
|
-
)
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
def _encode_io_value(
|
|
356
|
-
io_value: str,
|
|
357
|
-
mime_type: Optional[MimeType],
|
|
358
|
-
) -> pb.Span.IOValue:
|
|
359
|
-
if mime_type is MimeType.JSON:
|
|
360
|
-
return pb.Span.IOValue(
|
|
361
|
-
value=io_value,
|
|
362
|
-
mime_type=pb.Span.IOValue.MimeType.JSON,
|
|
363
|
-
)
|
|
364
|
-
return pb.Span.IOValue(value=io_value)
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
def _decode_io_value(
|
|
368
|
-
pb_io_value: pb.Span.IOValue,
|
|
369
|
-
) -> Iterator[Union[str, MimeType]]:
|
|
370
|
-
yield pb_io_value.value
|
|
371
|
-
if pb_io_value.mime_type is pb.Span.IOValue.MimeType.JSON:
|
|
372
|
-
yield MimeType.JSON
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
def _encode_retrieval(
|
|
376
|
-
documents: Iterable[Mapping[str, Any]],
|
|
377
|
-
) -> pb.Retrieval:
|
|
378
|
-
return pb.Retrieval(
|
|
379
|
-
documents=map(
|
|
380
|
-
_encode_retrieval_document,
|
|
381
|
-
documents,
|
|
382
|
-
),
|
|
383
|
-
)
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
def _decode_retrieval(
|
|
387
|
-
pb_retrieval: pb.Retrieval,
|
|
388
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
389
|
-
yield (
|
|
390
|
-
RETRIEVAL_DOCUMENTS,
|
|
391
|
-
[
|
|
392
|
-
dict(_decode_retrieval_document(pb_retrieval_document))
|
|
393
|
-
for pb_retrieval_document in pb_retrieval.documents
|
|
394
|
-
],
|
|
395
|
-
)
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
def _encode_retrieval_document(
|
|
399
|
-
document: Mapping[str, Any],
|
|
400
|
-
) -> pb.Retrieval.Document:
|
|
401
|
-
_attributes: Dict[str, Any] = dict(document)
|
|
402
|
-
document_id: Optional[str] = _attributes.pop(DOCUMENT_ID, None)
|
|
403
|
-
assert document_id is None or isinstance(
|
|
404
|
-
document_id, str
|
|
405
|
-
), f"{DOCUMENT_ID} must be str, found {type(document_id)}"
|
|
406
|
-
document_score: Optional[float] = _attributes.pop(DOCUMENT_SCORE, None)
|
|
407
|
-
assert document_score is None or isinstance(
|
|
408
|
-
document_score, SupportsFloat
|
|
409
|
-
), f"{DOCUMENT_SCORE} must be float, found {type(document_score)}"
|
|
410
|
-
document_content: Optional[str] = _attributes.pop(DOCUMENT_CONTENT, None)
|
|
411
|
-
assert document_content is None or isinstance(
|
|
412
|
-
document_content, str
|
|
413
|
-
), f"{DOCUMENT_CONTENT} must be str, found {type(document_content)}"
|
|
414
|
-
document_metadata: Optional[Mapping[str, Any]] = _attributes.pop(DOCUMENT_METADATA, None)
|
|
415
|
-
assert document_metadata is None or isinstance(
|
|
416
|
-
document_metadata, Mapping
|
|
417
|
-
), f"{DOCUMENT_METADATA} must be Mapping, found {type(document_metadata)}"
|
|
418
|
-
return pb.Retrieval.Document(
|
|
419
|
-
id=_maybe_str(document_id),
|
|
420
|
-
score=_maybe_float(document_score),
|
|
421
|
-
content=_maybe_str(document_content),
|
|
422
|
-
metadata=_maybe_struct(document_metadata),
|
|
423
|
-
attributes=_maybe_struct(_attributes),
|
|
424
|
-
)
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
def _decode_retrieval_document(
|
|
428
|
-
pb_document: pb.Retrieval.Document,
|
|
429
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
430
|
-
if pb_document.HasField("id"):
|
|
431
|
-
yield DOCUMENT_ID, pb_document.id.value
|
|
432
|
-
if pb_document.HasField("score"):
|
|
433
|
-
yield DOCUMENT_SCORE, pb_document.score.value
|
|
434
|
-
if pb_document.HasField("content"):
|
|
435
|
-
yield DOCUMENT_CONTENT, pb_document.content.value
|
|
436
|
-
if pb_document.HasField("metadata"):
|
|
437
|
-
yield DOCUMENT_METADATA, MessageToDict(pb_document.metadata)
|
|
438
|
-
if pb_document.HasField("attributes"):
|
|
439
|
-
yield from MessageToDict(pb_document.attributes).items()
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
def _encode_embedding(
|
|
443
|
-
embeddings: Iterable[Mapping[str, Any]],
|
|
444
|
-
model_name: Optional[str],
|
|
445
|
-
) -> pb.Embedding:
|
|
446
|
-
return pb.Embedding(
|
|
447
|
-
model_name=_maybe_str(model_name),
|
|
448
|
-
embeddings=map(_encode_embedding_embedding, embeddings),
|
|
449
|
-
)
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
def _decode_embedding(
|
|
453
|
-
pb_embedding: pb.Embedding,
|
|
454
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
455
|
-
if pb_embedding.HasField("model_name"):
|
|
456
|
-
yield EMBEDDING_MODEL_NAME, pb_embedding.model_name.value
|
|
457
|
-
yield (
|
|
458
|
-
EMBEDDING_EMBEDDINGS,
|
|
459
|
-
[
|
|
460
|
-
dict(_decode_embedding_embedding(pb_embedding_embedding))
|
|
461
|
-
for pb_embedding_embedding in pb_embedding.embeddings
|
|
462
|
-
],
|
|
463
|
-
)
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
def _encode_embedding_embedding(
|
|
467
|
-
embedding: Mapping[str, Any],
|
|
468
|
-
) -> pb.Embedding.Embedding:
|
|
469
|
-
_attributes = dict(embedding)
|
|
470
|
-
vector: Optional[Iterable[float]] = _attributes.pop(EMBEDDING_VECTOR, None)
|
|
471
|
-
assert vector is None or isinstance(
|
|
472
|
-
vector, Iterable
|
|
473
|
-
), f"{EMBEDDING_VECTOR} must be Iterable, found {type(vector)}"
|
|
474
|
-
embedding_text: Optional[str] = _attributes.pop(EMBEDDING_TEXT, None)
|
|
475
|
-
assert embedding_text is None or isinstance(
|
|
476
|
-
embedding_text, str
|
|
477
|
-
), f"{EMBEDDING_TEXT} must be str, found {type(embedding_text)}"
|
|
478
|
-
return pb.Embedding.Embedding(
|
|
479
|
-
vector=() if vector is None else vector,
|
|
480
|
-
text=_maybe_str(embedding_text),
|
|
481
|
-
attributes=_maybe_struct(_attributes),
|
|
482
|
-
)
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
def _decode_embedding_embedding(
|
|
486
|
-
pb_embedding_embedding: pb.Embedding.Embedding,
|
|
487
|
-
) -> Iterator[Tuple[str, Any]]:
|
|
488
|
-
yield EMBEDDING_VECTOR, list(pb_embedding_embedding.vector)
|
|
489
|
-
if pb_embedding_embedding.HasField("text"):
|
|
490
|
-
yield EMBEDDING_TEXT, pb_embedding_embedding.text.value
|
|
491
|
-
if pb_embedding_embedding.HasField("attributes"):
|
|
492
|
-
yield from MessageToDict(pb_embedding_embedding.attributes).items()
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
def _maybe_str(obj: Optional[str]) -> Optional[StringValue]:
|
|
496
|
-
return None if not obj else StringValue(value=obj)
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
def _maybe_float(obj: Optional[float]) -> Optional[FloatValue]:
|
|
500
|
-
return None if obj is None else FloatValue(value=obj)
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
def _maybe_bool(obj: Optional[bool]) -> Optional[BoolValue]:
|
|
504
|
-
return None if obj is None else BoolValue(value=obj)
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
def _as_timestamp(obj: datetime) -> Timestamp:
|
|
508
|
-
timestamp = Timestamp()
|
|
509
|
-
timestamp.FromDatetime(obj)
|
|
510
|
-
return timestamp
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
def _maybe_timestamp(obj: Optional[datetime]) -> Optional[Timestamp]:
|
|
514
|
-
return _as_timestamp(obj) if obj else None
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
def _as_struct(obj: Mapping[str, Any]) -> Struct:
|
|
518
|
-
struct = Struct()
|
|
519
|
-
for key, value in obj.items():
|
|
520
|
-
# The type check below is based on _SetStructValue in protobuf 3.20
|
|
521
|
-
# see https://github.com/protocolbuffers/protobuf/blob/5a3dac894157bf3618b2c906a8b9073b4cad62b6/python/google/protobuf/internal/well_known_types.py#L733C42 # noqa: E501
|
|
522
|
-
# A use-case is when we have numpy.ndarray as a value, which can come from pyarrow.
|
|
523
|
-
# Note that this doesn't handle numpy.ndarray with more than one dimension.
|
|
524
|
-
if value is not None and not isinstance(
|
|
525
|
-
value, (str, int, float, bool, list, dict, Struct, ListValue)
|
|
526
|
-
):
|
|
527
|
-
if isinstance(value, Mapping):
|
|
528
|
-
value = dict(value)
|
|
529
|
-
elif isinstance(value, Iterable):
|
|
530
|
-
value = list(value)
|
|
531
|
-
else:
|
|
532
|
-
raise TypeError(f"Unsupported type {type(value)} for key {key}")
|
|
533
|
-
struct[key] = value
|
|
534
|
-
return struct
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
def _maybe_struct(obj: Optional[Mapping[str, Any]]) -> Optional[Struct]:
|
|
538
|
-
return _as_struct(obj) if obj else None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|