arize-phoenix 2.11.1__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

@@ -1,432 +1,46 @@
1
- import json
1
+ """
2
+ This module is defunct and will be removed in the future. It's currently
3
+ maintaining a dummy class to avoid breaking any import code.
4
+ """
2
5
  import logging
3
- from copy import deepcopy
4
- from datetime import datetime, timezone
5
- from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple, cast
6
- from uuid import UUID
6
+ import sys
7
+ from typing import Any, Iterator
7
8
 
8
- from langchain.callbacks.tracers.base import BaseTracer
9
- from langchain.callbacks.tracers.schemas import Run
10
- from langchain.load.dump import dumpd
11
- from langchain.schema.messages import BaseMessage
12
-
13
- from phoenix.trace.exporter import HttpExporter
14
- from phoenix.trace.schemas import MimeType, Span, SpanEvent, SpanException, SpanKind, SpanStatusCode
15
- from phoenix.trace.semantic_conventions import (
16
- DOCUMENT_CONTENT,
17
- DOCUMENT_METADATA,
18
- INPUT_MIME_TYPE,
19
- INPUT_VALUE,
20
- LLM_FUNCTION_CALL,
21
- LLM_INPUT_MESSAGES,
22
- LLM_INVOCATION_PARAMETERS,
23
- LLM_MODEL_NAME,
24
- LLM_OUTPUT_MESSAGES,
25
- LLM_PROMPT_TEMPLATE,
26
- LLM_PROMPT_TEMPLATE_VARIABLES,
27
- LLM_PROMPT_TEMPLATE_VERSION,
28
- LLM_PROMPTS,
29
- LLM_TOKEN_COUNT_COMPLETION,
30
- LLM_TOKEN_COUNT_PROMPT,
31
- LLM_TOKEN_COUNT_TOTAL,
32
- MESSAGE_CONTENT,
33
- MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON,
34
- MESSAGE_FUNCTION_CALL_NAME,
35
- MESSAGE_ROLE,
36
- MESSAGE_TOOL_CALLS,
37
- OUTPUT_MIME_TYPE,
38
- OUTPUT_VALUE,
39
- RETRIEVAL_DOCUMENTS,
40
- TOOL_CALL_FUNCTION_ARGUMENTS_JSON,
41
- TOOL_CALL_FUNCTION_NAME,
42
- TOOL_DESCRIPTION,
43
- TOOL_NAME,
44
- )
45
- from phoenix.trace.tracer import Tracer
46
- from phoenix.utilities.error_handling import graceful_fallback
9
+ from phoenix.trace.schemas import Span
47
10
 
48
11
  logger = logging.getLogger(__name__)
49
12
 
50
-
51
- Message = Dict[str, Any]
52
-
53
-
54
- def _langchain_run_type_to_span_kind(run_type: str) -> SpanKind:
55
- # TODO: LangChain is moving away from enums and to arbitrary strings
56
- # for the run_type variable, so we may need to do the same
57
- try:
58
- return SpanKind(run_type.upper())
59
- except ValueError:
60
- return SpanKind.UNKNOWN
61
-
62
-
63
- def _serialize_json(obj: Any) -> str:
64
- if isinstance(obj, datetime):
65
- return obj.isoformat()
66
- return str(obj)
67
-
68
-
69
- def _convert_io(obj: Optional[Dict[str, Any]]) -> Iterator[Any]:
70
- if not obj:
71
- return
72
- if not isinstance(obj, dict):
73
- raise ValueError(f"obj should be dict, but obj={obj}")
74
- if len(obj) == 1 and isinstance(value := next(iter(obj.values())), str):
75
- yield value
76
- else:
77
- yield json.dumps(obj, default=_serialize_json)
78
- yield MimeType.JSON
79
-
80
-
81
- def _prompts(run_inputs: Dict[str, Any]) -> Iterator[Tuple[str, List[str]]]:
82
- """Yields prompts if present."""
83
- if "prompts" in run_inputs:
84
- yield LLM_PROMPTS, run_inputs["prompts"]
85
-
86
-
87
- def _input_messages(run_inputs: Mapping[str, Any]) -> Iterator[Tuple[str, List[Message]]]:
88
- """Yields chat messages if present."""
89
- if not hasattr(run_inputs, "get"):
90
- return
91
- # There may be more than one set of messages. We'll use just the first set.
92
- if not (multiple_messages := run_inputs.get("messages")):
93
- return
94
- assert isinstance(
95
- multiple_messages, Iterable
96
- ), f"expected Iterable, found {type(multiple_messages)}"
97
- # This will only get the first set of messages.
98
- if not (first_messages := next(iter(multiple_messages), None)):
99
- return
100
- assert isinstance(first_messages, Iterable), f"expected Iterable, found {type(first_messages)}"
101
- parsed_messages = []
102
- for message_data in first_messages:
103
- assert hasattr(message_data, "get"), f"expected Mapping, found {type(message_data)}"
104
- parsed_messages.append(_parse_message_data(message_data))
105
- if parsed_messages:
106
- yield LLM_INPUT_MESSAGES, parsed_messages
107
-
108
-
109
- def _output_messages(run_outputs: Mapping[str, Any]) -> Iterator[Tuple[str, List[Message]]]:
110
- """Yields chat messages if present."""
111
- if not hasattr(run_outputs, "get"):
112
- return
113
- # There may be more than one set of generations. We'll use just the first set.
114
- if not (multiple_generations := run_outputs.get("generations")):
115
- return
116
- assert isinstance(
117
- multiple_generations, Iterable
118
- ), f"expected Iterable, found {type(multiple_generations)}"
119
- # This will only get the first set of generations.
120
- if not (first_generations := next(iter(multiple_generations), None)):
121
- return
122
- assert isinstance(
123
- first_generations, Iterable
124
- ), f"expected Iterable, found {type(first_generations)}"
125
- parsed_messages = []
126
- for generation in first_generations:
127
- assert hasattr(generation, "get"), f"expected Mapping, found {type(generation)}"
128
- if message_data := generation.get("message"):
129
- assert hasattr(message_data, "get"), f"expected Mapping, found {type(message_data)}"
130
- parsed_messages.append(_parse_message_data(message_data))
131
- if parsed_messages:
132
- yield LLM_OUTPUT_MESSAGES, parsed_messages
133
-
134
-
135
- def _parse_message_data(message_data: Mapping[str, Any]) -> Message:
136
- """Parses message data to grab message role, content, etc."""
137
- message_class_name = message_data["id"][-1]
138
- if message_class_name.startswith("HumanMessage"):
139
- role = "user"
140
- elif message_class_name.startswith("AIMessage"):
141
- role = "assistant"
142
- elif message_class_name.startswith("SystemMessage"):
143
- role = "system"
144
- elif message_class_name.startswith("FunctionMessage"):
145
- role = "function"
146
- elif message_class_name.startswith("ChatMessage"):
147
- role = message_data["kwargs"]["role"]
148
- else:
149
- raise ValueError(f"Cannot parse message of type: {message_class_name}")
150
- parsed_message_data: Dict[str, Any] = {MESSAGE_ROLE: role}
151
- if kwargs := message_data.get("kwargs"):
152
- assert hasattr(kwargs, "get"), f"expected Mapping, found {type(kwargs)}"
153
- if content := kwargs.get("content"):
154
- assert isinstance(content, str), f"content must be str, found {type(content)}"
155
- parsed_message_data[MESSAGE_CONTENT] = content
156
- if additional_kwargs := kwargs.get("additional_kwargs"):
157
- assert hasattr(
158
- additional_kwargs, "get"
159
- ), f"expected Mapping, found {type(additional_kwargs)}"
160
- if function_call := additional_kwargs.get("function_call"):
161
- assert hasattr(
162
- function_call, "get"
163
- ), f"expected Mapping, found {type(function_call)}"
164
- if name := function_call.get("name"):
165
- assert isinstance(name, str), f"name must be str, found {type(name)}"
166
- parsed_message_data[MESSAGE_FUNCTION_CALL_NAME] = name
167
- if arguments := function_call.get("arguments"):
168
- assert isinstance(
169
- arguments, str
170
- ), f"arguments must be str, found {type(arguments)}"
171
- parsed_message_data[MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON] = arguments
172
- if tool_calls := additional_kwargs.get("tool_calls"):
173
- assert isinstance(
174
- tool_calls, Iterable
175
- ), f"tool_calls must be Iterable, found {type(tool_calls)}"
176
- message_tool_calls = []
177
- for tool_call in tool_calls:
178
- if message_tool_call := dict(_get_tool_call(tool_call)):
179
- message_tool_calls.append(message_tool_call)
180
- if message_tool_calls:
181
- parsed_message_data[MESSAGE_TOOL_CALLS] = message_tool_calls
182
- return parsed_message_data
183
-
184
-
185
- def _get_tool_call(tool_call: Mapping[str, Any]) -> Iterator[Tuple[str, Any]]:
186
- if function := tool_call.get("function"):
187
- assert hasattr(function, "get"), f"expected Mapping, found {type(function)}"
188
- if name := function.get("name"):
189
- assert isinstance(name, str), f"name must be str, found {type(name)}"
190
- yield TOOL_CALL_FUNCTION_NAME, name
191
- if arguments := function.get("arguments"):
192
- assert isinstance(arguments, str), f"arguments must be str, found {type(arguments)}"
193
- yield TOOL_CALL_FUNCTION_ARGUMENTS_JSON, arguments
194
-
195
-
196
- def _prompt_template(run_serialized: Dict[str, Any]) -> Iterator[Tuple[str, Any]]:
197
- """
198
- A best-effort attempt to locate the PromptTemplate object among the
199
- keyword arguments of a serialized object, e.g. an LLMChain object.
200
- """
201
- for obj in run_serialized.get("kwargs", {}).values():
202
- if not isinstance(obj, dict) or "id" not in obj:
203
- continue
204
- # The `id` field of the object is a list indicating the path to the
205
- # object's class in the LangChain package, e.g. `PromptTemplate` in
206
- # the `langchain.prompts.prompt` module is represented as
207
- # ["langchain", "prompts", "prompt", "PromptTemplate"]
208
- if obj["id"][-1].endswith("PromptTemplate"):
209
- kwargs = obj.get("kwargs", {})
210
- if not (template := kwargs.get("template", "")):
211
- continue
212
- yield LLM_PROMPT_TEMPLATE, template
213
- yield LLM_PROMPT_TEMPLATE_VARIABLES, kwargs.get("input_variables", [])
214
- yield LLM_PROMPT_TEMPLATE_VERSION, "unknown"
215
- break
216
-
217
-
218
- def _invocation_parameters(run: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
219
- """Yields invocation parameters if present."""
220
- if run["run_type"] != "llm":
221
- return
222
- run_extra = run["extra"]
223
- yield LLM_INVOCATION_PARAMETERS, json.dumps(run_extra.get("invocation_params", {}))
224
-
225
-
226
- def _model_name(run_extra: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
227
- """Yields model name if present."""
228
- if not (invocation_params := run_extra.get("invocation_params")):
229
- return
230
- for key in ["model_name", "model"]:
231
- if name := invocation_params.get(key):
232
- yield LLM_MODEL_NAME, name
233
- return
234
-
235
-
236
- def _token_counts(run_outputs: Dict[str, Any]) -> Iterator[Tuple[str, int]]:
237
- """Yields token count information if present."""
238
- try:
239
- token_usage = run_outputs["llm_output"]["token_usage"]
240
- except Exception:
241
- return
242
- for attribute_name, key in [
243
- (LLM_TOKEN_COUNT_PROMPT, "prompt_tokens"),
244
- (LLM_TOKEN_COUNT_COMPLETION, "completion_tokens"),
245
- (LLM_TOKEN_COUNT_TOTAL, "total_tokens"),
246
- ]:
247
- if (token_count := token_usage.get(key)) is not None:
248
- yield attribute_name, token_count
249
-
250
-
251
- def _function_calls(run_outputs: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
252
- """Yields function call information if present."""
253
- try:
254
- function_call_data = deepcopy(
255
- run_outputs["generations"][0][0]["message"]["kwargs"]["additional_kwargs"][
256
- "function_call"
257
- ]
258
- )
259
- function_call_data["arguments"] = json.loads(function_call_data["arguments"])
260
- yield LLM_FUNCTION_CALL, json.dumps(function_call_data)
261
- except Exception:
262
- pass
263
-
264
-
265
- def _tools(run: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
266
- """Yields tool attributes if present."""
267
- if run["run_type"] != "tool":
268
- return
269
- run_serialized = run["serialized"]
270
- if "name" in run_serialized:
271
- yield TOOL_NAME, run_serialized["name"]
272
- if "description" in run_serialized:
273
- yield TOOL_DESCRIPTION, run_serialized["description"]
274
- # TODO: tool parameters https://github.com/Arize-ai/phoenix/issues/1330
275
-
276
-
277
- def _retrieval_documents(
278
- run: Dict[str, Any],
279
- ) -> Iterator[Tuple[str, List[Any]]]:
280
- if run["run_type"] != "retriever":
281
- return
282
- yield (
283
- RETRIEVAL_DOCUMENTS,
284
- [
285
- {
286
- DOCUMENT_CONTENT: document.get("page_content"),
287
- DOCUMENT_METADATA: document.get("metadata") or {},
288
- }
289
- for document in (run.get("outputs") or {}).get("documents") or []
290
- ],
291
- )
292
-
293
-
294
- def _chat_model_start_fallback(
295
- serialized: Dict[str, Any],
296
- messages: List[List[BaseMessage]],
297
- *,
298
- run_id: UUID,
299
- tags: Optional[List[str]] = None,
300
- parent_run_id: Optional[UUID] = None,
301
- metadata: Optional[Dict[str, Any]] = None,
302
- **kwargs: Any,
303
- ) -> None:
304
- # Currently does nothing. If a functional fallback is implemented, new failures will not be
305
- # caught
306
- pass
13
+ _DUMMY = "OpenInferenceTracer"
14
+ _DEPRECATION_MESSAGE = (
15
+ f"`{__name__}.{_DUMMY}` is a defunct class in the current version of Phoenix, "
16
+ "and will be removed in the future. For a migration guide, see "
17
+ "https://github.com/Arize-ai/phoenix/blob/main/MIGRATION.md"
18
+ )
307
19
 
308
20
 
309
- class OpenInferenceTracer(Tracer, BaseTracer): # type: ignore
310
- def __init__(self, *args: Any, **kwargs: Any) -> None:
311
- super().__init__(*args, **kwargs)
312
- self._exporter = self._exporter or HttpExporter()
21
+ class _DummyObject:
22
+ def __init__(self, *_: Any, **__: Any) -> None:
23
+ logger.warning(_DEPRECATION_MESSAGE)
313
24
 
314
- def _convert_run_to_spans(
315
- self,
316
- run: Dict[str, Any],
317
- parent: Optional[Span] = None,
318
- ) -> None:
319
- attributes: Dict[str, Any] = {}
320
- for io_key, io_attributes in {
321
- "inputs": (INPUT_VALUE, INPUT_MIME_TYPE),
322
- "outputs": (OUTPUT_VALUE, OUTPUT_MIME_TYPE),
323
- }.items():
324
- attributes.update(zip(io_attributes, _convert_io(run.get(io_key))))
325
- attributes.update(_prompts(run["inputs"]))
326
- attributes.update(_input_messages(run["inputs"]))
327
- attributes.update(_output_messages(run["outputs"]))
328
- attributes.update(_prompt_template(run["serialized"]))
329
- attributes.update(_invocation_parameters(run))
330
- attributes.update(_model_name(run["extra"]))
331
- attributes.update(_token_counts(run["outputs"]))
332
- attributes.update(_function_calls(run["outputs"]))
333
- attributes.update(_tools(run))
334
- attributes.update(_retrieval_documents(run))
335
- events: List[SpanEvent] = []
336
- if (error := run["error"]) is None:
337
- status_code = SpanStatusCode.OK
338
- else:
339
- status_code = SpanStatusCode.ERROR
340
- # Since there is only one error message, keep just the
341
- # first error event.
342
- error_event = next(
343
- filter(
344
- lambda event: event["name"] == "error",
345
- run["events"],
346
- )
347
- )
348
- events.append(
349
- SpanException(
350
- message=error,
351
- timestamp=error_event["time"],
352
- )
353
- )
354
- span_kind = (
355
- SpanKind.AGENT
356
- if "agent" in run["name"].lower()
357
- else _langchain_run_type_to_span_kind(run["run_type"])
358
- )
359
- start_time = cast(datetime, run["start_time"])
360
- end_time = cast(Optional[datetime], run.get("end_time"))
361
- if _is_tz_naive(start_time):
362
- start_time = start_time.replace(tzinfo=timezone.utc)
363
- if end_time and _is_tz_naive(end_time):
364
- end_time = end_time.replace(tzinfo=timezone.utc)
365
- span = self.create_span(
366
- name=run["name"],
367
- span_kind=span_kind,
368
- parent_id=None if parent is None else parent.context.span_id,
369
- trace_id=None if parent is None else parent.context.trace_id,
370
- start_time=start_time,
371
- end_time=end_time,
372
- status_code=status_code,
373
- attributes=attributes,
374
- events=events,
25
+ def get_spans(self) -> Iterator[Span]:
26
+ logger.warning(_DEPRECATION_MESSAGE)
27
+ logger.warning(
28
+ "`.get_spans()` is a defunct method that does nothing, and will be removed "
29
+ "in the future. For a migration guide, see "
30
+ "https://github.com/Arize-ai/phoenix/blob/main/MIGRATION.md"
375
31
  )
376
- for child_run in run["child_runs"]:
377
- self._convert_run_to_spans(child_run, span)
378
-
379
- def _persist_run(self, run: Run) -> None:
380
- # Note that this relies on `.dict()` from pydantic for the
381
- # serialization of objects like `langchain.schema.Document`.
382
- try:
383
- self._convert_run_to_spans(run.dict())
384
- except Exception:
385
- logger.exception("Failed to convert run to spans")
32
+ return iter(())
386
33
 
387
- @graceful_fallback(_chat_model_start_fallback)
388
- def on_chat_model_start(
389
- self,
390
- serialized: Dict[str, Any],
391
- messages: List[List[BaseMessage]],
392
- *,
393
- run_id: UUID,
394
- tags: Optional[List[str]] = None,
395
- parent_run_id: Optional[UUID] = None,
396
- metadata: Optional[Dict[str, Any]] = None,
397
- name: Optional[str] = None,
398
- **kwargs: Any,
399
- ) -> None:
400
- """
401
- Adds chat messages to the run inputs.
402
34
 
403
- LangChain's BaseTracer class does not implement hooks for chat models and hence does not
404
- record data such as the list of messages that were passed to the chat model.
35
+ class _DefunctModule:
36
+ __all__ = (_DUMMY,)
405
37
 
406
- For reference, see https://github.com/langchain-ai/langchain/pull/4499.
407
- """
408
-
409
- parent_run_id_ = str(parent_run_id) if parent_run_id else None
410
- execution_order = self._get_execution_order(parent_run_id_)
411
- start_time = datetime.utcnow()
412
- if metadata:
413
- kwargs.update({"metadata": metadata})
414
- run = Run(
415
- id=run_id,
416
- parent_run_id=parent_run_id,
417
- serialized=serialized,
418
- inputs={"messages": [[dumpd(message) for message in batch] for batch in messages]},
419
- extra=kwargs,
420
- events=[{"name": "start", "time": start_time}],
421
- start_time=start_time,
422
- execution_order=execution_order,
423
- child_execution_order=execution_order,
424
- run_type="llm",
425
- tags=tags,
426
- name=name or "",
427
- )
428
- self._start_trace(run)
38
+ def __getattr__(self, name: str) -> Any:
39
+ if name == _DUMMY:
40
+ logger.warning(_DEPRECATION_MESSAGE)
41
+ return _DummyObject
42
+ raise AttributeError(f"module {__name__} has no attribute {name}")
429
43
 
430
44
 
431
- def _is_tz_naive(dt: datetime) -> bool:
432
- return dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None
45
+ # See e.g. https://stackoverflow.com/a/7668273
46
+ sys.modules[__name__] = _DefunctModule() # type: ignore