arize-phoenix 0.0.32rc1__py3-none-any.whl → 0.0.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of arize-phoenix might be problematic. Click here for more details.

Files changed (71) hide show
  1. {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/METADATA +11 -5
  2. {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/RECORD +69 -40
  3. phoenix/__init__.py +3 -1
  4. phoenix/config.py +23 -1
  5. phoenix/core/model_schema.py +14 -37
  6. phoenix/core/model_schema_adapter.py +0 -1
  7. phoenix/core/traces.py +285 -0
  8. phoenix/datasets/dataset.py +14 -21
  9. phoenix/datasets/errors.py +4 -1
  10. phoenix/datasets/schema.py +1 -1
  11. phoenix/datetime_utils.py +87 -0
  12. phoenix/experimental/callbacks/__init__.py +0 -0
  13. phoenix/experimental/callbacks/langchain_tracer.py +228 -0
  14. phoenix/experimental/callbacks/llama_index_trace_callback_handler.py +364 -0
  15. phoenix/experimental/evals/__init__.py +33 -0
  16. phoenix/experimental/evals/functions/__init__.py +4 -0
  17. phoenix/experimental/evals/functions/binary.py +156 -0
  18. phoenix/experimental/evals/functions/common.py +31 -0
  19. phoenix/experimental/evals/functions/generate.py +50 -0
  20. phoenix/experimental/evals/models/__init__.py +4 -0
  21. phoenix/experimental/evals/models/base.py +130 -0
  22. phoenix/experimental/evals/models/openai.py +128 -0
  23. phoenix/experimental/evals/retrievals.py +2 -2
  24. phoenix/experimental/evals/templates/__init__.py +24 -0
  25. phoenix/experimental/evals/templates/default_templates.py +126 -0
  26. phoenix/experimental/evals/templates/template.py +107 -0
  27. phoenix/experimental/evals/utils/__init__.py +0 -0
  28. phoenix/experimental/evals/utils/downloads.py +33 -0
  29. phoenix/experimental/evals/utils/threads.py +27 -0
  30. phoenix/experimental/evals/utils/types.py +9 -0
  31. phoenix/experimental/evals/utils.py +33 -0
  32. phoenix/metrics/binning.py +0 -1
  33. phoenix/metrics/timeseries.py +2 -3
  34. phoenix/server/api/context.py +2 -0
  35. phoenix/server/api/input_types/SpanSort.py +60 -0
  36. phoenix/server/api/schema.py +85 -4
  37. phoenix/server/api/types/DataQualityMetric.py +10 -1
  38. phoenix/server/api/types/Dataset.py +2 -4
  39. phoenix/server/api/types/DatasetInfo.py +10 -0
  40. phoenix/server/api/types/ExportEventsMutation.py +4 -1
  41. phoenix/server/api/types/Functionality.py +15 -0
  42. phoenix/server/api/types/MimeType.py +16 -0
  43. phoenix/server/api/types/Model.py +3 -5
  44. phoenix/server/api/types/SortDir.py +13 -0
  45. phoenix/server/api/types/Span.py +229 -0
  46. phoenix/server/api/types/TimeSeries.py +9 -2
  47. phoenix/server/api/types/pagination.py +2 -0
  48. phoenix/server/app.py +24 -4
  49. phoenix/server/main.py +60 -24
  50. phoenix/server/span_handler.py +39 -0
  51. phoenix/server/static/index.js +956 -479
  52. phoenix/server/thread_server.py +10 -2
  53. phoenix/services.py +39 -16
  54. phoenix/session/session.py +99 -27
  55. phoenix/trace/exporter.py +71 -0
  56. phoenix/trace/filter.py +181 -0
  57. phoenix/trace/fixtures.py +23 -8
  58. phoenix/trace/schemas.py +59 -6
  59. phoenix/trace/semantic_conventions.py +141 -1
  60. phoenix/trace/span_json_decoder.py +60 -6
  61. phoenix/trace/span_json_encoder.py +1 -9
  62. phoenix/trace/trace_dataset.py +100 -8
  63. phoenix/trace/tracer.py +26 -3
  64. phoenix/trace/v1/__init__.py +522 -0
  65. phoenix/trace/v1/trace_pb2.py +52 -0
  66. phoenix/trace/v1/trace_pb2.pyi +351 -0
  67. phoenix/core/dimension_data_type.py +0 -6
  68. phoenix/core/dimension_type.py +0 -9
  69. {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/WHEEL +0 -0
  70. {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/IP_NOTICE +0 -0
  71. {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,228 @@
1
+ import json
2
+ import logging
3
+ from copy import deepcopy
4
+ from datetime import datetime
5
+ from typing import Any, Dict, Iterator, List, Optional, Tuple
6
+
7
+ from langchain.callbacks.tracers.base import BaseTracer
8
+ from langchain.callbacks.tracers.schemas import Run
9
+
10
+ from phoenix.trace.exporter import HttpExporter
11
+ from phoenix.trace.schemas import (
12
+ Span,
13
+ SpanEvent,
14
+ SpanException,
15
+ SpanKind,
16
+ SpanStatusCode,
17
+ )
18
+ from phoenix.trace.semantic_conventions import (
19
+ DOCUMENT_CONTENT,
20
+ DOCUMENT_METADATA,
21
+ INPUT_MIME_TYPE,
22
+ INPUT_VALUE,
23
+ LLM_FUNCTION_CALL,
24
+ LLM_INVOCATION_PARAMETERS,
25
+ LLM_MODEL_NAME,
26
+ LLM_PROMPT_TEMPLATE,
27
+ LLM_PROMPT_TEMPLATE_VARIABLES,
28
+ LLM_PROMPT_TEMPLATE_VERSION,
29
+ LLM_TOKEN_COUNT_COMPLETION,
30
+ LLM_TOKEN_COUNT_PROMPT,
31
+ LLM_TOKEN_COUNT_TOTAL,
32
+ OUTPUT_MIME_TYPE,
33
+ OUTPUT_VALUE,
34
+ RETRIEVAL_DOCUMENTS,
35
+ TOOL_DESCRIPTION,
36
+ TOOL_NAME,
37
+ MimeType,
38
+ )
39
+ from phoenix.trace.tracer import Tracer
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+
44
+ def _langchain_run_type_to_span_kind(run_type: str) -> SpanKind:
45
+ # TODO: LangChain is moving away from enums and to arbitrary strings
46
+ # for the run_type variable, so we may need to do the same
47
+ try:
48
+ return SpanKind(run_type.upper())
49
+ except ValueError:
50
+ return SpanKind.UNKNOWN
51
+
52
+
53
+ def _serialize_json(obj: Any) -> str:
54
+ if isinstance(obj, datetime):
55
+ return obj.isoformat()
56
+ return str(obj)
57
+
58
+
59
+ def _convert_io(obj: Optional[Dict[str, Any]]) -> Iterator[Any]:
60
+ if not obj:
61
+ return
62
+ if not isinstance(obj, dict):
63
+ raise ValueError(f"obj should be dict, but obj={obj}")
64
+ if len(obj) == 1 and isinstance(value := next(iter(obj.values())), str):
65
+ yield value
66
+ else:
67
+ yield json.dumps(obj, default=_serialize_json)
68
+ yield MimeType.JSON
69
+
70
+
71
+ def _prompt_template(run_serialized: Dict[str, Any]) -> Iterator[Tuple[str, Any]]:
72
+ """
73
+ A best-effort attempt to locate the PromptTemplate object among the
74
+ keyword arguments of a serialized object, e.g. an LLMChain object.
75
+ """
76
+ for obj in run_serialized.get("kwargs", {}).values():
77
+ if not isinstance(obj, dict) or "id" not in obj:
78
+ continue
79
+ # The `id` field of the object is a list indicating the path to the
80
+ # object's class in the LangChain package, e.g. `PromptTemplate` in
81
+ # the `langchain.prompts.prompt` module is represented as
82
+ # ["langchain", "prompts", "prompt", "PromptTemplate"]
83
+ if obj["id"][-1].endswith("PromptTemplate"):
84
+ kwargs = obj.get("kwargs", {})
85
+ if not (template := kwargs.get("template", "")):
86
+ continue
87
+ yield LLM_PROMPT_TEMPLATE, template
88
+ yield LLM_PROMPT_TEMPLATE_VARIABLES, kwargs.get("input_variables", [])
89
+ yield LLM_PROMPT_TEMPLATE_VERSION, "unknown"
90
+ break
91
+
92
+
93
+ def _invocation_parameters(run: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
94
+ """Yields invocation parameters if present."""
95
+ if run["run_type"] != "llm":
96
+ return
97
+ run_extra = run["extra"]
98
+ yield LLM_INVOCATION_PARAMETERS, json.dumps(run_extra.get("invocation_params", {}))
99
+
100
+
101
+ def _model_name(run_extra: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
102
+ """Yields model name if present."""
103
+ if not (invocation_params := run_extra.get("invocation_params")):
104
+ return
105
+ for key in ["model_name", "model"]:
106
+ if name := invocation_params.get(key):
107
+ yield LLM_MODEL_NAME, name
108
+ return
109
+
110
+
111
+ def _token_counts(run_outputs: Dict[str, Any]) -> Iterator[Tuple[str, int]]:
112
+ """Yields token count information if present."""
113
+ try:
114
+ token_usage = run_outputs["llm_output"]["token_usage"]
115
+ except Exception:
116
+ return
117
+ for attribute_name, key in [
118
+ (LLM_TOKEN_COUNT_PROMPT, "prompt_tokens"),
119
+ (LLM_TOKEN_COUNT_COMPLETION, "completion_tokens"),
120
+ (LLM_TOKEN_COUNT_TOTAL, "total_tokens"),
121
+ ]:
122
+ if (token_count := token_usage.get(key)) is not None:
123
+ yield attribute_name, token_count
124
+
125
+
126
+ def _function_calls(run_outputs: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
127
+ """Yields function call information if present."""
128
+ try:
129
+ function_call_data = deepcopy(
130
+ run_outputs["generations"][0][0]["message"]["kwargs"]["additional_kwargs"][
131
+ "function_call"
132
+ ]
133
+ )
134
+ function_call_data["arguments"] = json.loads(function_call_data["arguments"])
135
+ yield LLM_FUNCTION_CALL, json.dumps(function_call_data)
136
+ except Exception:
137
+ pass
138
+
139
+
140
+ def _tools(run: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
141
+ """Yields tool attributes if present."""
142
+ if run["run_type"] != "tool":
143
+ return
144
+ run_serialized = run["serialized"]
145
+ if "name" in run_serialized:
146
+ yield TOOL_NAME, run_serialized["name"]
147
+ if "description" in run_serialized:
148
+ yield TOOL_DESCRIPTION, run_serialized["description"]
149
+ # TODO: tool parameters https://github.com/Arize-ai/phoenix/issues/1330
150
+
151
+
152
+ def _retrieval_documents(
153
+ run: Dict[str, Any],
154
+ ) -> Iterator[Tuple[str, List[Any]]]:
155
+ if run["run_type"] != "retriever":
156
+ return
157
+ yield RETRIEVAL_DOCUMENTS, [
158
+ {
159
+ DOCUMENT_CONTENT: document.get("page_content"),
160
+ DOCUMENT_METADATA: document.get("metadata") or {},
161
+ }
162
+ for document in (run.get("outputs") or {}).get("documents") or []
163
+ ]
164
+
165
+
166
+ class OpenInferenceTracer(Tracer, BaseTracer):
167
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
168
+ super().__init__(*args, **kwargs)
169
+ self._exporter = self._exporter or HttpExporter()
170
+
171
+ def _convert_run_to_spans(
172
+ self,
173
+ run: Dict[str, Any],
174
+ parent: Optional[Span] = None,
175
+ ) -> None:
176
+ attributes: Dict[str, Any] = {}
177
+ for io_key, io_attributes in {
178
+ "inputs": (INPUT_VALUE, INPUT_MIME_TYPE),
179
+ "outputs": (OUTPUT_VALUE, OUTPUT_MIME_TYPE),
180
+ }.items():
181
+ attributes.update(zip(io_attributes, _convert_io(run.get(io_key))))
182
+ attributes.update(_prompt_template(run["serialized"]))
183
+ attributes.update(_invocation_parameters(run))
184
+ attributes.update(_model_name(run["extra"]))
185
+ attributes.update(_token_counts(run["outputs"]))
186
+ attributes.update(_function_calls(run["outputs"]))
187
+ attributes.update(_tools(run))
188
+ attributes.update(_retrieval_documents(run))
189
+ events: List[SpanEvent] = []
190
+ if (error := run["error"]) is None:
191
+ status_code = SpanStatusCode.OK
192
+ else:
193
+ status_code = SpanStatusCode.ERROR
194
+ # Since there is only one error message, keep just the
195
+ # first error event.
196
+ error_event = next(
197
+ filter(
198
+ lambda event: event["name"] == "error",
199
+ run["events"],
200
+ )
201
+ )
202
+ events.append(
203
+ SpanException(
204
+ message=error,
205
+ timestamp=error_event["time"],
206
+ )
207
+ )
208
+ span = self.create_span(
209
+ name=run["name"],
210
+ span_kind=_langchain_run_type_to_span_kind(run["run_type"]),
211
+ parent_id=None if parent is None else parent.context.span_id,
212
+ trace_id=None if parent is None else parent.context.trace_id,
213
+ start_time=run["start_time"],
214
+ end_time=run["end_time"],
215
+ status_code=status_code,
216
+ attributes=attributes,
217
+ events=events,
218
+ )
219
+ for child_run in run["child_runs"]:
220
+ self._convert_run_to_spans(child_run, span)
221
+
222
+ def _persist_run(self, run: Run) -> None:
223
+ # Note that this relies on `.dict()` from pydantic for the
224
+ # serialization of objects like `langchain.schema.Document`.
225
+ try:
226
+ self._convert_run_to_spans(run.dict())
227
+ except Exception:
228
+ logger.exception("Failed to convert run to spans")
@@ -0,0 +1,364 @@
1
+ """
2
+ Callback handler for emitting trace data in OpenInference tracing format.
3
+ OpenInference tracing is an open standard for capturing and storing
4
+ LLM Application execution logs.
5
+
6
+ It enables production LLMapp servers to seamlessly integrate with LLM
7
+ observability solutions such as Arize and Phoenix.
8
+
9
+ For more information on the specification, see
10
+ https://github.com/Arize-ai/open-inference-spec
11
+ """
12
+ import json
13
+ import logging
14
+ from collections import defaultdict
15
+ from datetime import datetime
16
+ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypedDict, cast
17
+ from uuid import uuid4
18
+
19
+ from llama_index.callbacks.base_handler import BaseCallbackHandler
20
+ from llama_index.callbacks.schema import (
21
+ TIMESTAMP_FORMAT,
22
+ CBEvent,
23
+ CBEventType,
24
+ EventPayload,
25
+ )
26
+ from llama_index.llms.base import ChatMessage, ChatResponse
27
+ from llama_index.tools import ToolMetadata
28
+ from openai.openai_object import OpenAIObject
29
+
30
+ from phoenix.trace.exporter import HttpExporter
31
+ from phoenix.trace.schemas import Span, SpanID, SpanKind, SpanStatusCode
32
+ from phoenix.trace.semantic_conventions import (
33
+ DOCUMENT_CONTENT,
34
+ DOCUMENT_ID,
35
+ DOCUMENT_METADATA,
36
+ DOCUMENT_SCORE,
37
+ EMBEDDING_EMBEDDINGS,
38
+ EMBEDDING_MODEL_NAME,
39
+ EMBEDDING_TEXT,
40
+ EMBEDDING_VECTOR,
41
+ INPUT_MIME_TYPE,
42
+ INPUT_VALUE,
43
+ LLM_INVOCATION_PARAMETERS,
44
+ LLM_MESSAGES,
45
+ LLM_MODEL_NAME,
46
+ LLM_PROMPT,
47
+ LLM_TOKEN_COUNT_COMPLETION,
48
+ LLM_TOKEN_COUNT_PROMPT,
49
+ LLM_TOKEN_COUNT_TOTAL,
50
+ MESSAGE_CONTENT,
51
+ MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON,
52
+ MESSAGE_FUNCTION_CALL_NAME,
53
+ MESSAGE_NAME,
54
+ MESSAGE_ROLE,
55
+ OUTPUT_MIME_TYPE,
56
+ OUTPUT_VALUE,
57
+ RETRIEVAL_DOCUMENTS,
58
+ TOOL_DESCRIPTION,
59
+ TOOL_NAME,
60
+ TOOL_PARAMETERS,
61
+ MimeType,
62
+ )
63
+ from phoenix.trace.tracer import SpanExporter, Tracer
64
+
65
+ logger = logging.getLogger(__name__)
66
+ logger.addHandler(logging.NullHandler())
67
+
68
+ CBEventID = str
69
+ _LOCAL_TZINFO = datetime.now().astimezone().tzinfo
70
+
71
+
72
+ class CBEventData(TypedDict, total=False):
73
+ name: str
74
+ event_type: CBEventType
75
+ start_event: CBEvent
76
+ end_event: CBEvent
77
+ attributes: Dict[str, Any]
78
+
79
+
80
+ def payload_to_semantic_attributes(
81
+ event_type: CBEventType,
82
+ payload: Dict[str, Any],
83
+ ) -> Dict[str, Any]:
84
+ """
85
+ Converts a LLMapp payload to a dictionary of semantic conventions compliant attributes.
86
+ """
87
+ attributes: Dict[str, Any] = {}
88
+ if event_type in (CBEventType.NODE_PARSING, CBEventType.CHUNKING):
89
+ # TODO(maybe): handle these events
90
+ return attributes
91
+ if EventPayload.CHUNKS in payload and EventPayload.EMBEDDINGS in payload:
92
+ attributes[EMBEDDING_EMBEDDINGS] = [
93
+ {EMBEDDING_TEXT: text, EMBEDDING_VECTOR: vector}
94
+ for text, vector in zip(payload[EventPayload.CHUNKS], payload[EventPayload.EMBEDDINGS])
95
+ ]
96
+ if EventPayload.QUERY_STR in payload:
97
+ attributes[INPUT_VALUE] = payload[EventPayload.QUERY_STR]
98
+ attributes[INPUT_MIME_TYPE] = MimeType.TEXT
99
+ if EventPayload.NODES in payload:
100
+ attributes[RETRIEVAL_DOCUMENTS] = [
101
+ {
102
+ DOCUMENT_ID: node_with_score.node.node_id,
103
+ DOCUMENT_SCORE: node_with_score.score,
104
+ DOCUMENT_CONTENT: node_with_score.node.text,
105
+ DOCUMENT_METADATA: node_with_score.node.metadata,
106
+ }
107
+ for node_with_score in payload[EventPayload.NODES]
108
+ ]
109
+ if EventPayload.PROMPT in payload:
110
+ attributes[LLM_PROMPT] = payload[EventPayload.PROMPT]
111
+ if EventPayload.MESSAGES in payload:
112
+ messages = payload[EventPayload.MESSAGES]
113
+ # Messages is only relevant to the LLM invocation
114
+ if event_type is CBEventType.LLM:
115
+ attributes[LLM_MESSAGES] = [
116
+ _message_payload_to_attributes(message_data) for message_data in messages
117
+ ]
118
+ elif event_type is CBEventType.AGENT_STEP and len(messages):
119
+ # the agent step contains a message that is actually the input
120
+ # akin to the query_str
121
+ attributes[INPUT_VALUE] = _message_payload_to_str(messages[0])
122
+ if response := (payload.get(EventPayload.RESPONSE) or payload.get(EventPayload.COMPLETION)):
123
+ attributes[OUTPUT_VALUE] = _get_response_content(response)
124
+ attributes[OUTPUT_MIME_TYPE] = MimeType.TEXT
125
+ if raw := getattr(response, "raw", None):
126
+ if isinstance(raw, OpenAIObject):
127
+ usage = raw.usage
128
+ attributes[LLM_TOKEN_COUNT_PROMPT] = usage.prompt_tokens
129
+ attributes[LLM_TOKEN_COUNT_COMPLETION] = usage.completion_tokens
130
+ attributes[LLM_TOKEN_COUNT_TOTAL] = usage.total_tokens
131
+ if EventPayload.TEMPLATE in payload:
132
+ ...
133
+ if event_type is CBEventType.RERANKING:
134
+ ... # TODO
135
+ # if EventPayload.TOP_K in payload:
136
+ # attributes[RERANKING_TOP_K] = payload[EventPayload.TOP_K]
137
+ # if EventPayload.MODEL_NAME in payload:
138
+ # attributes[RERANKING_MODEL_NAME] = payload[EventPayload.MODEL_NAME]
139
+ if EventPayload.TOOL in payload:
140
+ tool_metadata = cast(ToolMetadata, payload.get(EventPayload.TOOL))
141
+ attributes[TOOL_NAME] = tool_metadata.name
142
+ attributes[TOOL_DESCRIPTION] = tool_metadata.description
143
+ attributes[TOOL_PARAMETERS] = tool_metadata.to_openai_function()["parameters"]
144
+ if EventPayload.SERIALIZED in payload:
145
+ serialized = payload[EventPayload.SERIALIZED]
146
+ if event_type is CBEventType.EMBEDDING:
147
+ if model_name := serialized.get("model_name"):
148
+ attributes[EMBEDDING_MODEL_NAME] = model_name
149
+ if event_type is CBEventType.LLM:
150
+ if model_name := serialized.get("model"):
151
+ attributes[LLM_MODEL_NAME] = model_name
152
+ attributes[LLM_INVOCATION_PARAMETERS] = json.dumps(
153
+ {
154
+ "model": model_name,
155
+ "temperature": serialized["temperature"],
156
+ "max_tokens": serialized["max_tokens"],
157
+ **serialized["additional_kwargs"],
158
+ }
159
+ )
160
+ return attributes
161
+
162
+
163
+ class OpenInferenceTraceCallbackHandler(BaseCallbackHandler):
164
+ """Callback handler for storing LLM application trace data in OpenInference format.
165
+ OpenInference is an open standard for capturing and storing AI model
166
+ inferences. It enables production LLMapp servers to seamlessly integrate
167
+ with LLM observability solutions such as Arize and Phoenix.
168
+
169
+ For more information on the specification, see
170
+ https://github.com/Arize-ai/open-inference-spec
171
+ """
172
+
173
+ def __init__(
174
+ self,
175
+ callback: Optional[Callable[[List[Span]], None]] = None,
176
+ exporter: Optional[SpanExporter] = HttpExporter(),
177
+ ) -> None:
178
+ super().__init__(event_starts_to_ignore=[], event_ends_to_ignore=[])
179
+ self._tracer = Tracer(on_append=callback, exporter=exporter)
180
+ self._event_id_to_event_data: Dict[CBEventID, CBEventData] = defaultdict(
181
+ lambda: CBEventData()
182
+ )
183
+
184
+ def on_event_start(
185
+ self,
186
+ event_type: CBEventType,
187
+ payload: Optional[Dict[str, Any]] = None,
188
+ event_id: CBEventID = "",
189
+ **kwargs: Any,
190
+ ) -> CBEventID:
191
+ event_id = event_id or str(uuid4())
192
+ event_data = self._event_id_to_event_data[event_id]
193
+ event_data["name"] = event_type.value
194
+ event_data["event_type"] = event_type
195
+ event_data["start_event"] = CBEvent(
196
+ event_type=event_type,
197
+ payload=payload,
198
+ id_=event_id,
199
+ )
200
+ event_data["attributes"] = {}
201
+ # Parse the payload to extract the parameters
202
+ if payload is not None:
203
+ event_data["attributes"].update(
204
+ payload_to_semantic_attributes(event_type, payload),
205
+ )
206
+
207
+ return event_id
208
+
209
+ def on_event_end(
210
+ self,
211
+ event_type: CBEventType,
212
+ payload: Optional[Dict[str, Any]] = None,
213
+ event_id: CBEventID = "",
214
+ **kwargs: Any,
215
+ ) -> None:
216
+ event_data = self._event_id_to_event_data[event_id]
217
+ event_data.setdefault("name", event_type.value)
218
+ event_data.setdefault("event_type", event_type)
219
+ event_data["end_event"] = CBEvent(
220
+ event_type=event_type,
221
+ payload=payload,
222
+ id_=event_id,
223
+ )
224
+
225
+ # Parse the payload to extract the parameters
226
+ if payload is not None:
227
+ event_data["attributes"].update(
228
+ payload_to_semantic_attributes(event_type, payload),
229
+ )
230
+
231
+ def start_trace(self, trace_id: Optional[str] = None) -> None:
232
+ self._event_id_to_event_data = defaultdict(lambda: CBEventData())
233
+
234
+ def end_trace(
235
+ self,
236
+ trace_id: Optional[str] = None,
237
+ trace_map: Optional[Dict[CBEventID, List[CBEventID]]] = None,
238
+ ) -> None:
239
+ if not trace_map:
240
+ return # TODO: investigate when empty or None trace_map is passed
241
+ try:
242
+ _add_to_tracer(
243
+ event_id_to_event_data=self._event_id_to_event_data,
244
+ trace_map=trace_map,
245
+ tracer=self._tracer,
246
+ )
247
+ except Exception:
248
+ logger.exception("OpenInferenceCallbackHandler trace processing failed")
249
+ self._event_id_to_event_data = defaultdict(lambda: CBEventData())
250
+
251
+ def get_spans(self) -> Iterator[Span]:
252
+ """
253
+ Returns the spans stored in the tracer. This is useful if you are running
254
+ LlamaIndex in a notebook environment and you want to inspect the spans.
255
+ """
256
+ return self._tracer.get_spans()
257
+
258
+
259
+ def _add_to_tracer(
260
+ event_id_to_event_data: Dict[CBEventID, CBEventData],
261
+ trace_map: Dict[CBEventID, List[CBEventID]],
262
+ tracer: Tracer,
263
+ ) -> None:
264
+ """Adds event data to the tracer, where it is converted to a span and stored in a buffer.
265
+
266
+ Args:
267
+ event_id_to_event_data (Dict[CBEventID, CBEventData]): A map of event IDs to event data.
268
+
269
+ trace_map (Dict[CBEventID, List[CBEventID]]): A map of parent event IDs to child event IDs.
270
+ The root event IDs are stored under the key "root".
271
+
272
+ tracer (Tracer): The tracer that stores spans.
273
+ """
274
+
275
+ trace_id = uuid4()
276
+ parent_child_id_stack: List[Tuple[Optional[SpanID], CBEventID]] = [
277
+ (None, root_event_id) for root_event_id in trace_map["root"]
278
+ ]
279
+ while parent_child_id_stack:
280
+ parent_span_id, event_id = parent_child_id_stack.pop()
281
+ event_data = event_id_to_event_data[event_id]
282
+ start_event = event_data["start_event"]
283
+ start_time_tz_naive = datetime.strptime(start_event.time, TIMESTAMP_FORMAT)
284
+ start_time_tz_aware = start_time_tz_naive.replace(tzinfo=_LOCAL_TZINFO)
285
+ end_event = event_data["end_event"]
286
+ end_time_tz_naive = datetime.strptime(end_event.time, TIMESTAMP_FORMAT)
287
+ end_time_tz_aware = end_time_tz_naive.replace(tzinfo=_LOCAL_TZINFO)
288
+ name = event_data["name"]
289
+ event_type = event_data["event_type"]
290
+ span_kind = _get_span_kind(event_type)
291
+ span = tracer.create_span(
292
+ name=name,
293
+ span_kind=span_kind,
294
+ trace_id=trace_id,
295
+ start_time=start_time_tz_aware,
296
+ end_time=end_time_tz_aware,
297
+ status_code=SpanStatusCode.OK,
298
+ status_message="",
299
+ parent_id=parent_span_id,
300
+ attributes=event_data["attributes"],
301
+ events=None,
302
+ conversation=None,
303
+ )
304
+ new_parent_span_id = span.context.span_id
305
+ for new_child_event_id in trace_map.get(event_id, []):
306
+ parent_child_id_stack.append((new_parent_span_id, new_child_event_id))
307
+
308
+
309
+ def _get_span_kind(event_type: CBEventType) -> SpanKind:
310
+ """Maps a CBEventType to a SpanKind.
311
+
312
+ Args:
313
+ event_type (CBEventType): LlamaIndex callback event type.
314
+
315
+ Returns:
316
+ SpanKind: The corresponding span kind.
317
+ """
318
+ return {
319
+ CBEventType.EMBEDDING: SpanKind.EMBEDDING,
320
+ CBEventType.LLM: SpanKind.LLM,
321
+ CBEventType.RETRIEVE: SpanKind.RETRIEVER,
322
+ CBEventType.FUNCTION_CALL: SpanKind.TOOL,
323
+ CBEventType.AGENT_STEP: SpanKind.AGENT,
324
+ }.get(event_type, SpanKind.CHAIN)
325
+
326
+
327
+ def _message_payload_to_attributes(message: Any) -> Dict[str, Optional[str]]:
328
+ if isinstance(message, ChatMessage):
329
+ message_attributes = {
330
+ MESSAGE_ROLE: message.role.value,
331
+ MESSAGE_CONTENT: message.content,
332
+ }
333
+ # Parse the kwargs to extract the function name and parameters for function calling
334
+ # NB: these additional kwargs exist both for 'agent' and 'function' roles
335
+ if "name" in message.additional_kwargs:
336
+ message_attributes[MESSAGE_NAME] = message.additional_kwargs["name"]
337
+ if "function_call" in message.additional_kwargs:
338
+ function_call = message.additional_kwargs["function_call"]
339
+ message_attributes[MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON] = function_call.arguments
340
+ message_attributes[MESSAGE_FUNCTION_CALL_NAME] = function_call.name
341
+ return message_attributes
342
+
343
+ return {
344
+ MESSAGE_ROLE: "user", # assume user if not ChatMessage
345
+ MESSAGE_CONTENT: str(message),
346
+ }
347
+
348
+
349
+ def _message_payload_to_str(message: Any) -> Optional[str]:
350
+ """Converts a message payload to a string, if possible"""
351
+ if isinstance(message, ChatMessage):
352
+ return message.content
353
+
354
+ return str(message)
355
+
356
+
357
+ def _get_response_content(response: Any) -> str:
358
+ """
359
+ Gets content from response objects. This is needed since the string representation of some
360
+ response objects includes extra information in addition to the content itself.
361
+ """
362
+ if isinstance(response, ChatResponse):
363
+ return response.message.content or ""
364
+ return str(response)
@@ -0,0 +1,33 @@
1
+ from .functions import llm_eval_binary, llm_generate, run_relevance_eval
2
+ from .models import OpenAiModel
3
+ from .retrievals import compute_precisions_at_k
4
+ from .templates import (
5
+ CODE_READABILITY_PROMPT_RAILS_MAP,
6
+ CODE_READABILITY_PROMPT_TEMPLATE_STR,
7
+ HALLUCINATION_PROMPT_RAILS_MAP,
8
+ HALLUCINATION_PROMPT_TEMPLATE_STR,
9
+ RAG_RELEVANCY_PROMPT_RAILS_MAP,
10
+ RAG_RELEVANCY_PROMPT_TEMPLATE_STR,
11
+ TOXICITY_PROMPT_RAILS_MAP,
12
+ TOXICITY_PROMPT_TEMPLATE_STR,
13
+ PromptTemplate,
14
+ )
15
+ from .utils.downloads import download_benchmark_dataset
16
+
17
+ __all__ = [
18
+ "compute_precisions_at_k",
19
+ "download_benchmark_dataset",
20
+ "llm_eval_binary",
21
+ "llm_generate",
22
+ "OpenAiModel",
23
+ "PromptTemplate",
24
+ "CODE_READABILITY_PROMPT_RAILS_MAP",
25
+ "CODE_READABILITY_PROMPT_TEMPLATE_STR",
26
+ "HALLUCINATION_PROMPT_RAILS_MAP",
27
+ "HALLUCINATION_PROMPT_TEMPLATE_STR",
28
+ "RAG_RELEVANCY_PROMPT_RAILS_MAP",
29
+ "RAG_RELEVANCY_PROMPT_TEMPLATE_STR",
30
+ "TOXICITY_PROMPT_TEMPLATE_STR",
31
+ "TOXICITY_PROMPT_RAILS_MAP",
32
+ "run_relevance_eval",
33
+ ]
@@ -0,0 +1,4 @@
1
+ from .binary import llm_eval_binary, run_relevance_eval
2
+ from .generate import llm_generate
3
+
4
+ __all__ = ["llm_eval_binary", "run_relevance_eval", "llm_generate"]