arize-phoenix 0.0.32rc1__py3-none-any.whl → 0.0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of arize-phoenix might be problematic. Click here for more details.
- {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/METADATA +11 -5
- {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/RECORD +69 -40
- phoenix/__init__.py +3 -1
- phoenix/config.py +23 -1
- phoenix/core/model_schema.py +14 -37
- phoenix/core/model_schema_adapter.py +0 -1
- phoenix/core/traces.py +285 -0
- phoenix/datasets/dataset.py +14 -21
- phoenix/datasets/errors.py +4 -1
- phoenix/datasets/schema.py +1 -1
- phoenix/datetime_utils.py +87 -0
- phoenix/experimental/callbacks/__init__.py +0 -0
- phoenix/experimental/callbacks/langchain_tracer.py +228 -0
- phoenix/experimental/callbacks/llama_index_trace_callback_handler.py +364 -0
- phoenix/experimental/evals/__init__.py +33 -0
- phoenix/experimental/evals/functions/__init__.py +4 -0
- phoenix/experimental/evals/functions/binary.py +156 -0
- phoenix/experimental/evals/functions/common.py +31 -0
- phoenix/experimental/evals/functions/generate.py +50 -0
- phoenix/experimental/evals/models/__init__.py +4 -0
- phoenix/experimental/evals/models/base.py +130 -0
- phoenix/experimental/evals/models/openai.py +128 -0
- phoenix/experimental/evals/retrievals.py +2 -2
- phoenix/experimental/evals/templates/__init__.py +24 -0
- phoenix/experimental/evals/templates/default_templates.py +126 -0
- phoenix/experimental/evals/templates/template.py +107 -0
- phoenix/experimental/evals/utils/__init__.py +0 -0
- phoenix/experimental/evals/utils/downloads.py +33 -0
- phoenix/experimental/evals/utils/threads.py +27 -0
- phoenix/experimental/evals/utils/types.py +9 -0
- phoenix/experimental/evals/utils.py +33 -0
- phoenix/metrics/binning.py +0 -1
- phoenix/metrics/timeseries.py +2 -3
- phoenix/server/api/context.py +2 -0
- phoenix/server/api/input_types/SpanSort.py +60 -0
- phoenix/server/api/schema.py +85 -4
- phoenix/server/api/types/DataQualityMetric.py +10 -1
- phoenix/server/api/types/Dataset.py +2 -4
- phoenix/server/api/types/DatasetInfo.py +10 -0
- phoenix/server/api/types/ExportEventsMutation.py +4 -1
- phoenix/server/api/types/Functionality.py +15 -0
- phoenix/server/api/types/MimeType.py +16 -0
- phoenix/server/api/types/Model.py +3 -5
- phoenix/server/api/types/SortDir.py +13 -0
- phoenix/server/api/types/Span.py +229 -0
- phoenix/server/api/types/TimeSeries.py +9 -2
- phoenix/server/api/types/pagination.py +2 -0
- phoenix/server/app.py +24 -4
- phoenix/server/main.py +60 -24
- phoenix/server/span_handler.py +39 -0
- phoenix/server/static/index.js +956 -479
- phoenix/server/thread_server.py +10 -2
- phoenix/services.py +39 -16
- phoenix/session/session.py +99 -27
- phoenix/trace/exporter.py +71 -0
- phoenix/trace/filter.py +181 -0
- phoenix/trace/fixtures.py +23 -8
- phoenix/trace/schemas.py +59 -6
- phoenix/trace/semantic_conventions.py +141 -1
- phoenix/trace/span_json_decoder.py +60 -6
- phoenix/trace/span_json_encoder.py +1 -9
- phoenix/trace/trace_dataset.py +100 -8
- phoenix/trace/tracer.py +26 -3
- phoenix/trace/v1/__init__.py +522 -0
- phoenix/trace/v1/trace_pb2.py +52 -0
- phoenix/trace/v1/trace_pb2.pyi +351 -0
- phoenix/core/dimension_data_type.py +0 -6
- phoenix/core/dimension_type.py +0 -9
- {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/WHEEL +0 -0
- {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/IP_NOTICE +0 -0
- {arize_phoenix-0.0.32rc1.dist-info → arize_phoenix-0.0.33.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from copy import deepcopy
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from langchain.callbacks.tracers.base import BaseTracer
|
|
8
|
+
from langchain.callbacks.tracers.schemas import Run
|
|
9
|
+
|
|
10
|
+
from phoenix.trace.exporter import HttpExporter
|
|
11
|
+
from phoenix.trace.schemas import (
|
|
12
|
+
Span,
|
|
13
|
+
SpanEvent,
|
|
14
|
+
SpanException,
|
|
15
|
+
SpanKind,
|
|
16
|
+
SpanStatusCode,
|
|
17
|
+
)
|
|
18
|
+
from phoenix.trace.semantic_conventions import (
|
|
19
|
+
DOCUMENT_CONTENT,
|
|
20
|
+
DOCUMENT_METADATA,
|
|
21
|
+
INPUT_MIME_TYPE,
|
|
22
|
+
INPUT_VALUE,
|
|
23
|
+
LLM_FUNCTION_CALL,
|
|
24
|
+
LLM_INVOCATION_PARAMETERS,
|
|
25
|
+
LLM_MODEL_NAME,
|
|
26
|
+
LLM_PROMPT_TEMPLATE,
|
|
27
|
+
LLM_PROMPT_TEMPLATE_VARIABLES,
|
|
28
|
+
LLM_PROMPT_TEMPLATE_VERSION,
|
|
29
|
+
LLM_TOKEN_COUNT_COMPLETION,
|
|
30
|
+
LLM_TOKEN_COUNT_PROMPT,
|
|
31
|
+
LLM_TOKEN_COUNT_TOTAL,
|
|
32
|
+
OUTPUT_MIME_TYPE,
|
|
33
|
+
OUTPUT_VALUE,
|
|
34
|
+
RETRIEVAL_DOCUMENTS,
|
|
35
|
+
TOOL_DESCRIPTION,
|
|
36
|
+
TOOL_NAME,
|
|
37
|
+
MimeType,
|
|
38
|
+
)
|
|
39
|
+
from phoenix.trace.tracer import Tracer
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _langchain_run_type_to_span_kind(run_type: str) -> SpanKind:
|
|
45
|
+
# TODO: LangChain is moving away from enums and to arbitrary strings
|
|
46
|
+
# for the run_type variable, so we may need to do the same
|
|
47
|
+
try:
|
|
48
|
+
return SpanKind(run_type.upper())
|
|
49
|
+
except ValueError:
|
|
50
|
+
return SpanKind.UNKNOWN
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _serialize_json(obj: Any) -> str:
|
|
54
|
+
if isinstance(obj, datetime):
|
|
55
|
+
return obj.isoformat()
|
|
56
|
+
return str(obj)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _convert_io(obj: Optional[Dict[str, Any]]) -> Iterator[Any]:
|
|
60
|
+
if not obj:
|
|
61
|
+
return
|
|
62
|
+
if not isinstance(obj, dict):
|
|
63
|
+
raise ValueError(f"obj should be dict, but obj={obj}")
|
|
64
|
+
if len(obj) == 1 and isinstance(value := next(iter(obj.values())), str):
|
|
65
|
+
yield value
|
|
66
|
+
else:
|
|
67
|
+
yield json.dumps(obj, default=_serialize_json)
|
|
68
|
+
yield MimeType.JSON
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _prompt_template(run_serialized: Dict[str, Any]) -> Iterator[Tuple[str, Any]]:
|
|
72
|
+
"""
|
|
73
|
+
A best-effort attempt to locate the PromptTemplate object among the
|
|
74
|
+
keyword arguments of a serialized object, e.g. an LLMChain object.
|
|
75
|
+
"""
|
|
76
|
+
for obj in run_serialized.get("kwargs", {}).values():
|
|
77
|
+
if not isinstance(obj, dict) or "id" not in obj:
|
|
78
|
+
continue
|
|
79
|
+
# The `id` field of the object is a list indicating the path to the
|
|
80
|
+
# object's class in the LangChain package, e.g. `PromptTemplate` in
|
|
81
|
+
# the `langchain.prompts.prompt` module is represented as
|
|
82
|
+
# ["langchain", "prompts", "prompt", "PromptTemplate"]
|
|
83
|
+
if obj["id"][-1].endswith("PromptTemplate"):
|
|
84
|
+
kwargs = obj.get("kwargs", {})
|
|
85
|
+
if not (template := kwargs.get("template", "")):
|
|
86
|
+
continue
|
|
87
|
+
yield LLM_PROMPT_TEMPLATE, template
|
|
88
|
+
yield LLM_PROMPT_TEMPLATE_VARIABLES, kwargs.get("input_variables", [])
|
|
89
|
+
yield LLM_PROMPT_TEMPLATE_VERSION, "unknown"
|
|
90
|
+
break
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _invocation_parameters(run: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
|
|
94
|
+
"""Yields invocation parameters if present."""
|
|
95
|
+
if run["run_type"] != "llm":
|
|
96
|
+
return
|
|
97
|
+
run_extra = run["extra"]
|
|
98
|
+
yield LLM_INVOCATION_PARAMETERS, json.dumps(run_extra.get("invocation_params", {}))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _model_name(run_extra: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
|
|
102
|
+
"""Yields model name if present."""
|
|
103
|
+
if not (invocation_params := run_extra.get("invocation_params")):
|
|
104
|
+
return
|
|
105
|
+
for key in ["model_name", "model"]:
|
|
106
|
+
if name := invocation_params.get(key):
|
|
107
|
+
yield LLM_MODEL_NAME, name
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _token_counts(run_outputs: Dict[str, Any]) -> Iterator[Tuple[str, int]]:
|
|
112
|
+
"""Yields token count information if present."""
|
|
113
|
+
try:
|
|
114
|
+
token_usage = run_outputs["llm_output"]["token_usage"]
|
|
115
|
+
except Exception:
|
|
116
|
+
return
|
|
117
|
+
for attribute_name, key in [
|
|
118
|
+
(LLM_TOKEN_COUNT_PROMPT, "prompt_tokens"),
|
|
119
|
+
(LLM_TOKEN_COUNT_COMPLETION, "completion_tokens"),
|
|
120
|
+
(LLM_TOKEN_COUNT_TOTAL, "total_tokens"),
|
|
121
|
+
]:
|
|
122
|
+
if (token_count := token_usage.get(key)) is not None:
|
|
123
|
+
yield attribute_name, token_count
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _function_calls(run_outputs: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
|
|
127
|
+
"""Yields function call information if present."""
|
|
128
|
+
try:
|
|
129
|
+
function_call_data = deepcopy(
|
|
130
|
+
run_outputs["generations"][0][0]["message"]["kwargs"]["additional_kwargs"][
|
|
131
|
+
"function_call"
|
|
132
|
+
]
|
|
133
|
+
)
|
|
134
|
+
function_call_data["arguments"] = json.loads(function_call_data["arguments"])
|
|
135
|
+
yield LLM_FUNCTION_CALL, json.dumps(function_call_data)
|
|
136
|
+
except Exception:
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _tools(run: Dict[str, Any]) -> Iterator[Tuple[str, str]]:
|
|
141
|
+
"""Yields tool attributes if present."""
|
|
142
|
+
if run["run_type"] != "tool":
|
|
143
|
+
return
|
|
144
|
+
run_serialized = run["serialized"]
|
|
145
|
+
if "name" in run_serialized:
|
|
146
|
+
yield TOOL_NAME, run_serialized["name"]
|
|
147
|
+
if "description" in run_serialized:
|
|
148
|
+
yield TOOL_DESCRIPTION, run_serialized["description"]
|
|
149
|
+
# TODO: tool parameters https://github.com/Arize-ai/phoenix/issues/1330
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _retrieval_documents(
|
|
153
|
+
run: Dict[str, Any],
|
|
154
|
+
) -> Iterator[Tuple[str, List[Any]]]:
|
|
155
|
+
if run["run_type"] != "retriever":
|
|
156
|
+
return
|
|
157
|
+
yield RETRIEVAL_DOCUMENTS, [
|
|
158
|
+
{
|
|
159
|
+
DOCUMENT_CONTENT: document.get("page_content"),
|
|
160
|
+
DOCUMENT_METADATA: document.get("metadata") or {},
|
|
161
|
+
}
|
|
162
|
+
for document in (run.get("outputs") or {}).get("documents") or []
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class OpenInferenceTracer(Tracer, BaseTracer):
|
|
167
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
168
|
+
super().__init__(*args, **kwargs)
|
|
169
|
+
self._exporter = self._exporter or HttpExporter()
|
|
170
|
+
|
|
171
|
+
def _convert_run_to_spans(
|
|
172
|
+
self,
|
|
173
|
+
run: Dict[str, Any],
|
|
174
|
+
parent: Optional[Span] = None,
|
|
175
|
+
) -> None:
|
|
176
|
+
attributes: Dict[str, Any] = {}
|
|
177
|
+
for io_key, io_attributes in {
|
|
178
|
+
"inputs": (INPUT_VALUE, INPUT_MIME_TYPE),
|
|
179
|
+
"outputs": (OUTPUT_VALUE, OUTPUT_MIME_TYPE),
|
|
180
|
+
}.items():
|
|
181
|
+
attributes.update(zip(io_attributes, _convert_io(run.get(io_key))))
|
|
182
|
+
attributes.update(_prompt_template(run["serialized"]))
|
|
183
|
+
attributes.update(_invocation_parameters(run))
|
|
184
|
+
attributes.update(_model_name(run["extra"]))
|
|
185
|
+
attributes.update(_token_counts(run["outputs"]))
|
|
186
|
+
attributes.update(_function_calls(run["outputs"]))
|
|
187
|
+
attributes.update(_tools(run))
|
|
188
|
+
attributes.update(_retrieval_documents(run))
|
|
189
|
+
events: List[SpanEvent] = []
|
|
190
|
+
if (error := run["error"]) is None:
|
|
191
|
+
status_code = SpanStatusCode.OK
|
|
192
|
+
else:
|
|
193
|
+
status_code = SpanStatusCode.ERROR
|
|
194
|
+
# Since there is only one error message, keep just the
|
|
195
|
+
# first error event.
|
|
196
|
+
error_event = next(
|
|
197
|
+
filter(
|
|
198
|
+
lambda event: event["name"] == "error",
|
|
199
|
+
run["events"],
|
|
200
|
+
)
|
|
201
|
+
)
|
|
202
|
+
events.append(
|
|
203
|
+
SpanException(
|
|
204
|
+
message=error,
|
|
205
|
+
timestamp=error_event["time"],
|
|
206
|
+
)
|
|
207
|
+
)
|
|
208
|
+
span = self.create_span(
|
|
209
|
+
name=run["name"],
|
|
210
|
+
span_kind=_langchain_run_type_to_span_kind(run["run_type"]),
|
|
211
|
+
parent_id=None if parent is None else parent.context.span_id,
|
|
212
|
+
trace_id=None if parent is None else parent.context.trace_id,
|
|
213
|
+
start_time=run["start_time"],
|
|
214
|
+
end_time=run["end_time"],
|
|
215
|
+
status_code=status_code,
|
|
216
|
+
attributes=attributes,
|
|
217
|
+
events=events,
|
|
218
|
+
)
|
|
219
|
+
for child_run in run["child_runs"]:
|
|
220
|
+
self._convert_run_to_spans(child_run, span)
|
|
221
|
+
|
|
222
|
+
def _persist_run(self, run: Run) -> None:
|
|
223
|
+
# Note that this relies on `.dict()` from pydantic for the
|
|
224
|
+
# serialization of objects like `langchain.schema.Document`.
|
|
225
|
+
try:
|
|
226
|
+
self._convert_run_to_spans(run.dict())
|
|
227
|
+
except Exception:
|
|
228
|
+
logger.exception("Failed to convert run to spans")
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Callback handler for emitting trace data in OpenInference tracing format.
|
|
3
|
+
OpenInference tracing is an open standard for capturing and storing
|
|
4
|
+
LLM Application execution logs.
|
|
5
|
+
|
|
6
|
+
It enables production LLMapp servers to seamlessly integrate with LLM
|
|
7
|
+
observability solutions such as Arize and Phoenix.
|
|
8
|
+
|
|
9
|
+
For more information on the specification, see
|
|
10
|
+
https://github.com/Arize-ai/open-inference-spec
|
|
11
|
+
"""
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypedDict, cast
|
|
17
|
+
from uuid import uuid4
|
|
18
|
+
|
|
19
|
+
from llama_index.callbacks.base_handler import BaseCallbackHandler
|
|
20
|
+
from llama_index.callbacks.schema import (
|
|
21
|
+
TIMESTAMP_FORMAT,
|
|
22
|
+
CBEvent,
|
|
23
|
+
CBEventType,
|
|
24
|
+
EventPayload,
|
|
25
|
+
)
|
|
26
|
+
from llama_index.llms.base import ChatMessage, ChatResponse
|
|
27
|
+
from llama_index.tools import ToolMetadata
|
|
28
|
+
from openai.openai_object import OpenAIObject
|
|
29
|
+
|
|
30
|
+
from phoenix.trace.exporter import HttpExporter
|
|
31
|
+
from phoenix.trace.schemas import Span, SpanID, SpanKind, SpanStatusCode
|
|
32
|
+
from phoenix.trace.semantic_conventions import (
|
|
33
|
+
DOCUMENT_CONTENT,
|
|
34
|
+
DOCUMENT_ID,
|
|
35
|
+
DOCUMENT_METADATA,
|
|
36
|
+
DOCUMENT_SCORE,
|
|
37
|
+
EMBEDDING_EMBEDDINGS,
|
|
38
|
+
EMBEDDING_MODEL_NAME,
|
|
39
|
+
EMBEDDING_TEXT,
|
|
40
|
+
EMBEDDING_VECTOR,
|
|
41
|
+
INPUT_MIME_TYPE,
|
|
42
|
+
INPUT_VALUE,
|
|
43
|
+
LLM_INVOCATION_PARAMETERS,
|
|
44
|
+
LLM_MESSAGES,
|
|
45
|
+
LLM_MODEL_NAME,
|
|
46
|
+
LLM_PROMPT,
|
|
47
|
+
LLM_TOKEN_COUNT_COMPLETION,
|
|
48
|
+
LLM_TOKEN_COUNT_PROMPT,
|
|
49
|
+
LLM_TOKEN_COUNT_TOTAL,
|
|
50
|
+
MESSAGE_CONTENT,
|
|
51
|
+
MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON,
|
|
52
|
+
MESSAGE_FUNCTION_CALL_NAME,
|
|
53
|
+
MESSAGE_NAME,
|
|
54
|
+
MESSAGE_ROLE,
|
|
55
|
+
OUTPUT_MIME_TYPE,
|
|
56
|
+
OUTPUT_VALUE,
|
|
57
|
+
RETRIEVAL_DOCUMENTS,
|
|
58
|
+
TOOL_DESCRIPTION,
|
|
59
|
+
TOOL_NAME,
|
|
60
|
+
TOOL_PARAMETERS,
|
|
61
|
+
MimeType,
|
|
62
|
+
)
|
|
63
|
+
from phoenix.trace.tracer import SpanExporter, Tracer
|
|
64
|
+
|
|
65
|
+
logger = logging.getLogger(__name__)
|
|
66
|
+
logger.addHandler(logging.NullHandler())
|
|
67
|
+
|
|
68
|
+
CBEventID = str
|
|
69
|
+
_LOCAL_TZINFO = datetime.now().astimezone().tzinfo
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class CBEventData(TypedDict, total=False):
|
|
73
|
+
name: str
|
|
74
|
+
event_type: CBEventType
|
|
75
|
+
start_event: CBEvent
|
|
76
|
+
end_event: CBEvent
|
|
77
|
+
attributes: Dict[str, Any]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def payload_to_semantic_attributes(
|
|
81
|
+
event_type: CBEventType,
|
|
82
|
+
payload: Dict[str, Any],
|
|
83
|
+
) -> Dict[str, Any]:
|
|
84
|
+
"""
|
|
85
|
+
Converts a LLMapp payload to a dictionary of semantic conventions compliant attributes.
|
|
86
|
+
"""
|
|
87
|
+
attributes: Dict[str, Any] = {}
|
|
88
|
+
if event_type in (CBEventType.NODE_PARSING, CBEventType.CHUNKING):
|
|
89
|
+
# TODO(maybe): handle these events
|
|
90
|
+
return attributes
|
|
91
|
+
if EventPayload.CHUNKS in payload and EventPayload.EMBEDDINGS in payload:
|
|
92
|
+
attributes[EMBEDDING_EMBEDDINGS] = [
|
|
93
|
+
{EMBEDDING_TEXT: text, EMBEDDING_VECTOR: vector}
|
|
94
|
+
for text, vector in zip(payload[EventPayload.CHUNKS], payload[EventPayload.EMBEDDINGS])
|
|
95
|
+
]
|
|
96
|
+
if EventPayload.QUERY_STR in payload:
|
|
97
|
+
attributes[INPUT_VALUE] = payload[EventPayload.QUERY_STR]
|
|
98
|
+
attributes[INPUT_MIME_TYPE] = MimeType.TEXT
|
|
99
|
+
if EventPayload.NODES in payload:
|
|
100
|
+
attributes[RETRIEVAL_DOCUMENTS] = [
|
|
101
|
+
{
|
|
102
|
+
DOCUMENT_ID: node_with_score.node.node_id,
|
|
103
|
+
DOCUMENT_SCORE: node_with_score.score,
|
|
104
|
+
DOCUMENT_CONTENT: node_with_score.node.text,
|
|
105
|
+
DOCUMENT_METADATA: node_with_score.node.metadata,
|
|
106
|
+
}
|
|
107
|
+
for node_with_score in payload[EventPayload.NODES]
|
|
108
|
+
]
|
|
109
|
+
if EventPayload.PROMPT in payload:
|
|
110
|
+
attributes[LLM_PROMPT] = payload[EventPayload.PROMPT]
|
|
111
|
+
if EventPayload.MESSAGES in payload:
|
|
112
|
+
messages = payload[EventPayload.MESSAGES]
|
|
113
|
+
# Messages is only relevant to the LLM invocation
|
|
114
|
+
if event_type is CBEventType.LLM:
|
|
115
|
+
attributes[LLM_MESSAGES] = [
|
|
116
|
+
_message_payload_to_attributes(message_data) for message_data in messages
|
|
117
|
+
]
|
|
118
|
+
elif event_type is CBEventType.AGENT_STEP and len(messages):
|
|
119
|
+
# the agent step contains a message that is actually the input
|
|
120
|
+
# akin to the query_str
|
|
121
|
+
attributes[INPUT_VALUE] = _message_payload_to_str(messages[0])
|
|
122
|
+
if response := (payload.get(EventPayload.RESPONSE) or payload.get(EventPayload.COMPLETION)):
|
|
123
|
+
attributes[OUTPUT_VALUE] = _get_response_content(response)
|
|
124
|
+
attributes[OUTPUT_MIME_TYPE] = MimeType.TEXT
|
|
125
|
+
if raw := getattr(response, "raw", None):
|
|
126
|
+
if isinstance(raw, OpenAIObject):
|
|
127
|
+
usage = raw.usage
|
|
128
|
+
attributes[LLM_TOKEN_COUNT_PROMPT] = usage.prompt_tokens
|
|
129
|
+
attributes[LLM_TOKEN_COUNT_COMPLETION] = usage.completion_tokens
|
|
130
|
+
attributes[LLM_TOKEN_COUNT_TOTAL] = usage.total_tokens
|
|
131
|
+
if EventPayload.TEMPLATE in payload:
|
|
132
|
+
...
|
|
133
|
+
if event_type is CBEventType.RERANKING:
|
|
134
|
+
... # TODO
|
|
135
|
+
# if EventPayload.TOP_K in payload:
|
|
136
|
+
# attributes[RERANKING_TOP_K] = payload[EventPayload.TOP_K]
|
|
137
|
+
# if EventPayload.MODEL_NAME in payload:
|
|
138
|
+
# attributes[RERANKING_MODEL_NAME] = payload[EventPayload.MODEL_NAME]
|
|
139
|
+
if EventPayload.TOOL in payload:
|
|
140
|
+
tool_metadata = cast(ToolMetadata, payload.get(EventPayload.TOOL))
|
|
141
|
+
attributes[TOOL_NAME] = tool_metadata.name
|
|
142
|
+
attributes[TOOL_DESCRIPTION] = tool_metadata.description
|
|
143
|
+
attributes[TOOL_PARAMETERS] = tool_metadata.to_openai_function()["parameters"]
|
|
144
|
+
if EventPayload.SERIALIZED in payload:
|
|
145
|
+
serialized = payload[EventPayload.SERIALIZED]
|
|
146
|
+
if event_type is CBEventType.EMBEDDING:
|
|
147
|
+
if model_name := serialized.get("model_name"):
|
|
148
|
+
attributes[EMBEDDING_MODEL_NAME] = model_name
|
|
149
|
+
if event_type is CBEventType.LLM:
|
|
150
|
+
if model_name := serialized.get("model"):
|
|
151
|
+
attributes[LLM_MODEL_NAME] = model_name
|
|
152
|
+
attributes[LLM_INVOCATION_PARAMETERS] = json.dumps(
|
|
153
|
+
{
|
|
154
|
+
"model": model_name,
|
|
155
|
+
"temperature": serialized["temperature"],
|
|
156
|
+
"max_tokens": serialized["max_tokens"],
|
|
157
|
+
**serialized["additional_kwargs"],
|
|
158
|
+
}
|
|
159
|
+
)
|
|
160
|
+
return attributes
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class OpenInferenceTraceCallbackHandler(BaseCallbackHandler):
|
|
164
|
+
"""Callback handler for storing LLM application trace data in OpenInference format.
|
|
165
|
+
OpenInference is an open standard for capturing and storing AI model
|
|
166
|
+
inferences. It enables production LLMapp servers to seamlessly integrate
|
|
167
|
+
with LLM observability solutions such as Arize and Phoenix.
|
|
168
|
+
|
|
169
|
+
For more information on the specification, see
|
|
170
|
+
https://github.com/Arize-ai/open-inference-spec
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(
|
|
174
|
+
self,
|
|
175
|
+
callback: Optional[Callable[[List[Span]], None]] = None,
|
|
176
|
+
exporter: Optional[SpanExporter] = HttpExporter(),
|
|
177
|
+
) -> None:
|
|
178
|
+
super().__init__(event_starts_to_ignore=[], event_ends_to_ignore=[])
|
|
179
|
+
self._tracer = Tracer(on_append=callback, exporter=exporter)
|
|
180
|
+
self._event_id_to_event_data: Dict[CBEventID, CBEventData] = defaultdict(
|
|
181
|
+
lambda: CBEventData()
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
def on_event_start(
|
|
185
|
+
self,
|
|
186
|
+
event_type: CBEventType,
|
|
187
|
+
payload: Optional[Dict[str, Any]] = None,
|
|
188
|
+
event_id: CBEventID = "",
|
|
189
|
+
**kwargs: Any,
|
|
190
|
+
) -> CBEventID:
|
|
191
|
+
event_id = event_id or str(uuid4())
|
|
192
|
+
event_data = self._event_id_to_event_data[event_id]
|
|
193
|
+
event_data["name"] = event_type.value
|
|
194
|
+
event_data["event_type"] = event_type
|
|
195
|
+
event_data["start_event"] = CBEvent(
|
|
196
|
+
event_type=event_type,
|
|
197
|
+
payload=payload,
|
|
198
|
+
id_=event_id,
|
|
199
|
+
)
|
|
200
|
+
event_data["attributes"] = {}
|
|
201
|
+
# Parse the payload to extract the parameters
|
|
202
|
+
if payload is not None:
|
|
203
|
+
event_data["attributes"].update(
|
|
204
|
+
payload_to_semantic_attributes(event_type, payload),
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
return event_id
|
|
208
|
+
|
|
209
|
+
def on_event_end(
|
|
210
|
+
self,
|
|
211
|
+
event_type: CBEventType,
|
|
212
|
+
payload: Optional[Dict[str, Any]] = None,
|
|
213
|
+
event_id: CBEventID = "",
|
|
214
|
+
**kwargs: Any,
|
|
215
|
+
) -> None:
|
|
216
|
+
event_data = self._event_id_to_event_data[event_id]
|
|
217
|
+
event_data.setdefault("name", event_type.value)
|
|
218
|
+
event_data.setdefault("event_type", event_type)
|
|
219
|
+
event_data["end_event"] = CBEvent(
|
|
220
|
+
event_type=event_type,
|
|
221
|
+
payload=payload,
|
|
222
|
+
id_=event_id,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Parse the payload to extract the parameters
|
|
226
|
+
if payload is not None:
|
|
227
|
+
event_data["attributes"].update(
|
|
228
|
+
payload_to_semantic_attributes(event_type, payload),
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
def start_trace(self, trace_id: Optional[str] = None) -> None:
|
|
232
|
+
self._event_id_to_event_data = defaultdict(lambda: CBEventData())
|
|
233
|
+
|
|
234
|
+
def end_trace(
|
|
235
|
+
self,
|
|
236
|
+
trace_id: Optional[str] = None,
|
|
237
|
+
trace_map: Optional[Dict[CBEventID, List[CBEventID]]] = None,
|
|
238
|
+
) -> None:
|
|
239
|
+
if not trace_map:
|
|
240
|
+
return # TODO: investigate when empty or None trace_map is passed
|
|
241
|
+
try:
|
|
242
|
+
_add_to_tracer(
|
|
243
|
+
event_id_to_event_data=self._event_id_to_event_data,
|
|
244
|
+
trace_map=trace_map,
|
|
245
|
+
tracer=self._tracer,
|
|
246
|
+
)
|
|
247
|
+
except Exception:
|
|
248
|
+
logger.exception("OpenInferenceCallbackHandler trace processing failed")
|
|
249
|
+
self._event_id_to_event_data = defaultdict(lambda: CBEventData())
|
|
250
|
+
|
|
251
|
+
def get_spans(self) -> Iterator[Span]:
|
|
252
|
+
"""
|
|
253
|
+
Returns the spans stored in the tracer. This is useful if you are running
|
|
254
|
+
LlamaIndex in a notebook environment and you want to inspect the spans.
|
|
255
|
+
"""
|
|
256
|
+
return self._tracer.get_spans()
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _add_to_tracer(
|
|
260
|
+
event_id_to_event_data: Dict[CBEventID, CBEventData],
|
|
261
|
+
trace_map: Dict[CBEventID, List[CBEventID]],
|
|
262
|
+
tracer: Tracer,
|
|
263
|
+
) -> None:
|
|
264
|
+
"""Adds event data to the tracer, where it is converted to a span and stored in a buffer.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
event_id_to_event_data (Dict[CBEventID, CBEventData]): A map of event IDs to event data.
|
|
268
|
+
|
|
269
|
+
trace_map (Dict[CBEventID, List[CBEventID]]): A map of parent event IDs to child event IDs.
|
|
270
|
+
The root event IDs are stored under the key "root".
|
|
271
|
+
|
|
272
|
+
tracer (Tracer): The tracer that stores spans.
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
trace_id = uuid4()
|
|
276
|
+
parent_child_id_stack: List[Tuple[Optional[SpanID], CBEventID]] = [
|
|
277
|
+
(None, root_event_id) for root_event_id in trace_map["root"]
|
|
278
|
+
]
|
|
279
|
+
while parent_child_id_stack:
|
|
280
|
+
parent_span_id, event_id = parent_child_id_stack.pop()
|
|
281
|
+
event_data = event_id_to_event_data[event_id]
|
|
282
|
+
start_event = event_data["start_event"]
|
|
283
|
+
start_time_tz_naive = datetime.strptime(start_event.time, TIMESTAMP_FORMAT)
|
|
284
|
+
start_time_tz_aware = start_time_tz_naive.replace(tzinfo=_LOCAL_TZINFO)
|
|
285
|
+
end_event = event_data["end_event"]
|
|
286
|
+
end_time_tz_naive = datetime.strptime(end_event.time, TIMESTAMP_FORMAT)
|
|
287
|
+
end_time_tz_aware = end_time_tz_naive.replace(tzinfo=_LOCAL_TZINFO)
|
|
288
|
+
name = event_data["name"]
|
|
289
|
+
event_type = event_data["event_type"]
|
|
290
|
+
span_kind = _get_span_kind(event_type)
|
|
291
|
+
span = tracer.create_span(
|
|
292
|
+
name=name,
|
|
293
|
+
span_kind=span_kind,
|
|
294
|
+
trace_id=trace_id,
|
|
295
|
+
start_time=start_time_tz_aware,
|
|
296
|
+
end_time=end_time_tz_aware,
|
|
297
|
+
status_code=SpanStatusCode.OK,
|
|
298
|
+
status_message="",
|
|
299
|
+
parent_id=parent_span_id,
|
|
300
|
+
attributes=event_data["attributes"],
|
|
301
|
+
events=None,
|
|
302
|
+
conversation=None,
|
|
303
|
+
)
|
|
304
|
+
new_parent_span_id = span.context.span_id
|
|
305
|
+
for new_child_event_id in trace_map.get(event_id, []):
|
|
306
|
+
parent_child_id_stack.append((new_parent_span_id, new_child_event_id))
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _get_span_kind(event_type: CBEventType) -> SpanKind:
|
|
310
|
+
"""Maps a CBEventType to a SpanKind.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
event_type (CBEventType): LlamaIndex callback event type.
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
SpanKind: The corresponding span kind.
|
|
317
|
+
"""
|
|
318
|
+
return {
|
|
319
|
+
CBEventType.EMBEDDING: SpanKind.EMBEDDING,
|
|
320
|
+
CBEventType.LLM: SpanKind.LLM,
|
|
321
|
+
CBEventType.RETRIEVE: SpanKind.RETRIEVER,
|
|
322
|
+
CBEventType.FUNCTION_CALL: SpanKind.TOOL,
|
|
323
|
+
CBEventType.AGENT_STEP: SpanKind.AGENT,
|
|
324
|
+
}.get(event_type, SpanKind.CHAIN)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _message_payload_to_attributes(message: Any) -> Dict[str, Optional[str]]:
|
|
328
|
+
if isinstance(message, ChatMessage):
|
|
329
|
+
message_attributes = {
|
|
330
|
+
MESSAGE_ROLE: message.role.value,
|
|
331
|
+
MESSAGE_CONTENT: message.content,
|
|
332
|
+
}
|
|
333
|
+
# Parse the kwargs to extract the function name and parameters for function calling
|
|
334
|
+
# NB: these additional kwargs exist both for 'agent' and 'function' roles
|
|
335
|
+
if "name" in message.additional_kwargs:
|
|
336
|
+
message_attributes[MESSAGE_NAME] = message.additional_kwargs["name"]
|
|
337
|
+
if "function_call" in message.additional_kwargs:
|
|
338
|
+
function_call = message.additional_kwargs["function_call"]
|
|
339
|
+
message_attributes[MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON] = function_call.arguments
|
|
340
|
+
message_attributes[MESSAGE_FUNCTION_CALL_NAME] = function_call.name
|
|
341
|
+
return message_attributes
|
|
342
|
+
|
|
343
|
+
return {
|
|
344
|
+
MESSAGE_ROLE: "user", # assume user if not ChatMessage
|
|
345
|
+
MESSAGE_CONTENT: str(message),
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _message_payload_to_str(message: Any) -> Optional[str]:
|
|
350
|
+
"""Converts a message payload to a string, if possible"""
|
|
351
|
+
if isinstance(message, ChatMessage):
|
|
352
|
+
return message.content
|
|
353
|
+
|
|
354
|
+
return str(message)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _get_response_content(response: Any) -> str:
|
|
358
|
+
"""
|
|
359
|
+
Gets content from response objects. This is needed since the string representation of some
|
|
360
|
+
response objects includes extra information in addition to the content itself.
|
|
361
|
+
"""
|
|
362
|
+
if isinstance(response, ChatResponse):
|
|
363
|
+
return response.message.content or ""
|
|
364
|
+
return str(response)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from .functions import llm_eval_binary, llm_generate, run_relevance_eval
|
|
2
|
+
from .models import OpenAiModel
|
|
3
|
+
from .retrievals import compute_precisions_at_k
|
|
4
|
+
from .templates import (
|
|
5
|
+
CODE_READABILITY_PROMPT_RAILS_MAP,
|
|
6
|
+
CODE_READABILITY_PROMPT_TEMPLATE_STR,
|
|
7
|
+
HALLUCINATION_PROMPT_RAILS_MAP,
|
|
8
|
+
HALLUCINATION_PROMPT_TEMPLATE_STR,
|
|
9
|
+
RAG_RELEVANCY_PROMPT_RAILS_MAP,
|
|
10
|
+
RAG_RELEVANCY_PROMPT_TEMPLATE_STR,
|
|
11
|
+
TOXICITY_PROMPT_RAILS_MAP,
|
|
12
|
+
TOXICITY_PROMPT_TEMPLATE_STR,
|
|
13
|
+
PromptTemplate,
|
|
14
|
+
)
|
|
15
|
+
from .utils.downloads import download_benchmark_dataset
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"compute_precisions_at_k",
|
|
19
|
+
"download_benchmark_dataset",
|
|
20
|
+
"llm_eval_binary",
|
|
21
|
+
"llm_generate",
|
|
22
|
+
"OpenAiModel",
|
|
23
|
+
"PromptTemplate",
|
|
24
|
+
"CODE_READABILITY_PROMPT_RAILS_MAP",
|
|
25
|
+
"CODE_READABILITY_PROMPT_TEMPLATE_STR",
|
|
26
|
+
"HALLUCINATION_PROMPT_RAILS_MAP",
|
|
27
|
+
"HALLUCINATION_PROMPT_TEMPLATE_STR",
|
|
28
|
+
"RAG_RELEVANCY_PROMPT_RAILS_MAP",
|
|
29
|
+
"RAG_RELEVANCY_PROMPT_TEMPLATE_STR",
|
|
30
|
+
"TOXICITY_PROMPT_TEMPLATE_STR",
|
|
31
|
+
"TOXICITY_PROMPT_RAILS_MAP",
|
|
32
|
+
"run_relevance_eval",
|
|
33
|
+
]
|