openlit 1.34.27__py3-none-any.whl → 1.34.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +38 -0
- openlit/__init__.py +22 -155
- openlit/_instrumentors.py +144 -0
- openlit/guard/all.py +3 -3
- openlit/instrumentation/chroma/utils.py +2 -2
- openlit/instrumentation/controlflow/controlflow.py +2 -2
- openlit/instrumentation/embedchain/embedchain.py +4 -4
- openlit/instrumentation/groq/__init__.py +4 -4
- openlit/instrumentation/haystack/__init__.py +57 -28
- openlit/instrumentation/haystack/async_haystack.py +54 -0
- openlit/instrumentation/haystack/haystack.py +35 -65
- openlit/instrumentation/haystack/utils.py +377 -0
- openlit/instrumentation/julep/async_julep.py +2 -2
- openlit/instrumentation/julep/julep.py +2 -2
- openlit/instrumentation/langchain_community/utils.py +2 -2
- openlit/instrumentation/llamaindex/__init__.py +165 -37
- openlit/instrumentation/llamaindex/async_llamaindex.py +53 -0
- openlit/instrumentation/llamaindex/llamaindex.py +32 -64
- openlit/instrumentation/llamaindex/utils.py +412 -0
- openlit/instrumentation/mem0/mem0.py +2 -2
- openlit/instrumentation/openai/__init__.py +24 -24
- openlit/instrumentation/openai/utils.py +10 -4
- openlit/instrumentation/pinecone/utils.py +2 -2
- openlit/instrumentation/qdrant/utils.py +2 -2
- openlit/instrumentation/together/__init__.py +8 -8
- openlit/semcov/__init__.py +79 -0
- {openlit-1.34.27.dist-info → openlit-1.34.28.dist-info}/METADATA +1 -1
- {openlit-1.34.27.dist-info → openlit-1.34.28.dist-info}/RECORD +30 -25
- {openlit-1.34.27.dist-info → openlit-1.34.28.dist-info}/LICENSE +0 -0
- {openlit-1.34.27.dist-info → openlit-1.34.28.dist-info}/WHEEL +0 -0
@@ -1,49 +1,78 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
"""
|
2
|
+
OpenLIT Haystack Instrumentation
|
3
|
+
"""
|
4
|
+
|
3
5
|
from typing import Collection
|
4
6
|
import importlib.metadata
|
5
7
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
6
8
|
from wrapt import wrap_function_wrapper
|
7
9
|
|
8
|
-
from openlit.instrumentation.haystack.haystack import
|
10
|
+
from openlit.instrumentation.haystack.haystack import general_wrap
|
11
|
+
from openlit.instrumentation.haystack.async_haystack import async_general_wrap
|
9
12
|
|
10
13
|
_instruments = ("haystack-ai >= 2.0.0",)
|
11
14
|
|
12
|
-
WRAPPED_METHODS = [
|
13
|
-
{
|
14
|
-
"package": "haystack.components.joiners.document_joiner",
|
15
|
-
"object": "DocumentJoiner",
|
16
|
-
"endpoint": "haystack.join_data",
|
17
|
-
"wrapper": join_data,
|
18
|
-
}
|
19
|
-
]
|
20
|
-
|
21
15
|
class HaystackInstrumentor(BaseInstrumentor):
|
22
|
-
"""
|
16
|
+
"""Optimized instrumentor for Haystack with minimal overhead"""
|
23
17
|
|
24
18
|
def instrumentation_dependencies(self) -> Collection[str]:
|
25
19
|
return _instruments
|
26
20
|
|
27
21
|
def _instrument(self, **kwargs):
|
28
|
-
application_name = kwargs.get("application_name")
|
29
|
-
environment = kwargs.get("environment")
|
30
|
-
tracer = kwargs.get("tracer")
|
31
|
-
pricing_info = kwargs.get("pricing_info")
|
32
|
-
capture_message_content = kwargs.get("capture_message_content")
|
33
22
|
version = importlib.metadata.version("haystack-ai")
|
23
|
+
environment = kwargs.get("environment", "default")
|
24
|
+
application_name = kwargs.get("application_name", "default")
|
25
|
+
tracer = kwargs.get("tracer")
|
26
|
+
pricing_info = kwargs.get("pricing_info", {})
|
27
|
+
capture_message_content = kwargs.get("capture_message_content", False)
|
28
|
+
metrics = kwargs.get("metrics_dict")
|
29
|
+
disable_metrics = kwargs.get("disable_metrics")
|
30
|
+
detailed_tracing = kwargs.get("detailed_tracing", False)
|
34
31
|
|
35
|
-
|
36
|
-
|
37
|
-
wrap_object = wrapped_method.get("object")
|
38
|
-
gen_ai_endpoint = wrapped_method.get("endpoint")
|
39
|
-
wrapper = wrapped_method.get("wrapper")
|
32
|
+
# Pipeline operations (always enabled)
|
33
|
+
try:
|
40
34
|
wrap_function_wrapper(
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
35
|
+
"haystack", "Pipeline.run",
|
36
|
+
general_wrap("pipeline", version, environment, application_name,
|
37
|
+
tracer, pricing_info, capture_message_content,
|
38
|
+
metrics, disable_metrics)
|
45
39
|
)
|
40
|
+
wrap_function_wrapper(
|
41
|
+
"haystack", "AsyncPipeline.run_async",
|
42
|
+
async_general_wrap("pipeline", version, environment,
|
43
|
+
application_name, tracer, pricing_info,
|
44
|
+
capture_message_content, metrics, disable_metrics)
|
45
|
+
)
|
46
|
+
except Exception:
|
47
|
+
pass
|
48
|
+
|
49
|
+
# Component operations (only if detailed_tracing enabled)
|
50
|
+
if detailed_tracing:
|
51
|
+
components = [
|
52
|
+
("haystack.components.retrievers.in_memory",
|
53
|
+
"InMemoryBM25Retriever.run", "bm25_retriever"),
|
54
|
+
("haystack.components.builders.prompt_builder",
|
55
|
+
"PromptBuilder.run", "prompt_builder"),
|
56
|
+
("haystack.components.generators.openai",
|
57
|
+
"OpenAIGenerator.run", "openai_generator"),
|
58
|
+
("haystack.components.generators.chat.openai",
|
59
|
+
"OpenAIChatGenerator.run", "openai_chat_generator"),
|
60
|
+
("haystack.components.embedders.openai_text_embedder",
|
61
|
+
"OpenAITextEmbedder.run", "text_embedder"),
|
62
|
+
("haystack.components.embedders.openai_document_embedder",
|
63
|
+
"OpenAIDocumentEmbedder.run", "document_embedder"),
|
64
|
+
]
|
65
|
+
|
66
|
+
for module, method, component_type in components:
|
67
|
+
try:
|
68
|
+
wrap_function_wrapper(
|
69
|
+
module, method,
|
70
|
+
general_wrap(component_type, version, environment,
|
71
|
+
application_name, tracer, pricing_info,
|
72
|
+
capture_message_content, metrics, disable_metrics)
|
73
|
+
)
|
74
|
+
except Exception:
|
75
|
+
pass
|
46
76
|
|
47
|
-
@staticmethod
|
48
77
|
def _uninstrument(self, **kwargs):
|
49
78
|
pass
|
@@ -0,0 +1,54 @@
|
|
1
|
+
"""
|
2
|
+
Haystack async wrapper
|
3
|
+
"""
|
4
|
+
|
5
|
+
import time
|
6
|
+
from opentelemetry.trace import SpanKind
|
7
|
+
from opentelemetry import context as context_api
|
8
|
+
from openlit.__helpers import handle_exception
|
9
|
+
from openlit.instrumentation.haystack.utils import (
|
10
|
+
process_haystack_response,
|
11
|
+
OPERATION_MAP,
|
12
|
+
set_server_address_and_port,
|
13
|
+
)
|
14
|
+
|
15
|
+
def async_general_wrap(endpoint, version, environment, application_name,
|
16
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
17
|
+
"""Optimized async wrapper for Haystack operations"""
|
18
|
+
|
19
|
+
async def wrapper(wrapped, instance, args, kwargs):
|
20
|
+
"""Fast async wrapper with minimal overhead"""
|
21
|
+
|
22
|
+
# CRITICAL: Suppression check
|
23
|
+
if context_api.get_value(context_api._SUPPRESS_INSTRUMENTATION_KEY):
|
24
|
+
return await wrapped(*args, **kwargs)
|
25
|
+
|
26
|
+
# Fast operation mapping
|
27
|
+
operation_type = OPERATION_MAP.get(endpoint, "framework")
|
28
|
+
|
29
|
+
# Optimized span naming
|
30
|
+
if endpoint == "pipeline":
|
31
|
+
span_name = f"{operation_type} pipeline"
|
32
|
+
else:
|
33
|
+
span_name = f"{operation_type} {endpoint}"
|
34
|
+
|
35
|
+
# Fast server address
|
36
|
+
server_address, server_port = set_server_address_and_port(instance)
|
37
|
+
|
38
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
39
|
+
start_time = time.time()
|
40
|
+
response = await wrapped(*args, **kwargs)
|
41
|
+
|
42
|
+
try:
|
43
|
+
response = process_haystack_response(
|
44
|
+
response, operation_type, server_address, server_port,
|
45
|
+
environment, application_name, metrics, start_time, span,
|
46
|
+
capture_message_content, disable_metrics, version,
|
47
|
+
instance, args, endpoint=endpoint, **kwargs
|
48
|
+
)
|
49
|
+
except Exception as e:
|
50
|
+
handle_exception(span, e)
|
51
|
+
|
52
|
+
return response
|
53
|
+
|
54
|
+
return wrapper
|
@@ -1,84 +1,54 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument
|
2
1
|
"""
|
3
|
-
|
2
|
+
Haystack sync wrapper
|
4
3
|
"""
|
5
4
|
|
6
|
-
import
|
7
|
-
from opentelemetry.trace import SpanKind
|
8
|
-
from opentelemetry
|
5
|
+
import time
|
6
|
+
from opentelemetry.trace import SpanKind
|
7
|
+
from opentelemetry import context as context_api
|
9
8
|
from openlit.__helpers import handle_exception
|
10
|
-
from openlit.
|
9
|
+
from openlit.instrumentation.haystack.utils import (
|
10
|
+
process_haystack_response,
|
11
|
+
OPERATION_MAP,
|
12
|
+
set_server_address_and_port,
|
13
|
+
)
|
11
14
|
|
12
|
-
|
13
|
-
|
15
|
+
def general_wrap(endpoint, version, environment, application_name,
|
16
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
17
|
+
"""Optimized wrapper for Haystack operations"""
|
14
18
|
|
15
|
-
def
|
16
|
-
|
17
|
-
"""
|
18
|
-
Creates a wrapper around a function call to trace and log its execution metrics.
|
19
|
+
def wrapper(wrapped, instance, args, kwargs):
|
20
|
+
"""Fast wrapper with minimal overhead"""
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
Parameters:
|
24
|
-
- gen_ai_endpoint (str): A descriptor or name for the endpoint being traced.
|
25
|
-
- version (str): The version of the Haystack application.
|
26
|
-
- environment (str): The deployment environment (e.g., 'production', 'development').
|
27
|
-
- application_name (str): Name of the Haystack application.
|
28
|
-
- tracer (opentelemetry.trace.Tracer): The tracer object used for OpenTelemetry tracing.
|
29
|
-
- pricing_info (dict): Information about the pricing for internal metrics (currently not used).
|
30
|
-
- capture_message_content (bool): Flag indicating whether to trace the content of the response.
|
22
|
+
# CRITICAL: Suppression check
|
23
|
+
if context_api.get_value(context_api._SUPPRESS_INSTRUMENTATION_KEY):
|
24
|
+
return wrapped(*args, **kwargs)
|
31
25
|
|
32
|
-
|
33
|
-
|
34
|
-
a new function that wraps 'wrapped' with additional tracing and logging.
|
35
|
-
"""
|
26
|
+
# Fast operation mapping
|
27
|
+
operation_type = OPERATION_MAP.get(endpoint, "framework")
|
36
28
|
|
37
|
-
|
38
|
-
""
|
39
|
-
|
40
|
-
|
29
|
+
# Optimized span naming
|
30
|
+
if endpoint == "pipeline":
|
31
|
+
span_name = f"{operation_type} pipeline"
|
32
|
+
else:
|
33
|
+
span_name = f"{operation_type} {endpoint}"
|
41
34
|
|
42
|
-
|
43
|
-
|
44
|
-
- instance (object): The instance to which the wrapped function belongs. This
|
45
|
-
is used for instance methods. For static and classmethods,
|
46
|
-
this may be None.
|
47
|
-
- args (tuple): Positional arguments passed to the wrapped function.
|
48
|
-
- kwargs (dict): Keyword arguments passed to the wrapped function.
|
35
|
+
# Fast server address
|
36
|
+
server_address, server_port = set_server_address_and_port(instance)
|
49
37
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
The wrapper initiates a span with the provided tracer, sets various attributes
|
54
|
-
on the span based on the function's execution and response, and ensures
|
55
|
-
errors are handled and logged appropriately.
|
56
|
-
"""
|
57
|
-
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
38
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
39
|
+
start_time = time.time()
|
58
40
|
response = wrapped(*args, **kwargs)
|
59
41
|
|
60
42
|
try:
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
environment)
|
68
|
-
span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
|
69
|
-
SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK)
|
70
|
-
span.set_attribute(SERVICE_NAME,
|
71
|
-
application_name)
|
72
|
-
span.set_status(Status(StatusCode.OK))
|
73
|
-
|
74
|
-
# Return original response
|
75
|
-
return response
|
76
|
-
|
43
|
+
response = process_haystack_response(
|
44
|
+
response, operation_type, server_address, server_port,
|
45
|
+
environment, application_name, metrics, start_time, span,
|
46
|
+
capture_message_content, disable_metrics, version,
|
47
|
+
instance, args, endpoint=endpoint, **kwargs
|
48
|
+
)
|
77
49
|
except Exception as e:
|
78
50
|
handle_exception(span, e)
|
79
|
-
logger.error("Error in trace creation: %s", e)
|
80
51
|
|
81
|
-
|
82
|
-
return response
|
52
|
+
return response
|
83
53
|
|
84
54
|
return wrapper
|
@@ -0,0 +1,377 @@
|
|
1
|
+
"""
|
2
|
+
Haystack utilities
|
3
|
+
"""
|
4
|
+
import time
|
5
|
+
import json
|
6
|
+
from typing import Dict, Any
|
7
|
+
from opentelemetry.trace import Status, StatusCode
|
8
|
+
from openlit.__helpers import common_framework_span_attributes, record_framework_metrics
|
9
|
+
from openlit.semcov import SemanticConvention
|
10
|
+
|
11
|
+
# Optimized operation mapping - minimal and fast
|
12
|
+
OPERATION_MAP = {
|
13
|
+
"pipeline": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
|
14
|
+
"bm25_retriever": SemanticConvention.GEN_AI_OPERATION_TYPE_RETRIEVE,
|
15
|
+
"prompt_builder": SemanticConvention.GEN_AI_OPERATION_TYPE_FRAMEWORK,
|
16
|
+
"openai_generator": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
17
|
+
"openai_chat_generator": SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
18
|
+
"text_embedder": SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
19
|
+
"document_embedder": SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
20
|
+
}
|
21
|
+
|
22
|
+
def set_server_address_and_port(instance):
|
23
|
+
"""Fast server address extraction"""
|
24
|
+
return "localhost", 8080
|
25
|
+
|
26
|
+
def object_count(obj):
|
27
|
+
"""Fast object counting"""
|
28
|
+
try:
|
29
|
+
return len(obj) if hasattr(obj, "__len__") else 1
|
30
|
+
except:
|
31
|
+
return 1
|
32
|
+
|
33
|
+
def extract_component_technical_details(instance, args, kwargs, endpoint) -> Dict[str, Any]:
|
34
|
+
"""Extract comprehensive component technical details with performance optimization"""
|
35
|
+
details = {}
|
36
|
+
|
37
|
+
try:
|
38
|
+
# Component class information
|
39
|
+
if hasattr(instance, "__class__"):
|
40
|
+
details["class_name"] = instance.__class__.__name__
|
41
|
+
details["module_name"] = instance.__class__.__module__
|
42
|
+
|
43
|
+
# Component input type extraction (optimized)
|
44
|
+
if hasattr(instance, "_component_config") and hasattr(instance._component_config, "input_types"):
|
45
|
+
input_types = {}
|
46
|
+
for name, type_info in instance._component_config.input_types.items():
|
47
|
+
input_types[name] = str(type_info) if type_info else "Any"
|
48
|
+
details["input_types"] = input_types
|
49
|
+
elif hasattr(instance, "run") and hasattr(instance.run, "__annotations__"):
|
50
|
+
# Fallback: extract from method annotations
|
51
|
+
annotations = instance.run.__annotations__
|
52
|
+
input_types = {k: str(v) for k, v in annotations.items() if k != "return"}
|
53
|
+
details["input_types"] = input_types
|
54
|
+
|
55
|
+
# Component output type extraction (optimized)
|
56
|
+
if hasattr(instance, "_component_config") and hasattr(instance._component_config, "output_types"):
|
57
|
+
output_types = {}
|
58
|
+
for name, type_info in instance._component_config.output_types.items():
|
59
|
+
output_types[name] = str(type_info) if type_info else "Any"
|
60
|
+
details["output_types"] = output_types
|
61
|
+
|
62
|
+
# Enhanced input/output specifications with connections
|
63
|
+
if hasattr(instance, "_component_config"):
|
64
|
+
config = instance._component_config
|
65
|
+
|
66
|
+
# Input specifications with data flow
|
67
|
+
if hasattr(config, "input_sockets"):
|
68
|
+
input_spec = {}
|
69
|
+
for socket_name, socket in config.input_sockets.items():
|
70
|
+
spec_info = {
|
71
|
+
"type": str(getattr(socket, "type", "Any")),
|
72
|
+
"default_value": str(getattr(socket, "default_value", None)),
|
73
|
+
"is_optional": getattr(socket, "is_optional", False)
|
74
|
+
}
|
75
|
+
input_spec[socket_name] = spec_info
|
76
|
+
details["input_spec"] = input_spec
|
77
|
+
|
78
|
+
# Output specifications with receivers
|
79
|
+
if hasattr(config, "output_sockets"):
|
80
|
+
output_spec = {}
|
81
|
+
for socket_name, socket in config.output_sockets.items():
|
82
|
+
spec_info = {
|
83
|
+
"type": str(getattr(socket, "type", "Any")),
|
84
|
+
"is_list": getattr(socket, "is_list", False)
|
85
|
+
}
|
86
|
+
output_spec[socket_name] = spec_info
|
87
|
+
details["output_spec"] = output_spec
|
88
|
+
|
89
|
+
# Runtime input data analysis (for actual values)
|
90
|
+
if args or kwargs:
|
91
|
+
runtime_inputs = {}
|
92
|
+
if args:
|
93
|
+
for i, arg in enumerate(args):
|
94
|
+
runtime_inputs[f"arg_{i}"] = type(arg).__name__
|
95
|
+
if kwargs:
|
96
|
+
for key, value in kwargs.items():
|
97
|
+
runtime_inputs[key] = type(value).__name__
|
98
|
+
details["runtime_input_types"] = runtime_inputs
|
99
|
+
|
100
|
+
except Exception:
|
101
|
+
# Silently continue if introspection fails - maintain performance
|
102
|
+
pass
|
103
|
+
|
104
|
+
return details
|
105
|
+
|
106
|
+
def extract_pipeline_metadata(instance, args, kwargs) -> Dict[str, Any]:
|
107
|
+
"""Extract pipeline-level metadata and configuration"""
|
108
|
+
metadata = {}
|
109
|
+
|
110
|
+
try:
|
111
|
+
# Pipeline configuration
|
112
|
+
if hasattr(instance, "graph"):
|
113
|
+
graph = instance.graph
|
114
|
+
elif hasattr(instance, "_graph"):
|
115
|
+
graph = instance._graph
|
116
|
+
|
117
|
+
# Component count and connections
|
118
|
+
if hasattr(graph, "nodes"):
|
119
|
+
metadata["component_count"] = len(graph.nodes())
|
120
|
+
|
121
|
+
# Extract component connections and data flow
|
122
|
+
connections = []
|
123
|
+
if hasattr(graph, "edges"):
|
124
|
+
for edge in graph.edges(data=True):
|
125
|
+
source, target, data = edge
|
126
|
+
connection_info = {
|
127
|
+
"source": source,
|
128
|
+
"target": target,
|
129
|
+
"data": str(data) if data else None
|
130
|
+
}
|
131
|
+
connections.append(connection_info)
|
132
|
+
metadata["connections"] = connections
|
133
|
+
|
134
|
+
# Component list with types
|
135
|
+
components = []
|
136
|
+
for node in graph.nodes():
|
137
|
+
node_data = graph.nodes[node] if hasattr(graph.nodes[node], "get") else {}
|
138
|
+
component_info = {
|
139
|
+
"name": node,
|
140
|
+
"type": str(type(node_data.get("instance", ""))) if node_data.get("instance") else "unknown"
|
141
|
+
}
|
142
|
+
components.append(component_info)
|
143
|
+
metadata["components"] = components
|
144
|
+
|
145
|
+
# Pipeline configuration parameters
|
146
|
+
if hasattr(instance, "max_runs_per_component"):
|
147
|
+
metadata["max_runs_per_component"] = instance.max_runs_per_component
|
148
|
+
|
149
|
+
# Input/output data (if provided)
|
150
|
+
if args and len(args) > 0:
|
151
|
+
# Pipeline input data
|
152
|
+
input_data = args[0] if args else {}
|
153
|
+
if isinstance(input_data, dict):
|
154
|
+
# Sanitize large data for telemetry
|
155
|
+
sanitized_input = {}
|
156
|
+
for key, value in input_data.items():
|
157
|
+
if isinstance(value, (str, int, float, bool)):
|
158
|
+
sanitized_input[key] = value
|
159
|
+
elif isinstance(value, dict):
|
160
|
+
sanitized_input[key] = {k: str(v)[:100] for k, v in value.items()}
|
161
|
+
else:
|
162
|
+
sanitized_input[key] = str(type(value)).__name__
|
163
|
+
metadata["input_data"] = sanitized_input
|
164
|
+
|
165
|
+
except Exception:
|
166
|
+
# Silently continue if metadata extraction fails
|
167
|
+
pass
|
168
|
+
|
169
|
+
return metadata
|
170
|
+
|
171
|
+
def extract_component_connections(instance) -> Dict[str, Any]:
|
172
|
+
"""Extract component connection and data flow information"""
|
173
|
+
connections = {}
|
174
|
+
|
175
|
+
try:
|
176
|
+
# Extract senders (components that send data to this component)
|
177
|
+
if hasattr(instance, "_component_config") and hasattr(instance._component_config, "input_sockets"):
|
178
|
+
senders = []
|
179
|
+
for socket_name, socket in instance._component_config.input_sockets.items():
|
180
|
+
if hasattr(socket, "_senders") and socket._senders:
|
181
|
+
for sender in socket._senders:
|
182
|
+
sender_info = {
|
183
|
+
"component": str(sender),
|
184
|
+
"socket": socket_name
|
185
|
+
}
|
186
|
+
senders.append(sender_info)
|
187
|
+
connections["senders"] = senders
|
188
|
+
|
189
|
+
# Extract receivers (components that receive data from this component)
|
190
|
+
if hasattr(instance, "_component_config") and hasattr(instance._component_config, "output_sockets"):
|
191
|
+
receivers = []
|
192
|
+
for socket_name, socket in instance._component_config.output_sockets.items():
|
193
|
+
if hasattr(socket, "_receivers") and socket._receivers:
|
194
|
+
for receiver in socket._receivers:
|
195
|
+
receiver_info = {
|
196
|
+
"component": str(receiver),
|
197
|
+
"socket": socket_name
|
198
|
+
}
|
199
|
+
receivers.append(receiver_info)
|
200
|
+
connections["receivers"] = receivers
|
201
|
+
|
202
|
+
except Exception:
|
203
|
+
# Silently continue if connection extraction fails
|
204
|
+
pass
|
205
|
+
|
206
|
+
return connections
|
207
|
+
|
208
|
+
def process_haystack_response(response, operation_type, server_address, server_port,
|
209
|
+
environment, application_name, metrics, start_time, span,
|
210
|
+
capture_message_content, disable_metrics, version, instance=None,
|
211
|
+
args=None, endpoint=None, **kwargs):
|
212
|
+
"""Enhanced response processing with comprehensive technical details and optimized performance"""
|
213
|
+
|
214
|
+
end_time = time.time()
|
215
|
+
|
216
|
+
# Essential attributes
|
217
|
+
common_framework_span_attributes(
|
218
|
+
type("Scope", (), {
|
219
|
+
"_span": span,
|
220
|
+
"_server_address": server_address,
|
221
|
+
"_server_port": server_port,
|
222
|
+
"_start_time": start_time,
|
223
|
+
"_end_time": end_time
|
224
|
+
})(),
|
225
|
+
SemanticConvention.GEN_AI_SYSTEM_HAYSTACK,
|
226
|
+
server_address, server_port, environment, application_name,
|
227
|
+
version, endpoint, instance
|
228
|
+
)
|
229
|
+
|
230
|
+
# Core operation attributes
|
231
|
+
span.set_attribute(SemanticConvention.GEN_AI_OPERATION, operation_type)
|
232
|
+
|
233
|
+
# Enhanced technical details collection
|
234
|
+
if instance:
|
235
|
+
# Extract comprehensive component technical details
|
236
|
+
tech_details = extract_component_technical_details(instance, args, kwargs, endpoint)
|
237
|
+
|
238
|
+
# Apply component technical attributes using new semantic conventions
|
239
|
+
if tech_details.get("class_name"):
|
240
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_CLASS_NAME, tech_details["class_name"])
|
241
|
+
|
242
|
+
if tech_details.get("input_types"):
|
243
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_INPUT_TYPES,
|
244
|
+
json.dumps(tech_details["input_types"]))
|
245
|
+
|
246
|
+
if tech_details.get("output_types"):
|
247
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_OUTPUT_TYPES,
|
248
|
+
json.dumps(tech_details["output_types"]))
|
249
|
+
|
250
|
+
if tech_details.get("input_spec"):
|
251
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_INPUT_SPEC,
|
252
|
+
json.dumps(tech_details["input_spec"]))
|
253
|
+
|
254
|
+
if tech_details.get("output_spec"):
|
255
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_OUTPUT_SPEC,
|
256
|
+
json.dumps(tech_details["output_spec"]))
|
257
|
+
|
258
|
+
# Component connections and data flow
|
259
|
+
connections = extract_component_connections(instance)
|
260
|
+
if connections.get("senders"):
|
261
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_SENDERS,
|
262
|
+
json.dumps(connections["senders"]))
|
263
|
+
|
264
|
+
if connections.get("receivers"):
|
265
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_RECEIVERS,
|
266
|
+
json.dumps(connections["receivers"]))
|
267
|
+
|
268
|
+
# Enhanced telemetry - pipeline level
|
269
|
+
if endpoint == "pipeline" and isinstance(response, dict):
|
270
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_CONTEXT_COUNT, len(response))
|
271
|
+
|
272
|
+
# Enhanced pipeline metadata collection
|
273
|
+
if instance:
|
274
|
+
pipeline_metadata = extract_pipeline_metadata(instance, args, kwargs)
|
275
|
+
|
276
|
+
# Apply pipeline metadata using new semantic conventions
|
277
|
+
if pipeline_metadata.get("component_count"):
|
278
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_PIPELINE_COMPONENT_COUNT,
|
279
|
+
pipeline_metadata["component_count"])
|
280
|
+
|
281
|
+
if pipeline_metadata.get("max_runs_per_component"):
|
282
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_PIPELINE_MAX_RUNS,
|
283
|
+
pipeline_metadata["max_runs_per_component"])
|
284
|
+
|
285
|
+
if pipeline_metadata.get("connections"):
|
286
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_CONNECTIONS,
|
287
|
+
json.dumps(pipeline_metadata["connections"]))
|
288
|
+
|
289
|
+
if pipeline_metadata.get("components"):
|
290
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_PIPELINE_METADATA,
|
291
|
+
json.dumps(pipeline_metadata["components"]))
|
292
|
+
|
293
|
+
if pipeline_metadata.get("input_data"):
|
294
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_PIPELINE_INPUT_DATA,
|
295
|
+
json.dumps(pipeline_metadata["input_data"]))
|
296
|
+
|
297
|
+
# Pipeline output data
|
298
|
+
if response:
|
299
|
+
# Sanitize output data for telemetry
|
300
|
+
sanitized_output = {}
|
301
|
+
for key, value in response.items():
|
302
|
+
if isinstance(value, (str, int, float, bool)):
|
303
|
+
sanitized_output[key] = value
|
304
|
+
elif isinstance(value, dict) and "replies" in value:
|
305
|
+
sanitized_output[key] = f"{len(value['replies'])} replies"
|
306
|
+
else:
|
307
|
+
sanitized_output[key] = str(type(value)).__name__
|
308
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_PIPELINE_OUTPUT_DATA,
|
309
|
+
json.dumps(sanitized_output))
|
310
|
+
|
311
|
+
# Fast LLM response extraction
|
312
|
+
for key, value in response.items():
|
313
|
+
if key in ["llm", "generator"] and isinstance(value, dict) and "replies" in value:
|
314
|
+
replies = value["replies"]
|
315
|
+
if replies and capture_message_content:
|
316
|
+
span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, str(replies[0])[:500])
|
317
|
+
break
|
318
|
+
|
319
|
+
# Enhanced telemetry - retriever level
|
320
|
+
elif "retriever" in endpoint and isinstance(response, dict) and "documents" in response:
|
321
|
+
docs = response["documents"]
|
322
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_RETRIEVAL_COUNT, object_count(docs))
|
323
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_DOCUMENTS_COUNT, object_count(docs))
|
324
|
+
|
325
|
+
# Component identification
|
326
|
+
if instance:
|
327
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_NAME, endpoint)
|
328
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_TYPE, "retriever")
|
329
|
+
|
330
|
+
# Enhanced telemetry - generator level
|
331
|
+
elif "generator" in endpoint:
|
332
|
+
# Component identification
|
333
|
+
if instance:
|
334
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_NAME, endpoint)
|
335
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_TYPE, "generator")
|
336
|
+
|
337
|
+
if args and capture_message_content:
|
338
|
+
span.set_attribute(SemanticConvention.GEN_AI_PROMPT, str(args[0])[:500])
|
339
|
+
|
340
|
+
if isinstance(response, dict) and "replies" in response:
|
341
|
+
replies = response["replies"]
|
342
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_CONTEXT_COUNT, object_count(replies))
|
343
|
+
|
344
|
+
# Enhanced telemetry - prompt builder level
|
345
|
+
elif endpoint == "prompt_builder":
|
346
|
+
# Component identification
|
347
|
+
if instance:
|
348
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_NAME, endpoint)
|
349
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_TYPE, "prompt_builder")
|
350
|
+
|
351
|
+
if kwargs and capture_message_content:
|
352
|
+
for key, value in kwargs.items():
|
353
|
+
if key in ["documents", "question"] and value:
|
354
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_CONTEXT_COUNT, object_count([value]))
|
355
|
+
break
|
356
|
+
|
357
|
+
# Component visit tracking (simulate component execution count)
|
358
|
+
if endpoint != "pipeline" and instance:
|
359
|
+
# Simple visit counter - can be enhanced with actual state tracking
|
360
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_COMPONENT_VISITS, 1)
|
361
|
+
|
362
|
+
# Duration and status
|
363
|
+
execution_time = end_time - start_time
|
364
|
+
span.set_attribute(SemanticConvention.GEN_AI_CLIENT_OPERATION_DURATION, execution_time)
|
365
|
+
|
366
|
+
# Pipeline execution time tracking
|
367
|
+
if endpoint == "pipeline":
|
368
|
+
span.set_attribute(SemanticConvention.GEN_AI_FRAMEWORK_PIPELINE_EXECUTION_TIME, execution_time)
|
369
|
+
|
370
|
+
span.set_status(Status(StatusCode.OK))
|
371
|
+
|
372
|
+
# Metrics
|
373
|
+
if not disable_metrics:
|
374
|
+
record_framework_metrics(metrics, operation_type, SemanticConvention.GEN_AI_SYSTEM_HAYSTACK,
|
375
|
+
server_address, server_port, environment, application_name, start_time, end_time)
|
376
|
+
|
377
|
+
return response
|