genai-otel-instrument 0.1.2.dev0__py3-none-any.whl → 0.1.4.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of genai-otel-instrument might be problematic. Click here for more details.
- genai_otel/__version__.py +2 -2
- genai_otel/auto_instrument.py +18 -1
- genai_otel/config.py +3 -0
- genai_otel/cost_calculator.py +136 -11
- genai_otel/cost_enrichment_processor.py +177 -0
- genai_otel/instrumentors/base.py +74 -42
- genai_otel/instrumentors/cohere_instrumentor.py +80 -16
- genai_otel/instrumentors/huggingface_instrumentor.py +138 -13
- genai_otel/instrumentors/mistralai_instrumentor.py +249 -37
- genai_otel/instrumentors/ollama_instrumentor.py +104 -35
- genai_otel/instrumentors/replicate_instrumentor.py +59 -14
- genai_otel/instrumentors/togetherai_instrumentor.py +120 -16
- genai_otel/instrumentors/vertexai_instrumentor.py +79 -15
- genai_otel/llm_pricing.json +866 -586
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.4.dev0.dist-info}/METADATA +45 -8
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.4.dev0.dist-info}/RECORD +20 -19
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.4.dev0.dist-info}/WHEEL +0 -0
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.4.dev0.dist-info}/entry_points.txt +0 -0
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.4.dev0.dist-info}/licenses/LICENSE +0 -0
- {genai_otel_instrument-0.1.2.dev0.dist-info → genai_otel_instrument-0.1.4.dev0.dist-info}/top_level.txt +0 -0
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
This instrumentor automatically traces calls to Ollama models for both
|
|
4
4
|
generation and chat functionalities, capturing relevant attributes such as
|
|
5
|
-
the model name.
|
|
5
|
+
the model name and token usage.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import logging
|
|
9
|
-
from typing import Dict, Optional
|
|
9
|
+
from typing import Any, Dict, Optional
|
|
10
10
|
|
|
11
11
|
from ..config import OTelConfig
|
|
12
12
|
from .base import BaseInstrumentor
|
|
@@ -22,8 +22,8 @@ class OllamaInstrumentor(BaseInstrumentor):
|
|
|
22
22
|
super().__init__()
|
|
23
23
|
self._ollama_available = False
|
|
24
24
|
self._ollama_module = None
|
|
25
|
-
self._original_generate = None
|
|
26
|
-
self._original_chat = None
|
|
25
|
+
self._original_generate = None
|
|
26
|
+
self._original_chat = None
|
|
27
27
|
self._check_availability()
|
|
28
28
|
|
|
29
29
|
def _check_availability(self):
|
|
@@ -46,38 +46,107 @@ class OllamaInstrumentor(BaseInstrumentor):
|
|
|
46
46
|
if not self._ollama_available or self._ollama_module is None:
|
|
47
47
|
return
|
|
48
48
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
49
|
+
try:
|
|
50
|
+
# Store original methods and wrap them
|
|
51
|
+
self._original_generate = self._ollama_module.generate
|
|
52
|
+
self._original_chat = self._ollama_module.chat
|
|
53
|
+
|
|
54
|
+
# Wrap generate method
|
|
55
|
+
wrapped_generate = self.create_span_wrapper(
|
|
56
|
+
span_name="ollama.generate",
|
|
57
|
+
extract_attributes=self._extract_generate_attributes,
|
|
58
|
+
)(self._original_generate)
|
|
59
|
+
self._ollama_module.generate = wrapped_generate
|
|
60
|
+
|
|
61
|
+
# Wrap chat method
|
|
62
|
+
wrapped_chat = self.create_span_wrapper(
|
|
63
|
+
span_name="ollama.chat",
|
|
64
|
+
extract_attributes=self._extract_chat_attributes,
|
|
65
|
+
)(self._original_chat)
|
|
66
|
+
self._ollama_module.chat = wrapped_chat
|
|
67
|
+
|
|
68
|
+
self._instrumented = True
|
|
69
|
+
logger.info("Ollama instrumentation enabled")
|
|
70
|
+
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error("Failed to instrument Ollama: %s", e, exc_info=True)
|
|
73
|
+
if config.fail_on_error:
|
|
74
|
+
raise
|
|
75
|
+
|
|
76
|
+
def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
77
|
+
"""Extract attributes from Ollama generate call.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
instance: The client instance (None for module-level functions).
|
|
81
|
+
args: Positional arguments.
|
|
82
|
+
kwargs: Keyword arguments.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
86
|
+
"""
|
|
87
|
+
attrs = {}
|
|
88
|
+
model = kwargs.get("model", "unknown")
|
|
89
|
+
|
|
90
|
+
attrs["gen_ai.system"] = "ollama"
|
|
91
|
+
attrs["gen_ai.request.model"] = model
|
|
92
|
+
attrs["gen_ai.operation.name"] = "generate"
|
|
93
|
+
|
|
94
|
+
return attrs
|
|
95
|
+
|
|
96
|
+
def _extract_chat_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
97
|
+
"""Extract attributes from Ollama chat call.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
instance: The client instance (None for module-level functions).
|
|
101
|
+
args: Positional arguments.
|
|
102
|
+
kwargs: Keyword arguments.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
106
|
+
"""
|
|
107
|
+
attrs = {}
|
|
108
|
+
model = kwargs.get("model", "unknown")
|
|
109
|
+
messages = kwargs.get("messages", [])
|
|
110
|
+
|
|
111
|
+
attrs["gen_ai.system"] = "ollama"
|
|
112
|
+
attrs["gen_ai.request.model"] = model
|
|
113
|
+
attrs["gen_ai.operation.name"] = "chat"
|
|
114
|
+
attrs["gen_ai.request.message_count"] = len(messages)
|
|
115
|
+
|
|
116
|
+
return attrs
|
|
72
117
|
|
|
73
|
-
|
|
74
|
-
|
|
118
|
+
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
119
|
+
"""Extract token usage from Ollama response.
|
|
75
120
|
|
|
76
|
-
|
|
77
|
-
|
|
121
|
+
Ollama responses include:
|
|
122
|
+
- prompt_eval_count: Input tokens
|
|
123
|
+
- eval_count: Output tokens
|
|
78
124
|
|
|
79
|
-
|
|
80
|
-
|
|
125
|
+
Args:
|
|
126
|
+
result: The API response object or dictionary.
|
|
81
127
|
|
|
82
|
-
|
|
83
|
-
|
|
128
|
+
Returns:
|
|
129
|
+
Optional[Dict[str, int]]: Dictionary with token counts or None.
|
|
130
|
+
"""
|
|
131
|
+
try:
|
|
132
|
+
# Handle both dict and object responses
|
|
133
|
+
if isinstance(result, dict):
|
|
134
|
+
prompt_tokens = result.get("prompt_eval_count", 0)
|
|
135
|
+
completion_tokens = result.get("eval_count", 0)
|
|
136
|
+
elif hasattr(result, "prompt_eval_count") and hasattr(result, "eval_count"):
|
|
137
|
+
prompt_tokens = getattr(result, "prompt_eval_count", 0)
|
|
138
|
+
completion_tokens = getattr(result, "eval_count", 0)
|
|
139
|
+
else:
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
if prompt_tokens == 0 and completion_tokens == 0:
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
"prompt_tokens": prompt_tokens,
|
|
147
|
+
"completion_tokens": completion_tokens,
|
|
148
|
+
"total_tokens": prompt_tokens + completion_tokens,
|
|
149
|
+
}
|
|
150
|
+
except Exception as e:
|
|
151
|
+
logger.debug("Failed to extract usage from Ollama response: %s", e)
|
|
152
|
+
return None
|
|
@@ -2,41 +2,86 @@
|
|
|
2
2
|
|
|
3
3
|
This instrumentor automatically traces calls to Replicate models, capturing
|
|
4
4
|
relevant attributes such as the model name.
|
|
5
|
+
|
|
6
|
+
Note: Replicate uses hardware-based pricing (per second of GPU/CPU time),
|
|
7
|
+
not token-based pricing. Cost tracking is not applicable as the pricing model
|
|
8
|
+
is fundamentally different from token-based LLM APIs.
|
|
5
9
|
"""
|
|
6
10
|
|
|
7
|
-
|
|
11
|
+
import logging
|
|
12
|
+
from typing import Any, Dict, Optional
|
|
8
13
|
|
|
9
14
|
from ..config import OTelConfig
|
|
10
15
|
from .base import BaseInstrumentor
|
|
11
16
|
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
12
19
|
|
|
13
20
|
class ReplicateInstrumentor(BaseInstrumentor):
|
|
14
|
-
"""Instrumentor for Replicate
|
|
21
|
+
"""Instrumentor for Replicate.
|
|
22
|
+
|
|
23
|
+
Note: Replicate uses hardware-based pricing ($/second), not token-based.
|
|
24
|
+
Cost tracking returns None as pricing is based on execution time and hardware type.
|
|
25
|
+
"""
|
|
15
26
|
|
|
16
27
|
def instrument(self, config: OTelConfig):
|
|
28
|
+
"""Instrument Replicate SDK if available."""
|
|
17
29
|
self.config = config
|
|
18
30
|
try:
|
|
19
31
|
import replicate
|
|
20
32
|
|
|
21
33
|
original_run = replicate.run
|
|
22
34
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
35
|
+
# Wrap using create_span_wrapper
|
|
36
|
+
wrapped_run = self.create_span_wrapper(
|
|
37
|
+
span_name="replicate.run",
|
|
38
|
+
extract_attributes=self._extract_run_attributes,
|
|
39
|
+
)(original_run)
|
|
26
40
|
|
|
27
|
-
|
|
28
|
-
|
|
41
|
+
replicate.run = wrapped_run
|
|
42
|
+
self._instrumented = True
|
|
43
|
+
logger.info("Replicate instrumentation enabled")
|
|
29
44
|
|
|
30
|
-
|
|
31
|
-
|
|
45
|
+
except ImportError:
|
|
46
|
+
logger.debug("Replicate library not installed, instrumentation will be skipped")
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.error("Failed to instrument Replicate: %s", e, exc_info=True)
|
|
49
|
+
if config.fail_on_error:
|
|
50
|
+
raise
|
|
32
51
|
|
|
33
|
-
|
|
34
|
-
|
|
52
|
+
def _extract_run_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
53
|
+
"""Extract attributes from Replicate run call.
|
|
35
54
|
|
|
36
|
-
|
|
55
|
+
Args:
|
|
56
|
+
instance: The instance (None for module-level functions).
|
|
57
|
+
args: Positional arguments (first arg is typically the model).
|
|
58
|
+
kwargs: Keyword arguments.
|
|
37
59
|
|
|
38
|
-
|
|
39
|
-
|
|
60
|
+
Returns:
|
|
61
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
62
|
+
"""
|
|
63
|
+
attrs = {}
|
|
64
|
+
model = args[0] if args else kwargs.get("model", "unknown")
|
|
65
|
+
|
|
66
|
+
attrs["gen_ai.system"] = "replicate"
|
|
67
|
+
attrs["gen_ai.request.model"] = model
|
|
68
|
+
attrs["gen_ai.operation.name"] = "run"
|
|
69
|
+
|
|
70
|
+
return attrs
|
|
40
71
|
|
|
41
72
|
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
73
|
+
"""Extract token usage from Replicate response.
|
|
74
|
+
|
|
75
|
+
Note: Replicate uses hardware-based pricing ($/second of GPU/CPU time),
|
|
76
|
+
not token-based pricing. Returns None as the pricing model is incompatible
|
|
77
|
+
with token-based cost calculation.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
result: The API response.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
None: Replicate uses hardware-based pricing, not token-based.
|
|
84
|
+
"""
|
|
85
|
+
# Replicate uses hardware-based pricing ($/second), not tokens
|
|
86
|
+
# Cannot track costs with token-based calculator
|
|
42
87
|
return None
|
|
@@ -1,42 +1,146 @@
|
|
|
1
1
|
"""OpenTelemetry instrumentor for the Together AI SDK.
|
|
2
2
|
|
|
3
3
|
This instrumentor automatically traces completion calls to Together AI models,
|
|
4
|
-
capturing relevant attributes such as the model name.
|
|
4
|
+
capturing relevant attributes such as the model name and token usage.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
8
9
|
|
|
9
10
|
from ..config import OTelConfig
|
|
10
11
|
from .base import BaseInstrumentor
|
|
11
12
|
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
class TogetherAIInstrumentor(BaseInstrumentor):
|
|
14
17
|
"""Instrumentor for Together AI"""
|
|
15
18
|
|
|
16
19
|
def instrument(self, config: OTelConfig):
|
|
20
|
+
"""Instrument Together AI SDK if available."""
|
|
17
21
|
self.config = config
|
|
18
22
|
try:
|
|
19
23
|
import together
|
|
20
24
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
model = kwargs.get("model", "unknown")
|
|
25
|
+
# Instrument chat completions (newer API)
|
|
26
|
+
if hasattr(together, "Together"):
|
|
27
|
+
# This is the newer Together SDK with client-based API
|
|
28
|
+
original_init = together.Together.__init__
|
|
26
29
|
|
|
27
|
-
|
|
28
|
-
|
|
30
|
+
def wrapped_init(instance, *args, **kwargs):
|
|
31
|
+
original_init(instance, *args, **kwargs)
|
|
32
|
+
self._instrument_client(instance)
|
|
29
33
|
|
|
30
|
-
|
|
31
|
-
|
|
34
|
+
together.Together.__init__ = wrapped_init
|
|
35
|
+
self._instrumented = True
|
|
36
|
+
logger.info("Together AI instrumentation enabled (client-based API)")
|
|
37
|
+
# Fallback to older Complete API if available
|
|
38
|
+
elif hasattr(together, "Complete"):
|
|
39
|
+
original_complete = together.Complete.create
|
|
32
40
|
|
|
33
|
-
|
|
34
|
-
|
|
41
|
+
wrapped_complete = self.create_span_wrapper(
|
|
42
|
+
span_name="together.complete",
|
|
43
|
+
extract_attributes=self._extract_complete_attributes,
|
|
44
|
+
)(original_complete)
|
|
35
45
|
|
|
36
|
-
|
|
46
|
+
together.Complete.create = wrapped_complete
|
|
47
|
+
self._instrumented = True
|
|
48
|
+
logger.info("Together AI instrumentation enabled (Complete API)")
|
|
37
49
|
|
|
38
50
|
except ImportError:
|
|
39
|
-
|
|
51
|
+
logger.debug("Together AI library not installed, instrumentation will be skipped")
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logger.error("Failed to instrument Together AI: %s", e, exc_info=True)
|
|
54
|
+
if config.fail_on_error:
|
|
55
|
+
raise
|
|
56
|
+
|
|
57
|
+
def _instrument_client(self, client):
|
|
58
|
+
"""Instrument Together AI client methods."""
|
|
59
|
+
if hasattr(client, "chat") and hasattr(client.chat, "completions"):
|
|
60
|
+
original_create = client.chat.completions.create
|
|
61
|
+
|
|
62
|
+
wrapped_create = self.create_span_wrapper(
|
|
63
|
+
span_name="together.chat.completion",
|
|
64
|
+
extract_attributes=self._extract_chat_attributes,
|
|
65
|
+
)(original_create)
|
|
66
|
+
|
|
67
|
+
client.chat.completions.create = wrapped_create
|
|
68
|
+
|
|
69
|
+
def _extract_chat_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
70
|
+
"""Extract attributes from Together AI chat completion call.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
instance: The client instance.
|
|
74
|
+
args: Positional arguments.
|
|
75
|
+
kwargs: Keyword arguments.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
79
|
+
"""
|
|
80
|
+
attrs = {}
|
|
81
|
+
model = kwargs.get("model", "unknown")
|
|
82
|
+
messages = kwargs.get("messages", [])
|
|
83
|
+
|
|
84
|
+
attrs["gen_ai.system"] = "together"
|
|
85
|
+
attrs["gen_ai.request.model"] = model
|
|
86
|
+
attrs["gen_ai.operation.name"] = "chat"
|
|
87
|
+
attrs["gen_ai.request.message_count"] = len(messages)
|
|
88
|
+
|
|
89
|
+
# Optional parameters
|
|
90
|
+
if "temperature" in kwargs:
|
|
91
|
+
attrs["gen_ai.request.temperature"] = kwargs["temperature"]
|
|
92
|
+
if "top_p" in kwargs:
|
|
93
|
+
attrs["gen_ai.request.top_p"] = kwargs["top_p"]
|
|
94
|
+
if "max_tokens" in kwargs:
|
|
95
|
+
attrs["gen_ai.request.max_tokens"] = kwargs["max_tokens"]
|
|
96
|
+
|
|
97
|
+
return attrs
|
|
98
|
+
|
|
99
|
+
def _extract_complete_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
100
|
+
"""Extract attributes from Together AI complete call.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
instance: The instance (None for class methods).
|
|
104
|
+
args: Positional arguments.
|
|
105
|
+
kwargs: Keyword arguments.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
109
|
+
"""
|
|
110
|
+
attrs = {}
|
|
111
|
+
model = kwargs.get("model", "unknown")
|
|
112
|
+
|
|
113
|
+
attrs["gen_ai.system"] = "together"
|
|
114
|
+
attrs["gen_ai.request.model"] = model
|
|
115
|
+
attrs["gen_ai.operation.name"] = "complete"
|
|
116
|
+
|
|
117
|
+
return attrs
|
|
40
118
|
|
|
41
119
|
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
42
|
-
|
|
120
|
+
"""Extract token usage from Together AI response.
|
|
121
|
+
|
|
122
|
+
Together AI uses OpenAI-compatible format with usage field containing:
|
|
123
|
+
- prompt_tokens: Input tokens
|
|
124
|
+
- completion_tokens: Output tokens
|
|
125
|
+
- total_tokens: Total tokens
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
result: The API response object.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Optional[Dict[str, int]]: Dictionary with token counts or None.
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
# Handle OpenAI-compatible response format
|
|
135
|
+
if hasattr(result, "usage") and result.usage:
|
|
136
|
+
usage = result.usage
|
|
137
|
+
return {
|
|
138
|
+
"prompt_tokens": getattr(usage, "prompt_tokens", 0),
|
|
139
|
+
"completion_tokens": getattr(usage, "completion_tokens", 0),
|
|
140
|
+
"total_tokens": getattr(usage, "total_tokens", 0),
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return None
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.debug("Failed to extract usage from Together AI response: %s", e)
|
|
146
|
+
return None
|
|
@@ -1,42 +1,106 @@
|
|
|
1
1
|
"""OpenTelemetry instrumentor for Google Vertex AI SDK.
|
|
2
2
|
|
|
3
3
|
This instrumentor automatically traces content generation calls to Vertex AI models,
|
|
4
|
-
capturing relevant attributes such as the model name.
|
|
4
|
+
capturing relevant attributes such as the model name and token usage.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
8
9
|
|
|
9
10
|
from ..config import OTelConfig
|
|
10
11
|
from .base import BaseInstrumentor
|
|
11
12
|
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
12
15
|
|
|
13
16
|
class VertexAIInstrumentor(BaseInstrumentor):
|
|
14
17
|
"""Instrumentor for Google Vertex AI"""
|
|
15
18
|
|
|
16
19
|
def instrument(self, config: OTelConfig):
|
|
20
|
+
"""Instrument Vertex AI SDK if available."""
|
|
17
21
|
self.config = config
|
|
18
22
|
try:
|
|
19
23
|
from vertexai.preview.generative_models import GenerativeModel
|
|
20
24
|
|
|
21
25
|
original_generate = GenerativeModel.generate_content
|
|
22
26
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
27
|
+
# Wrap using create_span_wrapper
|
|
28
|
+
wrapped_generate = self.create_span_wrapper(
|
|
29
|
+
span_name="vertexai.generate_content",
|
|
30
|
+
extract_attributes=self._extract_generate_attributes,
|
|
31
|
+
)(original_generate)
|
|
26
32
|
|
|
27
|
-
|
|
28
|
-
|
|
33
|
+
GenerativeModel.generate_content = wrapped_generate
|
|
34
|
+
self._instrumented = True
|
|
35
|
+
logger.info("Vertex AI instrumentation enabled")
|
|
29
36
|
|
|
30
|
-
|
|
31
|
-
|
|
37
|
+
except ImportError:
|
|
38
|
+
logger.debug("Vertex AI library not installed, instrumentation will be skipped")
|
|
39
|
+
except Exception as e:
|
|
40
|
+
logger.error("Failed to instrument Vertex AI: %s", e, exc_info=True)
|
|
41
|
+
if config.fail_on_error:
|
|
42
|
+
raise
|
|
32
43
|
|
|
33
|
-
|
|
34
|
-
|
|
44
|
+
def _extract_generate_attributes(self, instance: Any, args: Any, kwargs: Any) -> Dict[str, Any]:
|
|
45
|
+
"""Extract attributes from Vertex AI generate_content call.
|
|
35
46
|
|
|
36
|
-
|
|
47
|
+
Args:
|
|
48
|
+
instance: The GenerativeModel instance.
|
|
49
|
+
args: Positional arguments.
|
|
50
|
+
kwargs: Keyword arguments.
|
|
37
51
|
|
|
38
|
-
|
|
39
|
-
|
|
52
|
+
Returns:
|
|
53
|
+
Dict[str, Any]: Dictionary of attributes to set on the span.
|
|
54
|
+
"""
|
|
55
|
+
attrs = {}
|
|
56
|
+
model_name = getattr(instance, "_model_name", "unknown")
|
|
57
|
+
|
|
58
|
+
attrs["gen_ai.system"] = "vertexai"
|
|
59
|
+
attrs["gen_ai.request.model"] = model_name
|
|
60
|
+
attrs["gen_ai.operation.name"] = "generate_content"
|
|
61
|
+
|
|
62
|
+
return attrs
|
|
40
63
|
|
|
41
64
|
def _extract_usage(self, result) -> Optional[Dict[str, int]]:
|
|
42
|
-
|
|
65
|
+
"""Extract token usage from Vertex AI response.
|
|
66
|
+
|
|
67
|
+
Vertex AI responses include usage_metadata with:
|
|
68
|
+
- prompt_token_count: Input tokens
|
|
69
|
+
- candidates_token_count: Output tokens
|
|
70
|
+
- total_token_count: Total tokens
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
result: The API response object.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Optional[Dict[str, int]]: Dictionary with token counts or None.
|
|
77
|
+
"""
|
|
78
|
+
try:
|
|
79
|
+
# Handle response with usage_metadata
|
|
80
|
+
if hasattr(result, "usage_metadata") and result.usage_metadata:
|
|
81
|
+
usage_metadata = result.usage_metadata
|
|
82
|
+
|
|
83
|
+
# Try snake_case first (Python SDK style)
|
|
84
|
+
prompt_tokens = getattr(usage_metadata, "prompt_token_count", None)
|
|
85
|
+
candidates_tokens = getattr(usage_metadata, "candidates_token_count", None)
|
|
86
|
+
total_tokens = getattr(usage_metadata, "total_token_count", None)
|
|
87
|
+
|
|
88
|
+
# Fallback to camelCase (REST API style)
|
|
89
|
+
if prompt_tokens is None:
|
|
90
|
+
prompt_tokens = getattr(usage_metadata, "promptTokenCount", 0)
|
|
91
|
+
if candidates_tokens is None:
|
|
92
|
+
candidates_tokens = getattr(usage_metadata, "candidatesTokenCount", 0)
|
|
93
|
+
if total_tokens is None:
|
|
94
|
+
total_tokens = getattr(usage_metadata, "totalTokenCount", 0)
|
|
95
|
+
|
|
96
|
+
if prompt_tokens or candidates_tokens:
|
|
97
|
+
return {
|
|
98
|
+
"prompt_tokens": int(prompt_tokens or 0),
|
|
99
|
+
"completion_tokens": int(candidates_tokens or 0),
|
|
100
|
+
"total_tokens": int(total_tokens or 0),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return None
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.debug("Failed to extract usage from Vertex AI response: %s", e)
|
|
106
|
+
return None
|