genai-otel-instrument 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. genai_otel/__init__.py +132 -0
  2. genai_otel/__version__.py +34 -0
  3. genai_otel/auto_instrument.py +602 -0
  4. genai_otel/cli.py +92 -0
  5. genai_otel/config.py +333 -0
  6. genai_otel/cost_calculator.py +467 -0
  7. genai_otel/cost_enriching_exporter.py +207 -0
  8. genai_otel/cost_enrichment_processor.py +174 -0
  9. genai_otel/evaluation/__init__.py +76 -0
  10. genai_otel/evaluation/bias_detector.py +364 -0
  11. genai_otel/evaluation/config.py +261 -0
  12. genai_otel/evaluation/hallucination_detector.py +525 -0
  13. genai_otel/evaluation/pii_detector.py +356 -0
  14. genai_otel/evaluation/prompt_injection_detector.py +262 -0
  15. genai_otel/evaluation/restricted_topics_detector.py +316 -0
  16. genai_otel/evaluation/span_processor.py +962 -0
  17. genai_otel/evaluation/toxicity_detector.py +406 -0
  18. genai_otel/exceptions.py +17 -0
  19. genai_otel/gpu_metrics.py +516 -0
  20. genai_otel/instrumentors/__init__.py +71 -0
  21. genai_otel/instrumentors/anthropic_instrumentor.py +134 -0
  22. genai_otel/instrumentors/anyscale_instrumentor.py +27 -0
  23. genai_otel/instrumentors/autogen_instrumentor.py +394 -0
  24. genai_otel/instrumentors/aws_bedrock_instrumentor.py +94 -0
  25. genai_otel/instrumentors/azure_openai_instrumentor.py +69 -0
  26. genai_otel/instrumentors/base.py +919 -0
  27. genai_otel/instrumentors/bedrock_agents_instrumentor.py +398 -0
  28. genai_otel/instrumentors/cohere_instrumentor.py +140 -0
  29. genai_otel/instrumentors/crewai_instrumentor.py +311 -0
  30. genai_otel/instrumentors/dspy_instrumentor.py +661 -0
  31. genai_otel/instrumentors/google_ai_instrumentor.py +310 -0
  32. genai_otel/instrumentors/groq_instrumentor.py +106 -0
  33. genai_otel/instrumentors/guardrails_ai_instrumentor.py +510 -0
  34. genai_otel/instrumentors/haystack_instrumentor.py +503 -0
  35. genai_otel/instrumentors/huggingface_instrumentor.py +399 -0
  36. genai_otel/instrumentors/hyperbolic_instrumentor.py +236 -0
  37. genai_otel/instrumentors/instructor_instrumentor.py +425 -0
  38. genai_otel/instrumentors/langchain_instrumentor.py +340 -0
  39. genai_otel/instrumentors/langgraph_instrumentor.py +328 -0
  40. genai_otel/instrumentors/llamaindex_instrumentor.py +36 -0
  41. genai_otel/instrumentors/mistralai_instrumentor.py +315 -0
  42. genai_otel/instrumentors/ollama_instrumentor.py +197 -0
  43. genai_otel/instrumentors/ollama_server_metrics_poller.py +336 -0
  44. genai_otel/instrumentors/openai_agents_instrumentor.py +291 -0
  45. genai_otel/instrumentors/openai_instrumentor.py +260 -0
  46. genai_otel/instrumentors/pydantic_ai_instrumentor.py +362 -0
  47. genai_otel/instrumentors/replicate_instrumentor.py +87 -0
  48. genai_otel/instrumentors/sambanova_instrumentor.py +196 -0
  49. genai_otel/instrumentors/togetherai_instrumentor.py +146 -0
  50. genai_otel/instrumentors/vertexai_instrumentor.py +106 -0
  51. genai_otel/llm_pricing.json +1676 -0
  52. genai_otel/logging_config.py +45 -0
  53. genai_otel/mcp_instrumentors/__init__.py +14 -0
  54. genai_otel/mcp_instrumentors/api_instrumentor.py +144 -0
  55. genai_otel/mcp_instrumentors/base.py +105 -0
  56. genai_otel/mcp_instrumentors/database_instrumentor.py +336 -0
  57. genai_otel/mcp_instrumentors/kafka_instrumentor.py +31 -0
  58. genai_otel/mcp_instrumentors/manager.py +139 -0
  59. genai_otel/mcp_instrumentors/redis_instrumentor.py +31 -0
  60. genai_otel/mcp_instrumentors/vector_db_instrumentor.py +265 -0
  61. genai_otel/metrics.py +148 -0
  62. genai_otel/py.typed +2 -0
  63. genai_otel/server_metrics.py +197 -0
  64. genai_otel_instrument-0.1.24.dist-info/METADATA +1404 -0
  65. genai_otel_instrument-0.1.24.dist-info/RECORD +69 -0
  66. genai_otel_instrument-0.1.24.dist-info/WHEEL +5 -0
  67. genai_otel_instrument-0.1.24.dist-info/entry_points.txt +2 -0
  68. genai_otel_instrument-0.1.24.dist-info/licenses/LICENSE +680 -0
  69. genai_otel_instrument-0.1.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1404 @@
1
+ Metadata-Version: 2.4
2
+ Name: genai-otel-instrument
3
+ Version: 0.1.24
4
+ Summary: Comprehensive OpenTelemetry auto-instrumentation for LLM/GenAI applications
5
+ Author-email: Kshitij Thakkar <kshitijthakkar@rocketmail.com>
6
+ License: AGPL-3.0-or-later
7
+ Project-URL: Homepage, https://github.com/Mandark-droid/genai_otel_instrument
8
+ Project-URL: Repository, https://github.com/Mandark-droid/genai_otel_instrument
9
+ Project-URL: Documentation, https://github.com/Mandark-droid/genai_otel_instrument#readme
10
+ Project-URL: Issues, https://github.com/Mandark-droid/genai_otel_instrument/issues
11
+ Project-URL: Changelog, https://github.com/Mandark-droid/genai_otel_instrument/blob/main/CHANGELOG.md
12
+ Keywords: opentelemetry,observability,llm,genai,instrumentation,tracing,metrics,monitoring
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Classifier: Topic :: System :: Monitoring
17
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Requires-Python: >=3.9
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: opentelemetry-api<2.0.0,>=1.20.0
28
+ Requires-Dist: opentelemetry-sdk<2.0.0,>=1.20.0
29
+ Requires-Dist: opentelemetry-instrumentation>=0.41b0
30
+ Requires-Dist: opentelemetry-semantic-conventions<1.0.0,>=0.45b0
31
+ Requires-Dist: opentelemetry-exporter-otlp>=1.20.0
32
+ Requires-Dist: opentelemetry-instrumentation-requests>=0.41b0
33
+ Requires-Dist: opentelemetry-instrumentation-httpx>=0.41b0
34
+ Requires-Dist: requests>=2.20.0
35
+ Requires-Dist: wrapt>=1.14.0
36
+ Requires-Dist: httpx>=0.23.0
37
+ Requires-Dist: opentelemetry-instrumentation-mysql>=0.41b0
38
+ Requires-Dist: mysql-connector-python>=9.1.0
39
+ Requires-Dist: opentelemetry-instrumentation-psycopg2>=0.41b0
40
+ Requires-Dist: psycopg2-binary>=2.9.0
41
+ Requires-Dist: opentelemetry-instrumentation-redis>=0.41b0
42
+ Requires-Dist: redis
43
+ Requires-Dist: opentelemetry-instrumentation-pymongo>=0.41b0
44
+ Requires-Dist: pymongo
45
+ Requires-Dist: opentelemetry-instrumentation-sqlalchemy>=0.41b0
46
+ Requires-Dist: sqlalchemy>=1.4.0
47
+ Requires-Dist: opentelemetry-instrumentation-kafka-python>=0.41b0
48
+ Requires-Dist: kafka-python
49
+ Provides-Extra: openinference
50
+ Requires-Dist: openinference-instrumentation==0.1.31; extra == "openinference"
51
+ Requires-Dist: openinference-instrumentation-litellm==0.1.19; extra == "openinference"
52
+ Requires-Dist: openinference-instrumentation-mcp==1.3.0; extra == "openinference"
53
+ Requires-Dist: openinference-instrumentation-smolagents==0.1.11; extra == "openinference"
54
+ Requires-Dist: litellm>=1.0.0; extra == "openinference"
55
+ Provides-Extra: evaluation
56
+ Requires-Dist: presidio-analyzer>=2.2.0; extra == "evaluation"
57
+ Requires-Dist: presidio-anonymizer>=2.2.0; extra == "evaluation"
58
+ Requires-Dist: spacy>=3.0.0; extra == "evaluation"
59
+ Requires-Dist: detoxify>=0.5.0; extra == "evaluation"
60
+ Requires-Dist: google-api-python-client>=2.0.0; extra == "evaluation"
61
+ Provides-Extra: gpu
62
+ Requires-Dist: nvidia-ml-py>=11.495.46; extra == "gpu"
63
+ Requires-Dist: codecarbon>=2.3.0; extra == "gpu"
64
+ Provides-Extra: co2
65
+ Requires-Dist: codecarbon>=2.3.0; extra == "co2"
66
+ Provides-Extra: openai
67
+ Requires-Dist: openai>=1.0.0; extra == "openai"
68
+ Provides-Extra: anthropic
69
+ Requires-Dist: anthropic>=0.18.0; extra == "anthropic"
70
+ Provides-Extra: google
71
+ Requires-Dist: google-generativeai>=0.3.0; extra == "google"
72
+ Provides-Extra: aws
73
+ Requires-Dist: boto3>=1.28.0; extra == "aws"
74
+ Provides-Extra: azure
75
+ Requires-Dist: azure-ai-openai>=1.0.0; extra == "azure"
76
+ Provides-Extra: cohere
77
+ Requires-Dist: cohere>=4.0.0; extra == "cohere"
78
+ Provides-Extra: mistral
79
+ Requires-Dist: mistralai>=0.4.2; extra == "mistral"
80
+ Provides-Extra: together
81
+ Requires-Dist: together>=0.2.0; extra == "together"
82
+ Provides-Extra: groq
83
+ Requires-Dist: groq>=0.4.0; extra == "groq"
84
+ Provides-Extra: ollama
85
+ Requires-Dist: ollama>=0.1.0; extra == "ollama"
86
+ Provides-Extra: replicate
87
+ Requires-Dist: replicate>=0.15.0; extra == "replicate"
88
+ Provides-Extra: sambanova
89
+ Requires-Dist: sambanova>=1.0.0; extra == "sambanova"
90
+ Provides-Extra: langchain
91
+ Requires-Dist: langchain>=0.1.0; extra == "langchain"
92
+ Provides-Extra: langgraph
93
+ Requires-Dist: langgraph>=0.1.0; extra == "langgraph"
94
+ Provides-Extra: llamaindex
95
+ Requires-Dist: llama-index>=0.9.0; extra == "llamaindex"
96
+ Provides-Extra: huggingface
97
+ Requires-Dist: transformers>=4.30.0; extra == "huggingface"
98
+ Provides-Extra: crewai
99
+ Requires-Dist: crewai>=0.1.0; extra == "crewai"
100
+ Provides-Extra: autogen
101
+ Requires-Dist: pyautogen>=0.2.0; extra == "autogen"
102
+ Provides-Extra: haystack
103
+ Requires-Dist: haystack-ai>=2.0.0; extra == "haystack"
104
+ Provides-Extra: dspy
105
+ Requires-Dist: dspy-ai>=2.0.0; extra == "dspy"
106
+ Provides-Extra: guardrails
107
+ Requires-Dist: guardrails-ai>=0.4.0; extra == "guardrails"
108
+ Provides-Extra: instructor
109
+ Requires-Dist: instructor>=1.0.0; extra == "instructor"
110
+ Provides-Extra: pydantic-ai
111
+ Requires-Dist: pydantic-ai>=0.0.1; extra == "pydantic-ai"
112
+ Provides-Extra: vertexai
113
+ Requires-Dist: google-cloud-aiplatform>=1.0.0; extra == "vertexai"
114
+ Provides-Extra: databases
115
+ Requires-Dist: opentelemetry-instrumentation-sqlalchemy>=0.41b0; extra == "databases"
116
+ Requires-Dist: sqlalchemy>=1.4.0; extra == "databases"
117
+ Requires-Dist: opentelemetry-instrumentation-redis>=0.41b0; extra == "databases"
118
+ Requires-Dist: redis; extra == "databases"
119
+ Requires-Dist: opentelemetry-instrumentation-pymongo>=0.41b0; extra == "databases"
120
+ Requires-Dist: pymongo; extra == "databases"
121
+ Requires-Dist: opentelemetry-instrumentation-psycopg2>=0.41b0; extra == "databases"
122
+ Requires-Dist: psycopg2-binary>=2.9.0; extra == "databases"
123
+ Requires-Dist: opentelemetry-instrumentation-mysql>=0.41b0; extra == "databases"
124
+ Requires-Dist: mysql-connector-python>=9.1.0; extra == "databases"
125
+ Provides-Extra: messaging
126
+ Requires-Dist: opentelemetry-instrumentation-kafka-python>=0.41b0; extra == "messaging"
127
+ Requires-Dist: kafka-python; extra == "messaging"
128
+ Provides-Extra: vector-dbs
129
+ Requires-Dist: pinecone>=3.0.0; extra == "vector-dbs"
130
+ Requires-Dist: weaviate-client>=3.0.0; extra == "vector-dbs"
131
+ Requires-Dist: qdrant-client>=1.0.0; extra == "vector-dbs"
132
+ Requires-Dist: chromadb>=0.4.0; extra == "vector-dbs"
133
+ Requires-Dist: pymilvus>=2.3.0; extra == "vector-dbs"
134
+ Requires-Dist: faiss-cpu>=1.7.0; extra == "vector-dbs"
135
+ Provides-Extra: all-providers
136
+ Requires-Dist: openai>=1.0.0; extra == "all-providers"
137
+ Requires-Dist: anthropic>=0.18.0; extra == "all-providers"
138
+ Requires-Dist: google-generativeai>=0.3.0; extra == "all-providers"
139
+ Requires-Dist: boto3>=1.28.0; extra == "all-providers"
140
+ Requires-Dist: azure-ai-openai>=1.0.0; extra == "all-providers"
141
+ Requires-Dist: cohere>=4.0.0; extra == "all-providers"
142
+ Requires-Dist: mistralai>=0.4.2; extra == "all-providers"
143
+ Requires-Dist: together>=0.2.0; extra == "all-providers"
144
+ Requires-Dist: groq>=0.4.0; extra == "all-providers"
145
+ Requires-Dist: ollama>=0.1.0; extra == "all-providers"
146
+ Requires-Dist: replicate>=0.15.0; extra == "all-providers"
147
+ Requires-Dist: sambanova>=1.0.0; extra == "all-providers"
148
+ Requires-Dist: langchain>=0.1.0; extra == "all-providers"
149
+ Requires-Dist: langgraph>=0.1.0; extra == "all-providers"
150
+ Requires-Dist: llama-index>=0.9.0; extra == "all-providers"
151
+ Requires-Dist: transformers>=4.30.0; extra == "all-providers"
152
+ Requires-Dist: litellm>=1.0.0; extra == "all-providers"
153
+ Requires-Dist: crewai>=0.1.0; extra == "all-providers"
154
+ Requires-Dist: pyautogen>=0.2.0; extra == "all-providers"
155
+ Requires-Dist: haystack-ai>=2.0.0; extra == "all-providers"
156
+ Requires-Dist: dspy-ai>=2.0.0; extra == "all-providers"
157
+ Requires-Dist: guardrails-ai>=0.4.0; extra == "all-providers"
158
+ Requires-Dist: instructor>=1.0.0; extra == "all-providers"
159
+ Requires-Dist: pydantic-ai>=0.0.1; extra == "all-providers"
160
+ Requires-Dist: google-cloud-aiplatform>=1.0.0; extra == "all-providers"
161
+ Provides-Extra: all-mcp
162
+ Requires-Dist: opentelemetry-instrumentation-sqlalchemy>=0.41b0; extra == "all-mcp"
163
+ Requires-Dist: opentelemetry-instrumentation-redis>=0.41b0; extra == "all-mcp"
164
+ Requires-Dist: opentelemetry-instrumentation-pymongo>=0.41b0; extra == "all-mcp"
165
+ Requires-Dist: opentelemetry-instrumentation-psycopg2>=0.41b0; extra == "all-mcp"
166
+ Requires-Dist: opentelemetry-instrumentation-mysql>=0.41b0; extra == "all-mcp"
167
+ Requires-Dist: opentelemetry-instrumentation-kafka-python>=0.41b0; extra == "all-mcp"
168
+ Requires-Dist: pinecone>=3.0.0; extra == "all-mcp"
169
+ Requires-Dist: weaviate-client>=3.0.0; extra == "all-mcp"
170
+ Requires-Dist: qdrant-client>=1.0.0; extra == "all-mcp"
171
+ Requires-Dist: chromadb>=0.4.0; extra == "all-mcp"
172
+ Requires-Dist: pymilvus>=2.3.0; extra == "all-mcp"
173
+ Requires-Dist: faiss-cpu>=1.7.0; extra == "all-mcp"
174
+ Requires-Dist: sqlalchemy; extra == "all-mcp"
175
+ Provides-Extra: all
176
+ Requires-Dist: openai>=1.0.0; extra == "all"
177
+ Requires-Dist: anthropic>=0.18.0; extra == "all"
178
+ Requires-Dist: google-generativeai>=0.3.0; extra == "all"
179
+ Requires-Dist: boto3>=1.28.0; extra == "all"
180
+ Requires-Dist: azure-ai-openai>=1.0.0; extra == "all"
181
+ Requires-Dist: cohere>=4.0.0; extra == "all"
182
+ Requires-Dist: mistralai>=0.4.2; extra == "all"
183
+ Requires-Dist: together>=0.2.0; extra == "all"
184
+ Requires-Dist: groq>=0.4.0; extra == "all"
185
+ Requires-Dist: ollama>=0.1.0; extra == "all"
186
+ Requires-Dist: replicate>=0.15.0; extra == "all"
187
+ Requires-Dist: sambanova>=1.0.0; extra == "all"
188
+ Requires-Dist: langchain>=0.1.0; extra == "all"
189
+ Requires-Dist: langgraph>=0.1.0; extra == "all"
190
+ Requires-Dist: llama-index>=0.9.0; extra == "all"
191
+ Requires-Dist: transformers>=4.30.0; extra == "all"
192
+ Requires-Dist: nvidia-ml-py>=11.495.46; extra == "all"
193
+ Requires-Dist: crewai>=0.1.0; extra == "all"
194
+ Requires-Dist: pyautogen>=0.2.0; extra == "all"
195
+ Requires-Dist: haystack-ai>=2.0.0; extra == "all"
196
+ Requires-Dist: dspy-ai>=2.0.0; extra == "all"
197
+ Requires-Dist: guardrails-ai>=0.4.0; extra == "all"
198
+ Requires-Dist: instructor>=1.0.0; extra == "all"
199
+ Requires-Dist: pydantic-ai>=0.0.1; extra == "all"
200
+ Requires-Dist: google-cloud-aiplatform>=1.0.0; extra == "all"
201
+ Requires-Dist: opentelemetry-instrumentation-sqlalchemy>=0.41b0; extra == "all"
202
+ Requires-Dist: opentelemetry-instrumentation-redis>=0.41b0; extra == "all"
203
+ Requires-Dist: opentelemetry-instrumentation-pymongo>=0.41b0; extra == "all"
204
+ Requires-Dist: opentelemetry-instrumentation-psycopg2>=0.41b0; extra == "all"
205
+ Requires-Dist: opentelemetry-instrumentation-mysql>=0.41b0; extra == "all"
206
+ Requires-Dist: opentelemetry-instrumentation-kafka-python>=0.41b0; extra == "all"
207
+ Requires-Dist: pinecone>=3.0.0; extra == "all"
208
+ Requires-Dist: weaviate-client>=3.0.0; extra == "all"
209
+ Requires-Dist: qdrant-client>=1.0.0; extra == "all"
210
+ Requires-Dist: chromadb>=0.4.0; extra == "all"
211
+ Requires-Dist: pymilvus>=2.3.0; extra == "all"
212
+ Requires-Dist: faiss-cpu>=1.7.0; extra == "all"
213
+ Requires-Dist: sqlalchemy; extra == "all"
214
+ Requires-Dist: presidio-analyzer>=2.2.0; extra == "all"
215
+ Requires-Dist: presidio-anonymizer>=2.2.0; extra == "all"
216
+ Requires-Dist: spacy>=3.0.0; extra == "all"
217
+ Requires-Dist: detoxify>=0.5.0; extra == "all"
218
+ Requires-Dist: google-api-python-client>=2.0.0; extra == "all"
219
+ Provides-Extra: dev
220
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
221
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
222
+ Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
223
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
224
+ Requires-Dist: black>=23.0.0; extra == "dev"
225
+ Requires-Dist: isort>=5.12.0; extra == "dev"
226
+ Requires-Dist: pylint>=2.17.0; extra == "dev"
227
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
228
+ Requires-Dist: build>=0.10.0; extra == "dev"
229
+ Requires-Dist: twine>=4.0.0; extra == "dev"
230
+ Dynamic: license-file
231
+
232
+ # TraceVerde
233
+
234
+ <div align="center">
235
+ <img src="https://raw.githubusercontent.com/Mandark-droid/genai_otel_instrument/main/.github/images/Logo.jpg" alt="TraceVerde - GenAI OpenTelemetry Instrumentation Logo" width="400"/>
236
+ </div>
237
+
238
+ <br/>
239
+
240
+ [![PyPI version](https://badge.fury.io/py/genai-otel-instrument.svg)](https://badge.fury.io/py/genai-otel-instrument)
241
+ [![Python Versions](https://img.shields.io/pypi/pyversions/genai-otel-instrument.svg)](https://pypi.org/project/genai-otel-instrument/)
242
+ [![License](https://img.shields.io/badge/License-AGPL%203.0-blue.svg)](https://www.gnu.org/licenses/agpl-3.0)
243
+ [![Downloads](https://static.pepy.tech/badge/genai-otel-instrument)](https://pepy.tech/project/genai-otel-instrument)
244
+ [![Downloads/Month](https://static.pepy.tech/badge/genai-otel-instrument/month)](https://pepy.tech/project/genai-otel-instrument)
245
+
246
+ [![GitHub Stars](https://img.shields.io/github/stars/Mandark-droid/genai_otel_instrument?style=social)](https://github.com/Mandark-droid/genai_otel_instrument)
247
+ [![GitHub Forks](https://img.shields.io/github/forks/Mandark-droid/genai_otel_instrument?style=social)](https://github.com/Mandark-droid/genai_otel_instrument)
248
+ [![GitHub Issues](https://img.shields.io/github/issues/Mandark-droid/genai_otel_instrument)](https://github.com/Mandark-droid/genai_otel_instrument/issues)
249
+ [![GitHub Pull Requests](https://img.shields.io/github/issues-pr/Mandark-droid/genai_otel_instrument)](https://github.com/Mandark-droid/genai_otel_instrument/pulls)
250
+
251
+ [![Code Coverage](https://img.shields.io/badge/coverage-90%25-brightgreen.svg)](https://github.com/Mandark-droid/genai_otel_instrument)
252
+ [![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
253
+ [![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
254
+ [![Type Checked: mypy](https://img.shields.io/badge/type%20checked-mypy-blue.svg)](http://mypy-lang.org/)
255
+
256
+ [![OpenTelemetry](https://img.shields.io/badge/OpenTelemetry-1.20%2B-blueviolet)](https://opentelemetry.io/)
257
+ [![Semantic Conventions](https://img.shields.io/badge/OTel%20Semconv-GenAI%20v1.28-orange)](https://opentelemetry.io/docs/specs/semconv/gen-ai/)
258
+ [![CI/CD](https://img.shields.io/badge/CI%2FCD-GitHub%20Actions-2088FF?logo=github-actions&logoColor=white)](https://github.com/Mandark-droid/genai_otel_instrument/actions)
259
+
260
+ ---
261
+
262
+ <div align="center">
263
+ <img src="https://raw.githubusercontent.com/Mandark-droid/genai_otel_instrument/main/.github/images/Landing_Page.jpg" alt="GenAI OpenTelemetry Instrumentation Overview" width="800"/>
264
+ </div>
265
+
266
+ ---
267
+
268
+ Production-ready OpenTelemetry instrumentation for GenAI/LLM applications with zero-code setup.
269
+
270
+ ## Features
271
+
272
+ 🚀 **Zero-Code Instrumentation** - Just install and set env vars
273
+ 🤖 **17+ LLM Providers** - OpenAI, Anthropic, Google, AWS, Azure, SambaNova, Hyperbolic, and more
274
+ 🤝 **Multi-Agent Frameworks** - CrewAI, LangGraph, OpenAI Agents SDK, AutoGen, Pydantic AI for agent orchestration
275
+ 🔧 **MCP Tool Support** - Auto-instrument databases, APIs, caches, vector DBs
276
+ 💰 **Cost Tracking** - Automatic cost calculation for both streaming and non-streaming requests
277
+ ⚡ **Streaming Support** - Full observability for streaming responses with TTFT/TBT metrics and cost tracking
278
+ 🎮 **GPU Metrics** - Real-time GPU utilization, memory, temperature, power, and electricity cost tracking
279
+ 🛡️ **PII Detection** (NEW) - Automatic PII detection with GDPR/HIPAA/PCI-DSS compliance modes
280
+ ☢️ **Toxicity Detection** (NEW) - Detect harmful content with Perspective API and Detoxify
281
+ ⚖️ **Bias Detection** (NEW) - Identify demographic and other biases in prompts and responses
282
+ 📊 **Complete Observability** - Traces, metrics, and rich span attributes
283
+ ➕ **Service Instance ID & Environment** - Identify your services and environments
284
+ ⏱️ **Configurable Exporter Timeout** - Set timeout for OTLP exporter
285
+ 🔗 **OpenInference Instrumentors** - Smolagents, MCP, and LiteLLM instrumentation
286
+
287
+ ## Quick Start
288
+
289
+ ### Installation
290
+
291
+ ```bash
292
+ pip install genai-otel-instrument
293
+ ```
294
+
295
+ ### Usage
296
+
297
+ **Option 1: Environment Variables (No code changes)**
298
+
299
+ ```bash
300
+ export OTEL_SERVICE_NAME=my-llm-app
301
+ export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
302
+ python your_app.py
303
+ ```
304
+
305
+ **Option 2: One line of code**
306
+
307
+ ```python
308
+ import genai_otel
309
+ genai_otel.instrument()
310
+
311
+ # Your existing code works unchanged
312
+ import openai
313
+ client = openai.OpenAI()
314
+ response = client.chat.completions.create(...)
315
+ ```
316
+
317
+ **Option 3: CLI wrapper**
318
+
319
+ ```bash
320
+ genai-instrument python your_app.py
321
+ ```
322
+
323
+ For a more comprehensive demonstration of various LLM providers and MCP tools, refer to `example_usage.py` in the project root. Note that running this example requires setting up relevant API keys and external services (e.g., databases, Redis, Pinecone).
324
+
325
+ ## What Gets Instrumented?
326
+
327
+ ### LLM Providers (Auto-detected)
328
+ - **With Full Cost Tracking**: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure OpenAI, Cohere, Mistral AI, Together AI, Groq, Ollama, Vertex AI, SambaNova, Hyperbolic
329
+ - **Hardware/Local Pricing**: Replicate (hardware-based $/second), HuggingFace (local execution with estimated costs)
330
+ - **HuggingFace Support**: `pipeline()`, `AutoModelForCausalLM.generate()`, `AutoModelForSeq2SeqLM.generate()`, `InferenceClient` API calls
331
+ - **Other Providers**: Anyscale
332
+ - **Special Configuration**: Hyperbolic (requires OTLP gRPC exporter - see `examples/hyperbolic_example.py`)
333
+
334
+ ### Frameworks
335
+ - **LangChain** (chains, agents, tools)
336
+ - **LlamaIndex** (query engines, indices)
337
+ - **Haystack** (modular NLP pipelines with RAG support)
338
+ - **DSPy** (Stanford NLP declarative LM programming with automatic optimization)
339
+ - **Instructor** (Pydantic-based structured output extraction with validation and retries)
340
+ - **Guardrails AI** (input/output validation guards with on-fail policies: reask, fix, filter, refrain)
341
+
342
+ ### Multi-Agent Frameworks (NEW)
343
+ - **OpenAI Agents SDK** (agent orchestration with handoffs, sessions, guardrails)
344
+ - **CrewAI** (role-based multi-agent collaboration with crews and tasks)
345
+ - **LangGraph** (stateful workflows with graph-based orchestration)
346
+ - **AutoGen** (Microsoft multi-agent conversations with group chats)
347
+ - **Pydantic AI** (type-safe agents with Pydantic validation and multi-provider support)
348
+ - **AWS Bedrock Agents** (managed agent runtime with knowledge bases and RAG)
349
+
350
+ ### MCP Tools (Model Context Protocol)
351
+ - **Databases**: PostgreSQL, MySQL, MongoDB, SQLAlchemy
352
+ - **Caching**: Redis
353
+ - **Message Queues**: Apache Kafka
354
+ - **Vector Databases**: Pinecone, Weaviate, Qdrant, ChromaDB, Milvus, FAISS
355
+ - **APIs**: HTTP/REST requests (requests, httpx)
356
+
357
+ ### OpenInference (Optional - Python 3.10+ only)
358
+ - Smolagents - HuggingFace smolagents framework tracing
359
+ - MCP - Model Context Protocol instrumentation
360
+ - LiteLLM - Multi-provider LLM proxy
361
+
362
+ **Cost Enrichment:** OpenInference instrumentors are automatically enriched with cost tracking! When cost tracking is enabled (`GENAI_ENABLE_COST_TRACKING=true`), a custom `CostEnrichmentSpanProcessor` extracts model and token usage from OpenInference spans and adds cost attributes (`gen_ai.usage.cost.total`, `gen_ai.usage.cost.prompt`, `gen_ai.usage.cost.completion`) using our comprehensive pricing database of 340+ models across 20+ providers.
363
+
364
+ The processor supports OpenInference semantic conventions:
365
+ - Model: `llm.model_name`, `embedding.model_name`
366
+ - Tokens: `llm.token_count.prompt`, `llm.token_count.completion`
367
+ - Operations: `openinference.span.kind` (LLM, EMBEDDING, CHAIN, RETRIEVER, etc.)
368
+
369
+ **Note:** OpenInference instrumentors require Python >= 3.10. Install with:
370
+ ```bash
371
+ pip install genai-otel-instrument[openinference]
372
+ ```
373
+
374
+ ## Screenshots
375
+
376
+ See the instrumentation in action across different LLM providers and observability backends.
377
+
378
+ ### OpenAI Instrumentation
379
+ Full trace capture for OpenAI API calls with token usage, costs, and latency metrics.
380
+
381
+ <div align="center">
382
+ <img src="https://raw.githubusercontent.com/Mandark-droid/genai_otel_instrument/main/.github/images/Screenshots/Traces_OpenAI.png" alt="OpenAI Traces" width="900"/>
383
+ </div>
384
+
385
+ ### Ollama (Local LLM) Instrumentation
386
+ Zero-code instrumentation for local models running on Ollama with comprehensive observability.
387
+
388
+ <div align="center">
389
+ <img src="https://raw.githubusercontent.com/Mandark-droid/genai_otel_instrument/main/.github/images/Screenshots/Traces_Ollama.png" alt="Ollama Traces" width="900"/>
390
+ </div>
391
+
392
+ ### HuggingFace Transformers
393
+ Direct instrumentation of HuggingFace Transformers with automatic token counting and cost estimation.
394
+
395
+ <div align="center">
396
+ <img src="https://raw.githubusercontent.com/Mandark-droid/genai_otel_instrument/main/.github/images/Screenshots/Trace_HuggingFace_Transformer_Models.png" alt="HuggingFace Transformer Traces" width="900"/>
397
+ </div>
398
+
399
+ ### SmolAgents Framework
400
+ Complete agent workflow tracing with tool calls, iterations, and cost breakdown.
401
+
402
+ <div align="center">
403
+ <img src="https://raw.githubusercontent.com/Mandark-droid/genai_otel_instrument/main/.github/images/Screenshots/Traces_SmolAgent_with_tool_calls.png" alt="SmolAgent Traces with Tool Calls" width="900"/>
404
+ </div>
405
+
406
+ ### GPU Metrics Collection
407
+ Real-time GPU utilization, memory, temperature, and power consumption metrics.
408
+
409
+ <div align="center">
410
+ <img src="https://raw.githubusercontent.com/Mandark-droid/genai_otel_instrument/main/.github/images/Screenshots/GPU_Metrics.png" alt="GPU Metrics Dashboard" width="900"/>
411
+ </div>
412
+
413
+ ### Additional Screenshots
414
+
415
+ - **[Token Cost Breakdown](.github/images/Screenshots/Traces_SmolAgent_Token_Cost_breakdown.png)** - Detailed token usage and cost analysis for SmolAgent workflows
416
+ - **[OpenSearch Dashboard](.github/images/Screenshots/GENAI_OpenSearch_output.png)** - GenAI metrics visualization in OpenSearch/Kibana
417
+
418
+ ---
419
+
420
+ ## Demo Video
421
+
422
+ Watch a comprehensive walkthrough of GenAI OpenTelemetry Auto-Instrumentation in action, demonstrating setup, configuration, and real-time observability across multiple LLM providers.
423
+
424
+ <div align="center">
425
+
426
+ **🎥 [Watch Demo Video](https://youtu.be/YOUR_VIDEO_ID_HERE)**
427
+ *(Coming Soon)*
428
+
429
+ </div>
430
+
431
+ ---
432
+
433
+ ## Cost Tracking Coverage
434
+
435
+ The library includes comprehensive cost tracking with pricing data for **340+ models** across **20+ providers**:
436
+
437
+ ### Providers with Full Token-Based Cost Tracking
438
+ - **OpenAI**: GPT-4o, GPT-4 Turbo, GPT-3.5 Turbo, o1/o3 series, embeddings, audio, vision (35+ models)
439
+ - **Anthropic**: Claude 3.5 Sonnet/Opus/Haiku, Claude 3 series (10+ models)
440
+ - **Google AI**: Gemini 1.5/2.0 Pro/Flash, PaLM 2 (12+ models)
441
+ - **AWS Bedrock**: Amazon Titan, Claude, Llama, Mistral models (20+ models)
442
+ - **Azure OpenAI**: Same as OpenAI with Azure-specific pricing
443
+ - **Cohere**: Command R/R+, Command Light, Embed v3/v2 (8+ models)
444
+ - **Mistral AI**: Mistral Large/Medium/Small, Mixtral, embeddings (8+ models)
445
+ - **Together AI**: DeepSeek-R1, Llama 3.x, Qwen, Mixtral (25+ models)
446
+ - **Groq**: Llama 3.x series, Mixtral, Gemma models (15+ models)
447
+ - **Ollama**: Local models with token tracking (pricing via cost estimation)
448
+ - **Vertex AI**: Gemini models via Google Cloud with usage metadata extraction
449
+
450
+ ### Special Pricing Models
451
+ - **Replicate**: Hardware-based pricing ($/second of GPU/CPU time) - not token-based
452
+ - **HuggingFace Transformers**: Local model execution with estimated costs based on parameter count
453
+ - Supports `pipeline()`, `AutoModelForCausalLM.generate()`, `AutoModelForSeq2SeqLM.generate()`
454
+ - Cost estimation uses GPU/compute resource pricing tiers (tiny/small/medium/large)
455
+ - Automatic token counting from tensor shapes
456
+
457
+ ### Pricing Features
458
+ - **Differential Pricing**: Separate rates for prompt tokens vs. completion tokens
459
+ - **Reasoning Tokens**: Special pricing for OpenAI o1/o3 reasoning tokens
460
+ - **Cache Pricing**: Anthropic prompt caching costs (read/write)
461
+ - **Granular Cost Metrics**: Per-request cost breakdown by token type
462
+ - **Auto-Updated Pricing**: Pricing data maintained in `llm_pricing.json`
463
+ - **Custom Pricing**: Add pricing for custom/proprietary models via environment variable
464
+
465
+ ### Adding Custom Model Pricing
466
+
467
+ For custom or proprietary models not in `llm_pricing.json`, you can provide custom pricing via the `GENAI_CUSTOM_PRICING_JSON` environment variable:
468
+
469
+ ```bash
470
+ # For chat models
471
+ export GENAI_CUSTOM_PRICING_JSON='{"chat":{"my-custom-model":{"promptPrice":0.001,"completionPrice":0.002}}}'
472
+
473
+ # For embeddings
474
+ export GENAI_CUSTOM_PRICING_JSON='{"embeddings":{"my-custom-embeddings":0.00005}}'
475
+
476
+ # For multiple categories
477
+ export GENAI_CUSTOM_PRICING_JSON='{
478
+ "chat": {
479
+ "my-custom-chat": {"promptPrice": 0.001, "completionPrice": 0.002}
480
+ },
481
+ "embeddings": {
482
+ "my-custom-embed": 0.00005
483
+ },
484
+ "audio": {
485
+ "my-custom-tts": 0.02
486
+ }
487
+ }'
488
+ ```
489
+
490
+ **Pricing Format:**
491
+ - **Chat models**: `{"promptPrice": <$/1k tokens>, "completionPrice": <$/1k tokens>}`
492
+ - **Embeddings**: Single number for price per 1k tokens
493
+ - **Audio**: Price per 1k characters (TTS) or per second (STT)
494
+ - **Images**: Nested structure with quality/size pricing (see `llm_pricing.json` for examples)
495
+
496
+ **Hybrid Pricing:** Custom prices are merged with default pricing from `llm_pricing.json`. If you provide custom pricing for an existing model, the custom price overrides the default.
497
+
498
+ **Coverage Statistics**: As of v0.1.3, 89% test coverage with 415 passing tests, including comprehensive cost calculation validation and cost enrichment processor tests (supporting both GenAI and OpenInference semantic conventions).
499
+
500
+ ## Collected Telemetry
501
+
502
+ ### Traces
503
+ Every LLM call, database query, API request, and vector search is traced with full context propagation.
504
+
505
+ ### Metrics
506
+
507
+ **GenAI Metrics:**
508
+ - `gen_ai.requests` - Request counts by provider and model
509
+ - `gen_ai.client.token.usage` - Token usage (prompt/completion)
510
+ - `gen_ai.client.operation.duration` - Request latency histogram (optimized buckets for LLM workloads)
511
+ - `gen_ai.usage.cost` - Total estimated costs in USD
512
+ - `gen_ai.usage.cost.prompt` - Prompt tokens cost (granular)
513
+ - `gen_ai.usage.cost.completion` - Completion tokens cost (granular)
514
+ - `gen_ai.usage.cost.reasoning` - Reasoning tokens cost (OpenAI o1 models)
515
+ - `gen_ai.usage.cost.cache_read` - Cache read cost (Anthropic)
516
+ - `gen_ai.usage.cost.cache_write` - Cache write cost (Anthropic)
517
+ - `gen_ai.client.errors` - Error counts by operation and type
518
+ - `gen_ai.gpu.*` - GPU utilization, memory, temperature, power (ObservableGauges)
519
+ - `gen_ai.co2.emissions` - CO2 emissions tracking with codecarbon integration (opt-in via `GENAI_ENABLE_CO2_TRACKING`)
520
+ - `gen_ai.power.cost` - Cumulative electricity cost in USD based on GPU power consumption (configurable via `GENAI_POWER_COST_PER_KWH`)
521
+
522
+ **CO2 Tracking Options:**
523
+ - **Automatic (codecarbon)**: Uses region-based carbon intensity data for accurate emissions calculation
524
+ - **Manual**: Uses `GENAI_CARBON_INTENSITY` value (gCO2e/kWh) for calculation
525
+ - Set `GENAI_CO2_USE_MANUAL=true` to force manual calculation even when codecarbon is installed
526
+ - `gen_ai.server.ttft` - Time to First Token for streaming responses (histogram, 1ms-10s buckets)
527
+ - `gen_ai.server.tbt` - Time Between Tokens for streaming responses (histogram, 10ms-2.5s buckets)
528
+
529
+ **MCP Metrics (Database Operations):**
530
+ - `mcp.requests` - Number of MCP/database requests
531
+ - `mcp.client.operation.duration` - Operation duration histogram (1ms to 10s buckets)
532
+ - `mcp.request.size` - Request payload size histogram (100B to 5MB buckets)
533
+ - `mcp.response.size` - Response payload size histogram (100B to 5MB buckets)
534
+
535
+ ### Span Attributes
536
+ **Core Attributes:**
537
+ - `gen_ai.system` - Provider name (e.g., "openai")
538
+ - `gen_ai.operation.name` - Operation type (e.g., "chat")
539
+ - `gen_ai.request.model` - Model identifier
540
+ - `gen_ai.usage.prompt_tokens` / `gen_ai.usage.input_tokens` - Input tokens (dual emission supported)
541
+ - `gen_ai.usage.completion_tokens` / `gen_ai.usage.output_tokens` - Output tokens (dual emission supported)
542
+ - `gen_ai.usage.total_tokens` - Total tokens
543
+
544
+ **Request Parameters:**
545
+ - `gen_ai.request.temperature` - Temperature setting
546
+ - `gen_ai.request.top_p` - Top-p sampling
547
+ - `gen_ai.request.max_tokens` - Max tokens requested
548
+ - `gen_ai.request.frequency_penalty` - Frequency penalty
549
+ - `gen_ai.request.presence_penalty` - Presence penalty
550
+
551
+ **Response Attributes:**
552
+ - `gen_ai.response.id` - Response ID from provider
553
+ - `gen_ai.response.model` - Actual model used (may differ from request)
554
+ - `gen_ai.response.finish_reasons` - Array of finish reasons
555
+
556
+ **Tool/Function Calls:**
557
+ - `llm.tools` - JSON-serialized tool definitions
558
+ - `llm.output_messages.{choice}.message.tool_calls.{index}.tool_call.id` - Tool call ID
559
+ - `llm.output_messages.{choice}.message.tool_calls.{index}.tool_call.function.name` - Function name
560
+ - `llm.output_messages.{choice}.message.tool_calls.{index}.tool_call.function.arguments` - Function arguments
561
+
562
+ **Cost Attributes (granular):**
563
+ - `gen_ai.usage.cost.total` - Total cost
564
+ - `gen_ai.usage.cost.prompt` - Prompt tokens cost
565
+ - `gen_ai.usage.cost.completion` - Completion tokens cost
566
+ - `gen_ai.usage.cost.reasoning` - Reasoning tokens cost (o1 models)
567
+ - `gen_ai.usage.cost.cache_read` - Cache read cost (Anthropic)
568
+ - `gen_ai.usage.cost.cache_write` - Cache write cost (Anthropic)
569
+
570
+ **Streaming Attributes:**
571
+ - `gen_ai.server.ttft` - Time to First Token (seconds) for streaming responses
572
+ - `gen_ai.streaming.token_count` - Total number of chunks in streaming response
573
+ - `gen_ai.usage.prompt_tokens` - Actual prompt tokens (extracted from final chunk)
574
+ - `gen_ai.usage.completion_tokens` - Actual completion tokens (extracted from final chunk)
575
+ - `gen_ai.usage.total_tokens` - Total tokens (extracted from final chunk)
576
+ - `gen_ai.usage.cost.total` - Total cost for streaming request
577
+ - `gen_ai.usage.cost.prompt` - Prompt tokens cost for streaming request
578
+ - `gen_ai.usage.cost.completion` - Completion tokens cost for streaming request
579
+ - All granular cost attributes (reasoning, cache_read, cache_write) also available for streaming
580
+
581
+ **Content Events (opt-in):**
582
+ - `gen_ai.prompt.{index}` events with role and content
583
+ - `gen_ai.completion.{index}` events with role and content
584
+
585
+ **Additional:**
586
+ - Database, vector DB, and API attributes from MCP instrumentation
587
+
588
+ ## Configuration
589
+
590
+ ### Environment Variables
591
+
592
+ ```bash
593
+ # Required
594
+ OTEL_SERVICE_NAME=my-app
595
+ OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
596
+
597
+ # Optional
598
+ OTEL_EXPORTER_OTLP_HEADERS=x-api-key=secret
599
+ GENAI_ENABLE_GPU_METRICS=true
600
+ GENAI_ENABLE_COST_TRACKING=true
601
+ GENAI_ENABLE_MCP_INSTRUMENTATION=true
602
+ GENAI_GPU_COLLECTION_INTERVAL=5 # GPU metrics collection interval in seconds (default: 5)
603
+ OTEL_SERVICE_INSTANCE_ID=instance-1 # Optional service instance id
604
+ OTEL_ENVIRONMENT=production # Optional environment
605
+ OTEL_EXPORTER_OTLP_TIMEOUT=60 # Timeout for OTLP exporter in seconds (default: 60)
606
+ OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf # Protocol: "http/protobuf" (default) or "grpc"
607
+
608
+ # Semantic conventions (NEW)
609
+ OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai # "gen_ai" for new conventions only, "gen_ai/dup" for dual emission
610
+ GENAI_ENABLE_CONTENT_CAPTURE=false # WARNING: May capture sensitive data. Enable with caution.
611
+
612
+ # Logging configuration
613
+ GENAI_OTEL_LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR, CRITICAL. Logs are written to 'logs/genai_otel.log' with rotation (10 files, 10MB each).
614
+
615
+ # Error handling
616
+ GENAI_FAIL_ON_ERROR=false # true to fail fast, false to continue on errors
617
+ ```
618
+
619
+ ### Programmatic Configuration
620
+
621
+ ```python
622
+ import genai_otel
623
+
624
+ genai_otel.instrument(
625
+ service_name="my-app",
626
+ endpoint="http://localhost:4318",
627
+ enable_gpu_metrics=True,
628
+ enable_cost_tracking=True,
629
+ enable_mcp_instrumentation=True
630
+ )
631
+ ```
632
+
633
+ ### Sample Environment File (`sample.env`)
634
+
635
+ A `sample.env` file has been generated in the project root directory. This file contains commented-out examples of all supported environment variables, along with their default values or expected formats. You can copy this file to `.env` and uncomment/modify the variables to configure the instrumentation for your specific needs.
636
+
637
+ ## Advanced Features
638
+
639
+ ### Session and User Tracking
640
+
641
+ Track user sessions and identify users across multiple LLM requests for better analytics, debugging, and cost attribution.
642
+
643
+ **Configuration:**
644
+
645
+ ```python
646
+ import genai_otel
647
+ from genai_otel import OTelConfig
648
+
649
+ # Define extractor functions
650
+ def extract_session_id(instance, args, kwargs):
651
+ """Extract session ID from request metadata."""
652
+ # Option 1: From kwargs metadata
653
+ metadata = kwargs.get("metadata", {})
654
+ return metadata.get("session_id")
655
+
656
+ # Option 2: From custom headers
657
+ # headers = kwargs.get("headers", {})
658
+ # return headers.get("X-Session-ID")
659
+
660
+ # Option 3: From thread-local storage
661
+ # import threading
662
+ # return getattr(threading.current_thread(), "session_id", None)
663
+
664
+ def extract_user_id(instance, args, kwargs):
665
+ """Extract user ID from request metadata."""
666
+ metadata = kwargs.get("metadata", {})
667
+ return metadata.get("user_id")
668
+
669
+ # Configure with extractors
670
+ config = OTelConfig(
671
+ service_name="my-rag-app",
672
+ endpoint="http://localhost:4318",
673
+ session_id_extractor=extract_session_id,
674
+ user_id_extractor=extract_user_id,
675
+ )
676
+
677
+ genai_otel.instrument(config)
678
+ ```
679
+
680
+ **Usage:**
681
+
682
+ ```python
683
+ from openai import OpenAI
684
+
685
+ client = OpenAI()
686
+
687
+ # Pass session and user info via metadata
688
+ response = client.chat.completions.create(
689
+ model="gpt-3.5-turbo",
690
+ messages=[{"role": "user", "content": "What is OpenTelemetry?"}],
691
+ extra_body={"metadata": {"session_id": "sess_12345", "user_id": "user_alice"}}
692
+ )
693
+ ```
694
+
695
+ **Span Attributes Added:**
696
+ - `session.id` - Unique session identifier for tracking conversations
697
+ - `user.id` - User identifier for per-user analytics and cost tracking
698
+
699
+ **Use Cases:**
700
+ - Track multi-turn conversations across requests
701
+ - Analyze usage patterns per user
702
+ - Debug session-specific issues
703
+ - Calculate per-user costs and quotas
704
+ - Build user-specific dashboards
705
+
706
+ ### RAG and Embedding Attributes
707
+
708
+ Enhanced observability for Retrieval-Augmented Generation (RAG) workflows, including embedding generation and document retrieval.
709
+
710
+ **Helper Methods:**
711
+
712
+ The `BaseInstrumentor` provides helper methods to add RAG-specific attributes to your spans:
713
+
714
+ ```python
715
+ from opentelemetry import trace
716
+ from genai_otel.instrumentors.base import BaseInstrumentor
717
+
718
+ # Get your instrumentor instance (or create spans manually)
719
+ tracer = trace.get_tracer(__name__)
720
+
721
+ # 1. Embedding Attributes
722
+ with tracer.start_as_current_span("embedding.create") as span:
723
+ # Your embedding logic
724
+ embedding_response = client.embeddings.create(
725
+ model="text-embedding-3-small",
726
+ input="OpenTelemetry provides observability"
727
+ )
728
+
729
+ # Add embedding attributes (if using BaseInstrumentor)
730
+ # instrumentor.add_embedding_attributes(
731
+ # span,
732
+ # model="text-embedding-3-small",
733
+ # input_text="OpenTelemetry provides observability",
734
+ # vector=embedding_response.data[0].embedding
735
+ # )
736
+
737
+ # Or manually set attributes
738
+ span.set_attribute("embedding.model_name", "text-embedding-3-small")
739
+ span.set_attribute("embedding.text", "OpenTelemetry provides observability"[:500])
740
+ span.set_attribute("embedding.vector.dimension", len(embedding_response.data[0].embedding))
741
+
742
+ # 2. Retrieval Attributes
743
+ with tracer.start_as_current_span("retrieval.search") as span:
744
+ # Your retrieval logic
745
+ retrieved_docs = [
746
+ {
747
+ "id": "doc_001",
748
+ "score": 0.95,
749
+ "content": "OpenTelemetry is an observability framework...",
750
+ "metadata": {"source": "docs.opentelemetry.io", "category": "intro"}
751
+ },
752
+ # ... more documents
753
+ ]
754
+
755
+ # Add retrieval attributes (if using BaseInstrumentor)
756
+ # instrumentor.add_retrieval_attributes(
757
+ # span,
758
+ # documents=retrieved_docs,
759
+ # query="What is OpenTelemetry?",
760
+ # max_docs=5
761
+ # )
762
+
763
+ # Or manually set attributes
764
+ span.set_attribute("retrieval.query", "What is OpenTelemetry?"[:500])
765
+ span.set_attribute("retrieval.document_count", len(retrieved_docs))
766
+
767
+ for i, doc in enumerate(retrieved_docs[:5]): # Limit to 5 docs
768
+ prefix = f"retrieval.documents.{i}.document"
769
+ span.set_attribute(f"{prefix}.id", doc["id"])
770
+ span.set_attribute(f"{prefix}.score", doc["score"])
771
+ span.set_attribute(f"{prefix}.content", doc["content"][:500])
772
+
773
+ # Add metadata
774
+ for key, value in doc.get("metadata", {}).items():
775
+ span.set_attribute(f"{prefix}.metadata.{key}", str(value))
776
+ ```
777
+
778
+ **Embedding Attributes:**
779
+ - `embedding.model_name` - Embedding model used
780
+ - `embedding.text` - Input text (truncated to 500 chars)
781
+ - `embedding.vector` - Embedding vector (optional, if configured)
782
+ - `embedding.vector.dimension` - Vector dimensions
783
+
784
+ **Retrieval Attributes:**
785
+ - `retrieval.query` - Search query (truncated to 500 chars)
786
+ - `retrieval.document_count` - Number of documents retrieved
787
+ - `retrieval.documents.{i}.document.id` - Document ID
788
+ - `retrieval.documents.{i}.document.score` - Relevance score
789
+ - `retrieval.documents.{i}.document.content` - Document content (truncated to 500 chars)
790
+ - `retrieval.documents.{i}.document.metadata.*` - Custom metadata fields
791
+
792
+ **Safeguards:**
793
+ - Text content truncated to 500 characters to avoid span size explosion
794
+ - Document count limited to 5 by default (configurable via `max_docs`)
795
+ - Metadata values truncated to prevent excessive attribute counts
796
+
797
+ **Complete RAG Workflow Example:**
798
+
799
+ See `examples/phase4_session_rag_tracking.py` for a comprehensive demonstration of:
800
+ - Session and user tracking across RAG pipeline
801
+ - Embedding attribute capture
802
+ - Retrieval attribute capture
803
+ - End-to-end RAG workflow with full observability
804
+
805
+ **Use Cases:**
806
+ - Monitor retrieval quality and relevance scores
807
+ - Debug RAG pipeline performance
808
+ - Track embedding model usage
809
+ - Analyze document retrieval patterns
810
+ - Optimize vector search configurations
811
+
812
+ ## Example: Full-Stack GenAI App
813
+
814
+ ```python
815
+ import genai_otel
816
+ genai_otel.instrument()
817
+
818
+ import openai
819
+ import pinecone
820
+ import redis
821
+ import psycopg2
822
+
823
+ # All of these are automatically instrumented:
824
+
825
+ # Cache check
826
+ cache = redis.Redis().get('key')
827
+
828
+ # Vector search
829
+ pinecone_index = pinecone.Index("embeddings")
830
+ results = pinecone_index.query(vector=[...], top_k=5)
831
+
832
+ # Database query
833
+ conn = psycopg2.connect("dbname=mydb")
834
+ cursor = conn.cursor()
835
+ cursor.execute("SELECT * FROM context")
836
+
837
+ # LLM call with full context
838
+ client = openai.OpenAI()
839
+ response = client.chat.completions.create(
840
+ model="gpt-4",
841
+ messages=[...]
842
+ )
843
+
844
+ # You get:
845
+ # ✓ Distributed traces across all services
846
+ # ✓ Cost tracking for the LLM call
847
+ # ✓ Performance metrics for DB, cache, vector DB
848
+ # ✓ GPU metrics if using local models
849
+ # ✓ Complete observability with zero manual instrumentation
850
+ ```
851
+
852
+ ## Backend Integration
853
+
854
+ Works with any OpenTelemetry-compatible backend:
855
+ - Jaeger, Zipkin
856
+ - Prometheus, Grafana
857
+ - Datadog, New Relic, Honeycomb
858
+ - AWS X-Ray, Google Cloud Trace
859
+ - Elastic APM, Splunk
860
+ - Self-hosted OTEL Collector
861
+
862
+ ## Project Structure
863
+
864
+ ```bash
865
+ genai-otel-instrument/
866
+ ├── setup.py
867
+ ├── MANIFEST.in
868
+ ├── README.md
869
+ ├── LICENSE
870
+ ├── example_usage.py
871
+ └── genai_otel/
872
+ ├── __init__.py
873
+ ├── config.py
874
+ ├── auto_instrument.py
875
+ ├── cli.py
876
+ ├── cost_calculator.py
877
+ ├── gpu_metrics.py
878
+ ├── instrumentors/
879
+ │ ├── __init__.py
880
+ │ ├── base.py
881
+ │ └── (other instrumentor files)
882
+ └── mcp_instrumentors/
883
+ ├── __init__.py
884
+ ├── manager.py
885
+ └── (other mcp files)
886
+ ```
887
+
888
+ ## Roadmap
889
+
890
+ ### v0.2.0 Release (In Progress) - Q1 2026
891
+
892
+ We're implementing significant enhancements for this release, focusing on evaluation metrics and safety guardrails alongside completing OpenTelemetry semantic convention compliance.
893
+
894
+ **✅ Completed Features:**
895
+ - **PII Detection** - Automatic detection and handling of personally identifiable information with Microsoft Presidio
896
+ - Three modes: detect, redact, or block
897
+ - GDPR, HIPAA, and PCI-DSS compliance modes
898
+ - 15+ entity types (email, phone, SSN, credit cards, IP addresses, etc.)
899
+ - Span attributes and metrics for PII detections
900
+ - Example: `examples/pii_detection_example.py`
901
+
902
+ - **Toxicity Detection** - Monitor and alert on toxic or harmful content
903
+ - Dual detection methods: Perspective API (cloud) and Detoxify (local)
904
+ - Six toxicity categories: toxicity, severe_toxicity, identity_attack, insult, profanity, threat
905
+ - Automatic fallback from Perspective API to Detoxify
906
+ - Configurable threshold and blocking mode
907
+ - Batch processing support
908
+ - Span attributes and metrics for toxicity detections
909
+ - Example: `examples/toxicity_detection_example.py`
910
+
911
+ - **Bias Detection** - Identify demographic and other biases in prompts and responses
912
+ - 8 bias types: gender, race, ethnicity, religion, age, disability, sexual_orientation, political
913
+ - Pattern-based detection (always available, no external dependencies)
914
+ - Optional ML-based detection with Fairlearn
915
+ - Configurable threshold and blocking mode
916
+ - Batch processing and statistics generation
917
+ - Span attributes and metrics for bias detections
918
+ - Example: `examples/bias_detection_example.py`
919
+
920
+ - **Prompt Injection Detection** - Protect against prompt manipulation attacks
921
+ - 6 injection types: instruction_override, role_playing, jailbreak, context_switching, system_extraction, encoding_obfuscation
922
+ - Pattern-based detection (always available)
923
+ - Configurable threshold and blocking mode
924
+ - Automatic security blocking for high-risk prompts
925
+ - Span attributes and metrics for injection attempts
926
+ - Example: `examples/comprehensive_evaluation_example.py`
927
+
928
+ - **Restricted Topics Detection** - Monitor and block sensitive topics
929
+ - 9 topic categories: medical_advice, legal_advice, financial_advice, violence, self_harm, illegal_activities, adult_content, personal_information, political_manipulation
930
+ - Pattern-based topic classification
931
+ - Configurable topic blacklists
932
+ - Industry-specific content filters
933
+ - Span attributes and metrics for topic violations
934
+ - Example: `examples/comprehensive_evaluation_example.py`
935
+
936
+ - **Hallucination Detection** - Track factual accuracy and groundedness
937
+ - Factual claim extraction and validation
938
+ - Hedge word detection for uncertainty
939
+ - Citation and attribution tracking
940
+ - Context contradiction detection
941
+ - Unsupported claims identification
942
+ - Span attributes and metrics for hallucination risks
943
+ - Example: `examples/comprehensive_evaluation_example.py`
944
+
945
+ **Implementation:**
946
+ ```python
947
+ import genai_otel
948
+
949
+ # Enable all 6 evaluation features
950
+ genai_otel.instrument(
951
+ # Detection & Safety
952
+ enable_pii_detection=True,
953
+ enable_toxicity_detection=True,
954
+ enable_bias_detection=True,
955
+ enable_prompt_injection_detection=True,
956
+ enable_restricted_topics=True,
957
+ enable_hallucination_detection=True,
958
+
959
+ # Configure thresholds
960
+ pii_threshold=0.8,
961
+ toxicity_threshold=0.7,
962
+ bias_threshold=0.5,
963
+ prompt_injection_threshold=0.7,
964
+ restricted_topics_threshold=0.5,
965
+ hallucination_threshold=0.6,
966
+ )
967
+ ```
968
+
969
+ **All Features Completed! ✅**
970
+
971
+ - **Restricted Topics** - Block sensitive or inappropriate topics
972
+ - Configurable topic blacklists (legal, medical, financial advice)
973
+ - Industry-specific content filters
974
+ - Topic detection with confidence scoring
975
+ - Custom topic definition support
976
+
977
+ - **Sensitive Information Protection** - ✅ COMPLETED - Prevent PII leakage
978
+ - ✅ PII detection (emails, phone numbers, SSN, credit cards, IPs, and more)
979
+ - ✅ Automatic redaction or blocking modes
980
+ - ✅ Compliance modes (GDPR, HIPAA, PCI-DSS)
981
+ - ✅ Data leak prevention metrics
982
+ - ✅ Microsoft Presidio integration with regex fallback
983
+
984
+ **Implementation:**
985
+ ```python
986
+ import genai_otel
987
+
988
+ # Configure guardrails (PII Detection is LIVE!)
989
+ genai_otel.instrument(
990
+ # PII Detection (✅ AVAILABLE NOW)
991
+ enable_pii_detection=True,
992
+ pii_mode="redact", # "detect", "redact", or "block"
993
+ pii_threshold=0.7,
994
+ pii_gdpr_mode=True, # Enable GDPR compliance
995
+ pii_hipaa_mode=True, # Enable HIPAA compliance
996
+ pii_pci_dss_mode=True, # Enable PCI-DSS compliance
997
+
998
+ # Coming Soon:
999
+ enable_prompt_injection_detection=True,
1000
+ enable_restricted_topics=True,
1001
+ restricted_topics=["medical_advice", "legal_advice", "financial_advice"],
1002
+ )
1003
+ ```
1004
+
1005
+ **Metrics Added:**
1006
+ - ✅ `genai.evaluation.pii.detections` - PII detection events (by location and mode)
1007
+ - ✅ `genai.evaluation.pii.entities` - PII entities detected by type
1008
+ - ✅ `genai.evaluation.pii.blocked` - Requests/responses blocked due to PII
1009
+ - ✅ `genai.evaluation.toxicity.detections` - Toxicity detection events
1010
+ - ✅ `genai.evaluation.toxicity.categories` - Toxicity by category
1011
+ - ✅ `genai.evaluation.toxicity.blocked` - Blocked due to toxicity
1012
+ - ✅ `genai.evaluation.toxicity.score` - Toxicity score distribution (histogram)
1013
+ - ✅ `genai.evaluation.bias.detections` - Bias detection events (by location)
1014
+ - ✅ `genai.evaluation.bias.types` - Bias detections by type
1015
+ - ✅ `genai.evaluation.bias.blocked` - Requests/responses blocked due to bias
1016
+ - ✅ `genai.evaluation.bias.score` - Bias score distribution (histogram)
1017
+ - ✅ `genai.evaluation.prompt_injection.detections` - Injection attempts detected
1018
+ - ✅ `genai.evaluation.prompt_injection.types` - Injection attempts by type
1019
+ - ✅ `genai.evaluation.prompt_injection.blocked` - Blocked due to injection
1020
+ - ✅ `genai.evaluation.prompt_injection.score` - Injection score distribution (histogram)
1021
+ - ✅ `genai.evaluation.restricted_topics.detections` - Restricted topics detected
1022
+ - ✅ `genai.evaluation.restricted_topics.types` - Detections by topic
1023
+ - ✅ `genai.evaluation.restricted_topics.blocked` - Blocked due to restricted topics
1024
+ - ✅ `genai.evaluation.restricted_topics.score` - Topic score distribution (histogram)
1025
+ - ✅ `genai.evaluation.hallucination.detections` - Hallucination risks detected
1026
+ - ✅ `genai.evaluation.hallucination.indicators` - Detections by indicator type
1027
+ - ✅ `genai.evaluation.hallucination.score` - Hallucination score distribution (histogram)
1028
+
1029
+ **Span Attributes:**
1030
+ - ✅ `evaluation.pii.prompt.detected` - PII detected in prompt (boolean)
1031
+ - ✅ `evaluation.pii.response.detected` - PII detected in response (boolean)
1032
+ - ✅ `evaluation.pii.*.entity_count` - Number of PII entities found
1033
+ - ✅ `evaluation.pii.*.entity_types` - Types of PII detected (array)
1034
+ - ✅ `evaluation.pii.*.score` - Detection confidence score
1035
+ - ✅ `evaluation.pii.*.redacted` - Redacted text (in redact mode)
1036
+ - ✅ `evaluation.pii.*.blocked` - Whether blocked due to PII (boolean)
1037
+ - ✅ `evaluation.toxicity.prompt.detected` - Toxicity in prompt (boolean)
1038
+ - ✅ `evaluation.toxicity.response.detected` - Toxicity in response (boolean)
1039
+ - ✅ `evaluation.toxicity.*.max_score` - Maximum toxicity score
1040
+ - ✅ `evaluation.toxicity.*.categories` - Toxic categories detected (array)
1041
+ - ✅ `evaluation.toxicity.*.<category>_score` - Individual category scores
1042
+ - ✅ `evaluation.toxicity.*.blocked` - Whether blocked due to toxicity
1043
+ - ✅ `evaluation.bias.prompt.detected` - Bias detected in prompt (boolean)
1044
+ - ✅ `evaluation.bias.response.detected` - Bias detected in response (boolean)
1045
+ - ✅ `evaluation.bias.*.max_score` - Maximum bias score
1046
+ - ✅ `evaluation.bias.*.detected_biases` - Bias types detected (array)
1047
+ - ✅ `evaluation.bias.*.<bias_type>_score` - Individual bias type scores
1048
+ - ✅ `evaluation.bias.*.<bias_type>_patterns` - Matched patterns for each bias type
1049
+ - ✅ `evaluation.bias.*.blocked` - Whether blocked due to bias
1050
+ - ✅ `evaluation.prompt_injection.detected` - Injection attempt detected (boolean)
1051
+ - ✅ `evaluation.prompt_injection.score` - Injection risk score
1052
+ - ✅ `evaluation.prompt_injection.types` - Injection types detected (array)
1053
+ - ✅ `evaluation.prompt_injection.*_patterns` - Matched patterns by injection type
1054
+ - ✅ `evaluation.prompt_injection.blocked` - Whether blocked due to injection
1055
+ - ✅ `evaluation.restricted_topics.prompt.detected` - Restricted topic in prompt (boolean)
1056
+ - ✅ `evaluation.restricted_topics.response.detected` - Restricted topic in response (boolean)
1057
+ - ✅ `evaluation.restricted_topics.*.max_score` - Maximum topic score
1058
+ - ✅ `evaluation.restricted_topics.*.topics` - Detected topics (array)
1059
+ - ✅ `evaluation.restricted_topics.*.<topic>_score` - Individual topic scores
1060
+ - ✅ `evaluation.restricted_topics.*.blocked` - Whether blocked due to topic
1061
+ - ✅ `evaluation.hallucination.detected` - Hallucination risk detected (boolean)
1062
+ - ✅ `evaluation.hallucination.score` - Hallucination risk score
1063
+ - ✅ `evaluation.hallucination.indicators` - Indicators found (array)
1064
+ - ✅ `evaluation.hallucination.hedge_words_count` - Count of uncertainty markers
1065
+ - ✅ `evaluation.hallucination.citation_count` - Count of citations found
1066
+ - ✅ `evaluation.hallucination.unsupported_claims` - List of unsupported claims (limited)
1067
+
1068
+ #### 🔄 Migration Support
1069
+
1070
+ **Backward Compatibility:**
1071
+ - All new features are opt-in via configuration
1072
+ - Existing instrumentation continues to work unchanged
1073
+ - Gradual migration path for new semantic conventions
1074
+
1075
+ **Version Support:**
1076
+ - Python 3.9+ (evaluation features require 3.10+)
1077
+ - OpenTelemetry SDK 1.20.0+
1078
+ - Backward compatible with existing dashboards
1079
+
1080
+ ### 2026-2027 Roadmap
1081
+
1082
+ Our roadmap focuses on comprehensive LLM observability, from RAG evaluation to enterprise governance.
1083
+
1084
+ ---
1085
+
1086
+ ### v0.3.0 - RAG & Retrieval Observability (Q1-Q2 2026)
1087
+
1088
+ **🎯 Goal:** Complete monitoring and optimization for RAG applications
1089
+
1090
+ **RAG Evaluation Metrics**
1091
+ - **Retrieval Quality Metrics**
1092
+ - Context relevance scoring (how relevant are retrieved documents)
1093
+ - Retrieval precision & recall (did we get the right documents)
1094
+ - MRR (Mean Reciprocal Rank) for ranked results
1095
+ - NDCG (Normalized Discounted Cumulative Gain)
1096
+ - Semantic similarity between query and retrieved chunks
1097
+
1098
+ - **Answer Groundedness Metrics**
1099
+ - Citation accuracy (claims backed by sources)
1100
+ - Hallucination vs grounded statements ratio
1101
+ - Answer-context alignment scoring
1102
+ - Faithfulness metrics (answer faithful to context)
1103
+
1104
+ - **RAG Pipeline Tracing**
1105
+ - Query understanding and rewriting traces
1106
+ - Retrieval step instrumentation (vector DB queries)
1107
+ - Re-ranking step metrics
1108
+ - Context compression tracking
1109
+ - Generation step with attribution
1110
+
1111
+ **Vector Database Monitoring**
1112
+ - Embedding quality metrics (cosine similarity distributions)
1113
+ - Index performance (latency, throughput)
1114
+ - Semantic drift detection (embedding space changes over time)
1115
+ - Vector DB integration: Pinecone, Weaviate, Qdrant, Milvus, ChromaDB
1116
+ - Cache hit rates and efficiency
1117
+
1118
+ ---
1119
+
1120
+ ### v0.4.0 - Prompt Engineering & Optimization (Q2-Q3 2026)
1121
+
1122
+ **🎯 Goal:** Production-grade prompt lifecycle management
1123
+
1124
+ **Prompt Management**
1125
+ - **Versioning & Registry**
1126
+ - Prompt version control with Git-like semantics
1127
+ - Centralized prompt registry
1128
+ - Rollback capabilities
1129
+ - Change history and diff tracking
1130
+
1131
+ - **A/B Testing Framework**
1132
+ - Multi-variant prompt testing
1133
+ - Automatic traffic splitting
1134
+ - Statistical significance testing
1135
+ - Winner selection algorithms
1136
+
1137
+ - **Optimization Engine**
1138
+ - Automatic prompt optimization suggestions
1139
+ - Few-shot example selection
1140
+ - Chain-of-thought template optimization
1141
+ - Token usage optimization recommendations
1142
+
1143
+ **Prompt Analytics**
1144
+ - Performance by prompt template
1145
+ - Cost per prompt version
1146
+ - Success rate tracking
1147
+ - User satisfaction correlation
1148
+ - Conversion metrics by prompt
1149
+
1150
+ ---
1151
+
1152
+ ### v0.5.0 - Human Feedback & Active Learning (Q3 2026)
1153
+
1154
+ **🎯 Goal:** Close the loop with human feedback integration
1155
+
1156
+ **Feedback Collection**
1157
+ - **Multi-Channel Feedback**
1158
+ - Thumbs up/down collection
1159
+ - Star ratings (1-5 scale)
1160
+ - Free-text feedback
1161
+ - Issue categorization
1162
+ - Custom feedback schemas
1163
+
1164
+ - **Feedback API & SDKs**
1165
+ - REST API for feedback submission
1166
+ - JavaScript/Python SDKs
1167
+ - React components for UI
1168
+ - Slack/Discord integrations
1169
+
1170
+ **Active Learning Pipeline**
1171
+ - Feedback → Dataset → Fine-tuning workflow
1172
+ - Automatic dataset curation from feedback
1173
+ - Export to fine-tuning formats (JSONL, Parquet)
1174
+ - Integration with training platforms
1175
+ - RLHF (Reinforcement Learning from Human Feedback) support
1176
+
1177
+ **Analytics & Insights**
1178
+ - Feedback trends and patterns
1179
+ - Issue clustering and categorization
1180
+ - User satisfaction scores (CSAT, NPS)
1181
+ - Feedback-based model comparison
1182
+ - Root cause analysis for negative feedback
1183
+
1184
+ ---
1185
+
1186
+ ### v0.6.0 - Advanced Agent Observability (Q4 2026)
1187
+
1188
+ **🎯 Goal:** Deep visibility into complex multi-agent systems
1189
+
1190
+ **Multi-Agent Tracing**
1191
+ - **Agent Workflow Visualization**
1192
+ - Agent collaboration graphs
1193
+ - Communication pattern analysis
1194
+ - Handoff tracking and optimization
1195
+ - Deadlock and bottleneck detection
1196
+
1197
+ - **Agent Performance Metrics**
1198
+ - Per-agent success rates
1199
+ - Agent utilization and load balancing
1200
+ - Task completion times
1201
+ - Agent-to-agent latency
1202
+
1203
+ - **Advanced Agent Patterns**
1204
+ - Hierarchical agent systems
1205
+ - Swarm intelligence monitoring
1206
+ - Autonomous agent chains
1207
+ - Agent memory and state tracking
1208
+
1209
+ **Tool & Function Calling**
1210
+ - Tool invocation traces
1211
+ - Tool success/failure rates
1212
+ - Tool latency and cost
1213
+ - Tool chain optimization
1214
+ - Error propagation analysis
1215
+
1216
+ ---
1217
+
1218
+ ### v0.7.0 - Custom Evaluators & Extensibility (Q1 2027)
1219
+
1220
+ **🎯 Goal:** Flexible evaluation framework for any use case
1221
+
1222
+ **Custom Evaluator Framework**
1223
+ - **SDK for Custom Metrics**
1224
+ - Python decorator-based evaluators
1225
+ - Async evaluation support
1226
+ - Batch evaluation APIs
1227
+ - Streaming evaluation
1228
+
1229
+ - **Evaluator Marketplace**
1230
+ - Community-contributed evaluators
1231
+ - Domain-specific evaluators (medical, legal, finance)
1232
+ - Language-specific evaluators
1233
+ - Industry benchmark evaluators
1234
+
1235
+ **Evaluation Orchestration**
1236
+ - Parallel evaluation execution
1237
+ - Conditional evaluation chains
1238
+ - Evaluation result caching
1239
+ - Scheduled batch evaluations
1240
+ - Integration with CI/CD pipelines
1241
+
1242
+ **Pre-built Evaluator Library**
1243
+ - Answer correctness (exact match, F1, BLEU, ROUGE)
1244
+ - Semantic similarity (embeddings-based)
1245
+ - Code execution evaluators
1246
+ - SQL query validation
1247
+ - JSON schema validation
1248
+ - Regex pattern matching
1249
+ - Custom business rule evaluators
1250
+
1251
+ ---
1252
+
1253
+ ### v0.8.0 - Multi-Modal & Advanced Models (Q2 2027)
1254
+
1255
+ **🎯 Goal:** Support for next-generation AI capabilities
1256
+
1257
+ **Multi-Modal Observability**
1258
+ - **Vision Models (GPT-4V, Claude 3, Gemini Vision)**
1259
+ - Image input/output tracking
1260
+ - Image quality metrics
1261
+ - OCR accuracy monitoring
1262
+ - Visual question answering evaluation
1263
+
1264
+ - **Audio Models (Whisper, ElevenLabs, etc.)**
1265
+ - Audio transcription accuracy
1266
+ - Speech synthesis quality
1267
+ - Audio processing latency
1268
+ - WER (Word Error Rate) tracking
1269
+
1270
+ - **Video Models**
1271
+ - Video understanding metrics
1272
+ - Frame-by-frame analysis
1273
+ - Video generation monitoring
1274
+
1275
+ **Advanced Model Types**
1276
+ - **Code Generation Models (Codex, CodeLlama)**
1277
+ - Code syntax validation
1278
+ - Execution success rates
1279
+ - Security vulnerability detection
1280
+ - Code quality metrics
1281
+
1282
+ - **Reasoning Models (o1, o3)**
1283
+ - Reasoning step tracking
1284
+ - Logical consistency checking
1285
+ - Multi-hop reasoning evaluation
1286
+
1287
+ ---
1288
+
1289
+ ### v0.9.0 - Production Debugging & Optimization (Q3 2027)
1290
+
1291
+ **🎯 Goal:** Powerful tools for production issue resolution
1292
+
1293
+ **Trace Replay & Debugging**
1294
+ - **Replay Capabilities**
1295
+ - Request replay from traces
1296
+ - Environment reconstruction
1297
+ - Deterministic replay for debugging
1298
+ - Step-by-step execution debugging
1299
+
1300
+ - **Issue Reproduction**
1301
+ - One-click issue reproduction
1302
+ - Local environment setup from trace
1303
+ - Integration with IDEs (VS Code, PyCharm)
1304
+
1305
+ **Performance Optimization**
1306
+ - **Caching Layer Monitoring**
1307
+ - Semantic caching effectiveness
1308
+ - Cache hit/miss ratios
1309
+ - Cache invalidation patterns
1310
+ - LRU/LFU cache optimization
1311
+
1312
+ - **Token Optimization**
1313
+ - Automatic prompt compression suggestions
1314
+ - Redundancy detection
1315
+ - Context pruning recommendations
1316
+ - Cost vs quality trade-offs
1317
+
1318
+ - **Latency Optimization**
1319
+ - Bottleneck identification
1320
+ - Parallel execution opportunities
1321
+ - Streaming optimization
1322
+ - Model selection recommendations
1323
+
1324
+ ---
1325
+
1326
+ ### v1.0.0 - Enterprise & Governance (Q4 2027)
1327
+
1328
+ **🎯 Goal:** Enterprise-ready platform with compliance and governance
1329
+
1330
+ **Enterprise Features**
1331
+ - **Multi-Tenancy**
1332
+ - Tenant isolation
1333
+ - Resource quotas and limits
1334
+ - Tenant-specific configurations
1335
+ - Cross-tenant analytics (with permissions)
1336
+
1337
+ - **Access Control**
1338
+ - Role-based access control (RBAC)
1339
+ - Attribute-based access control (ABAC)
1340
+ - API key management
1341
+ - SSO/SAML integration
1342
+ - Audit logging
1343
+
1344
+ **Compliance & Governance**
1345
+ - **Audit & Compliance**
1346
+ - Complete audit trails
1347
+ - Compliance reporting (SOC 2, GDPR, HIPAA)
1348
+ - Data retention policies
1349
+ - Right to deletion (GDPR Article 17)
1350
+ - Data lineage tracking
1351
+
1352
+ - **Policy Enforcement**
1353
+ - Custom policy rules
1354
+ - Automated policy violations
1355
+ - Remediation workflows
1356
+ - Compliance dashboards
1357
+
1358
+ **SLA & Reliability**
1359
+ - SLA monitoring and alerting
1360
+ - Uptime tracking
1361
+ - Error budget management
1362
+ - Incident management integration
1363
+ - On-call scheduling integration
1364
+
1365
+ ---
1366
+
1367
+ ## Community & Contributions
1368
+
1369
+ We're building the future of LLM observability together! 🚀
1370
+
1371
+ **How to Influence the Roadmap:**
1372
+ - 🌟 Star us on GitHub to show support
1373
+ - 💬 Join discussions on feature prioritization
1374
+ - 🐛 Report bugs and request features via Issues
1375
+ - 🔧 Contribute code via Pull Requests
1376
+ - 📖 Improve documentation and examples
1377
+ - 🎤 Share your use cases and feedback
1378
+
1379
+ **Priority is determined by:**
1380
+ 1. Community feedback and votes (👍 reactions on issues)
1381
+ 2. Industry trends and adoption
1382
+ 3. Integration partnerships
1383
+ 4. Security and compliance requirements
1384
+ 5. Developer experience improvements
1385
+
1386
+ See [Contributing.md](Contributing.md) for detailed contribution guidelines.
1387
+
1388
+ **Join our Community:**
1389
+ - GitHub Discussions: [Share ideas and questions]
1390
+ - Discord: [Coming soon - Real-time chat]
1391
+ - Twitter/X: [@genai_otel]
1392
+ - Blog: [Technical deep-dives and updates]
1393
+
1394
+ ---
1395
+
1396
+ ## License
1397
+
1398
+ TraceVerde is licensed under the GNU Affero General Public License v3.0 or later (AGPL-3.0-or-later).
1399
+
1400
+ Copyright (C) 2025 Kshitij Thakkar
1401
+
1402
+ This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
1403
+
1404
+ See the [LICENSE](LICENSE) file for the full license text.