cortexhub 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,481 @@
1
+ """OpenTelemetry-based telemetry for CortexHub.
2
+
3
+ This module implements the OTLP-based telemetry as specified in TELEMETRY_OTEL_DESIGN.md.
4
+
5
+ Key features:
6
+ - OTel spans instead of custom events
7
+ - BatchSpanProcessor for batching, retry, backpressure
8
+ - OTLP/HTTP (JSON) transport
9
+ - Guardrail findings as span events
10
+ - Privacy mode support
11
+
12
+ Usage:
13
+ from cortexhub.telemetry.otel import OTelTelemetry
14
+
15
+ telemetry = OTelTelemetry(
16
+ agent_id="customer_support",
17
+ api_key="chk_...",
18
+ backend_url="https://api.cortexhub.io",
19
+ privacy=True,
20
+ )
21
+
22
+ # Create span for tool call
23
+ with telemetry.trace_tool_call("process_refund", args={"amount": 75}) as span:
24
+ result = tool.invoke(args)
25
+ telemetry.record_tool_result(span, success=True, result=result)
26
+ """
27
+
28
+ import json
29
+ import os
30
+ from datetime import datetime
31
+ from typing import Any
32
+ from contextlib import contextmanager
33
+
34
+ import structlog
35
+
36
+ from opentelemetry import trace
37
+ from opentelemetry.sdk.trace import TracerProvider
38
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
39
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
40
+ from opentelemetry.sdk.resources import Resource
41
+ from opentelemetry.trace import Span, Status, StatusCode, SpanKind
42
+
43
+ from cortexhub.version import __version__
44
+
45
+ logger = structlog.get_logger(__name__)
46
+
47
+
48
+ class OTelTelemetry:
49
+ """OpenTelemetry-based telemetry for CortexHub.
50
+
51
+ This class manages OTel tracing with proper batching, retry, and backpressure.
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ agent_id: str,
57
+ api_key: str | None = None,
58
+ backend_url: str = "https://api.cortexhub.io",
59
+ privacy: bool = True,
60
+ session_id: str | None = None,
61
+ ):
62
+ """Initialize OTel telemetry.
63
+
64
+ Args:
65
+ agent_id: Unique identifier for this agent
66
+ api_key: CortexHub API key (from environment if not provided)
67
+ backend_url: CortexHub backend URL
68
+ privacy: If True (default), no raw data sent. If False, raw data included.
69
+ session_id: Session identifier (auto-generated if not provided)
70
+ """
71
+ self.agent_id = agent_id
72
+ self.api_key = api_key or os.getenv("CORTEXHUB_API_KEY")
73
+ self.backend_url = backend_url.rstrip("/")
74
+ self.privacy = privacy
75
+ self.session_id = session_id or self._generate_session_id()
76
+
77
+ # Create resource with agent/project metadata
78
+ self.resource = Resource.create({
79
+ "service.name": "cortexhub-sdk",
80
+ "service.version": __version__,
81
+ "cortexhub.agent.id": agent_id,
82
+ "cortexhub.privacy.mode": "enabled" if privacy else "disabled",
83
+ })
84
+
85
+ # Create tracer provider
86
+ self.provider = TracerProvider(resource=self.resource)
87
+
88
+ # Configure OTLP exporter if API key provided
89
+ if self.api_key:
90
+ exporter = OTLPSpanExporter(
91
+ endpoint=f"{self.backend_url}/v1/traces",
92
+ headers={"X-API-Key": self.api_key},
93
+ )
94
+
95
+ # Add batch processor (handles batching, retry, backpressure)
96
+ processor = BatchSpanProcessor(
97
+ exporter,
98
+ max_queue_size=2048, # Max spans in queue
99
+ max_export_batch_size=1, # Export each span quickly
100
+ schedule_delay_millis=250, # Near real-time export
101
+ export_timeout_millis=30000, # 30 second timeout
102
+ )
103
+ self.provider.add_span_processor(processor)
104
+
105
+ logger.info(
106
+ "OTel telemetry initialized with backend export",
107
+ backend_url=self.backend_url,
108
+ agent_id=agent_id,
109
+ privacy="enabled" if privacy else "DISABLED",
110
+ )
111
+ else:
112
+ logger.info(
113
+ "OTel telemetry initialized (local only - no API key)",
114
+ agent_id=agent_id,
115
+ )
116
+
117
+ # Set as global tracer provider
118
+ trace.set_tracer_provider(self.provider)
119
+
120
+ # Get tracer
121
+ self.tracer = trace.get_tracer("cortexhub", __version__)
122
+
123
+ if not privacy:
124
+ logger.warning(
125
+ "⚠️ PRIVACY MODE DISABLED - Raw inputs/outputs will be sent to backend",
126
+ warning="DO NOT USE IN PRODUCTION",
127
+ use_case="Testing policies, redaction, and approval workflows",
128
+ )
129
+
130
+ def _generate_session_id(self) -> str:
131
+ """Generate a unique session ID."""
132
+ import uuid
133
+ timestamp = datetime.utcnow().strftime("%Y%m%d%H%M%S")
134
+ random_suffix = str(uuid.uuid4())[:8]
135
+ return f"{timestamp}-{random_suffix}"
136
+
137
+ @contextmanager
138
+ def trace_tool_call(
139
+ self,
140
+ tool_name: str,
141
+ tool_description: str | None = None,
142
+ arg_names: list[str] | None = None,
143
+ args: dict | None = None,
144
+ framework: str = "unknown",
145
+ ):
146
+ """Start a span for a tool call.
147
+
148
+ Usage:
149
+ with telemetry.trace_tool_call("process_refund", args={"amount": 75}) as span:
150
+ result = tool.invoke(args)
151
+ telemetry.record_tool_result(span, success=True, result=result)
152
+
153
+ Args:
154
+ tool_name: Name of the tool being invoked
155
+ tool_description: Human-readable description of the tool
156
+ arg_names: List of argument names (extracted from args if not provided)
157
+ args: Tool arguments (only sent if privacy=False)
158
+ framework: Framework name (langchain, openai_agents, etc.)
159
+
160
+ Yields:
161
+ OTel Span for the tool call
162
+ """
163
+ span = self.tracer.start_span(
164
+ name="tool.invoke",
165
+ kind=SpanKind.INTERNAL,
166
+ )
167
+
168
+ try:
169
+ # Set standard attributes
170
+ span.set_attribute("cortexhub.session.id", self.session_id)
171
+ span.set_attribute("cortexhub.agent.id", self.agent_id)
172
+ span.set_attribute("cortexhub.tool.name", tool_name)
173
+ span.set_attribute("cortexhub.tool.framework", framework)
174
+
175
+ if tool_description:
176
+ span.set_attribute("cortexhub.tool.description", tool_description)
177
+
178
+ # Extract arg names if not provided
179
+ if arg_names is None and args:
180
+ arg_names = list(args.keys())
181
+
182
+ if arg_names:
183
+ span.set_attribute("cortexhub.tool.arg_names", arg_names)
184
+
185
+ # Raw data only if privacy disabled
186
+ if not self.privacy and args:
187
+ span.set_attribute("cortexhub.raw.args", json.dumps(args, default=str))
188
+
189
+ yield span
190
+
191
+ except Exception as e:
192
+ span.set_status(Status(StatusCode.ERROR, str(e)))
193
+ span.set_attribute("cortexhub.error.message", str(e))
194
+ raise
195
+ finally:
196
+ span.end()
197
+
198
+ @contextmanager
199
+ def trace_llm_call(
200
+ self,
201
+ model: str,
202
+ prompt: str | None = None,
203
+ ):
204
+ """Start a span for an LLM call.
205
+
206
+ Usage:
207
+ with telemetry.trace_llm_call("gpt-4o-mini", prompt=messages) as span:
208
+ response = llm.invoke(messages)
209
+ telemetry.record_llm_result(span, response=response.content)
210
+
211
+ Args:
212
+ model: Model name (e.g., "gpt-4o-mini")
213
+ prompt: Prompt content (only for guardrail checking, sent only if privacy=False)
214
+
215
+ Yields:
216
+ OTel Span for the LLM call
217
+ """
218
+ span = self.tracer.start_span(
219
+ name="llm.call",
220
+ kind=SpanKind.CLIENT,
221
+ )
222
+
223
+ try:
224
+ # Set standard attributes (following gen_ai.* conventions)
225
+ span.set_attribute("cortexhub.session.id", self.session_id)
226
+ span.set_attribute("cortexhub.agent.id", self.agent_id)
227
+ span.set_attribute("gen_ai.request.model", model)
228
+
229
+ # Raw data only if privacy disabled
230
+ if not self.privacy and prompt:
231
+ span.set_attribute("cortexhub.raw.prompt", prompt)
232
+
233
+ yield span
234
+
235
+ except Exception as e:
236
+ span.set_status(Status(StatusCode.ERROR, str(e)))
237
+ span.set_attribute("cortexhub.error.message", str(e))
238
+ raise
239
+ finally:
240
+ span.end()
241
+
242
+ def record_tool_result(
243
+ self,
244
+ span: Span,
245
+ success: bool,
246
+ result: Any = None,
247
+ error: str | None = None,
248
+ latency_ms: float | None = None,
249
+ ) -> None:
250
+ """Record the result of a tool call.
251
+
252
+ Args:
253
+ span: The span to record the result on
254
+ success: Whether the tool call succeeded
255
+ result: Tool result (only sent if privacy=False)
256
+ error: Error message if failed
257
+ latency_ms: Execution latency in milliseconds
258
+ """
259
+ span.set_attribute("cortexhub.result.success", success)
260
+
261
+ if latency_ms is not None:
262
+ span.set_attribute("cortexhub.latency_ms", latency_ms)
263
+
264
+ if success:
265
+ span.set_status(Status(StatusCode.OK))
266
+ if not self.privacy and result is not None:
267
+ span.set_attribute("cortexhub.raw.result", json.dumps(result, default=str))
268
+ else:
269
+ span.set_status(Status(StatusCode.ERROR, error or "Unknown error"))
270
+ if error:
271
+ span.set_attribute("cortexhub.error.message", error)
272
+
273
+ def record_llm_result(
274
+ self,
275
+ span: Span,
276
+ response: str | None = None,
277
+ prompt_tokens: int | None = None,
278
+ completion_tokens: int | None = None,
279
+ latency_ms: float | None = None,
280
+ ) -> None:
281
+ """Record the result of an LLM call.
282
+
283
+ Args:
284
+ span: The span to record the result on
285
+ response: LLM response content (only sent if privacy=False)
286
+ prompt_tokens: Number of prompt tokens
287
+ completion_tokens: Number of completion tokens
288
+ latency_ms: LLM call latency in milliseconds
289
+ """
290
+ span.set_status(Status(StatusCode.OK))
291
+
292
+ if prompt_tokens is not None:
293
+ span.set_attribute("gen_ai.usage.prompt_tokens", prompt_tokens)
294
+ if completion_tokens is not None:
295
+ span.set_attribute("gen_ai.usage.completion_tokens", completion_tokens)
296
+ if latency_ms is not None:
297
+ span.set_attribute("cortexhub.latency_ms", latency_ms)
298
+
299
+ if not self.privacy and response:
300
+ span.set_attribute("cortexhub.raw.response", response)
301
+
302
+ def add_guardrail_event(
303
+ self,
304
+ span: Span,
305
+ event_name: str,
306
+ attributes: dict[str, Any],
307
+ ) -> None:
308
+ """Add a guardrail finding as a span event.
309
+
310
+ Args:
311
+ span: The span to add the event to
312
+ event_name: Event name (e.g., "guardrail.pii_in_prompt")
313
+ attributes: Event attributes
314
+ """
315
+ span.add_event(event_name, attributes=attributes)
316
+
317
+ def add_pii_finding(
318
+ self,
319
+ span: Span,
320
+ content_type: str, # "prompt" or "response"
321
+ pii_types: list[str],
322
+ count: int,
323
+ counts_per_type: dict[str, int] | None = None,
324
+ ) -> None:
325
+ """Add a PII detection finding as a span event.
326
+
327
+ Args:
328
+ span: The span to add the event to
329
+ content_type: Where PII was found ("prompt" or "response")
330
+ pii_types: List of unique PII types detected
331
+ count: Total number of PII instances found
332
+ counts_per_type: Optional dict mapping type to count (e.g., {"email_address": 3})
333
+ """
334
+ attributes: dict[str, Any] = {
335
+ "pii.detected": True,
336
+ "pii.count": count,
337
+ "pii.types": pii_types,
338
+ }
339
+
340
+ # Add per-type counts for backend aggregation
341
+ if counts_per_type:
342
+ # Store as JSON string since OTLP attributes don't support nested objects
343
+ attributes["pii.counts_per_type"] = json.dumps(counts_per_type)
344
+
345
+ span.add_event(f"guardrail.pii_in_{content_type}", attributes=attributes)
346
+
347
+ def add_secrets_finding(
348
+ self,
349
+ span: Span,
350
+ content_type: str, # "prompt" or "response"
351
+ secret_types: list[str],
352
+ count: int,
353
+ counts_per_type: dict[str, int] | None = None,
354
+ ) -> None:
355
+ """Add a secrets detection finding as a span event.
356
+
357
+ Args:
358
+ span: The span to add the event to
359
+ content_type: Where secrets were found ("prompt" or "response")
360
+ secret_types: List of unique secret types detected
361
+ count: Total number of secrets found
362
+ counts_per_type: Optional dict mapping type to count (e.g., {"api_key": 2})
363
+ """
364
+ attributes: dict[str, Any] = {
365
+ "secrets.detected": True,
366
+ "secrets.count": count,
367
+ "secrets.types": secret_types,
368
+ }
369
+
370
+ # Add per-type counts for backend aggregation
371
+ if counts_per_type:
372
+ attributes["secrets.counts_per_type"] = json.dumps(counts_per_type)
373
+
374
+ span.add_event(f"guardrail.secrets_in_{content_type}", attributes=attributes)
375
+
376
+ def add_prompt_manipulation_finding(
377
+ self,
378
+ span: Span,
379
+ patterns: list[str],
380
+ ) -> None:
381
+ """Add a prompt manipulation detection finding as a span event.
382
+
383
+ Args:
384
+ span: The span to add the event to
385
+ patterns: List of manipulation patterns detected
386
+ """
387
+ span.add_event(
388
+ "guardrail.prompt_manipulation",
389
+ attributes={
390
+ "manipulation.detected": True,
391
+ "manipulation.patterns": patterns,
392
+ }
393
+ )
394
+
395
+ def add_policy_decision(
396
+ self,
397
+ span: Span,
398
+ effect: str,
399
+ policy_id: str | None = None,
400
+ reasoning: str | None = None,
401
+ ) -> None:
402
+ """Add a policy decision as a span event.
403
+
404
+ Args:
405
+ span: The span to add the event to
406
+ effect: Policy effect ("allow", "deny", "escalate")
407
+ policy_id: ID of the policy that matched
408
+ reasoning: Explanation for the decision
409
+ """
410
+ attributes = {
411
+ "policy.effect": effect,
412
+ }
413
+ if policy_id:
414
+ attributes["policy.id"] = policy_id
415
+ if reasoning:
416
+ attributes["policy.reasoning"] = reasoning
417
+
418
+ span.add_event("policy.decision", attributes=attributes)
419
+
420
+ def shutdown(self) -> None:
421
+ """Flush pending spans and shutdown."""
422
+ if hasattr(self.provider, 'shutdown'):
423
+ self.provider.shutdown()
424
+ logger.info("OTel telemetry shutdown complete")
425
+
426
+ def force_flush(self, timeout_millis: int = 30000) -> bool:
427
+ """Force flush all pending spans.
428
+
429
+ Args:
430
+ timeout_millis: Timeout in milliseconds
431
+
432
+ Returns:
433
+ True if flush succeeded, False otherwise
434
+ """
435
+ if hasattr(self.provider, 'force_flush'):
436
+ return self.provider.force_flush(timeout_millis)
437
+ return True
438
+
439
+
440
+ # Singleton instance for easy access
441
+ _telemetry_instance: OTelTelemetry | None = None
442
+
443
+
444
+ def init_telemetry(
445
+ agent_id: str,
446
+ api_key: str | None = None,
447
+ backend_url: str = "https://api.cortexhub.io",
448
+ privacy: bool = True,
449
+ ) -> OTelTelemetry:
450
+ """Initialize the global OTel telemetry instance.
451
+
452
+ Args:
453
+ agent_id: Unique identifier for this agent
454
+ api_key: CortexHub API key
455
+ backend_url: CortexHub backend URL
456
+ privacy: If True (default), no raw data sent
457
+
458
+ Returns:
459
+ OTelTelemetry instance
460
+ """
461
+ global _telemetry_instance
462
+ _telemetry_instance = OTelTelemetry(
463
+ agent_id=agent_id,
464
+ api_key=api_key,
465
+ backend_url=backend_url,
466
+ privacy=privacy,
467
+ )
468
+ return _telemetry_instance
469
+
470
+
471
+ def get_telemetry() -> OTelTelemetry | None:
472
+ """Get the global OTel telemetry instance."""
473
+ return _telemetry_instance
474
+
475
+
476
+ def shutdown_telemetry() -> None:
477
+ """Shutdown the global OTel telemetry instance."""
478
+ global _telemetry_instance
479
+ if _telemetry_instance:
480
+ _telemetry_instance.shutdown()
481
+ _telemetry_instance = None
cortexhub/version.py ADDED
@@ -0,0 +1,3 @@
1
+ """Version information for CortexHub SDK."""
2
+
3
+ __version__ = "0.1.0"