kalibr 1.0.25__py3-none-any.whl → 1.1.2a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalibr/__init__.py +170 -3
- kalibr/__main__.py +3 -203
- kalibr/capsule_middleware.py +108 -0
- kalibr/cli/__init__.py +5 -0
- kalibr/cli/capsule_cmd.py +174 -0
- kalibr/cli/deploy_cmd.py +114 -0
- kalibr/cli/main.py +67 -0
- kalibr/cli/run.py +203 -0
- kalibr/cli/serve.py +59 -0
- kalibr/client.py +293 -0
- kalibr/collector.py +173 -0
- kalibr/context.py +132 -0
- kalibr/cost_adapter.py +222 -0
- kalibr/decorators.py +140 -0
- kalibr/instrumentation/__init__.py +13 -0
- kalibr/instrumentation/anthropic_instr.py +282 -0
- kalibr/instrumentation/base.py +108 -0
- kalibr/instrumentation/google_instr.py +281 -0
- kalibr/instrumentation/openai_instr.py +265 -0
- kalibr/instrumentation/registry.py +153 -0
- kalibr/kalibr.py +144 -230
- kalibr/kalibr_app.py +53 -314
- kalibr/middleware/__init__.py +5 -0
- kalibr/middleware/auto_tracer.py +356 -0
- kalibr/models.py +41 -0
- kalibr/redaction.py +44 -0
- kalibr/schemas.py +116 -0
- kalibr/simple_tracer.py +258 -0
- kalibr/tokens.py +52 -0
- kalibr/trace_capsule.py +296 -0
- kalibr/trace_models.py +201 -0
- kalibr/tracer.py +354 -0
- kalibr/types.py +25 -93
- kalibr/utils.py +198 -0
- kalibr-1.1.2a0.dist-info/METADATA +236 -0
- kalibr-1.1.2a0.dist-info/RECORD +48 -0
- kalibr-1.1.2a0.dist-info/entry_points.txt +2 -0
- kalibr-1.1.2a0.dist-info/licenses/LICENSE +21 -0
- kalibr-1.1.2a0.dist-info/top_level.txt +4 -0
- kalibr_crewai/__init__.py +65 -0
- kalibr_crewai/callbacks.py +539 -0
- kalibr_crewai/instrumentor.py +513 -0
- kalibr_langchain/__init__.py +47 -0
- kalibr_langchain/async_callback.py +850 -0
- kalibr_langchain/callback.py +1064 -0
- kalibr_openai_agents/__init__.py +43 -0
- kalibr_openai_agents/processor.py +554 -0
- kalibr/deployment.py +0 -41
- kalibr/packager.py +0 -43
- kalibr/runtime_router.py +0 -138
- kalibr/schema_generators.py +0 -159
- kalibr/validator.py +0 -70
- kalibr-1.0.25.data/data/examples/README.md +0 -173
- kalibr-1.0.25.data/data/examples/basic_kalibr_example.py +0 -66
- kalibr-1.0.25.data/data/examples/enhanced_kalibr_example.py +0 -347
- kalibr-1.0.25.dist-info/METADATA +0 -231
- kalibr-1.0.25.dist-info/RECORD +0 -19
- kalibr-1.0.25.dist-info/entry_points.txt +0 -2
- kalibr-1.0.25.dist-info/licenses/LICENSE +0 -11
- kalibr-1.0.25.dist-info/top_level.txt +0 -1
- {kalibr-1.0.25.dist-info → kalibr-1.1.2a0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Auto-Tracer Middleware
|
|
3
|
+
Automatically traces all requests through Kalibr runtime
|
|
4
|
+
Phase 3B - Runtime Host Integration
|
|
5
|
+
Phase 3D - Capsule Auto-Emission
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import atexit
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import queue
|
|
12
|
+
import threading
|
|
13
|
+
import time
|
|
14
|
+
import uuid
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from typing import Any, Callable, Dict, List
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
20
|
+
from starlette.requests import Request
|
|
21
|
+
from starlette.responses import Response
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AutoTracerMiddleware(BaseHTTPMiddleware):
|
|
25
|
+
"""
|
|
26
|
+
Middleware that automatically traces all requests.
|
|
27
|
+
|
|
28
|
+
Features:
|
|
29
|
+
- Captures every request/response
|
|
30
|
+
- Generates trace events without @trace decorator
|
|
31
|
+
- Batches events for efficient submission
|
|
32
|
+
- Auto-flushes on shutdown
|
|
33
|
+
- Context token propagation
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
app,
|
|
39
|
+
agent_name: str = "unknown",
|
|
40
|
+
runtime_id: str = None,
|
|
41
|
+
context_token: str = None,
|
|
42
|
+
collector_url: str = None,
|
|
43
|
+
api_key: str = None,
|
|
44
|
+
tenant_id: str = None,
|
|
45
|
+
max_events: int = 100,
|
|
46
|
+
flush_interval: int = 30,
|
|
47
|
+
):
|
|
48
|
+
super().__init__(app)
|
|
49
|
+
|
|
50
|
+
# Runtime metadata
|
|
51
|
+
self.agent_name = agent_name
|
|
52
|
+
self.runtime_id = runtime_id or os.getenv("KALIBR_RUNTIME_ID", str(uuid.uuid4()))
|
|
53
|
+
self.context_token = context_token or os.getenv("KALIBR_CONTEXT_TOKEN", str(uuid.uuid4()))
|
|
54
|
+
|
|
55
|
+
# Collector config
|
|
56
|
+
self.collector_url = collector_url or os.getenv(
|
|
57
|
+
"KALIBR_COLLECTOR_URL", "http://localhost:8001/api/ingest"
|
|
58
|
+
)
|
|
59
|
+
self.api_key = api_key or os.getenv("KALIBR_API_KEY", "")
|
|
60
|
+
self.tenant_id = tenant_id or os.getenv("KALIBR_TENANT_ID", "default")
|
|
61
|
+
|
|
62
|
+
# Buffering config
|
|
63
|
+
self.max_events = int(os.getenv("KALIBR_MAX_EVENTS", max_events))
|
|
64
|
+
self.flush_interval = int(os.getenv("KALIBR_FLUSH_INTERVAL", flush_interval))
|
|
65
|
+
|
|
66
|
+
# Event buffer
|
|
67
|
+
self.events = queue.Queue()
|
|
68
|
+
self.event_count = 0
|
|
69
|
+
self.lock = threading.Lock()
|
|
70
|
+
|
|
71
|
+
# Phase 3D: Capsule emission tracking
|
|
72
|
+
self.capsule_events: List[Dict[str, Any]] = []
|
|
73
|
+
self.total_cost = 0.0
|
|
74
|
+
self.total_latency = 0
|
|
75
|
+
self.last_capsule_emission = time.time()
|
|
76
|
+
|
|
77
|
+
# Background flusher (handles both traces and capsules)
|
|
78
|
+
self.flusher_thread = threading.Thread(target=self._background_flusher, daemon=True)
|
|
79
|
+
self.flusher_thread.start()
|
|
80
|
+
|
|
81
|
+
# Register shutdown handler
|
|
82
|
+
atexit.register(self.flush_all)
|
|
83
|
+
|
|
84
|
+
print(
|
|
85
|
+
f"✅ AutoTracerMiddleware initialized: runtime_id={self.runtime_id}, context_token={self.context_token}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
async def dispatch(self, request: Request, call_next: Callable) -> Response:
|
|
89
|
+
"""
|
|
90
|
+
Intercept and trace every request.
|
|
91
|
+
Phase 3: Creates OpenTelemetry span for context propagation to SDK calls.
|
|
92
|
+
"""
|
|
93
|
+
# Skip tracing for health/docs endpoints
|
|
94
|
+
if request.url.path in ["/health", "/docs", "/openapi.json", "/redoc"]:
|
|
95
|
+
return await call_next(request)
|
|
96
|
+
|
|
97
|
+
# Generate trace metadata
|
|
98
|
+
trace_id = str(uuid.uuid4())
|
|
99
|
+
span_id = str(uuid.uuid4())
|
|
100
|
+
start_time = time.time()
|
|
101
|
+
ts_start = datetime.now(timezone.utc)
|
|
102
|
+
|
|
103
|
+
# Extract parent context from headers (for chaining)
|
|
104
|
+
parent_context_token = request.headers.get("X-Kalibr-Context-Token")
|
|
105
|
+
parent_trace_id = request.headers.get("X-Kalibr-Trace-ID")
|
|
106
|
+
|
|
107
|
+
# Phase 3: Create OpenTelemetry span for HTTP request
|
|
108
|
+
# This enables SDK calls within the request to be linked as child spans
|
|
109
|
+
from kalibr.context import clear_otel_request_context, set_otel_request_context
|
|
110
|
+
from opentelemetry import trace as otel_trace
|
|
111
|
+
|
|
112
|
+
tracer = otel_trace.get_tracer("kalibr.http")
|
|
113
|
+
|
|
114
|
+
with tracer.start_as_current_span(
|
|
115
|
+
f"{request.method} {request.url.path}",
|
|
116
|
+
kind=otel_trace.SpanKind.SERVER,
|
|
117
|
+
attributes={
|
|
118
|
+
"http.method": request.method,
|
|
119
|
+
"http.url": str(request.url),
|
|
120
|
+
"http.path": request.url.path,
|
|
121
|
+
"kalibr.runtime_id": self.runtime_id,
|
|
122
|
+
"kalibr.context_token": self.context_token,
|
|
123
|
+
"kalibr.agent_name": self.agent_name,
|
|
124
|
+
},
|
|
125
|
+
) as http_span:
|
|
126
|
+
# Get OpenTelemetry trace/span IDs
|
|
127
|
+
span_context = http_span.get_span_context()
|
|
128
|
+
otel_trace_id = format(span_context.trace_id, "032x")
|
|
129
|
+
otel_span_id = format(span_context.span_id, "016x")
|
|
130
|
+
|
|
131
|
+
# Set context for SDK instrumentation to inherit
|
|
132
|
+
set_otel_request_context(
|
|
133
|
+
context_token=self.context_token, trace_id=otel_trace_id, span_id=otel_span_id
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Process request
|
|
137
|
+
try:
|
|
138
|
+
response = await call_next(request)
|
|
139
|
+
status = "success"
|
|
140
|
+
error_type = ""
|
|
141
|
+
error_message = ""
|
|
142
|
+
|
|
143
|
+
# Set HTTP span attributes
|
|
144
|
+
http_span.set_attribute("http.status_code", response.status_code)
|
|
145
|
+
|
|
146
|
+
except Exception as e:
|
|
147
|
+
status = "error"
|
|
148
|
+
error_type = type(e).__name__
|
|
149
|
+
error_message = str(e)
|
|
150
|
+
|
|
151
|
+
# Set error on HTTP span
|
|
152
|
+
http_span.set_status(otel_trace.Status(otel_trace.StatusCode.ERROR))
|
|
153
|
+
http_span.set_attribute("error.type", error_type)
|
|
154
|
+
http_span.set_attribute("error.message", error_message)
|
|
155
|
+
http_span.record_exception(e)
|
|
156
|
+
|
|
157
|
+
# Re-raise to not swallow exceptions
|
|
158
|
+
raise
|
|
159
|
+
finally:
|
|
160
|
+
# Clear context at end of request
|
|
161
|
+
clear_otel_request_context()
|
|
162
|
+
# Calculate metrics
|
|
163
|
+
end_time = time.time()
|
|
164
|
+
duration_ms = int((end_time - start_time) * 1000)
|
|
165
|
+
ts_end = datetime.now(timezone.utc)
|
|
166
|
+
|
|
167
|
+
# Create trace event
|
|
168
|
+
event = {
|
|
169
|
+
"schema_version": "1.0",
|
|
170
|
+
"trace_id": trace_id,
|
|
171
|
+
"span_id": span_id,
|
|
172
|
+
"parent_id": parent_trace_id or "",
|
|
173
|
+
"tenant_id": self.tenant_id,
|
|
174
|
+
"ts_start": ts_start.isoformat() + "Z",
|
|
175
|
+
"ts_end": ts_end.isoformat() + "Z",
|
|
176
|
+
"timestamp": ts_end.isoformat() + "Z",
|
|
177
|
+
"environment": os.getenv("KALIBR_ENVIRONMENT", "production"),
|
|
178
|
+
"runtime_env": "kalibr_auto_tracer",
|
|
179
|
+
"provider": "runtime",
|
|
180
|
+
"model_id": self.agent_name,
|
|
181
|
+
"model_name": self.agent_name,
|
|
182
|
+
"operation": request.method.lower(),
|
|
183
|
+
"endpoint": request.url.path,
|
|
184
|
+
"duration_ms": duration_ms,
|
|
185
|
+
"latency_ms": duration_ms,
|
|
186
|
+
"input_tokens": 0,
|
|
187
|
+
"output_tokens": 0,
|
|
188
|
+
"total_tokens": 0,
|
|
189
|
+
"cost_usd": 0.0,
|
|
190
|
+
"cost_est_usd": 0.0,
|
|
191
|
+
"status": status,
|
|
192
|
+
"error_type": error_type,
|
|
193
|
+
"error_message": error_message,
|
|
194
|
+
# Phase 3 metadata
|
|
195
|
+
"runtime_id": self.runtime_id,
|
|
196
|
+
"context_token": self.context_token,
|
|
197
|
+
"parent_context_token": parent_context_token or "",
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
# Add to buffer
|
|
201
|
+
self.events.put(event)
|
|
202
|
+
with self.lock:
|
|
203
|
+
self.event_count += 1
|
|
204
|
+
|
|
205
|
+
# Phase 3D: Add to capsule tracking
|
|
206
|
+
self.capsule_events.append(
|
|
207
|
+
{
|
|
208
|
+
"trace_id": trace_id,
|
|
209
|
+
"operation": request.method.lower(),
|
|
210
|
+
"endpoint": request.url.path,
|
|
211
|
+
"duration_ms": duration_ms,
|
|
212
|
+
"cost_usd": event.get("cost_est_usd", 0.0),
|
|
213
|
+
"status": status,
|
|
214
|
+
"timestamp": ts_end.isoformat() + "Z",
|
|
215
|
+
}
|
|
216
|
+
)
|
|
217
|
+
self.total_cost += event.get("cost_est_usd", 0.0)
|
|
218
|
+
self.total_latency += duration_ms
|
|
219
|
+
|
|
220
|
+
# Check if flush needed
|
|
221
|
+
if self.event_count >= self.max_events:
|
|
222
|
+
threading.Thread(target=self.flush_all, daemon=True).start()
|
|
223
|
+
|
|
224
|
+
return response
|
|
225
|
+
|
|
226
|
+
def _background_flusher(self):
|
|
227
|
+
"""
|
|
228
|
+
Background thread that flushes events and capsules periodically.
|
|
229
|
+
Phase 3D: Dual trigger - interval OR count
|
|
230
|
+
"""
|
|
231
|
+
while True:
|
|
232
|
+
time.sleep(self.flush_interval)
|
|
233
|
+
|
|
234
|
+
# Check if flush needed (interval OR count)
|
|
235
|
+
should_flush = False
|
|
236
|
+
with self.lock:
|
|
237
|
+
time_since_last = time.time() - self.last_capsule_emission
|
|
238
|
+
if time_since_last >= self.flush_interval or self.event_count >= self.max_events:
|
|
239
|
+
should_flush = True
|
|
240
|
+
|
|
241
|
+
if should_flush:
|
|
242
|
+
self.flush_all()
|
|
243
|
+
|
|
244
|
+
def flush_events(self):
|
|
245
|
+
"""
|
|
246
|
+
Flush buffered trace events to collector.
|
|
247
|
+
"""
|
|
248
|
+
if self.event_count == 0:
|
|
249
|
+
return
|
|
250
|
+
|
|
251
|
+
with self.lock:
|
|
252
|
+
events_to_send = []
|
|
253
|
+
while not self.events.empty():
|
|
254
|
+
try:
|
|
255
|
+
events_to_send.append(self.events.get_nowait())
|
|
256
|
+
except queue.Empty:
|
|
257
|
+
break
|
|
258
|
+
|
|
259
|
+
if not events_to_send:
|
|
260
|
+
return
|
|
261
|
+
|
|
262
|
+
# Send to collector
|
|
263
|
+
try:
|
|
264
|
+
# ✅ Fixed Bug 2: Send as JSON dict instead of NDJSON string
|
|
265
|
+
# Backend expects: {"events": [event_dict]}
|
|
266
|
+
payload = {"events": events_to_send}
|
|
267
|
+
|
|
268
|
+
# Send to collector
|
|
269
|
+
with httpx.Client(timeout=10.0) as client:
|
|
270
|
+
response = client.post(
|
|
271
|
+
self.collector_url,
|
|
272
|
+
json=payload, # ✅ Sends as JSON object, not string
|
|
273
|
+
headers={
|
|
274
|
+
"X-API-Key": self.api_key,
|
|
275
|
+
"Content-Type": "application/json",
|
|
276
|
+
},
|
|
277
|
+
)
|
|
278
|
+
response.raise_for_status()
|
|
279
|
+
|
|
280
|
+
print(f"✅ Flushed {len(events_to_send)} trace events to collector")
|
|
281
|
+
self.event_count = 0
|
|
282
|
+
|
|
283
|
+
except Exception as e:
|
|
284
|
+
print(f"⚠️ Failed to flush events: {e}")
|
|
285
|
+
# Re-queue events for retry
|
|
286
|
+
for event in events_to_send:
|
|
287
|
+
self.events.put(event)
|
|
288
|
+
|
|
289
|
+
def emit_capsule(self):
|
|
290
|
+
"""
|
|
291
|
+
Phase 3D: Emit accumulated traces as a capsule.
|
|
292
|
+
Auto-posts to /api/ingest/capsule with aggregated metrics.
|
|
293
|
+
"""
|
|
294
|
+
with self.lock:
|
|
295
|
+
if not self.capsule_events:
|
|
296
|
+
return
|
|
297
|
+
|
|
298
|
+
# Build capsule payload
|
|
299
|
+
capsule = {
|
|
300
|
+
"trace_id": (
|
|
301
|
+
self.capsule_events[0]["trace_id"] if self.capsule_events else str(uuid.uuid4())
|
|
302
|
+
),
|
|
303
|
+
"runtime_id": self.runtime_id,
|
|
304
|
+
"agent_name": self.agent_name,
|
|
305
|
+
"context_token": self.context_token,
|
|
306
|
+
"timestamp": datetime.now(timezone.utc).isoformat() + "Z",
|
|
307
|
+
"aggregate_cost_usd": round(self.total_cost, 6),
|
|
308
|
+
"aggregate_latency_ms": self.total_latency,
|
|
309
|
+
"last_n_hops": self.capsule_events[-5:], # Last 5 hops
|
|
310
|
+
"tenant_id": self.tenant_id,
|
|
311
|
+
"metadata": {
|
|
312
|
+
"runtime_provider": "local",
|
|
313
|
+
"total_events": len(self.capsule_events),
|
|
314
|
+
"emission_reason": "auto_flush",
|
|
315
|
+
},
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
# Reset capsule tracking
|
|
319
|
+
events_count = len(self.capsule_events)
|
|
320
|
+
self.capsule_events = []
|
|
321
|
+
self.total_cost = 0.0
|
|
322
|
+
self.total_latency = 0
|
|
323
|
+
self.last_capsule_emission = time.time()
|
|
324
|
+
|
|
325
|
+
# Send capsule to backend
|
|
326
|
+
try:
|
|
327
|
+
capsule_url = self.collector_url.replace("/api/ingest", "/api/ingest/capsule")
|
|
328
|
+
|
|
329
|
+
with httpx.Client(timeout=10.0) as client:
|
|
330
|
+
response = client.post(
|
|
331
|
+
capsule_url,
|
|
332
|
+
json=capsule,
|
|
333
|
+
headers={
|
|
334
|
+
"X-API-Key": self.api_key,
|
|
335
|
+
"Content-Type": "application/json",
|
|
336
|
+
},
|
|
337
|
+
)
|
|
338
|
+
response.raise_for_status()
|
|
339
|
+
|
|
340
|
+
print(
|
|
341
|
+
f"📦 Emitted capsule: {events_count} events, cost=${capsule['aggregate_cost_usd']:.6f}, latency={capsule['aggregate_latency_ms']}ms"
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
except Exception as e:
|
|
345
|
+
print(f"⚠️ Failed to emit capsule: {e}")
|
|
346
|
+
|
|
347
|
+
def flush_all(self):
|
|
348
|
+
"""
|
|
349
|
+
Phase 3D: Flush both trace events and emit capsule.
|
|
350
|
+
Called on shutdown or when thresholds reached.
|
|
351
|
+
"""
|
|
352
|
+
# Flush individual trace events first
|
|
353
|
+
self.flush_events()
|
|
354
|
+
|
|
355
|
+
# Then emit capsule
|
|
356
|
+
self.emit_capsule()
|
kalibr/models.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Data models for Kalibr SDK."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class TraceConfig:
|
|
10
|
+
"""Configuration for tracing."""
|
|
11
|
+
|
|
12
|
+
operation: str = "model_call"
|
|
13
|
+
endpoint: Optional[str] = None
|
|
14
|
+
provider: Optional[str] = None
|
|
15
|
+
model_id: Optional[str] = None
|
|
16
|
+
environment: str = "prod"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class EventData:
|
|
21
|
+
"""Event data structure."""
|
|
22
|
+
|
|
23
|
+
schema_version: str = "1.0"
|
|
24
|
+
trace_id: str = ""
|
|
25
|
+
span_id: str = ""
|
|
26
|
+
parent_id: Optional[str] = None
|
|
27
|
+
tenant_id: str = ""
|
|
28
|
+
environment: str = "prod"
|
|
29
|
+
ts_start: Optional[datetime] = None
|
|
30
|
+
ts_end: Optional[datetime] = None
|
|
31
|
+
endpoint: str = ""
|
|
32
|
+
operation: str = "model_call"
|
|
33
|
+
provider: str = "unknown"
|
|
34
|
+
model_id: str = "unknown"
|
|
35
|
+
input_tokens: int = 0
|
|
36
|
+
output_tokens: int = 0
|
|
37
|
+
duration_ms: int = 0
|
|
38
|
+
status: str = "200"
|
|
39
|
+
prompt_hash: str = ""
|
|
40
|
+
response_hash: str = ""
|
|
41
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
kalibr/redaction.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Client-side redaction and hashing."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import re
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
# PII patterns
|
|
8
|
+
PII_PATTERNS = [
|
|
9
|
+
(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "[EMAIL]"),
|
|
10
|
+
(r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b", "[PHONE]"),
|
|
11
|
+
(r"\b\d{3}-\d{2}-\d{4}\b", "[SSN]"),
|
|
12
|
+
(r"\b(?:\d{4}[-\s]?){3}\d{4}\b", "[CREDIT_CARD]"),
|
|
13
|
+
(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", "[IP_ADDRESS]"),
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
# Compiled patterns
|
|
17
|
+
COMPILED_PATTERNS = [(re.compile(p, re.IGNORECASE), repl) for p, repl in PII_PATTERNS]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def redact_text(text: str) -> str:
|
|
21
|
+
"""Redact PII from text."""
|
|
22
|
+
if not text:
|
|
23
|
+
return text
|
|
24
|
+
|
|
25
|
+
redacted = text
|
|
26
|
+
for pattern, replacement in COMPILED_PATTERNS:
|
|
27
|
+
redacted = pattern.sub(replacement, redacted)
|
|
28
|
+
|
|
29
|
+
return redacted
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def hash_text(text: str) -> str:
|
|
33
|
+
"""Create SHA256 hash of text."""
|
|
34
|
+
if not text:
|
|
35
|
+
return "0" * 64
|
|
36
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def redact_and_hash(text: Optional[str]) -> str:
|
|
40
|
+
"""Redact PII and return hash."""
|
|
41
|
+
if not text:
|
|
42
|
+
return "0" * 64
|
|
43
|
+
redacted = redact_text(text)
|
|
44
|
+
return hash_text(redacted)
|
kalibr/schemas.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Schema generation for multiple AI model ecosystems"""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_base_url() -> str:
|
|
8
|
+
"""Auto-detect base URL based on environment"""
|
|
9
|
+
# Check for custom base URL
|
|
10
|
+
if custom_url := os.getenv("KALIBR_BASE_URL"):
|
|
11
|
+
return custom_url
|
|
12
|
+
|
|
13
|
+
# Detect Fly.io
|
|
14
|
+
if fly_app := os.getenv("FLY_APP_NAME"):
|
|
15
|
+
return f"https://{fly_app}.fly.dev"
|
|
16
|
+
|
|
17
|
+
# Detect Render
|
|
18
|
+
if render_url := os.getenv("RENDER_EXTERNAL_URL"):
|
|
19
|
+
return render_url
|
|
20
|
+
|
|
21
|
+
# Default to localhost
|
|
22
|
+
return "http://localhost:8000"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def generate_mcp_schema(actions: List[Dict[str, Any]], base_url: str) -> Dict[str, Any]:
|
|
26
|
+
"""Generate Claude MCP schema"""
|
|
27
|
+
tools = []
|
|
28
|
+
for action in actions:
|
|
29
|
+
tool = {
|
|
30
|
+
"name": action["name"],
|
|
31
|
+
"description": action["description"],
|
|
32
|
+
"input_schema": action["schema"],
|
|
33
|
+
"server": {"url": f"{base_url}/proxy/{action['name']}"},
|
|
34
|
+
}
|
|
35
|
+
tools.append(tool)
|
|
36
|
+
|
|
37
|
+
return {"mcp": "1.0", "name": "kalibr-enhanced", "tools": tools}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def generate_gemini_schema(actions: List[Dict[str, Any]], base_url: str) -> Dict[str, Any]:
|
|
41
|
+
"""Generate Gemini Extensions schema"""
|
|
42
|
+
functions = []
|
|
43
|
+
for action in actions:
|
|
44
|
+
func = {
|
|
45
|
+
"name": action["name"],
|
|
46
|
+
"description": action["description"],
|
|
47
|
+
"parameters": action["schema"],
|
|
48
|
+
"server": {"url": f"{base_url}/proxy/{action['name']}"},
|
|
49
|
+
}
|
|
50
|
+
functions.append(func)
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
"gemini_extension": "1.0",
|
|
54
|
+
"name": "kalibr_enhanced",
|
|
55
|
+
"description": "Enhanced Kalibr API for Gemini integration",
|
|
56
|
+
"functions": functions,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def generate_copilot_schema(actions: List[Dict[str, Any]], base_url: str) -> Dict[str, Any]:
|
|
61
|
+
"""Generate Copilot Plugins schema"""
|
|
62
|
+
apis = []
|
|
63
|
+
for action in actions:
|
|
64
|
+
api = {
|
|
65
|
+
"name": action["name"],
|
|
66
|
+
"description": action["description"],
|
|
67
|
+
"url": f"{base_url}/proxy/{action['name']}",
|
|
68
|
+
"method": "POST",
|
|
69
|
+
"request_schema": action["schema"],
|
|
70
|
+
"response_schema": {"type": "object", "description": "API response"},
|
|
71
|
+
}
|
|
72
|
+
apis.append(api)
|
|
73
|
+
|
|
74
|
+
return {
|
|
75
|
+
"schema_version": "v1",
|
|
76
|
+
"name_for_model": "kalibr_enhanced",
|
|
77
|
+
"name_for_human": "Enhanced Kalibr API",
|
|
78
|
+
"description_for_model": "Enhanced Kalibr API with advanced capabilities",
|
|
79
|
+
"description_for_human": "API for advanced AI model integrations",
|
|
80
|
+
"auth": {"type": "none"},
|
|
81
|
+
"api": {"type": "openapi", "url": f"{base_url}/openapi.json"},
|
|
82
|
+
"apis": apis,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def get_supported_models() -> Dict[str, Any]:
|
|
87
|
+
"""Return list of supported AI models"""
|
|
88
|
+
return {
|
|
89
|
+
"supported_models": [
|
|
90
|
+
{
|
|
91
|
+
"name": "GPT Actions",
|
|
92
|
+
"provider": "OpenAI",
|
|
93
|
+
"schema_endpoint": "/gpt-actions.json",
|
|
94
|
+
"format": "OpenAPI 3.1.0",
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"name": "Claude MCP",
|
|
98
|
+
"provider": "Anthropic",
|
|
99
|
+
"schema_endpoint": "/mcp.json",
|
|
100
|
+
"format": "MCP 1.0",
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
"name": "Gemini Extensions",
|
|
104
|
+
"provider": "Google",
|
|
105
|
+
"schema_endpoint": "/schemas/gemini",
|
|
106
|
+
"format": "Gemini Extension 1.0",
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
"name": "Copilot Plugins",
|
|
110
|
+
"provider": "Microsoft",
|
|
111
|
+
"schema_endpoint": "/schemas/copilot",
|
|
112
|
+
"format": "Copilot Plugin v1",
|
|
113
|
+
},
|
|
114
|
+
],
|
|
115
|
+
"version": "1.0.28",
|
|
116
|
+
}
|