cerberus-django 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cerberus_django/__init__.py +13 -0
- cerberus_django/middleware.py +410 -0
- cerberus_django/structs.py +23 -0
- cerberus_django/utils.py +70 -0
- cerberus_django-0.1.4.dist-info/METADATA +299 -0
- cerberus_django-0.1.4.dist-info/RECORD +8 -0
- cerberus_django-0.1.4.dist-info/WHEEL +4 -0
- cerberus_django-0.1.4.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cerberus Django - HTTP request metrics middleware
|
|
3
|
+
|
|
4
|
+
A Django middleware for capturing and streaming HTTP request metrics
|
|
5
|
+
to a backend analytics server via WebSocket.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .middleware import CerberusMiddleware
|
|
9
|
+
from .structs import CoreData
|
|
10
|
+
from .utils import hash_pii
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.4"
|
|
13
|
+
__all__ = ["CerberusMiddleware", "CoreData", "hash_pii", "__version__"]
|
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cerberus Django Middleware
|
|
3
|
+
|
|
4
|
+
Captures HTTP request metrics and sends them asynchronously to a backend
|
|
5
|
+
analytics server via WebSocket.
|
|
6
|
+
|
|
7
|
+
This middleware is designed to work in both WSGI (synchronous) and ASGI
|
|
8
|
+
(asynchronous) Django deployments without requiring an event loop at import time.
|
|
9
|
+
|
|
10
|
+
Architecture:
|
|
11
|
+
- Middleware (sync): Captures request data and puts it in a thread-safe queue
|
|
12
|
+
- Background thread: Runs its own event loop to process queue and send via WebSocket
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from .structs import CoreData
|
|
16
|
+
from .utils import fetch_secret_key
|
|
17
|
+
from django.conf import settings
|
|
18
|
+
import asyncio
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import logging
|
|
22
|
+
import threading
|
|
23
|
+
import queue as thread_queue
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
import websockets
|
|
26
|
+
|
|
27
|
+
# Configure logging
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
# Enable debug logging via environment variable
|
|
31
|
+
DEBUG_ENABLED = os.getenv('CERBERUS_DEBUG', 'false').lower() in ('true', '1', 'yes')
|
|
32
|
+
|
|
33
|
+
# Thread-safe queue for events (no event loop required at import time)
|
|
34
|
+
event_queue = thread_queue.Queue()
|
|
35
|
+
|
|
36
|
+
# Background thread management
|
|
37
|
+
_background_thread = None
|
|
38
|
+
_thread_lock = threading.Lock()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class AsyncWebSocketClient:
|
|
42
|
+
"""WebSocket client for sending events to the backend.
|
|
43
|
+
|
|
44
|
+
This client is used within the background thread's event loop,
|
|
45
|
+
so it can safely use asyncio primitives.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, ws_url, api_key, client_id):
|
|
49
|
+
self.ws_url = ws_url
|
|
50
|
+
self.api_key = api_key
|
|
51
|
+
self.client_id = client_id
|
|
52
|
+
self.websocket = None
|
|
53
|
+
self._async_lock = None # Created lazily within event loop context
|
|
54
|
+
|
|
55
|
+
async def _get_lock(self):
|
|
56
|
+
"""Get or create async lock within the event loop context."""
|
|
57
|
+
if self._async_lock is None:
|
|
58
|
+
self._async_lock = asyncio.Lock()
|
|
59
|
+
return self._async_lock
|
|
60
|
+
|
|
61
|
+
async def connect(self):
|
|
62
|
+
"""Establish WebSocket connection to the backend."""
|
|
63
|
+
try:
|
|
64
|
+
if DEBUG_ENABLED:
|
|
65
|
+
logger.info(f"[Cerberus] Connecting to WebSocket: {self.ws_url}")
|
|
66
|
+
self.websocket = await websockets.connect(self.ws_url)
|
|
67
|
+
if DEBUG_ENABLED:
|
|
68
|
+
logger.info("[Cerberus] WebSocket connected successfully")
|
|
69
|
+
except Exception as e:
|
|
70
|
+
self.websocket = None
|
|
71
|
+
logger.error(f"[Cerberus] Failed to connect to WebSocket: {e}")
|
|
72
|
+
|
|
73
|
+
async def send(self, event_data):
|
|
74
|
+
"""Send event data to backend via WebSocket.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
event_data: CoreData object to send
|
|
78
|
+
"""
|
|
79
|
+
lock = await self._get_lock()
|
|
80
|
+
async with lock:
|
|
81
|
+
# Connect if not already connected
|
|
82
|
+
if self.websocket is None:
|
|
83
|
+
await self.connect()
|
|
84
|
+
|
|
85
|
+
if self.websocket:
|
|
86
|
+
try:
|
|
87
|
+
# Format data as expected by backend
|
|
88
|
+
payload = {
|
|
89
|
+
'api_key': self.api_key,
|
|
90
|
+
'client_id': self.client_id,
|
|
91
|
+
'token': event_data.token,
|
|
92
|
+
'remote_addr': event_data.source_ip, # Backend expects 'remote_addr'
|
|
93
|
+
'endpoint': event_data.endpoint,
|
|
94
|
+
'scheme': event_data.scheme,
|
|
95
|
+
'method': event_data.method,
|
|
96
|
+
'timestamp': event_data.timestamp,
|
|
97
|
+
'custom_data': event_data.custom_data,
|
|
98
|
+
# Additional request details
|
|
99
|
+
'headers': event_data.headers,
|
|
100
|
+
'query_params': event_data.query_params,
|
|
101
|
+
'body': event_data.body,
|
|
102
|
+
'user_agent': event_data.user_agent,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
json_data = json.dumps(payload)
|
|
106
|
+
|
|
107
|
+
if DEBUG_ENABLED:
|
|
108
|
+
logger.info(f"[Cerberus] Sending event to backend: {json_data[:200]}...")
|
|
109
|
+
|
|
110
|
+
await self.websocket.send(json_data)
|
|
111
|
+
|
|
112
|
+
# Wait for acknowledgment
|
|
113
|
+
response = await asyncio.wait_for(self.websocket.recv(), timeout=5.0)
|
|
114
|
+
|
|
115
|
+
if DEBUG_ENABLED:
|
|
116
|
+
logger.info(f"[Cerberus] Backend response: {response}")
|
|
117
|
+
|
|
118
|
+
except asyncio.TimeoutError:
|
|
119
|
+
logger.warning("[Cerberus] Timeout waiting for backend response")
|
|
120
|
+
except websockets.exceptions.ConnectionClosed:
|
|
121
|
+
logger.warning("[Cerberus] WebSocket connection closed, will reconnect on next send")
|
|
122
|
+
self.websocket = None
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.error(f"[Cerberus] Error sending data: {e}")
|
|
125
|
+
if self.websocket:
|
|
126
|
+
try:
|
|
127
|
+
await self.websocket.close()
|
|
128
|
+
except Exception:
|
|
129
|
+
pass
|
|
130
|
+
self.websocket = None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# WebSocket client - initialized in middleware __init__, used by background thread
|
|
134
|
+
WS_CLIENT = None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _queue_get_with_timeout():
|
|
138
|
+
"""Get an item from the queue with a 1-second timeout.
|
|
139
|
+
|
|
140
|
+
This is a helper function for run_in_executor since we need to pass
|
|
141
|
+
the timeout parameter.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
CoreData object or raises queue.Empty
|
|
145
|
+
"""
|
|
146
|
+
return event_queue.get(block=True, timeout=1.0)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
async def _process_queue_async():
|
|
150
|
+
"""Async coroutine that processes events from the thread-safe queue.
|
|
151
|
+
|
|
152
|
+
Runs continuously in the background thread's event loop.
|
|
153
|
+
"""
|
|
154
|
+
global WS_CLIENT
|
|
155
|
+
|
|
156
|
+
if DEBUG_ENABLED:
|
|
157
|
+
logger.info("[Cerberus] Background queue processor started")
|
|
158
|
+
|
|
159
|
+
loop = asyncio.get_event_loop()
|
|
160
|
+
|
|
161
|
+
while True:
|
|
162
|
+
try:
|
|
163
|
+
# Use run_in_executor to get from sync queue without blocking event loop
|
|
164
|
+
data = await loop.run_in_executor(None, _queue_get_with_timeout)
|
|
165
|
+
except thread_queue.Empty:
|
|
166
|
+
# No events available, continue waiting
|
|
167
|
+
continue
|
|
168
|
+
except Exception as e:
|
|
169
|
+
logger.error(f"[Cerberus] Error getting from queue: {e}")
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
# Check for shutdown signal (None means stop)
|
|
173
|
+
if data is None:
|
|
174
|
+
if DEBUG_ENABLED:
|
|
175
|
+
logger.info("[Cerberus] Received shutdown signal, stopping processor")
|
|
176
|
+
break
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
if WS_CLIENT:
|
|
180
|
+
if DEBUG_ENABLED:
|
|
181
|
+
logger.info(f"[Cerberus] Processing event for endpoint: {data.endpoint}")
|
|
182
|
+
await WS_CLIENT.send(data)
|
|
183
|
+
else:
|
|
184
|
+
logger.warning("[Cerberus] WebSocket client not initialized, skipping event")
|
|
185
|
+
except Exception as e:
|
|
186
|
+
logger.error(f"[Cerberus] Failed to send event: {e}")
|
|
187
|
+
finally:
|
|
188
|
+
event_queue.task_done()
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _run_event_loop_in_thread():
|
|
192
|
+
"""Run the async event processing loop in a dedicated thread.
|
|
193
|
+
|
|
194
|
+
Creates its own event loop, independent of any Django event loop.
|
|
195
|
+
"""
|
|
196
|
+
loop = asyncio.new_event_loop()
|
|
197
|
+
asyncio.set_event_loop(loop)
|
|
198
|
+
|
|
199
|
+
if DEBUG_ENABLED:
|
|
200
|
+
logger.info("[Cerberus] Background thread started with new event loop")
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
loop.run_until_complete(_process_queue_async())
|
|
204
|
+
except Exception as e:
|
|
205
|
+
logger.error(f"[Cerberus] Background event loop error: {e}")
|
|
206
|
+
finally:
|
|
207
|
+
loop.close()
|
|
208
|
+
if DEBUG_ENABLED:
|
|
209
|
+
logger.info("[Cerberus] Background thread event loop closed")
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def ensure_background_thread():
|
|
213
|
+
"""Start the background processing thread if not already running.
|
|
214
|
+
|
|
215
|
+
Thread-safe: Uses a lock to prevent race conditions during startup.
|
|
216
|
+
The thread is a daemon thread, so it will automatically stop when
|
|
217
|
+
the main process exits.
|
|
218
|
+
"""
|
|
219
|
+
global _background_thread
|
|
220
|
+
|
|
221
|
+
with _thread_lock:
|
|
222
|
+
if _background_thread is not None and _background_thread.is_alive():
|
|
223
|
+
return
|
|
224
|
+
|
|
225
|
+
_background_thread = threading.Thread(
|
|
226
|
+
target=_run_event_loop_in_thread,
|
|
227
|
+
name="cerberus-event-sender",
|
|
228
|
+
daemon=True # Auto-shutdown when main process exits
|
|
229
|
+
)
|
|
230
|
+
_background_thread.start()
|
|
231
|
+
|
|
232
|
+
if DEBUG_ENABLED:
|
|
233
|
+
logger.info("[Cerberus] Started background event sender thread")
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _extract_headers(request):
|
|
237
|
+
"""Extract HTTP headers from Django request.
|
|
238
|
+
|
|
239
|
+
Converts Django's META dict (with HTTP_ prefixed headers) to a clean dict.
|
|
240
|
+
Only includes actual HTTP headers, not server variables.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
request: Django HttpRequest object
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Dict of header name -> value
|
|
247
|
+
"""
|
|
248
|
+
headers = {}
|
|
249
|
+
for key, value in request.META.items():
|
|
250
|
+
if key.startswith('HTTP_'):
|
|
251
|
+
# Convert HTTP_CONTENT_TYPE to Content-Type
|
|
252
|
+
header_name = key[5:].replace('_', '-').title()
|
|
253
|
+
headers[header_name] = value
|
|
254
|
+
elif key in ('CONTENT_TYPE', 'CONTENT_LENGTH'):
|
|
255
|
+
# These don't have HTTP_ prefix but are still headers
|
|
256
|
+
header_name = key.replace('_', '-').title()
|
|
257
|
+
headers[header_name] = value
|
|
258
|
+
return headers if headers else None
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _extract_query_params(request):
|
|
262
|
+
"""Extract query parameters from Django request.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
request: Django HttpRequest object
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Dict of query param name -> value (or list of values if multiple)
|
|
269
|
+
"""
|
|
270
|
+
if not request.GET:
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
params = {}
|
|
274
|
+
for key in request.GET:
|
|
275
|
+
values = request.GET.getlist(key)
|
|
276
|
+
params[key] = values[0] if len(values) == 1 else values
|
|
277
|
+
return params
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _extract_body(request):
|
|
281
|
+
"""Extract request body from Django request.
|
|
282
|
+
|
|
283
|
+
Only attempts to parse JSON bodies. Returns None for non-JSON content.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
request: Django HttpRequest object
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Parsed JSON body as dict, or None
|
|
290
|
+
"""
|
|
291
|
+
if request.method not in ('POST', 'PUT', 'PATCH'):
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
content_type = request.content_type or ''
|
|
295
|
+
if 'application/json' not in content_type:
|
|
296
|
+
return None
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
if request.body:
|
|
300
|
+
return json.loads(request.body.decode('utf-8'))
|
|
301
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
302
|
+
pass
|
|
303
|
+
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class CerberusMiddleware:
|
|
308
|
+
"""Django middleware for capturing and sending HTTP request metrics.
|
|
309
|
+
|
|
310
|
+
Compatible with both WSGI and ASGI Django deployments.
|
|
311
|
+
|
|
312
|
+
Configuration via CERBERUS_CONFIG in Django settings:
|
|
313
|
+
- token: API key for authentication
|
|
314
|
+
- client_id: Client identifier
|
|
315
|
+
- ws_url: WebSocket URL for event_ingest backend
|
|
316
|
+
- backend_url: HTTP URL for fetching secret key (optional)
|
|
317
|
+
- secret_key: HMAC key for PII hashing (optional, auto-fetched if backend_url set)
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
def __init__(self, get_response):
|
|
321
|
+
global WS_CLIENT
|
|
322
|
+
|
|
323
|
+
self.get_response = get_response
|
|
324
|
+
self.config = getattr(settings, 'CERBERUS_CONFIG', {})
|
|
325
|
+
|
|
326
|
+
if DEBUG_ENABLED:
|
|
327
|
+
logger.info("[Cerberus] Middleware initializing...")
|
|
328
|
+
logger.info(f"[Cerberus] Config keys: {list(self.config.keys())}")
|
|
329
|
+
|
|
330
|
+
# Auto-fetch secret_key from backend if not configured locally
|
|
331
|
+
if 'secret_key' not in self.config and 'backend_url' in self.config:
|
|
332
|
+
if DEBUG_ENABLED:
|
|
333
|
+
logger.info(f"[Cerberus] Fetching secret key from backend: {self.config['backend_url']}")
|
|
334
|
+
secret_key = fetch_secret_key(
|
|
335
|
+
self.config['backend_url'],
|
|
336
|
+
self.config.get('token', '')
|
|
337
|
+
)
|
|
338
|
+
if secret_key:
|
|
339
|
+
self.config['secret_key'] = secret_key
|
|
340
|
+
logger.info(f"[Cerberus] Successfully fetched secret key from {self.config['backend_url']}")
|
|
341
|
+
else:
|
|
342
|
+
logger.warning("[Cerberus] Failed to fetch secret key. PII will not be hashed.")
|
|
343
|
+
|
|
344
|
+
# Initialize WebSocket client
|
|
345
|
+
if 'ws_url' in self.config and 'token' in self.config and 'client_id' in self.config:
|
|
346
|
+
WS_CLIENT = AsyncWebSocketClient(
|
|
347
|
+
self.config['ws_url'],
|
|
348
|
+
self.config['token'],
|
|
349
|
+
self.config['client_id']
|
|
350
|
+
)
|
|
351
|
+
if DEBUG_ENABLED:
|
|
352
|
+
logger.info(f"[Cerberus] WebSocket client initialized: {self.config['ws_url']}")
|
|
353
|
+
else:
|
|
354
|
+
logger.warning("[Cerberus] WebSocket client not initialized. Missing ws_url, token, or client_id in CERBERUS_CONFIG")
|
|
355
|
+
|
|
356
|
+
# Start background thread for processing events
|
|
357
|
+
ensure_background_thread()
|
|
358
|
+
|
|
359
|
+
def __call__(self, request):
|
|
360
|
+
"""Process a request and queue metrics for async transmission.
|
|
361
|
+
|
|
362
|
+
This method is synchronous and does not require an event loop.
|
|
363
|
+
Events are placed in a thread-safe queue and processed by the
|
|
364
|
+
background thread.
|
|
365
|
+
"""
|
|
366
|
+
# Initialize custom_data attribute on the request object
|
|
367
|
+
request.cerberus_metrics = {}
|
|
368
|
+
|
|
369
|
+
# Extract request data BEFORE processing (body can only be read once)
|
|
370
|
+
headers = _extract_headers(request)
|
|
371
|
+
query_params = _extract_query_params(request)
|
|
372
|
+
body = _extract_body(request)
|
|
373
|
+
user_agent = request.META.get('HTTP_USER_AGENT')
|
|
374
|
+
|
|
375
|
+
# Process the request
|
|
376
|
+
response = self.get_response(request)
|
|
377
|
+
|
|
378
|
+
# Extract metrics from response if they exist
|
|
379
|
+
metrics = {}
|
|
380
|
+
if hasattr(response, 'data') and isinstance(response.data, dict):
|
|
381
|
+
if '_cerberus_metrics' in response.data:
|
|
382
|
+
metrics = response.data.pop('_cerberus_metrics')
|
|
383
|
+
|
|
384
|
+
# Get source IP address
|
|
385
|
+
source_ip = request.META.get('REMOTE_ADDR')
|
|
386
|
+
|
|
387
|
+
# Create the event data with current timestamp
|
|
388
|
+
d = CoreData(
|
|
389
|
+
token=self.config.get('token', ''),
|
|
390
|
+
source_ip=source_ip,
|
|
391
|
+
endpoint=request.path,
|
|
392
|
+
scheme=request.scheme == 'https',
|
|
393
|
+
method=request.method,
|
|
394
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
395
|
+
custom_data=metrics,
|
|
396
|
+
headers=headers,
|
|
397
|
+
query_params=query_params,
|
|
398
|
+
body=body,
|
|
399
|
+
user_agent=user_agent,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Queue the event (non-blocking)
|
|
403
|
+
try:
|
|
404
|
+
event_queue.put_nowait(d)
|
|
405
|
+
if DEBUG_ENABLED:
|
|
406
|
+
logger.info(f"[Cerberus] Queued event: {request.method} {request.path}")
|
|
407
|
+
except thread_queue.Full:
|
|
408
|
+
logger.warning("[Cerberus] Event queue full, dropping event")
|
|
409
|
+
|
|
410
|
+
return response
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class CoreData:
|
|
7
|
+
"""Data structure for HTTP request metrics.
|
|
8
|
+
|
|
9
|
+
Captures essential request information for analytics and monitoring.
|
|
10
|
+
"""
|
|
11
|
+
token: str
|
|
12
|
+
source_ip: str
|
|
13
|
+
endpoint: str
|
|
14
|
+
scheme: bool
|
|
15
|
+
method: str
|
|
16
|
+
timestamp: str # ISO 8601 format timestamp
|
|
17
|
+
custom_data: Optional[Dict] = None
|
|
18
|
+
|
|
19
|
+
# Additional request details
|
|
20
|
+
headers: Optional[Dict] = None
|
|
21
|
+
query_params: Optional[Dict] = None
|
|
22
|
+
body: Optional[Dict] = None
|
|
23
|
+
user_agent: Optional[str] = None
|
cerberus_django/utils.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import hmac
|
|
2
|
+
import hashlib
|
|
3
|
+
import requests
|
|
4
|
+
import os
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
# Configure logging
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
# Enable debug logging via environment variable
|
|
12
|
+
DEBUG_ENABLED = os.getenv('CERBERUS_DEBUG', 'false').lower() in ('true', '1', 'yes')
|
|
13
|
+
|
|
14
|
+
def hash_pii(value, secret_key):
|
|
15
|
+
"""
|
|
16
|
+
Consistently hash PII using HMAC-SHA256 for pseudoanonymization.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
value: The PII string to hash (e.g., IP address)
|
|
20
|
+
secret_key: Secret key for HMAC (from CERBERUS_CONFIG['secret_key'])
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Hex-encoded HMAC digest string
|
|
24
|
+
"""
|
|
25
|
+
if value is None:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
# Convert both to bytes if they aren't already
|
|
29
|
+
if isinstance(value, str):
|
|
30
|
+
value = value.encode('utf-8')
|
|
31
|
+
if isinstance(secret_key, str):
|
|
32
|
+
secret_key = secret_key.encode('utf-8')
|
|
33
|
+
|
|
34
|
+
return hmac.new(secret_key, value, hashlib.sha256).hexdigest()
|
|
35
|
+
|
|
36
|
+
def fetch_secret_key(backend_url: str, api_key: str, timeout: int = 5) -> Optional[str]:
|
|
37
|
+
"""
|
|
38
|
+
Fetch the shared HMAC secret key from the backend server.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
backend_url: Base URL of the backend server (e.g., 'https://cerberus.example.com')
|
|
42
|
+
api_key: Client API key for authentication
|
|
43
|
+
timeout: Request timeout in seconds (default: 5)
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
The secret key string, or None if fetch fails
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
requests.RequestException: On network/HTTP errors
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
url = f"{backend_url.rstrip('/')}/api/secret-key"
|
|
53
|
+
if DEBUG_ENABLED:
|
|
54
|
+
logger.info(f"[Cerberus] Making HTTP request to fetch secret key: {url}")
|
|
55
|
+
|
|
56
|
+
response = requests.get(
|
|
57
|
+
url,
|
|
58
|
+
headers={'X-API-Key': api_key},
|
|
59
|
+
timeout=timeout
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if DEBUG_ENABLED:
|
|
63
|
+
logger.info(f"[Cerberus] Secret key fetch response: {response.status_code}")
|
|
64
|
+
|
|
65
|
+
response.raise_for_status()
|
|
66
|
+
data = response.json()
|
|
67
|
+
return data.get('secret_key')
|
|
68
|
+
except requests.RequestException as e:
|
|
69
|
+
logger.error(f"[Cerberus] Failed to fetch secret key from {backend_url}: {e}")
|
|
70
|
+
return None
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cerberus-django
|
|
3
|
+
Version: 0.1.4
|
|
4
|
+
Summary: Django middleware for capturing and streaming HTTP request metrics via WebSocket
|
|
5
|
+
Project-URL: Homepage, https://github.com/gpotrock/cerberus
|
|
6
|
+
Project-URL: Documentation, https://github.com/gpotrock/cerberus#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/gpotrock/cerberus.git
|
|
8
|
+
Project-URL: Issues, https://github.com/gpotrock/cerberus/issues
|
|
9
|
+
Author: Griffin Potrock
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: analytics,async,django,metrics,middleware,monitoring,websocket
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Environment :: Web Environment
|
|
15
|
+
Classifier: Framework :: Django
|
|
16
|
+
Classifier: Framework :: Django :: 4.0
|
|
17
|
+
Classifier: Framework :: Django :: 4.1
|
|
18
|
+
Classifier: Framework :: Django :: 4.2
|
|
19
|
+
Classifier: Framework :: Django :: 5.0
|
|
20
|
+
Classifier: Intended Audience :: Developers
|
|
21
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
22
|
+
Classifier: Operating System :: OS Independent
|
|
23
|
+
Classifier: Programming Language :: Python :: 3
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
28
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Middleware
|
|
29
|
+
Classifier: Topic :: System :: Monitoring
|
|
30
|
+
Requires-Python: >=3.9
|
|
31
|
+
Requires-Dist: django>=4.0
|
|
32
|
+
Requires-Dist: requests>=2.28.0
|
|
33
|
+
Requires-Dist: websockets>=12.0
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: black>=23.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: mypy>=1.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-django>=4.5; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: ruff>=0.1; extra == 'dev'
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
|
|
43
|
+
# Cerberus Django
|
|
44
|
+
|
|
45
|
+
[](https://badge.fury.io/py/cerberus-django)
|
|
46
|
+
[](https://pypi.org/project/cerberus-django/)
|
|
47
|
+
[](https://www.djangoproject.com/)
|
|
48
|
+
[](https://opensource.org/licenses/MIT)
|
|
49
|
+
|
|
50
|
+
A Django middleware for capturing and streaming HTTP request metrics to a backend analytics server via WebSocket. Designed for high-performance, non-blocking operation in both WSGI and ASGI environments.
|
|
51
|
+
|
|
52
|
+
## Features
|
|
53
|
+
|
|
54
|
+
- **Non-blocking**: Events are queued and sent asynchronously via a background thread
|
|
55
|
+
- **WSGI & ASGI Compatible**: Works with both synchronous and asynchronous Django deployments
|
|
56
|
+
- **Privacy-First**: Built-in HMAC-SHA256 hashing for PII (IP addresses) before transmission
|
|
57
|
+
- **Custom Metrics**: Attach application-specific metrics to any request
|
|
58
|
+
- **Automatic Reconnection**: WebSocket client handles connection failures gracefully
|
|
59
|
+
- **Zero Configuration Required**: Sensible defaults with optional customization
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install cerberus-django
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Quick Start
|
|
68
|
+
|
|
69
|
+
### 1. Add to Django Settings
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
# settings.py
|
|
73
|
+
|
|
74
|
+
INSTALLED_APPS = [
|
|
75
|
+
# ... your apps
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
MIDDLEWARE = [
|
|
79
|
+
'django.middleware.security.SecurityMiddleware',
|
|
80
|
+
# ... other middleware
|
|
81
|
+
'cerberus_django.CerberusMiddleware', # Add Cerberus
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Cerberus Configuration
|
|
85
|
+
CERBERUS_CONFIG = {
|
|
86
|
+
'ws_url': 'wss://your-analytics-server.com/ws/events',
|
|
87
|
+
'token': 'your-api-key',
|
|
88
|
+
'client_id': 'your-client-id',
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 2. That's It!
|
|
93
|
+
|
|
94
|
+
Cerberus will now capture metrics for every HTTP request and send them to your analytics backend.
|
|
95
|
+
|
|
96
|
+
## Configuration
|
|
97
|
+
|
|
98
|
+
All configuration is done via the `CERBERUS_CONFIG` dictionary in your Django settings:
|
|
99
|
+
|
|
100
|
+
| Key | Required | Description |
|
|
101
|
+
|-----|----------|-------------|
|
|
102
|
+
| `ws_url` | Yes | WebSocket URL for the analytics backend |
|
|
103
|
+
| `token` | Yes | API key for authentication |
|
|
104
|
+
| `client_id` | Yes | Unique identifier for your application |
|
|
105
|
+
| `backend_url` | No | HTTP URL to auto-fetch the HMAC secret key |
|
|
106
|
+
| `secret_key` | No | HMAC secret key for PII hashing (auto-fetched if `backend_url` is set) |
|
|
107
|
+
|
|
108
|
+
### Example Configurations
|
|
109
|
+
|
|
110
|
+
**Basic (no PII hashing):**
|
|
111
|
+
```python
|
|
112
|
+
CERBERUS_CONFIG = {
|
|
113
|
+
'ws_url': 'wss://analytics.example.com/ws/events',
|
|
114
|
+
'token': 'sk-your-api-key',
|
|
115
|
+
'client_id': 'my-django-app',
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**With automatic secret key fetching:**
|
|
120
|
+
```python
|
|
121
|
+
CERBERUS_CONFIG = {
|
|
122
|
+
'ws_url': 'wss://analytics.example.com/ws/events',
|
|
123
|
+
'token': 'sk-your-api-key',
|
|
124
|
+
'client_id': 'my-django-app',
|
|
125
|
+
'backend_url': 'https://analytics.example.com', # Will fetch secret from /api/secret-key
|
|
126
|
+
}
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**With manual secret key:**
|
|
130
|
+
```python
|
|
131
|
+
CERBERUS_CONFIG = {
|
|
132
|
+
'ws_url': 'wss://analytics.example.com/ws/events',
|
|
133
|
+
'token': 'sk-your-api-key',
|
|
134
|
+
'client_id': 'my-django-app',
|
|
135
|
+
'secret_key': 'your-hmac-secret-key', # For consistent PII hashing
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Custom Metrics
|
|
140
|
+
|
|
141
|
+
Attach custom metrics to any request by adding them to the response:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from rest_framework.decorators import api_view
|
|
145
|
+
from rest_framework.response import Response
|
|
146
|
+
|
|
147
|
+
@api_view(['GET'])
|
|
148
|
+
def my_endpoint(request):
|
|
149
|
+
# Your business logic
|
|
150
|
+
items = process_items()
|
|
151
|
+
|
|
152
|
+
response = Response({'items': items})
|
|
153
|
+
|
|
154
|
+
# Add custom metrics (will be included in the event)
|
|
155
|
+
response.data['_cerberus_metrics'] = {
|
|
156
|
+
'items_processed': len(items),
|
|
157
|
+
'cache_hit': True,
|
|
158
|
+
'processing_time_ms': 42,
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return response
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
The `_cerberus_metrics` key is automatically extracted from the response and included in the event payload. It will not be sent to the client.
|
|
165
|
+
|
|
166
|
+
## Event Payload
|
|
167
|
+
|
|
168
|
+
Each event sent to your analytics backend includes:
|
|
169
|
+
|
|
170
|
+
```json
|
|
171
|
+
{
|
|
172
|
+
"api_key": "your-api-key",
|
|
173
|
+
"client_id": "your-client-id",
|
|
174
|
+
"token": "your-api-key",
|
|
175
|
+
"source_ip": "hashed-ip-address",
|
|
176
|
+
"endpoint": "/api/users/",
|
|
177
|
+
"scheme": true,
|
|
178
|
+
"method": "GET",
|
|
179
|
+
"custom_data": {
|
|
180
|
+
"items_processed": 10,
|
|
181
|
+
"cache_hit": true
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Privacy & Security
|
|
187
|
+
|
|
188
|
+
### PII Hashing
|
|
189
|
+
|
|
190
|
+
When a `secret_key` is configured, source IP addresses are hashed using HMAC-SHA256 before transmission:
|
|
191
|
+
|
|
192
|
+
- **Consistent**: Same IP always produces the same hash (enabling analytics)
|
|
193
|
+
- **Irreversible**: Original IP cannot be recovered from the hash
|
|
194
|
+
- **Secure**: Uses cryptographically strong HMAC-SHA256
|
|
195
|
+
|
|
196
|
+
### What's Captured
|
|
197
|
+
|
|
198
|
+
| Field | Description | Privacy |
|
|
199
|
+
|-------|-------------|---------|
|
|
200
|
+
| `source_ip` | Client IP address | Hashed if `secret_key` configured |
|
|
201
|
+
| `endpoint` | Request path | Sent as-is |
|
|
202
|
+
| `method` | HTTP method (GET, POST, etc.) | Sent as-is |
|
|
203
|
+
| `scheme` | Whether HTTPS was used | Sent as-is |
|
|
204
|
+
| `custom_data` | Your custom metrics | Sent as-is |
|
|
205
|
+
|
|
206
|
+
### What's NOT Captured
|
|
207
|
+
|
|
208
|
+
- Request/response bodies
|
|
209
|
+
- HTTP headers
|
|
210
|
+
- Query parameters
|
|
211
|
+
- Cookies or session data
|
|
212
|
+
- Authentication tokens
|
|
213
|
+
|
|
214
|
+
## Debug Mode
|
|
215
|
+
|
|
216
|
+
Enable debug logging to troubleshoot issues:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
export CERBERUS_DEBUG=true
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Or in your Django settings:
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
import os
|
|
226
|
+
os.environ['CERBERUS_DEBUG'] = 'true'
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
This will log:
|
|
230
|
+
- Middleware initialization
|
|
231
|
+
- WebSocket connection attempts
|
|
232
|
+
- Events being queued and sent
|
|
233
|
+
- Any errors encountered
|
|
234
|
+
|
|
235
|
+
## Architecture
|
|
236
|
+
|
|
237
|
+
```
|
|
238
|
+
┌─────────────────────────┐ ┌──────────────────────────────┐
|
|
239
|
+
│ Django Request │ │ Background Thread │
|
|
240
|
+
│ (WSGI or ASGI) │ │ (Daemon) │
|
|
241
|
+
├─────────────────────────┤ ├──────────────────────────────┤
|
|
242
|
+
│ CerberusMiddleware │ │ Event Loop │
|
|
243
|
+
│ └── queue.put(event) │────▶│ └── WebSocket.send() │
|
|
244
|
+
└─────────────────────────┘ └──────────────────────────────┘
|
|
245
|
+
│ │
|
|
246
|
+
│ Thread-safe Queue │ Async WebSocket
|
|
247
|
+
└────────────────────────────────────┘
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
- **Middleware**: Runs synchronously in the request/response cycle
|
|
251
|
+
- **Queue**: Thread-safe `queue.Queue` for passing events
|
|
252
|
+
- **Background Thread**: Daemon thread with its own event loop for async WebSocket communication
|
|
253
|
+
|
|
254
|
+
This architecture ensures:
|
|
255
|
+
- No blocking of HTTP requests
|
|
256
|
+
- No event loop conflicts in WSGI mode
|
|
257
|
+
- Automatic cleanup when the process exits (daemon thread)
|
|
258
|
+
|
|
259
|
+
## Requirements
|
|
260
|
+
|
|
261
|
+
- Python 3.9+
|
|
262
|
+
- Django 4.0+
|
|
263
|
+
- websockets 12.0+
|
|
264
|
+
- requests 2.28+
|
|
265
|
+
|
|
266
|
+
## Development
|
|
267
|
+
|
|
268
|
+
```bash
|
|
269
|
+
# Clone the repository
|
|
270
|
+
git clone https://github.com/gpotrock/cerberus.git
|
|
271
|
+
cd cerberus
|
|
272
|
+
|
|
273
|
+
# Install development dependencies
|
|
274
|
+
pip install -e ".[dev]"
|
|
275
|
+
|
|
276
|
+
# Run tests
|
|
277
|
+
pytest
|
|
278
|
+
|
|
279
|
+
# Format code
|
|
280
|
+
black src/
|
|
281
|
+
ruff check src/ --fix
|
|
282
|
+
|
|
283
|
+
# Type checking
|
|
284
|
+
mypy src/
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
## License
|
|
288
|
+
|
|
289
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
290
|
+
|
|
291
|
+
## Contributing
|
|
292
|
+
|
|
293
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
294
|
+
|
|
295
|
+
1. Fork the repository
|
|
296
|
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
297
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
298
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
299
|
+
5. Open a Pull Request
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
cerberus_django/__init__.py,sha256=JwXQyLUPFIxYh74vMXsy_okGtElL9gMxMNJnliZonCw,370
|
|
2
|
+
cerberus_django/middleware.py,sha256=dM0iX23Ns4EaxKdO_QtwhSh4ruUvkk1EKaXJt8ehElw,14309
|
|
3
|
+
cerberus_django/structs.py,sha256=KXZNuJJcUJjugIdApsdLRUyVO9PmEokm988WRma9wyc,584
|
|
4
|
+
cerberus_django/utils.py,sha256=yTCjXnz2lB84u4mVewFArN3x_PlMtbP4_yyMT6gEUVM,2121
|
|
5
|
+
cerberus_django-0.1.4.dist-info/METADATA,sha256=t88duqCl6QhI0uhwoGSEgfp1FrBrj_W_fewW8yXp6lE,9427
|
|
6
|
+
cerberus_django-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
7
|
+
cerberus_django-0.1.4.dist-info/licenses/LICENSE,sha256=L4F2m4zdMOZJqtVJoi6yyhbVviQsohkrNkXoND5uJPk,1072
|
|
8
|
+
cerberus_django-0.1.4.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Griffin Potrock
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|