cerberus-django 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+
28
+ # PyInstaller
29
+ *.manifest
30
+ *.spec
31
+
32
+ # Installer logs
33
+ pip-log.txt
34
+ pip-delete-this-directory.txt
35
+
36
+ # Unit test / coverage reports
37
+ htmlcov/
38
+ .tox/
39
+ .nox/
40
+ .coverage
41
+ .coverage.*
42
+ .cache
43
+ nosetests.xml
44
+ coverage.xml
45
+ *.cover
46
+ .hypothesis/
47
+ .pytest_cache/
48
+
49
+ # Translations
50
+ *.mo
51
+ *.pot
52
+
53
+ # Django stuff:
54
+ *.log
55
+ local_settings.py
56
+ db.sqlite3
57
+ db.sqlite3-journal
58
+
59
+ # Flask stuff:
60
+ instance/
61
+ .webassets-cache
62
+
63
+ # Scrapy stuff:
64
+ .scrapy
65
+
66
+ # Sphinx documentation
67
+ docs/_build/
68
+
69
+ # PyBuilder
70
+ target/
71
+
72
+ # Jupyter Notebook
73
+ .ipynb_checkpoints
74
+
75
+ # pyenv
76
+ .python-version
77
+
78
+ # celery beat schedule file
79
+ celerybeat-schedule
80
+
81
+ # SageMath parsed files
82
+ *.sage.py
83
+
84
+ # Environments
85
+ .env
86
+ .venv
87
+ env/
88
+ venv/
89
+ ENV/
90
+ env.bak/
91
+ venv.bak/
92
+
93
+ # Spyder project settings
94
+ .spyderproject
95
+ .spyproject
96
+
97
+ # Rope project settings
98
+ .ropeproject
99
+
100
+ # mkdocs documentation
101
+ /site
102
+
103
+ # mypy
104
+ .mypy_cache/
105
+ .dmypy.json
106
+ dmypy.json
107
+
108
+ # Pyre type checker
109
+ .pyre/
110
+
111
+ # IDE
112
+ .vscode/
113
+ .idea/
114
+ *.swp
115
+ *.swo
116
+ *~
117
+ .DS_Store
118
+
119
+ # Project-specific
120
+ *.pyc
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Griffin Potrock
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,299 @@
1
+ Metadata-Version: 2.4
2
+ Name: cerberus-django
3
+ Version: 0.1.4
4
+ Summary: Django middleware for capturing and streaming HTTP request metrics via WebSocket
5
+ Project-URL: Homepage, https://github.com/gpotrock/cerberus
6
+ Project-URL: Documentation, https://github.com/gpotrock/cerberus#readme
7
+ Project-URL: Repository, https://github.com/gpotrock/cerberus.git
8
+ Project-URL: Issues, https://github.com/gpotrock/cerberus/issues
9
+ Author: Griffin Potrock
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: analytics,async,django,metrics,middleware,monitoring,websocket
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Environment :: Web Environment
15
+ Classifier: Framework :: Django
16
+ Classifier: Framework :: Django :: 4.0
17
+ Classifier: Framework :: Django :: 4.1
18
+ Classifier: Framework :: Django :: 4.2
19
+ Classifier: Framework :: Django :: 5.0
20
+ Classifier: Intended Audience :: Developers
21
+ Classifier: License :: OSI Approved :: MIT License
22
+ Classifier: Operating System :: OS Independent
23
+ Classifier: Programming Language :: Python :: 3
24
+ Classifier: Programming Language :: Python :: 3.9
25
+ Classifier: Programming Language :: Python :: 3.10
26
+ Classifier: Programming Language :: Python :: 3.11
27
+ Classifier: Programming Language :: Python :: 3.12
28
+ Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Middleware
29
+ Classifier: Topic :: System :: Monitoring
30
+ Requires-Python: >=3.9
31
+ Requires-Dist: django>=4.0
32
+ Requires-Dist: requests>=2.28.0
33
+ Requires-Dist: websockets>=12.0
34
+ Provides-Extra: dev
35
+ Requires-Dist: black>=23.0; extra == 'dev'
36
+ Requires-Dist: mypy>=1.0; extra == 'dev'
37
+ Requires-Dist: pytest-asyncio>=0.21; extra == 'dev'
38
+ Requires-Dist: pytest-django>=4.5; extra == 'dev'
39
+ Requires-Dist: pytest>=7.0; extra == 'dev'
40
+ Requires-Dist: ruff>=0.1; extra == 'dev'
41
+ Description-Content-Type: text/markdown
42
+
43
+ # Cerberus Django
44
+
45
+ [![PyPI version](https://badge.fury.io/py/cerberus-django.svg)](https://badge.fury.io/py/cerberus-django)
46
+ [![Python Versions](https://img.shields.io/pypi/pyversions/cerberus-django.svg)](https://pypi.org/project/cerberus-django/)
47
+ [![Django Versions](https://img.shields.io/badge/django-4.0%20%7C%204.1%20%7C%204.2%20%7C%205.0-blue.svg)](https://www.djangoproject.com/)
48
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
49
+
50
+ A Django middleware for capturing and streaming HTTP request metrics to a backend analytics server via WebSocket. Designed for high-performance, non-blocking operation in both WSGI and ASGI environments.
51
+
52
+ ## Features
53
+
54
+ - **Non-blocking**: Events are queued and sent asynchronously via a background thread
55
+ - **WSGI & ASGI Compatible**: Works with both synchronous and asynchronous Django deployments
56
+ - **Privacy-First**: Built-in HMAC-SHA256 hashing for PII (IP addresses) before transmission
57
+ - **Custom Metrics**: Attach application-specific metrics to any request
58
+ - **Automatic Reconnection**: WebSocket client handles connection failures gracefully
59
+ - **Zero Configuration Required**: Sensible defaults with optional customization
60
+
61
+ ## Installation
62
+
63
+ ```bash
64
+ pip install cerberus-django
65
+ ```
66
+
67
+ ## Quick Start
68
+
69
+ ### 1. Add to Django Settings
70
+
71
+ ```python
72
+ # settings.py
73
+
74
+ INSTALLED_APPS = [
75
+ # ... your apps
76
+ ]
77
+
78
+ MIDDLEWARE = [
79
+ 'django.middleware.security.SecurityMiddleware',
80
+ # ... other middleware
81
+ 'cerberus_django.CerberusMiddleware', # Add Cerberus
82
+ ]
83
+
84
+ # Cerberus Configuration
85
+ CERBERUS_CONFIG = {
86
+ 'ws_url': 'wss://your-analytics-server.com/ws/events',
87
+ 'token': 'your-api-key',
88
+ 'client_id': 'your-client-id',
89
+ }
90
+ ```
91
+
92
+ ### 2. That's It!
93
+
94
+ Cerberus will now capture metrics for every HTTP request and send them to your analytics backend.
95
+
96
+ ## Configuration
97
+
98
+ All configuration is done via the `CERBERUS_CONFIG` dictionary in your Django settings:
99
+
100
+ | Key | Required | Description |
101
+ |-----|----------|-------------|
102
+ | `ws_url` | Yes | WebSocket URL for the analytics backend |
103
+ | `token` | Yes | API key for authentication |
104
+ | `client_id` | Yes | Unique identifier for your application |
105
+ | `backend_url` | No | HTTP URL to auto-fetch the HMAC secret key |
106
+ | `secret_key` | No | HMAC secret key for PII hashing (auto-fetched if `backend_url` is set) |
107
+
108
+ ### Example Configurations
109
+
110
+ **Basic (no PII hashing):**
111
+ ```python
112
+ CERBERUS_CONFIG = {
113
+ 'ws_url': 'wss://analytics.example.com/ws/events',
114
+ 'token': 'sk-your-api-key',
115
+ 'client_id': 'my-django-app',
116
+ }
117
+ ```
118
+
119
+ **With automatic secret key fetching:**
120
+ ```python
121
+ CERBERUS_CONFIG = {
122
+ 'ws_url': 'wss://analytics.example.com/ws/events',
123
+ 'token': 'sk-your-api-key',
124
+ 'client_id': 'my-django-app',
125
+ 'backend_url': 'https://analytics.example.com', # Will fetch secret from /api/secret-key
126
+ }
127
+ ```
128
+
129
+ **With manual secret key:**
130
+ ```python
131
+ CERBERUS_CONFIG = {
132
+ 'ws_url': 'wss://analytics.example.com/ws/events',
133
+ 'token': 'sk-your-api-key',
134
+ 'client_id': 'my-django-app',
135
+ 'secret_key': 'your-hmac-secret-key', # For consistent PII hashing
136
+ }
137
+ ```
138
+
139
+ ## Custom Metrics
140
+
141
+ Attach custom metrics to any request by adding them to the response:
142
+
143
+ ```python
144
+ from rest_framework.decorators import api_view
145
+ from rest_framework.response import Response
146
+
147
+ @api_view(['GET'])
148
+ def my_endpoint(request):
149
+ # Your business logic
150
+ items = process_items()
151
+
152
+ response = Response({'items': items})
153
+
154
+ # Add custom metrics (will be included in the event)
155
+ response.data['_cerberus_metrics'] = {
156
+ 'items_processed': len(items),
157
+ 'cache_hit': True,
158
+ 'processing_time_ms': 42,
159
+ }
160
+
161
+ return response
162
+ ```
163
+
164
+ The `_cerberus_metrics` key is automatically extracted from the response and included in the event payload. It will not be sent to the client.
165
+
166
+ ## Event Payload
167
+
168
+ Each event sent to your analytics backend includes:
169
+
170
+ ```json
171
+ {
172
+ "api_key": "your-api-key",
173
+ "client_id": "your-client-id",
174
+ "token": "your-api-key",
175
+ "source_ip": "hashed-ip-address",
176
+ "endpoint": "/api/users/",
177
+ "scheme": true,
178
+ "method": "GET",
179
+ "custom_data": {
180
+ "items_processed": 10,
181
+ "cache_hit": true
182
+ }
183
+ }
184
+ ```
185
+
186
+ ## Privacy & Security
187
+
188
+ ### PII Hashing
189
+
190
+ When a `secret_key` is configured, source IP addresses are hashed using HMAC-SHA256 before transmission:
191
+
192
+ - **Consistent**: Same IP always produces the same hash (enabling analytics)
193
+ - **Irreversible**: Original IP cannot be recovered from the hash
194
+ - **Secure**: Uses cryptographically strong HMAC-SHA256
195
+
196
+ ### What's Captured
197
+
198
+ | Field | Description | Privacy |
199
+ |-------|-------------|---------|
200
+ | `source_ip` | Client IP address | Hashed if `secret_key` configured |
201
+ | `endpoint` | Request path | Sent as-is |
202
+ | `method` | HTTP method (GET, POST, etc.) | Sent as-is |
203
+ | `scheme` | Whether HTTPS was used | Sent as-is |
204
+ | `custom_data` | Your custom metrics | Sent as-is |
205
+
206
+ ### What's NOT Captured
207
+
208
+ - Request/response bodies
209
+ - HTTP headers
210
+ - Query parameters
211
+ - Cookies or session data
212
+ - Authentication tokens
213
+
214
+ ## Debug Mode
215
+
216
+ Enable debug logging to troubleshoot issues:
217
+
218
+ ```bash
219
+ export CERBERUS_DEBUG=true
220
+ ```
221
+
222
+ Or in your Django settings:
223
+
224
+ ```python
225
+ import os
226
+ os.environ['CERBERUS_DEBUG'] = 'true'
227
+ ```
228
+
229
+ This will log:
230
+ - Middleware initialization
231
+ - WebSocket connection attempts
232
+ - Events being queued and sent
233
+ - Any errors encountered
234
+
235
+ ## Architecture
236
+
237
+ ```
238
+ ┌─────────────────────────┐ ┌──────────────────────────────┐
239
+ │ Django Request │ │ Background Thread │
240
+ │ (WSGI or ASGI) │ │ (Daemon) │
241
+ ├─────────────────────────┤ ├──────────────────────────────┤
242
+ │ CerberusMiddleware │ │ Event Loop │
243
+ │ └── queue.put(event) │────▶│ └── WebSocket.send() │
244
+ └─────────────────────────┘ └──────────────────────────────┘
245
+ │ │
246
+ │ Thread-safe Queue │ Async WebSocket
247
+ └────────────────────────────────────┘
248
+ ```
249
+
250
+ - **Middleware**: Runs synchronously in the request/response cycle
251
+ - **Queue**: Thread-safe `queue.Queue` for passing events
252
+ - **Background Thread**: Daemon thread with its own event loop for async WebSocket communication
253
+
254
+ This architecture ensures:
255
+ - No blocking of HTTP requests
256
+ - No event loop conflicts in WSGI mode
257
+ - Automatic cleanup when the process exits (daemon thread)
258
+
259
+ ## Requirements
260
+
261
+ - Python 3.9+
262
+ - Django 4.0+
263
+ - websockets 12.0+
264
+ - requests 2.28+
265
+
266
+ ## Development
267
+
268
+ ```bash
269
+ # Clone the repository
270
+ git clone https://github.com/gpotrock/cerberus.git
271
+ cd cerberus
272
+
273
+ # Install development dependencies
274
+ pip install -e ".[dev]"
275
+
276
+ # Run tests
277
+ pytest
278
+
279
+ # Format code
280
+ black src/
281
+ ruff check src/ --fix
282
+
283
+ # Type checking
284
+ mypy src/
285
+ ```
286
+
287
+ ## License
288
+
289
+ MIT License - see [LICENSE](LICENSE) for details.
290
+
291
+ ## Contributing
292
+
293
+ Contributions are welcome! Please feel free to submit a Pull Request.
294
+
295
+ 1. Fork the repository
296
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
297
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
298
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
299
+ 5. Open a Pull Request
@@ -0,0 +1,257 @@
1
+ # Cerberus Django
2
+
3
+ [![PyPI version](https://badge.fury.io/py/cerberus-django.svg)](https://badge.fury.io/py/cerberus-django)
4
+ [![Python Versions](https://img.shields.io/pypi/pyversions/cerberus-django.svg)](https://pypi.org/project/cerberus-django/)
5
+ [![Django Versions](https://img.shields.io/badge/django-4.0%20%7C%204.1%20%7C%204.2%20%7C%205.0-blue.svg)](https://www.djangoproject.com/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
+
8
+ A Django middleware for capturing and streaming HTTP request metrics to a backend analytics server via WebSocket. Designed for high-performance, non-blocking operation in both WSGI and ASGI environments.
9
+
10
+ ## Features
11
+
12
+ - **Non-blocking**: Events are queued and sent asynchronously via a background thread
13
+ - **WSGI & ASGI Compatible**: Works with both synchronous and asynchronous Django deployments
14
+ - **Privacy-First**: Built-in HMAC-SHA256 hashing for PII (IP addresses) before transmission
15
+ - **Custom Metrics**: Attach application-specific metrics to any request
16
+ - **Automatic Reconnection**: WebSocket client handles connection failures gracefully
17
+ - **Zero Configuration Required**: Sensible defaults with optional customization
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ pip install cerberus-django
23
+ ```
24
+
25
+ ## Quick Start
26
+
27
+ ### 1. Add to Django Settings
28
+
29
+ ```python
30
+ # settings.py
31
+
32
+ INSTALLED_APPS = [
33
+ # ... your apps
34
+ ]
35
+
36
+ MIDDLEWARE = [
37
+ 'django.middleware.security.SecurityMiddleware',
38
+ # ... other middleware
39
+ 'cerberus_django.CerberusMiddleware', # Add Cerberus
40
+ ]
41
+
42
+ # Cerberus Configuration
43
+ CERBERUS_CONFIG = {
44
+ 'ws_url': 'wss://your-analytics-server.com/ws/events',
45
+ 'token': 'your-api-key',
46
+ 'client_id': 'your-client-id',
47
+ }
48
+ ```
49
+
50
+ ### 2. That's It!
51
+
52
+ Cerberus will now capture metrics for every HTTP request and send them to your analytics backend.
53
+
54
+ ## Configuration
55
+
56
+ All configuration is done via the `CERBERUS_CONFIG` dictionary in your Django settings:
57
+
58
+ | Key | Required | Description |
59
+ |-----|----------|-------------|
60
+ | `ws_url` | Yes | WebSocket URL for the analytics backend |
61
+ | `token` | Yes | API key for authentication |
62
+ | `client_id` | Yes | Unique identifier for your application |
63
+ | `backend_url` | No | HTTP URL to auto-fetch the HMAC secret key |
64
+ | `secret_key` | No | HMAC secret key for PII hashing (auto-fetched if `backend_url` is set) |
65
+
66
+ ### Example Configurations
67
+
68
+ **Basic (no PII hashing):**
69
+ ```python
70
+ CERBERUS_CONFIG = {
71
+ 'ws_url': 'wss://analytics.example.com/ws/events',
72
+ 'token': 'sk-your-api-key',
73
+ 'client_id': 'my-django-app',
74
+ }
75
+ ```
76
+
77
+ **With automatic secret key fetching:**
78
+ ```python
79
+ CERBERUS_CONFIG = {
80
+ 'ws_url': 'wss://analytics.example.com/ws/events',
81
+ 'token': 'sk-your-api-key',
82
+ 'client_id': 'my-django-app',
83
+ 'backend_url': 'https://analytics.example.com', # Will fetch secret from /api/secret-key
84
+ }
85
+ ```
86
+
87
+ **With manual secret key:**
88
+ ```python
89
+ CERBERUS_CONFIG = {
90
+ 'ws_url': 'wss://analytics.example.com/ws/events',
91
+ 'token': 'sk-your-api-key',
92
+ 'client_id': 'my-django-app',
93
+ 'secret_key': 'your-hmac-secret-key', # For consistent PII hashing
94
+ }
95
+ ```
96
+
97
+ ## Custom Metrics
98
+
99
+ Attach custom metrics to any request by adding them to the response:
100
+
101
+ ```python
102
+ from rest_framework.decorators import api_view
103
+ from rest_framework.response import Response
104
+
105
+ @api_view(['GET'])
106
+ def my_endpoint(request):
107
+ # Your business logic
108
+ items = process_items()
109
+
110
+ response = Response({'items': items})
111
+
112
+ # Add custom metrics (will be included in the event)
113
+ response.data['_cerberus_metrics'] = {
114
+ 'items_processed': len(items),
115
+ 'cache_hit': True,
116
+ 'processing_time_ms': 42,
117
+ }
118
+
119
+ return response
120
+ ```
121
+
122
+ The `_cerberus_metrics` key is automatically extracted from the response and included in the event payload. It will not be sent to the client.
123
+
124
+ ## Event Payload
125
+
126
+ Each event sent to your analytics backend includes:
127
+
128
+ ```json
129
+ {
130
+ "api_key": "your-api-key",
131
+ "client_id": "your-client-id",
132
+ "token": "your-api-key",
133
+ "source_ip": "hashed-ip-address",
134
+ "endpoint": "/api/users/",
135
+ "scheme": true,
136
+ "method": "GET",
137
+ "custom_data": {
138
+ "items_processed": 10,
139
+ "cache_hit": true
140
+ }
141
+ }
142
+ ```
143
+
144
+ ## Privacy & Security
145
+
146
+ ### PII Hashing
147
+
148
+ When a `secret_key` is configured, source IP addresses are hashed using HMAC-SHA256 before transmission:
149
+
150
+ - **Consistent**: Same IP always produces the same hash (enabling analytics)
151
+ - **Irreversible**: Original IP cannot be recovered from the hash
152
+ - **Secure**: Uses cryptographically strong HMAC-SHA256
153
+
154
+ ### What's Captured
155
+
156
+ | Field | Description | Privacy |
157
+ |-------|-------------|---------|
158
+ | `source_ip` | Client IP address | Hashed if `secret_key` configured |
159
+ | `endpoint` | Request path | Sent as-is |
160
+ | `method` | HTTP method (GET, POST, etc.) | Sent as-is |
161
+ | `scheme` | Whether HTTPS was used | Sent as-is |
162
+ | `custom_data` | Your custom metrics | Sent as-is |
163
+
164
+ ### What's NOT Captured
165
+
166
+ - Request/response bodies
167
+ - HTTP headers
168
+ - Query parameters
169
+ - Cookies or session data
170
+ - Authentication tokens
171
+
172
+ ## Debug Mode
173
+
174
+ Enable debug logging to troubleshoot issues:
175
+
176
+ ```bash
177
+ export CERBERUS_DEBUG=true
178
+ ```
179
+
180
+ Or in your Django settings:
181
+
182
+ ```python
183
+ import os
184
+ os.environ['CERBERUS_DEBUG'] = 'true'
185
+ ```
186
+
187
+ This will log:
188
+ - Middleware initialization
189
+ - WebSocket connection attempts
190
+ - Events being queued and sent
191
+ - Any errors encountered
192
+
193
+ ## Architecture
194
+
195
+ ```
196
+ ┌─────────────────────────┐ ┌──────────────────────────────┐
197
+ │ Django Request │ │ Background Thread │
198
+ │ (WSGI or ASGI) │ │ (Daemon) │
199
+ ├─────────────────────────┤ ├──────────────────────────────┤
200
+ │ CerberusMiddleware │ │ Event Loop │
201
+ │ └── queue.put(event) │────▶│ └── WebSocket.send() │
202
+ └─────────────────────────┘ └──────────────────────────────┘
203
+ │ │
204
+ │ Thread-safe Queue │ Async WebSocket
205
+ └────────────────────────────────────┘
206
+ ```
207
+
208
+ - **Middleware**: Runs synchronously in the request/response cycle
209
+ - **Queue**: Thread-safe `queue.Queue` for passing events
210
+ - **Background Thread**: Daemon thread with its own event loop for async WebSocket communication
211
+
212
+ This architecture ensures:
213
+ - No blocking of HTTP requests
214
+ - No event loop conflicts in WSGI mode
215
+ - Automatic cleanup when the process exits (daemon thread)
216
+
217
+ ## Requirements
218
+
219
+ - Python 3.9+
220
+ - Django 4.0+
221
+ - websockets 12.0+
222
+ - requests 2.28+
223
+
224
+ ## Development
225
+
226
+ ```bash
227
+ # Clone the repository
228
+ git clone https://github.com/gpotrock/cerberus.git
229
+ cd cerberus
230
+
231
+ # Install development dependencies
232
+ pip install -e ".[dev]"
233
+
234
+ # Run tests
235
+ pytest
236
+
237
+ # Format code
238
+ black src/
239
+ ruff check src/ --fix
240
+
241
+ # Type checking
242
+ mypy src/
243
+ ```
244
+
245
+ ## License
246
+
247
+ MIT License - see [LICENSE](LICENSE) for details.
248
+
249
+ ## Contributing
250
+
251
+ Contributions are welcome! Please feel free to submit a Pull Request.
252
+
253
+ 1. Fork the repository
254
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
255
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
256
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
257
+ 5. Open a Pull Request
@@ -0,0 +1,11 @@
1
+ #!/bin/bash
2
+ cd /Users/griff/Documents/cerberus_code/cerberus
3
+
4
+ # Clean old builds
5
+ rm -rf dist/ build/ *.egg-info src/*.egg-info
6
+
7
+ # Build the package
8
+ python -m build
9
+
10
+ # Upload to PyPI
11
+ python -m twine upload dist/*
@@ -0,0 +1,95 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "cerberus-django"
7
+ version = "0.1.4"
8
+ description = "Django middleware for capturing and streaming HTTP request metrics via WebSocket"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ authors = [
12
+ { name = "Griffin Potrock" }
13
+ ]
14
+ keywords = [
15
+ "django",
16
+ "middleware",
17
+ "metrics",
18
+ "analytics",
19
+ "monitoring",
20
+ "websocket",
21
+ "async",
22
+ ]
23
+ classifiers = [
24
+ "Development Status :: 4 - Beta",
25
+ "Environment :: Web Environment",
26
+ "Framework :: Django",
27
+ "Framework :: Django :: 4.0",
28
+ "Framework :: Django :: 4.1",
29
+ "Framework :: Django :: 4.2",
30
+ "Framework :: Django :: 5.0",
31
+ "Intended Audience :: Developers",
32
+ "License :: OSI Approved :: MIT License",
33
+ "Operating System :: OS Independent",
34
+ "Programming Language :: Python :: 3",
35
+ "Programming Language :: Python :: 3.9",
36
+ "Programming Language :: Python :: 3.10",
37
+ "Programming Language :: Python :: 3.11",
38
+ "Programming Language :: Python :: 3.12",
39
+ "Topic :: Internet :: WWW/HTTP :: WSGI :: Middleware",
40
+ "Topic :: System :: Monitoring",
41
+ ]
42
+ requires-python = ">=3.9"
43
+ dependencies = [
44
+ "django>=4.0",
45
+ "websockets>=12.0",
46
+ "requests>=2.28.0",
47
+ ]
48
+
49
+ [project.optional-dependencies]
50
+ dev = [
51
+ "pytest>=7.0",
52
+ "pytest-asyncio>=0.21",
53
+ "pytest-django>=4.5",
54
+ "black>=23.0",
55
+ "ruff>=0.1",
56
+ "mypy>=1.0",
57
+ ]
58
+
59
+ [project.urls]
60
+ Homepage = "https://github.com/gpotrock/cerberus"
61
+ Documentation = "https://github.com/gpotrock/cerberus#readme"
62
+ Repository = "https://github.com/gpotrock/cerberus.git"
63
+ Issues = "https://github.com/gpotrock/cerberus/issues"
64
+
65
+ [tool.hatch.build.targets.sdist]
66
+ exclude = [
67
+ "/.github",
68
+ "/tests",
69
+ "CLAUDE.md",
70
+ ]
71
+
72
+ [tool.hatch.build.targets.wheel]
73
+ packages = ["src/cerberus_django"]
74
+
75
+ [tool.black]
76
+ line-length = 100
77
+ target-version = ["py39", "py310", "py311", "py312"]
78
+
79
+ [tool.ruff]
80
+ line-length = 100
81
+ target-version = "py39"
82
+
83
+ [tool.ruff.lint]
84
+ select = ["E", "F", "W", "I", "UP", "B", "C4"]
85
+
86
+ [tool.mypy]
87
+ python_version = "3.9"
88
+ warn_return_any = true
89
+ warn_unused_configs = true
90
+ ignore_missing_imports = true
91
+
92
+ [tool.pytest.ini_options]
93
+ DJANGO_SETTINGS_MODULE = "tests.settings"
94
+ python_files = ["test_*.py"]
95
+ asyncio_mode = "auto"
@@ -0,0 +1,13 @@
1
+ """
2
+ Cerberus Django - HTTP request metrics middleware
3
+
4
+ A Django middleware for capturing and streaming HTTP request metrics
5
+ to a backend analytics server via WebSocket.
6
+ """
7
+
8
+ from .middleware import CerberusMiddleware
9
+ from .structs import CoreData
10
+ from .utils import hash_pii
11
+
12
+ __version__ = "0.1.4"
13
+ __all__ = ["CerberusMiddleware", "CoreData", "hash_pii", "__version__"]
@@ -0,0 +1,410 @@
1
+ """
2
+ Cerberus Django Middleware
3
+
4
+ Captures HTTP request metrics and sends them asynchronously to a backend
5
+ analytics server via WebSocket.
6
+
7
+ This middleware is designed to work in both WSGI (synchronous) and ASGI
8
+ (asynchronous) Django deployments without requiring an event loop at import time.
9
+
10
+ Architecture:
11
+ - Middleware (sync): Captures request data and puts it in a thread-safe queue
12
+ - Background thread: Runs its own event loop to process queue and send via WebSocket
13
+ """
14
+
15
+ from .structs import CoreData
16
+ from .utils import fetch_secret_key
17
+ from django.conf import settings
18
+ import asyncio
19
+ import json
20
+ import os
21
+ import logging
22
+ import threading
23
+ import queue as thread_queue
24
+ from datetime import datetime, timezone
25
+ import websockets
26
+
27
+ # Configure logging
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Enable debug logging via environment variable
31
+ DEBUG_ENABLED = os.getenv('CERBERUS_DEBUG', 'false').lower() in ('true', '1', 'yes')
32
+
33
+ # Thread-safe queue for events (no event loop required at import time)
34
+ event_queue = thread_queue.Queue()
35
+
36
+ # Background thread management
37
+ _background_thread = None
38
+ _thread_lock = threading.Lock()
39
+
40
+
41
+ class AsyncWebSocketClient:
42
+ """WebSocket client for sending events to the backend.
43
+
44
+ This client is used within the background thread's event loop,
45
+ so it can safely use asyncio primitives.
46
+ """
47
+
48
+ def __init__(self, ws_url, api_key, client_id):
49
+ self.ws_url = ws_url
50
+ self.api_key = api_key
51
+ self.client_id = client_id
52
+ self.websocket = None
53
+ self._async_lock = None # Created lazily within event loop context
54
+
55
+ async def _get_lock(self):
56
+ """Get or create async lock within the event loop context."""
57
+ if self._async_lock is None:
58
+ self._async_lock = asyncio.Lock()
59
+ return self._async_lock
60
+
61
+ async def connect(self):
62
+ """Establish WebSocket connection to the backend."""
63
+ try:
64
+ if DEBUG_ENABLED:
65
+ logger.info(f"[Cerberus] Connecting to WebSocket: {self.ws_url}")
66
+ self.websocket = await websockets.connect(self.ws_url)
67
+ if DEBUG_ENABLED:
68
+ logger.info("[Cerberus] WebSocket connected successfully")
69
+ except Exception as e:
70
+ self.websocket = None
71
+ logger.error(f"[Cerberus] Failed to connect to WebSocket: {e}")
72
+
73
+ async def send(self, event_data):
74
+ """Send event data to backend via WebSocket.
75
+
76
+ Args:
77
+ event_data: CoreData object to send
78
+ """
79
+ lock = await self._get_lock()
80
+ async with lock:
81
+ # Connect if not already connected
82
+ if self.websocket is None:
83
+ await self.connect()
84
+
85
+ if self.websocket:
86
+ try:
87
+ # Format data as expected by backend
88
+ payload = {
89
+ 'api_key': self.api_key,
90
+ 'client_id': self.client_id,
91
+ 'token': event_data.token,
92
+ 'remote_addr': event_data.source_ip, # Backend expects 'remote_addr'
93
+ 'endpoint': event_data.endpoint,
94
+ 'scheme': event_data.scheme,
95
+ 'method': event_data.method,
96
+ 'timestamp': event_data.timestamp,
97
+ 'custom_data': event_data.custom_data,
98
+ # Additional request details
99
+ 'headers': event_data.headers,
100
+ 'query_params': event_data.query_params,
101
+ 'body': event_data.body,
102
+ 'user_agent': event_data.user_agent,
103
+ }
104
+
105
+ json_data = json.dumps(payload)
106
+
107
+ if DEBUG_ENABLED:
108
+ logger.info(f"[Cerberus] Sending event to backend: {json_data[:200]}...")
109
+
110
+ await self.websocket.send(json_data)
111
+
112
+ # Wait for acknowledgment
113
+ response = await asyncio.wait_for(self.websocket.recv(), timeout=5.0)
114
+
115
+ if DEBUG_ENABLED:
116
+ logger.info(f"[Cerberus] Backend response: {response}")
117
+
118
+ except asyncio.TimeoutError:
119
+ logger.warning("[Cerberus] Timeout waiting for backend response")
120
+ except websockets.exceptions.ConnectionClosed:
121
+ logger.warning("[Cerberus] WebSocket connection closed, will reconnect on next send")
122
+ self.websocket = None
123
+ except Exception as e:
124
+ logger.error(f"[Cerberus] Error sending data: {e}")
125
+ if self.websocket:
126
+ try:
127
+ await self.websocket.close()
128
+ except Exception:
129
+ pass
130
+ self.websocket = None
131
+
132
+
133
+ # WebSocket client - initialized in middleware __init__, used by background thread
134
+ WS_CLIENT = None
135
+
136
+
137
+ def _queue_get_with_timeout():
138
+ """Get an item from the queue with a 1-second timeout.
139
+
140
+ This is a helper function for run_in_executor since we need to pass
141
+ the timeout parameter.
142
+
143
+ Returns:
144
+ CoreData object or raises queue.Empty
145
+ """
146
+ return event_queue.get(block=True, timeout=1.0)
147
+
148
+
149
+ async def _process_queue_async():
150
+ """Async coroutine that processes events from the thread-safe queue.
151
+
152
+ Runs continuously in the background thread's event loop.
153
+ """
154
+ global WS_CLIENT
155
+
156
+ if DEBUG_ENABLED:
157
+ logger.info("[Cerberus] Background queue processor started")
158
+
159
+ loop = asyncio.get_event_loop()
160
+
161
+ while True:
162
+ try:
163
+ # Use run_in_executor to get from sync queue without blocking event loop
164
+ data = await loop.run_in_executor(None, _queue_get_with_timeout)
165
+ except thread_queue.Empty:
166
+ # No events available, continue waiting
167
+ continue
168
+ except Exception as e:
169
+ logger.error(f"[Cerberus] Error getting from queue: {e}")
170
+ continue
171
+
172
+ # Check for shutdown signal (None means stop)
173
+ if data is None:
174
+ if DEBUG_ENABLED:
175
+ logger.info("[Cerberus] Received shutdown signal, stopping processor")
176
+ break
177
+
178
+ try:
179
+ if WS_CLIENT:
180
+ if DEBUG_ENABLED:
181
+ logger.info(f"[Cerberus] Processing event for endpoint: {data.endpoint}")
182
+ await WS_CLIENT.send(data)
183
+ else:
184
+ logger.warning("[Cerberus] WebSocket client not initialized, skipping event")
185
+ except Exception as e:
186
+ logger.error(f"[Cerberus] Failed to send event: {e}")
187
+ finally:
188
+ event_queue.task_done()
189
+
190
+
191
+ def _run_event_loop_in_thread():
192
+ """Run the async event processing loop in a dedicated thread.
193
+
194
+ Creates its own event loop, independent of any Django event loop.
195
+ """
196
+ loop = asyncio.new_event_loop()
197
+ asyncio.set_event_loop(loop)
198
+
199
+ if DEBUG_ENABLED:
200
+ logger.info("[Cerberus] Background thread started with new event loop")
201
+
202
+ try:
203
+ loop.run_until_complete(_process_queue_async())
204
+ except Exception as e:
205
+ logger.error(f"[Cerberus] Background event loop error: {e}")
206
+ finally:
207
+ loop.close()
208
+ if DEBUG_ENABLED:
209
+ logger.info("[Cerberus] Background thread event loop closed")
210
+
211
+
212
+ def ensure_background_thread():
213
+ """Start the background processing thread if not already running.
214
+
215
+ Thread-safe: Uses a lock to prevent race conditions during startup.
216
+ The thread is a daemon thread, so it will automatically stop when
217
+ the main process exits.
218
+ """
219
+ global _background_thread
220
+
221
+ with _thread_lock:
222
+ if _background_thread is not None and _background_thread.is_alive():
223
+ return
224
+
225
+ _background_thread = threading.Thread(
226
+ target=_run_event_loop_in_thread,
227
+ name="cerberus-event-sender",
228
+ daemon=True # Auto-shutdown when main process exits
229
+ )
230
+ _background_thread.start()
231
+
232
+ if DEBUG_ENABLED:
233
+ logger.info("[Cerberus] Started background event sender thread")
234
+
235
+
236
+ def _extract_headers(request):
237
+ """Extract HTTP headers from Django request.
238
+
239
+ Converts Django's META dict (with HTTP_ prefixed headers) to a clean dict.
240
+ Only includes actual HTTP headers, not server variables.
241
+
242
+ Args:
243
+ request: Django HttpRequest object
244
+
245
+ Returns:
246
+ Dict of header name -> value
247
+ """
248
+ headers = {}
249
+ for key, value in request.META.items():
250
+ if key.startswith('HTTP_'):
251
+ # Convert HTTP_CONTENT_TYPE to Content-Type
252
+ header_name = key[5:].replace('_', '-').title()
253
+ headers[header_name] = value
254
+ elif key in ('CONTENT_TYPE', 'CONTENT_LENGTH'):
255
+ # These don't have HTTP_ prefix but are still headers
256
+ header_name = key.replace('_', '-').title()
257
+ headers[header_name] = value
258
+ return headers if headers else None
259
+
260
+
261
+ def _extract_query_params(request):
262
+ """Extract query parameters from Django request.
263
+
264
+ Args:
265
+ request: Django HttpRequest object
266
+
267
+ Returns:
268
+ Dict of query param name -> value (or list of values if multiple)
269
+ """
270
+ if not request.GET:
271
+ return None
272
+
273
+ params = {}
274
+ for key in request.GET:
275
+ values = request.GET.getlist(key)
276
+ params[key] = values[0] if len(values) == 1 else values
277
+ return params
278
+
279
+
280
+ def _extract_body(request):
281
+ """Extract request body from Django request.
282
+
283
+ Only attempts to parse JSON bodies. Returns None for non-JSON content.
284
+
285
+ Args:
286
+ request: Django HttpRequest object
287
+
288
+ Returns:
289
+ Parsed JSON body as dict, or None
290
+ """
291
+ if request.method not in ('POST', 'PUT', 'PATCH'):
292
+ return None
293
+
294
+ content_type = request.content_type or ''
295
+ if 'application/json' not in content_type:
296
+ return None
297
+
298
+ try:
299
+ if request.body:
300
+ return json.loads(request.body.decode('utf-8'))
301
+ except (json.JSONDecodeError, UnicodeDecodeError):
302
+ pass
303
+
304
+ return None
305
+
306
+
307
+ class CerberusMiddleware:
308
+ """Django middleware for capturing and sending HTTP request metrics.
309
+
310
+ Compatible with both WSGI and ASGI Django deployments.
311
+
312
+ Configuration via CERBERUS_CONFIG in Django settings:
313
+ - token: API key for authentication
314
+ - client_id: Client identifier
315
+ - ws_url: WebSocket URL for event_ingest backend
316
+ - backend_url: HTTP URL for fetching secret key (optional)
317
+ - secret_key: HMAC key for PII hashing (optional, auto-fetched if backend_url set)
318
+ """
319
+
320
+ def __init__(self, get_response):
321
+ global WS_CLIENT
322
+
323
+ self.get_response = get_response
324
+ self.config = getattr(settings, 'CERBERUS_CONFIG', {})
325
+
326
+ if DEBUG_ENABLED:
327
+ logger.info("[Cerberus] Middleware initializing...")
328
+ logger.info(f"[Cerberus] Config keys: {list(self.config.keys())}")
329
+
330
+ # Auto-fetch secret_key from backend if not configured locally
331
+ if 'secret_key' not in self.config and 'backend_url' in self.config:
332
+ if DEBUG_ENABLED:
333
+ logger.info(f"[Cerberus] Fetching secret key from backend: {self.config['backend_url']}")
334
+ secret_key = fetch_secret_key(
335
+ self.config['backend_url'],
336
+ self.config.get('token', '')
337
+ )
338
+ if secret_key:
339
+ self.config['secret_key'] = secret_key
340
+ logger.info(f"[Cerberus] Successfully fetched secret key from {self.config['backend_url']}")
341
+ else:
342
+ logger.warning("[Cerberus] Failed to fetch secret key. PII will not be hashed.")
343
+
344
+ # Initialize WebSocket client
345
+ if 'ws_url' in self.config and 'token' in self.config and 'client_id' in self.config:
346
+ WS_CLIENT = AsyncWebSocketClient(
347
+ self.config['ws_url'],
348
+ self.config['token'],
349
+ self.config['client_id']
350
+ )
351
+ if DEBUG_ENABLED:
352
+ logger.info(f"[Cerberus] WebSocket client initialized: {self.config['ws_url']}")
353
+ else:
354
+ logger.warning("[Cerberus] WebSocket client not initialized. Missing ws_url, token, or client_id in CERBERUS_CONFIG")
355
+
356
+ # Start background thread for processing events
357
+ ensure_background_thread()
358
+
359
+ def __call__(self, request):
360
+ """Process a request and queue metrics for async transmission.
361
+
362
+ This method is synchronous and does not require an event loop.
363
+ Events are placed in a thread-safe queue and processed by the
364
+ background thread.
365
+ """
366
+ # Initialize custom_data attribute on the request object
367
+ request.cerberus_metrics = {}
368
+
369
+ # Extract request data BEFORE processing (body can only be read once)
370
+ headers = _extract_headers(request)
371
+ query_params = _extract_query_params(request)
372
+ body = _extract_body(request)
373
+ user_agent = request.META.get('HTTP_USER_AGENT')
374
+
375
+ # Process the request
376
+ response = self.get_response(request)
377
+
378
+ # Extract metrics from response if they exist
379
+ metrics = {}
380
+ if hasattr(response, 'data') and isinstance(response.data, dict):
381
+ if '_cerberus_metrics' in response.data:
382
+ metrics = response.data.pop('_cerberus_metrics')
383
+
384
+ # Get source IP address
385
+ source_ip = request.META.get('REMOTE_ADDR')
386
+
387
+ # Create the event data with current timestamp
388
+ d = CoreData(
389
+ token=self.config.get('token', ''),
390
+ source_ip=source_ip,
391
+ endpoint=request.path,
392
+ scheme=request.scheme == 'https',
393
+ method=request.method,
394
+ timestamp=datetime.now(timezone.utc).isoformat(),
395
+ custom_data=metrics,
396
+ headers=headers,
397
+ query_params=query_params,
398
+ body=body,
399
+ user_agent=user_agent,
400
+ )
401
+
402
+ # Queue the event (non-blocking)
403
+ try:
404
+ event_queue.put_nowait(d)
405
+ if DEBUG_ENABLED:
406
+ logger.info(f"[Cerberus] Queued event: {request.method} {request.path}")
407
+ except thread_queue.Full:
408
+ logger.warning("[Cerberus] Event queue full, dropping event")
409
+
410
+ return response
@@ -0,0 +1,23 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Dict, Optional
3
+
4
+
5
+ @dataclass
6
+ class CoreData:
7
+ """Data structure for HTTP request metrics.
8
+
9
+ Captures essential request information for analytics and monitoring.
10
+ """
11
+ token: str
12
+ source_ip: str
13
+ endpoint: str
14
+ scheme: bool
15
+ method: str
16
+ timestamp: str # ISO 8601 format timestamp
17
+ custom_data: Optional[Dict] = None
18
+
19
+ # Additional request details
20
+ headers: Optional[Dict] = None
21
+ query_params: Optional[Dict] = None
22
+ body: Optional[Dict] = None
23
+ user_agent: Optional[str] = None
@@ -0,0 +1,70 @@
1
+ import hmac
2
+ import hashlib
3
+ import requests
4
+ import os
5
+ import logging
6
+ from typing import Optional
7
+
8
+ # Configure logging
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Enable debug logging via environment variable
12
+ DEBUG_ENABLED = os.getenv('CERBERUS_DEBUG', 'false').lower() in ('true', '1', 'yes')
13
+
14
+ def hash_pii(value, secret_key):
15
+ """
16
+ Consistently hash PII using HMAC-SHA256 for pseudoanonymization.
17
+
18
+ Args:
19
+ value: The PII string to hash (e.g., IP address)
20
+ secret_key: Secret key for HMAC (from CERBERUS_CONFIG['secret_key'])
21
+
22
+ Returns:
23
+ Hex-encoded HMAC digest string
24
+ """
25
+ if value is None:
26
+ return None
27
+
28
+ # Convert both to bytes if they aren't already
29
+ if isinstance(value, str):
30
+ value = value.encode('utf-8')
31
+ if isinstance(secret_key, str):
32
+ secret_key = secret_key.encode('utf-8')
33
+
34
+ return hmac.new(secret_key, value, hashlib.sha256).hexdigest()
35
+
36
+ def fetch_secret_key(backend_url: str, api_key: str, timeout: int = 5) -> Optional[str]:
37
+ """
38
+ Fetch the shared HMAC secret key from the backend server.
39
+
40
+ Args:
41
+ backend_url: Base URL of the backend server (e.g., 'https://cerberus.example.com')
42
+ api_key: Client API key for authentication
43
+ timeout: Request timeout in seconds (default: 5)
44
+
45
+ Returns:
46
+ The secret key string, or None if fetch fails
47
+
48
+ Raises:
49
+ requests.RequestException: On network/HTTP errors
50
+ """
51
+ try:
52
+ url = f"{backend_url.rstrip('/')}/api/secret-key"
53
+ if DEBUG_ENABLED:
54
+ logger.info(f"[Cerberus] Making HTTP request to fetch secret key: {url}")
55
+
56
+ response = requests.get(
57
+ url,
58
+ headers={'X-API-Key': api_key},
59
+ timeout=timeout
60
+ )
61
+
62
+ if DEBUG_ENABLED:
63
+ logger.info(f"[Cerberus] Secret key fetch response: {response.status_code}")
64
+
65
+ response.raise_for_status()
66
+ data = response.json()
67
+ return data.get('secret_key')
68
+ except requests.RequestException as e:
69
+ logger.error(f"[Cerberus] Failed to fetch secret key from {backend_url}: {e}")
70
+ return None