django-health-check 3.23.3__tar.gz → 4.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {django_health_check-3.23.3 → django_health_check-4.0rc1}/PKG-INFO +9 -5
- django_health_check-4.0rc1/health_check/__init__.py +21 -0
- {django_health_check-3.23.3 → django_health_check-4.0rc1}/health_check/_version.py +3 -3
- django_health_check-4.0rc1/health_check/base.py +93 -0
- django_health_check-4.0rc1/health_check/checks.py +329 -0
- django_health_check-4.0rc1/health_check/contrib/celery.py +70 -0
- django_health_check-4.0rc1/health_check/contrib/kafka.py +69 -0
- django_health_check-4.0rc1/health_check/contrib/rabbitmq.py +43 -0
- django_health_check-4.0rc1/health_check/contrib/redis.py +63 -0
- django_health_check-4.0rc1/health_check/contrib/rss.py +113 -0
- {django_health_check-3.23.3 → django_health_check-4.0rc1}/health_check/exceptions.py +6 -9
- django_health_check-4.0rc1/health_check/management/commands/health_check.py +46 -0
- {django_health_check-3.23.3 → django_health_check-4.0rc1}/health_check/templates/health_check/index.html +61 -43
- django_health_check-4.0rc1/health_check/views.py +287 -0
- {django_health_check-3.23.3 → django_health_check-4.0rc1}/pyproject.toml +11 -11
- django_health_check-3.23.3/health_check/__init__.py +0 -30
- django_health_check-3.23.3/health_check/backends.py +0 -101
- django_health_check-3.23.3/health_check/cache/apps.py +0 -14
- django_health_check-3.23.3/health_check/cache/backends.py +0 -50
- django_health_check-3.23.3/health_check/conf.py +0 -8
- django_health_check-3.23.3/health_check/contrib/celery/__init__.py +0 -3
- django_health_check-3.23.3/health_check/contrib/celery/apps.py +0 -31
- django_health_check-3.23.3/health_check/contrib/celery/backends.py +0 -46
- django_health_check-3.23.3/health_check/contrib/celery/tasks.py +0 -6
- django_health_check-3.23.3/health_check/contrib/celery_ping/apps.py +0 -19
- django_health_check-3.23.3/health_check/contrib/celery_ping/backends.py +0 -74
- django_health_check-3.23.3/health_check/contrib/db_heartbeat/__init__.py +0 -0
- django_health_check-3.23.3/health_check/contrib/db_heartbeat/apps.py +0 -19
- django_health_check-3.23.3/health_check/contrib/db_heartbeat/backends.py +0 -44
- django_health_check-3.23.3/health_check/contrib/mail/__init__.py +0 -0
- django_health_check-3.23.3/health_check/contrib/mail/apps.py +0 -19
- django_health_check-3.23.3/health_check/contrib/mail/backends.py +0 -61
- django_health_check-3.23.3/health_check/contrib/migrations/__init__.py +0 -0
- django_health_check-3.23.3/health_check/contrib/migrations/apps.py +0 -19
- django_health_check-3.23.3/health_check/contrib/migrations/backends.py +0 -31
- django_health_check-3.23.3/health_check/contrib/psutil/__init__.py +0 -0
- django_health_check-3.23.3/health_check/contrib/psutil/apps.py +0 -36
- django_health_check-3.23.3/health_check/contrib/psutil/backends.py +0 -63
- django_health_check-3.23.3/health_check/contrib/rabbitmq/__init__.py +0 -3
- django_health_check-3.23.3/health_check/contrib/rabbitmq/apps.py +0 -19
- django_health_check-3.23.3/health_check/contrib/rabbitmq/backends.py +0 -57
- django_health_check-3.23.3/health_check/contrib/redis/__init__.py +0 -3
- django_health_check-3.23.3/health_check/contrib/redis/apps.py +0 -19
- django_health_check-3.23.3/health_check/contrib/redis/backends.py +0 -75
- django_health_check-3.23.3/health_check/contrib/s3boto3_storage/__init__.py +0 -0
- django_health_check-3.23.3/health_check/contrib/s3boto3_storage/apps.py +0 -19
- django_health_check-3.23.3/health_check/contrib/s3boto3_storage/backends.py +0 -32
- django_health_check-3.23.3/health_check/contrib/s3boto_storage/__init__.py +0 -0
- django_health_check-3.23.3/health_check/contrib/s3boto_storage/apps.py +0 -20
- django_health_check-3.23.3/health_check/contrib/s3boto_storage/backends.py +0 -27
- django_health_check-3.23.3/health_check/db/__init__.py +0 -0
- django_health_check-3.23.3/health_check/db/apps.py +0 -20
- django_health_check-3.23.3/health_check/db/backends.py +0 -23
- django_health_check-3.23.3/health_check/db/migrations/0001_initial.py +0 -34
- django_health_check-3.23.3/health_check/db/migrations/0002_alter_testmodel_options.py +0 -32
- django_health_check-3.23.3/health_check/db/migrations/__init__.py +0 -0
- django_health_check-3.23.3/health_check/db/models.py +0 -9
- django_health_check-3.23.3/health_check/deprecation.py +0 -35
- django_health_check-3.23.3/health_check/management/__init__.py +0 -0
- django_health_check-3.23.3/health_check/management/commands/__init__.py +0 -0
- django_health_check-3.23.3/health_check/management/commands/health_check.py +0 -92
- django_health_check-3.23.3/health_check/mixins.py +0 -86
- django_health_check-3.23.3/health_check/plugins.py +0 -25
- django_health_check-3.23.3/health_check/storage/__init__.py +0 -0
- django_health_check-3.23.3/health_check/storage/apps.py +0 -12
- django_health_check-3.23.3/health_check/storage/backends.py +0 -73
- django_health_check-3.23.3/health_check/urls.py +0 -18
- django_health_check-3.23.3/health_check/views.py +0 -186
- {django_health_check-3.23.3 → django_health_check-4.0rc1}/LICENSE +0 -0
- {django_health_check-3.23.3 → django_health_check-4.0rc1}/README.md +0 -0
- {django_health_check-3.23.3/health_check/cache → django_health_check-4.0rc1/health_check/contrib}/__init__.py +0 -0
- {django_health_check-3.23.3/health_check/contrib → django_health_check-4.0rc1/health_check/management}/__init__.py +0 -0
- {django_health_check-3.23.3/health_check/contrib/celery_ping → django_health_check-4.0rc1/health_check/management/commands}/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: django-health-check
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.0rc1
|
|
4
4
|
Summary: Monitor the health of your Django app and its connected services.
|
|
5
5
|
Keywords: django,postgresql
|
|
6
6
|
Author-email: Kristian Ollegaard <kristian@oellegaard.com>, Johannes Maron <johannes@maron.family>
|
|
@@ -8,7 +8,6 @@ Requires-Python: >=3.10
|
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
Classifier: Development Status :: 5 - Production/Stable
|
|
10
10
|
Classifier: Framework :: Django
|
|
11
|
-
Classifier: Framework :: Django :: 4.2
|
|
12
11
|
Classifier: Framework :: Django :: 5.2
|
|
13
12
|
Classifier: Framework :: Django :: 6.0
|
|
14
13
|
Classifier: Intended Audience :: Developers
|
|
@@ -27,11 +26,14 @@ Classifier: Topic :: System :: Logging
|
|
|
27
26
|
Classifier: Topic :: System :: Monitoring
|
|
28
27
|
Classifier: Topic :: Utilities
|
|
29
28
|
License-File: LICENSE
|
|
30
|
-
Requires-Dist: Django>=
|
|
29
|
+
Requires-Dist: Django>=5.2
|
|
30
|
+
Requires-Dist: dnspython>=2.0.0
|
|
31
31
|
Requires-Dist: psutil
|
|
32
32
|
Requires-Dist: celery>=5.0.0 ; extra == "celery"
|
|
33
|
-
Requires-Dist:
|
|
34
|
-
Requires-Dist:
|
|
33
|
+
Requires-Dist: confluent-kafka>=2.0.0 ; extra == "kafka"
|
|
34
|
+
Requires-Dist: aio-pika>=9.0.0 ; extra == "rabbitmq"
|
|
35
|
+
Requires-Dist: redis>=4.2.0 ; extra == "redis"
|
|
36
|
+
Requires-Dist: httpx>=0.27.0 ; extra == "rss"
|
|
35
37
|
Project-URL: Changelog, https://github.com/codingjoe/django-health-check/releases
|
|
36
38
|
Project-URL: Documentation, https://codingjoe.dev/django-health-check/
|
|
37
39
|
Project-URL: Homepage, https://codingjoe.dev/django-health-check/
|
|
@@ -39,8 +41,10 @@ Project-URL: Issues, https://github.com/codingjoe/django-health-check/issues
|
|
|
39
41
|
Project-URL: Releasenotes, https://github.com/codingjoe/django-health-check/releases/latest
|
|
40
42
|
Project-URL: Source, https://github.com/codingjoe/django-health-check
|
|
41
43
|
Provides-Extra: celery
|
|
44
|
+
Provides-Extra: kafka
|
|
42
45
|
Provides-Extra: rabbitmq
|
|
43
46
|
Provides-Extra: redis
|
|
47
|
+
Provides-Extra: rss
|
|
44
48
|
|
|
45
49
|
<p align="center">
|
|
46
50
|
<picture>
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Monitor the health of your Django app and its connected services."""
|
|
2
|
+
|
|
3
|
+
from . import _version # noqa
|
|
4
|
+
from .base import HealthCheck
|
|
5
|
+
from .checks import Cache, DNS, Database, Disk, Mail, Memory, Storage
|
|
6
|
+
|
|
7
|
+
__version__ = _version.__version__
|
|
8
|
+
VERSION = _version.__version_tuple__
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"__version__",
|
|
12
|
+
"VERSION",
|
|
13
|
+
"HealthCheck",
|
|
14
|
+
"Cache",
|
|
15
|
+
"DNS",
|
|
16
|
+
"Database",
|
|
17
|
+
"Disk",
|
|
18
|
+
"Mail",
|
|
19
|
+
"Memory",
|
|
20
|
+
"Storage",
|
|
21
|
+
]
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '
|
|
32
|
-
__version_tuple__ = version_tuple = (
|
|
31
|
+
__version__ = version = '4.0rc1'
|
|
32
|
+
__version_tuple__ = version_tuple = (4, 0, 'rc1')
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g9814c8393'
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import asyncio
|
|
5
|
+
import dataclasses
|
|
6
|
+
import inspect
|
|
7
|
+
import logging
|
|
8
|
+
import timeit
|
|
9
|
+
|
|
10
|
+
from health_check.exceptions import HealthCheckException
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclasses.dataclass
|
|
16
|
+
class HealthCheckResult:
|
|
17
|
+
"""Result of a health check execution."""
|
|
18
|
+
|
|
19
|
+
check: HealthCheck
|
|
20
|
+
error: HealthCheckException | None
|
|
21
|
+
time_taken: float
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclasses.dataclass
|
|
25
|
+
class HealthCheck(abc.ABC):
|
|
26
|
+
"""
|
|
27
|
+
Base class for defining health checks.
|
|
28
|
+
|
|
29
|
+
Subclasses should implement the `run` method to perform the actual health check logic.
|
|
30
|
+
The `run` method can be either synchronous or asynchronous.
|
|
31
|
+
|
|
32
|
+
Examples:
|
|
33
|
+
>>> import dataclasses
|
|
34
|
+
>>> from health_check.base import HealthCheck
|
|
35
|
+
>>>
|
|
36
|
+
>>> @dataclasses.dataclass
|
|
37
|
+
>>> class MyHealthCheck(HealthCheck):
|
|
38
|
+
...
|
|
39
|
+
... async def run(self):
|
|
40
|
+
... # Implement health check logic here
|
|
41
|
+
|
|
42
|
+
Subclasses should be [dataclasses][dataclasses.dataclass] or implement their own `__repr__` method
|
|
43
|
+
to provide meaningful representations in health check reports.
|
|
44
|
+
|
|
45
|
+
Warning:
|
|
46
|
+
The `__repr__` method is used in health check reports.
|
|
47
|
+
Consider setting `repr=False` for sensitive dataclass fields
|
|
48
|
+
to avoid leaking sensitive information or credentials.
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
@abc.abstractmethod
|
|
53
|
+
async def run(self) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Run the health check logic and raise human-readable exceptions as needed.
|
|
56
|
+
|
|
57
|
+
Exception must be reraised to indicate the health status and provide context.
|
|
58
|
+
Any unexpected exceptions will be caught and logged for security purposes
|
|
59
|
+
while returning a generic error message.
|
|
60
|
+
|
|
61
|
+
Warning:
|
|
62
|
+
Exception messages must not contain sensitive information.
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
ServiceWarning: If the service is at a critical state but still operational.
|
|
66
|
+
ServiceUnavailable: If the service is not operational.
|
|
67
|
+
ServiceReturnedUnexpectedResult: If the check performs a computation that returns an unexpected result.
|
|
68
|
+
|
|
69
|
+
"""
|
|
70
|
+
...
|
|
71
|
+
|
|
72
|
+
def pretty_status(self) -> str:
|
|
73
|
+
"""Return human-readable status string, always 'OK' for the check itself."""
|
|
74
|
+
return "OK"
|
|
75
|
+
|
|
76
|
+
async def get_result(self: HealthCheck) -> HealthCheckResult:
|
|
77
|
+
start = timeit.default_timer()
|
|
78
|
+
try:
|
|
79
|
+
await self.run() if inspect.iscoroutinefunction(
|
|
80
|
+
self.run
|
|
81
|
+
) else await asyncio.to_thread(self.run)
|
|
82
|
+
except HealthCheckException as e:
|
|
83
|
+
error = e
|
|
84
|
+
except BaseException:
|
|
85
|
+
logger.exception("Unexpected exception during health check")
|
|
86
|
+
error = HealthCheckException("unknown error")
|
|
87
|
+
else:
|
|
88
|
+
error = None
|
|
89
|
+
return HealthCheckResult(
|
|
90
|
+
check=self,
|
|
91
|
+
error=error,
|
|
92
|
+
time_taken=timeit.default_timer() - start,
|
|
93
|
+
)
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
"""Health check implementations for Django built-in services."""
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import datetime
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import pathlib
|
|
8
|
+
import smtplib
|
|
9
|
+
import socket
|
|
10
|
+
import uuid
|
|
11
|
+
|
|
12
|
+
import dns.asyncresolver
|
|
13
|
+
import psutil
|
|
14
|
+
from django import db
|
|
15
|
+
from django.conf import settings
|
|
16
|
+
from django.core.cache import CacheKeyWarning, caches
|
|
17
|
+
from django.core.files.base import ContentFile
|
|
18
|
+
from django.core.files.storage import Storage as DjangoStorage
|
|
19
|
+
from django.core.files.storage import storages
|
|
20
|
+
from django.core.mail import get_connection
|
|
21
|
+
from django.core.mail.backends.base import BaseEmailBackend
|
|
22
|
+
from django.db import connections
|
|
23
|
+
from django.db.models import Expression
|
|
24
|
+
|
|
25
|
+
from health_check.base import HealthCheck
|
|
26
|
+
from health_check.exceptions import (
|
|
27
|
+
ServiceReturnedUnexpectedResult,
|
|
28
|
+
ServiceUnavailable,
|
|
29
|
+
ServiceWarning,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
# Exceptions thrown by Redis do not subclass builtin exceptions like ConnectionError.
|
|
34
|
+
# Additionally, not only connection errors (ConnectionError -> RedisError) can be raised,
|
|
35
|
+
# but also errors for time-outs (TimeoutError -> RedisError)
|
|
36
|
+
# and if the backend is read-only (ReadOnlyError -> ResponseError -> RedisError).
|
|
37
|
+
# Since we know what we are trying to do here, we are not picky and catch the global exception RedisError.
|
|
38
|
+
from redis.exceptions import RedisError
|
|
39
|
+
except ModuleNotFoundError:
|
|
40
|
+
# In case Redis is not installed and another cache backend is used.
|
|
41
|
+
class RedisError(Exception):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclasses.dataclass
|
|
49
|
+
class Cache(HealthCheck):
|
|
50
|
+
"""
|
|
51
|
+
Check that the cache backend is able to set and get a value.
|
|
52
|
+
|
|
53
|
+
It can be setup multiple times for different cache aliases if needed.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
alias: The cache alias to test against.
|
|
57
|
+
cache_key: The cache key to use for the test.
|
|
58
|
+
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
alias: str = "default"
|
|
62
|
+
cache_key: str = dataclasses.field(default="djangohealthcheck_test", repr=False)
|
|
63
|
+
|
|
64
|
+
async def run(self):
|
|
65
|
+
cache = caches[self.alias]
|
|
66
|
+
ts = datetime.datetime.now().timestamp()
|
|
67
|
+
try:
|
|
68
|
+
await cache.aset(self.cache_key, f"itworks-{ts}")
|
|
69
|
+
if not await cache.aget(self.cache_key) == f"itworks-{ts}":
|
|
70
|
+
raise ServiceUnavailable(f"Cache key {self.cache_key} does not match")
|
|
71
|
+
except CacheKeyWarning as e:
|
|
72
|
+
raise ServiceReturnedUnexpectedResult("Cache key warning") from e
|
|
73
|
+
except ValueError as e:
|
|
74
|
+
raise ServiceReturnedUnexpectedResult("ValueError") from e
|
|
75
|
+
except (ConnectionError, RedisError) as e:
|
|
76
|
+
raise ServiceReturnedUnexpectedResult("Connection Error") from e
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class _SelectOne(Expression):
|
|
80
|
+
"""An expression that represents a simple SELECT 1; query."""
|
|
81
|
+
|
|
82
|
+
def as_sql(self, compiler, connection):
|
|
83
|
+
return "SELECT 1", []
|
|
84
|
+
|
|
85
|
+
def as_oracle(self, compiler, connection):
|
|
86
|
+
return "SELECT 1 FROM DUAL", []
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclasses.dataclass
|
|
90
|
+
class Database(HealthCheck):
|
|
91
|
+
"""
|
|
92
|
+
Check database operation by executing a simple SELECT 1 query.
|
|
93
|
+
|
|
94
|
+
It can be setup multiple times for different database connections if needed.
|
|
95
|
+
No actual data is read from or written to the database to minimize the performance impact
|
|
96
|
+
and work with conservative database user permissions.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
alias: The alias of the database connection to check.
|
|
100
|
+
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
alias: str = "default"
|
|
104
|
+
|
|
105
|
+
def run(self):
|
|
106
|
+
connection = connections[self.alias]
|
|
107
|
+
result = None
|
|
108
|
+
try:
|
|
109
|
+
compiler = connection.ops.compiler("SQLCompiler")(
|
|
110
|
+
_SelectOne(), connection, None
|
|
111
|
+
)
|
|
112
|
+
with connection.cursor() as cursor:
|
|
113
|
+
cursor.execute(*compiler.compile(_SelectOne()))
|
|
114
|
+
result = cursor.fetchone()
|
|
115
|
+
except db.Error as e:
|
|
116
|
+
raise ServiceUnavailable(str(e).rsplit(":")[0]) from e
|
|
117
|
+
else:
|
|
118
|
+
if result != (1,):
|
|
119
|
+
raise ServiceUnavailable(
|
|
120
|
+
"Health Check query did not return the expected result."
|
|
121
|
+
)
|
|
122
|
+
finally:
|
|
123
|
+
connection.close_if_unusable_or_obsolete()
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclasses.dataclass
|
|
127
|
+
class DNS(HealthCheck):
|
|
128
|
+
"""
|
|
129
|
+
Check DNS resolution by resolving the server's hostname.
|
|
130
|
+
|
|
131
|
+
Verifies that DNS resolution is working using the system's configured
|
|
132
|
+
DNS servers, as well as nameserver resolution for the provided hostname.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
hostname: The hostname to resolve.
|
|
136
|
+
timeout: DNS query timeout.
|
|
137
|
+
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
hostname: str = dataclasses.field(default_factory=socket.gethostname)
|
|
141
|
+
timeout: datetime.timedelta = dataclasses.field(
|
|
142
|
+
default=datetime.timedelta(seconds=5), repr=False
|
|
143
|
+
)
|
|
144
|
+
nameservers: list[str] | None = dataclasses.field(default=None, repr=False)
|
|
145
|
+
|
|
146
|
+
async def run(self):
|
|
147
|
+
logger.debug("Attempting to resolve hostname: %s", self.hostname)
|
|
148
|
+
|
|
149
|
+
resolver = dns.asyncresolver.Resolver()
|
|
150
|
+
resolver.lifetime = self.timeout.total_seconds()
|
|
151
|
+
if self.nameservers is not None:
|
|
152
|
+
resolver.nameservers = self.nameservers
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
# Perform DNS resolution (A record by default)
|
|
156
|
+
answers = await resolver.resolve(self.hostname, "A")
|
|
157
|
+
except dns.resolver.NXDOMAIN as e:
|
|
158
|
+
raise ServiceUnavailable(
|
|
159
|
+
f"DNS resolution failed: hostname {self.hostname} does not exist"
|
|
160
|
+
) from e
|
|
161
|
+
except dns.resolver.NoAnswer as e:
|
|
162
|
+
raise ServiceUnavailable(
|
|
163
|
+
f"DNS resolution failed: no answer for {self.hostname}"
|
|
164
|
+
) from e
|
|
165
|
+
except dns.resolver.Timeout as e:
|
|
166
|
+
raise ServiceUnavailable(
|
|
167
|
+
f"DNS resolution failed: timeout resolving {self.hostname}"
|
|
168
|
+
) from e
|
|
169
|
+
except dns.resolver.NoNameservers as e:
|
|
170
|
+
raise ServiceUnavailable(
|
|
171
|
+
"DNS resolution failed: no nameservers available"
|
|
172
|
+
) from e
|
|
173
|
+
except dns.exception.DNSException as e:
|
|
174
|
+
raise ServiceUnavailable(f"DNS resolution failed: {e}") from e
|
|
175
|
+
else:
|
|
176
|
+
logger.debug(
|
|
177
|
+
"Successfully resolved %s to %s",
|
|
178
|
+
self.hostname,
|
|
179
|
+
[str(rdata) for rdata in answers],
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@dataclasses.dataclass()
|
|
184
|
+
class Disk(HealthCheck):
|
|
185
|
+
"""
|
|
186
|
+
Warn about disk usage for a given system path.
|
|
187
|
+
|
|
188
|
+
It can be setup multiple times at different system paths,
|
|
189
|
+
e.g. one at your application root and one at your media storage root.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
path: Path to check disk usage for.
|
|
193
|
+
max_disk_usage_percent: Maximum disk usage in percent or None to disable the check.
|
|
194
|
+
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
path: pathlib.Path | str = dataclasses.field(default_factory=os.getcwd)
|
|
198
|
+
max_disk_usage_percent: float | None = dataclasses.field(default=90.0, repr=False)
|
|
199
|
+
hostname: str = dataclasses.field(default_factory=socket.gethostname, init=False)
|
|
200
|
+
|
|
201
|
+
def run(self):
|
|
202
|
+
try:
|
|
203
|
+
du = psutil.disk_usage(str(self.path))
|
|
204
|
+
if (
|
|
205
|
+
self.max_disk_usage_percent
|
|
206
|
+
and du.percent >= self.max_disk_usage_percent
|
|
207
|
+
):
|
|
208
|
+
raise ServiceWarning(f"{du.percent}\u202f% disk usage")
|
|
209
|
+
except ValueError as e:
|
|
210
|
+
raise ServiceReturnedUnexpectedResult("ValueError") from e
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@dataclasses.dataclass
|
|
214
|
+
class Mail(HealthCheck):
|
|
215
|
+
"""
|
|
216
|
+
Check that mail backend is able to open and close connection.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
backend: The email backend to test against.
|
|
220
|
+
timeout: Timeout for connection to mail server in seconds.
|
|
221
|
+
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
backend: str = settings.EMAIL_BACKEND
|
|
225
|
+
timeout: datetime.timedelta = dataclasses.field(
|
|
226
|
+
default=datetime.timedelta(seconds=15), repr=False
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
def run(self) -> None:
|
|
230
|
+
connection: BaseEmailBackend = get_connection(self.backend, fail_silently=False)
|
|
231
|
+
connection.timeout = self.timeout.total_seconds()
|
|
232
|
+
logger.debug("Trying to open connection to mail backend.")
|
|
233
|
+
try:
|
|
234
|
+
connection.open()
|
|
235
|
+
except smtplib.SMTPException as e:
|
|
236
|
+
raise ServiceUnavailable(
|
|
237
|
+
"Failed to open connection with SMTP server"
|
|
238
|
+
) from e
|
|
239
|
+
except ConnectionRefusedError as e:
|
|
240
|
+
raise ServiceUnavailable("Connection refused error") from e
|
|
241
|
+
finally:
|
|
242
|
+
connection.close()
|
|
243
|
+
logger.debug(
|
|
244
|
+
"Connection established. Mail backend %r is healthy.", self.backend
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
@dataclasses.dataclass()
|
|
249
|
+
class Memory(HealthCheck):
|
|
250
|
+
"""
|
|
251
|
+
Warn about system memory utilization.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
min_gibibytes_available: Minimum available memory in gibibytes or None to disable the check.
|
|
255
|
+
max_memory_usage_percent: Maximum memory usage in percent or None to disable the check.
|
|
256
|
+
|
|
257
|
+
"""
|
|
258
|
+
|
|
259
|
+
min_gibibytes_available: float | None = dataclasses.field(default=None, repr=False)
|
|
260
|
+
max_memory_usage_percent: float | None = dataclasses.field(default=90.0, repr=False)
|
|
261
|
+
hostname: str = dataclasses.field(default_factory=socket.gethostname, init=False)
|
|
262
|
+
|
|
263
|
+
def run(self):
|
|
264
|
+
try:
|
|
265
|
+
memory = psutil.virtual_memory()
|
|
266
|
+
available_gibi = memory.available / (1024**3)
|
|
267
|
+
total_gibi = memory.total / (1024**3)
|
|
268
|
+
msg = f"RAM {available_gibi:.1f}/{total_gibi:.1f}GiB ({memory.percent}\u202f%)"
|
|
269
|
+
if (
|
|
270
|
+
self.min_gibibytes_available
|
|
271
|
+
and available_gibi < self.min_gibibytes_available
|
|
272
|
+
):
|
|
273
|
+
raise ServiceWarning(msg)
|
|
274
|
+
if (
|
|
275
|
+
self.max_memory_usage_percent
|
|
276
|
+
and memory.percent >= self.max_memory_usage_percent
|
|
277
|
+
):
|
|
278
|
+
raise ServiceWarning(msg)
|
|
279
|
+
except ValueError as e:
|
|
280
|
+
raise ServiceReturnedUnexpectedResult("ValueError") from e
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@dataclasses.dataclass
|
|
284
|
+
class Storage(HealthCheck):
|
|
285
|
+
"""
|
|
286
|
+
Check file storage backends by saving, reading, and deleting a test file.
|
|
287
|
+
|
|
288
|
+
It can be setup multiple times for different storage backends if needed.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
alias: The alias of the storage backend to check.
|
|
292
|
+
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
alias: str = "default"
|
|
296
|
+
|
|
297
|
+
@property
|
|
298
|
+
def storage(self) -> DjangoStorage:
|
|
299
|
+
return storages[self.alias]
|
|
300
|
+
|
|
301
|
+
def get_file_name(self):
|
|
302
|
+
return f"health_check_storage_test/test-{uuid.uuid4()}.txt"
|
|
303
|
+
|
|
304
|
+
def get_file_content(self):
|
|
305
|
+
return f"# generated by health_check.Storage at {datetime.datetime.now().timestamp()}".encode()
|
|
306
|
+
|
|
307
|
+
def check_save(self, file_name, file_content):
|
|
308
|
+
# save the file
|
|
309
|
+
file_name = self.storage.save(file_name, ContentFile(content=file_content))
|
|
310
|
+
# read the file and compare
|
|
311
|
+
if not self.storage.exists(file_name):
|
|
312
|
+
raise ServiceUnavailable("File does not exist")
|
|
313
|
+
with self.storage.open(file_name) as f:
|
|
314
|
+
if not f.read() == file_content:
|
|
315
|
+
raise ServiceUnavailable("File content does not match")
|
|
316
|
+
return file_name
|
|
317
|
+
|
|
318
|
+
def check_delete(self, file_name):
|
|
319
|
+
# delete the file and make sure it is gone
|
|
320
|
+
self.storage.delete(file_name)
|
|
321
|
+
if self.storage.exists(file_name):
|
|
322
|
+
raise ServiceUnavailable("File was not deleted")
|
|
323
|
+
|
|
324
|
+
def run(self):
|
|
325
|
+
# write the file to the storage backend
|
|
326
|
+
file_name = self.get_file_name()
|
|
327
|
+
file_content = self.get_file_content()
|
|
328
|
+
file_name = self.check_save(file_name, file_content)
|
|
329
|
+
self.check_delete(file_name)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Celery health check."""
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import datetime
|
|
5
|
+
import typing
|
|
6
|
+
|
|
7
|
+
import celery
|
|
8
|
+
from celery.app import app_or_default
|
|
9
|
+
|
|
10
|
+
from health_check.base import HealthCheck
|
|
11
|
+
from health_check.exceptions import ServiceUnavailable
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclasses.dataclass
|
|
15
|
+
class Ping(HealthCheck):
|
|
16
|
+
"""
|
|
17
|
+
Check Celery worker availability using the ping control command.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
app: Celery application instance to use for the health check, defaults to the [default Celery app][celery.app.default_app].
|
|
21
|
+
timeout: Timeout duration for the ping command.
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
CORRECT_PING_RESPONSE: typing.ClassVar[dict[str, str]] = {"ok": "pong"}
|
|
26
|
+
app: celery.Celery = dataclasses.field(default_factory=app_or_default)
|
|
27
|
+
timeout: datetime.timedelta = dataclasses.field(
|
|
28
|
+
default=datetime.timedelta(seconds=1), repr=False
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def run(self):
|
|
32
|
+
try:
|
|
33
|
+
ping_result = self.app.control.ping(timeout=self.timeout.total_seconds())
|
|
34
|
+
except OSError as e:
|
|
35
|
+
raise ServiceUnavailable("IOError") from e
|
|
36
|
+
except NotImplementedError as e:
|
|
37
|
+
raise ServiceUnavailable(
|
|
38
|
+
"NotImplementedError: Make sure CELERY_RESULT_BACKEND is set"
|
|
39
|
+
) from e
|
|
40
|
+
else:
|
|
41
|
+
if not ping_result:
|
|
42
|
+
raise ServiceUnavailable("Celery workers unavailable")
|
|
43
|
+
else:
|
|
44
|
+
self.check_active_queues(*self.active_workers(ping_result))
|
|
45
|
+
|
|
46
|
+
def active_workers(self, ping_result):
|
|
47
|
+
for result in ping_result:
|
|
48
|
+
worker, response = list(result.items())[0]
|
|
49
|
+
if response != self.CORRECT_PING_RESPONSE:
|
|
50
|
+
raise ServiceUnavailable(
|
|
51
|
+
f"Celery worker {worker} response was incorrect"
|
|
52
|
+
)
|
|
53
|
+
yield worker
|
|
54
|
+
|
|
55
|
+
def check_active_queues(self, *active_workers):
|
|
56
|
+
defined_queues = {
|
|
57
|
+
queue.name
|
|
58
|
+
for queue in getattr(self.app.conf, "task_queues", None)
|
|
59
|
+
or getattr(self.app.conf, "CELERY_QUEUES", None)
|
|
60
|
+
}
|
|
61
|
+
active_queues = {
|
|
62
|
+
queue.get("name")
|
|
63
|
+
for queues in self.app.control.inspect(active_workers)
|
|
64
|
+
.active_queues()
|
|
65
|
+
.values()
|
|
66
|
+
for queue in queues
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
for queue in defined_queues - active_queues:
|
|
70
|
+
raise ServiceUnavailable(f"No worker for Celery task queue {queue}")
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Kafka health check."""
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import datetime
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from confluent_kafka.aio import AIOConsumer
|
|
8
|
+
from confluent_kafka.error import KafkaException
|
|
9
|
+
|
|
10
|
+
from health_check.base import HealthCheck
|
|
11
|
+
from health_check.exceptions import ServiceUnavailable
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclasses.dataclass
|
|
17
|
+
class Kafka(HealthCheck):
|
|
18
|
+
"""
|
|
19
|
+
Check Kafka service by connecting to a Kafka broker and listing topics.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
bootstrap_servers: List of Kafka bootstrap servers, e.g., ['localhost:9092'].
|
|
23
|
+
timeout: Timeout duration for the connection check as a datetime.timedelta.
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
bootstrap_servers: list[str]
|
|
28
|
+
timeout: datetime.timedelta = dataclasses.field(
|
|
29
|
+
default=datetime.timedelta(seconds=10), repr=False
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
async def run(self):
|
|
33
|
+
logger.debug(
|
|
34
|
+
"Connecting to Kafka bootstrap servers %r ...",
|
|
35
|
+
self.bootstrap_servers,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Create a consumer with minimal configuration for health check
|
|
39
|
+
timeout_ms = int(self.timeout.total_seconds() * 1000)
|
|
40
|
+
consumer = AIOConsumer(
|
|
41
|
+
{
|
|
42
|
+
"bootstrap.servers": ",".join(self.bootstrap_servers),
|
|
43
|
+
"client.id": "health-check",
|
|
44
|
+
"group.id": "health-check",
|
|
45
|
+
"session.timeout.ms": timeout_ms,
|
|
46
|
+
"socket.timeout.ms": timeout_ms,
|
|
47
|
+
}
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
if not (
|
|
52
|
+
(
|
|
53
|
+
cluster_metadata := await consumer.list_topics(
|
|
54
|
+
timeout=self.timeout.total_seconds()
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
and cluster_metadata.topics
|
|
58
|
+
):
|
|
59
|
+
raise ServiceUnavailable("Failed to retrieve Kafka topics.")
|
|
60
|
+
|
|
61
|
+
except KafkaException as e:
|
|
62
|
+
raise ServiceUnavailable("Unable to connect") from e
|
|
63
|
+
else:
|
|
64
|
+
logger.debug(
|
|
65
|
+
"Connection established. Kafka is healthy. Found %d topics.",
|
|
66
|
+
len(cluster_metadata.topics),
|
|
67
|
+
)
|
|
68
|
+
finally:
|
|
69
|
+
await consumer.close()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""RabbitMQ health check."""
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
import aio_pika
|
|
7
|
+
|
|
8
|
+
from health_check.base import HealthCheck
|
|
9
|
+
from health_check.exceptions import ServiceUnavailable
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclasses.dataclass
|
|
15
|
+
class RabbitMQ(HealthCheck):
|
|
16
|
+
"""
|
|
17
|
+
Check RabbitMQ service by opening and closing a broker channel.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
amqp_url (str): The URL of the RabbitMQ broker to connect to, e.g., 'amqp://guest:guest@localhost:5672//'.
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
amqp_url: str
|
|
25
|
+
|
|
26
|
+
async def run(self):
|
|
27
|
+
logger.debug("Attempting to connect to %r...", self.amqp_url)
|
|
28
|
+
try:
|
|
29
|
+
# conn is used as a context to release opened resources later
|
|
30
|
+
connection = await aio_pika.connect_robust(self.amqp_url)
|
|
31
|
+
await connection.close()
|
|
32
|
+
except ConnectionRefusedError as e:
|
|
33
|
+
raise ServiceUnavailable(
|
|
34
|
+
"Unable to connect to RabbitMQ: Connection was refused."
|
|
35
|
+
) from e
|
|
36
|
+
except aio_pika.exceptions.ProbableAuthenticationError as e:
|
|
37
|
+
raise ServiceUnavailable(
|
|
38
|
+
"Unable to connect to RabbitMQ: Authentication error."
|
|
39
|
+
) from e
|
|
40
|
+
except OSError as e:
|
|
41
|
+
raise ServiceUnavailable("IOError") from e
|
|
42
|
+
else:
|
|
43
|
+
logger.debug("Connection established. RabbitMQ is healthy.")
|