django-kafka 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_kafka-0.0.1/LICENSE +7 -0
- django_kafka-0.0.1/MANIFEST.in +2 -0
- django_kafka-0.0.1/PKG-INFO +269 -0
- django_kafka-0.0.1/README.md +236 -0
- django_kafka-0.0.1/django_kafka/__init__.py +73 -0
- django_kafka-0.0.1/django_kafka/apps.py +10 -0
- django_kafka-0.0.1/django_kafka/conf.py +32 -0
- django_kafka-0.0.1/django_kafka/consumer.py +95 -0
- django_kafka-0.0.1/django_kafka/error_handlers.py +9 -0
- django_kafka-0.0.1/django_kafka/exceptions.py +2 -0
- django_kafka-0.0.1/django_kafka/management/__init__.py +0 -0
- django_kafka-0.0.1/django_kafka/management/commands/__init__.py +0 -0
- django_kafka-0.0.1/django_kafka/management/commands/kafka_consume.py +24 -0
- django_kafka-0.0.1/django_kafka/models.py +48 -0
- django_kafka-0.0.1/django_kafka/producer.py +53 -0
- django_kafka-0.0.1/django_kafka/registry.py +32 -0
- django_kafka-0.0.1/django_kafka/tests/__init__.py +0 -0
- django_kafka-0.0.1/django_kafka/tests/models.py +31 -0
- django_kafka-0.0.1/django_kafka/tests/test_consumer.py +220 -0
- django_kafka-0.0.1/django_kafka/tests/test_django_kafka_interface.py +60 -0
- django_kafka-0.0.1/django_kafka/tests/test_models.py +99 -0
- django_kafka-0.0.1/django_kafka/tests/test_registry.py +56 -0
- django_kafka-0.0.1/django_kafka/tests/test_settings.py +45 -0
- django_kafka-0.0.1/django_kafka/tests/test_topic.py +260 -0
- django_kafka-0.0.1/django_kafka/topic.py +121 -0
- django_kafka-0.0.1/django_kafka.egg-info/PKG-INFO +269 -0
- django_kafka-0.0.1/django_kafka.egg-info/SOURCES.txt +30 -0
- django_kafka-0.0.1/django_kafka.egg-info/dependency_links.txt +1 -0
- django_kafka-0.0.1/django_kafka.egg-info/requires.txt +5 -0
- django_kafka-0.0.1/django_kafka.egg-info/top_level.txt +1 -0
- django_kafka-0.0.1/pyproject.toml +36 -0
- django_kafka-0.0.1/setup.cfg +4 -0
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2024 RegioHelden GmbH
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED "AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: django-kafka
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Confluent's Kafka Python Client combined with Django
|
|
5
|
+
Author-email: RegioHelden GmbH <opensource@regiohelden.de>
|
|
6
|
+
License: Copyright 2024 RegioHelden GmbH
|
|
7
|
+
|
|
8
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
11
|
+
|
|
12
|
+
THE SOFTWARE IS PROVIDED "AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
13
|
+
|
|
14
|
+
Project-URL: Repository, https://github.com/RegioHelden/django-kafka
|
|
15
|
+
Project-URL: Issues, https://github.com/RegioHelden/django-kafka/issues
|
|
16
|
+
Project-URL: Changelog, https://github.com/RegioHelden/django-kafka/blob/main/CHANGELOG.md
|
|
17
|
+
Keywords: django,kafka
|
|
18
|
+
Classifier: Development Status :: 3 - Alpha
|
|
19
|
+
Classifier: Environment :: Web Environment
|
|
20
|
+
Classifier: Framework :: Django
|
|
21
|
+
Classifier: Intended Audience :: Developers
|
|
22
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
23
|
+
Classifier: Operating System :: OS Independent
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
25
|
+
Requires-Python: >=3.11
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: django<6.0,>=4.0
|
|
29
|
+
Requires-Dist: confluent-kafka==2.4.0
|
|
30
|
+
Requires-Dist: avro==1.11.3
|
|
31
|
+
Requires-Dist: fastavro==1.9.4
|
|
32
|
+
Requires-Dist: requests==2.32.2
|
|
33
|
+
|
|
34
|
+
# django-kafka
|
|
35
|
+
This library is using [confluent-kafka-python](https://github.com/confluentinc/confluent-kafka-python) which is a wrapper around the [librdkafka](https://github.com/confluentinc/librdkafka) (Apache Kafka C/C++ client library).
|
|
36
|
+
|
|
37
|
+
It helps to integrate kafka with Django.
|
|
38
|
+
|
|
39
|
+
## Quick start
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install django-kafka
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Configure:
|
|
46
|
+
Considering you have locally setup kafka instance with no authentication. All you need is to define the bootstrap servers.
|
|
47
|
+
```python
|
|
48
|
+
# ./settings.py
|
|
49
|
+
|
|
50
|
+
INSTALLED_APPS = [
|
|
51
|
+
# ...
|
|
52
|
+
"django_kafka",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
DJANGO_KAFKA = {
|
|
56
|
+
"GLOBAL_CONFIG": {
|
|
57
|
+
"bootstrap.servers": "kafka1:9092",
|
|
58
|
+
},
|
|
59
|
+
}
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Define a Topic:
|
|
63
|
+
|
|
64
|
+
Topics define how to handle incoming messages and how to produce an outgoing message.
|
|
65
|
+
```python
|
|
66
|
+
from confluent_kafka.serialization import MessageField
|
|
67
|
+
from django_kafka.topic import Topic
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class Topic1(Topic):
|
|
71
|
+
name = "topic1"
|
|
72
|
+
|
|
73
|
+
def consume(self, msg):
|
|
74
|
+
key = self.deserialize(msg.key(), MessageField.KEY, msg.headers())
|
|
75
|
+
value = self.deserialize(msg.value(), MessageField.VALUE, msg.headers())
|
|
76
|
+
# ... process values
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Define a Consumer:
|
|
80
|
+
|
|
81
|
+
Consumers define which topics they take care of. Usually you want one consumer per project. If 2 consumers are defined, then they will be started in parallel.
|
|
82
|
+
|
|
83
|
+
Consumers are auto-discovered and are expected to be located under the `consumers.py`.
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
# ./consumers.py
|
|
87
|
+
|
|
88
|
+
from django_kafka import kafka
|
|
89
|
+
from django_kafka.consumer import Consumer, Topics
|
|
90
|
+
|
|
91
|
+
from my_app.topics import Topic1
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# register your consumer using `DjangoKafka` class API decorator
|
|
95
|
+
@kafka.consumers()
|
|
96
|
+
class MyAppConsumer(Consumer):
|
|
97
|
+
# tell the consumers which topics to process using `django_kafka.consumer.Topics` interface.
|
|
98
|
+
topics = Topics(
|
|
99
|
+
Topic1(),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
config = {
|
|
103
|
+
"group.id": "my-app-consumer",
|
|
104
|
+
"auto.offset.reset": "latest",
|
|
105
|
+
"enable.auto.offset.store": False,
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
### Start the Consumers:
|
|
111
|
+
You can use django management command to start defined consumers.
|
|
112
|
+
```bash
|
|
113
|
+
./manage.py kafka_consume
|
|
114
|
+
```
|
|
115
|
+
Or you can use `DjangoKafka` class API.
|
|
116
|
+
```python
|
|
117
|
+
from django_kafka import kafka
|
|
118
|
+
|
|
119
|
+
kafka.start_consumers()
|
|
120
|
+
```
|
|
121
|
+
Check [Confluent Python Consumer](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#consumer) for API documentation.
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
### Produce:
|
|
126
|
+
Message are produced using a Topic instance.
|
|
127
|
+
```python
|
|
128
|
+
from my_app.topics import Topic1
|
|
129
|
+
|
|
130
|
+
# this will send a message to kafka, serializing it using the defined serializer
|
|
131
|
+
Topic1().produce("some message")
|
|
132
|
+
```
|
|
133
|
+
Check [Confluent Python Producer](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#producer) for API documentation.
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
### Define schema registry:
|
|
137
|
+
|
|
138
|
+
The library is using [Confluent's SchemaRegistryClient](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#schemaregistryclient). In order to use it define a `SCHEMA_REGISTRY` setting.
|
|
139
|
+
|
|
140
|
+
Find available configs in the [SchemaRegistryClient docs](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#schemaregistryclient).
|
|
141
|
+
```python
|
|
142
|
+
DJANGO_KAFKA = {
|
|
143
|
+
"SCHEMA_REGISTRY": {
|
|
144
|
+
"url": "http://schema-registry",
|
|
145
|
+
},
|
|
146
|
+
}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Note:** take [django_kafka.topic.AvroTopic](./django_kafka/topic.py) as an example if you want to implement a custom Topic with your schema.
|
|
150
|
+
|
|
151
|
+
## Settings:
|
|
152
|
+
|
|
153
|
+
**Defaults:**
|
|
154
|
+
```python
|
|
155
|
+
DJANGO_KAFKA = {
|
|
156
|
+
"CLIENT_ID": f"{socket.gethostname()}-python",
|
|
157
|
+
"GLOBAL_CONFIG": {},
|
|
158
|
+
"PRODUCER_CONFIG": {},
|
|
159
|
+
"CONSUMER_CONFIG": {},
|
|
160
|
+
"POLLING_FREQUENCY": 1, # seconds
|
|
161
|
+
"SCHEMA_REGISTRY": {},
|
|
162
|
+
"ERROR_HANDLER": "django_kafka.error_handlers.ClientErrorHandler",
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
#### `CLIENT_ID`
|
|
167
|
+
Default: `f"{socket.gethostname()}-python"`
|
|
168
|
+
|
|
169
|
+
An id string to pass to the server when making requests. The purpose of this is to be able to track the source of requests beyond just ip/port by allowing a logical application name to be included in server-side request logging.
|
|
170
|
+
|
|
171
|
+
**Note:** This parameter is included in the config of both the consumer and producer unless `client.id` is overwritten within `PRODUCER_CONFIG` or `CONSUMER_CONFIG`.
|
|
172
|
+
|
|
173
|
+
#### `GLOBAL_CONFIG`
|
|
174
|
+
Default: `{}`
|
|
175
|
+
|
|
176
|
+
Defines configurations applied to both consumer and producer. See [configs marked with `*`](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
|
|
177
|
+
|
|
178
|
+
#### `PRODUCER_CONFIG`
|
|
179
|
+
Default: `{}`
|
|
180
|
+
|
|
181
|
+
Defines configurations of the producer. See [configs marked with `P`](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
|
|
182
|
+
|
|
183
|
+
#### `CONSUMER_CONFIG`
|
|
184
|
+
Default: `{}`
|
|
185
|
+
|
|
186
|
+
Defines configurations of the consumer. See [configs marked with `C`](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
|
|
187
|
+
|
|
188
|
+
#### `POLLING_FREQUENCY`
|
|
189
|
+
Default: 1 # second
|
|
190
|
+
|
|
191
|
+
How often client polls for events.
|
|
192
|
+
|
|
193
|
+
#### `SCHEMA_REGISTRY`
|
|
194
|
+
Default: `{}`
|
|
195
|
+
|
|
196
|
+
Configuration for [confluent_kafka.schema_registry.SchemaRegistryClient](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#schemaregistryclient).
|
|
197
|
+
|
|
198
|
+
#### `ERROR_HANDLER`
|
|
199
|
+
Default: `django_kafka.error_handlers.ClientErrorHandler`
|
|
200
|
+
|
|
201
|
+
This is an `error_cb` hook (see [Kafka Client Configuration](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration) for reference).
|
|
202
|
+
It is triggered for client global errors and in case of fatal error it raises `DjangoKafkaException`.
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
## Bidirectional data sync with no infinite event loop.
|
|
206
|
+
|
|
207
|
+
**For example, you want to keep a User table in sync in multiple systems.**
|
|
208
|
+
|
|
209
|
+
The idea is to send events from all systems to the same topic, and also consume events from the same topic, marking the record with `kafka_skip=True` at the consumption time.
|
|
210
|
+
- Producer should respect `kafka_skip=True` and do not produce new events when `True`.
|
|
211
|
+
- Any updates to the User table, which are happening outside the consumer, should set `kafka_skip=False` which will allow the producer to create an event again.
|
|
212
|
+
|
|
213
|
+
This way the chronology is strictly kept and the infinite events loop is avoided.
|
|
214
|
+
|
|
215
|
+
The disadvantage is that each system will still consume its own message.
|
|
216
|
+
|
|
217
|
+
#### There are 2 mixins for django Model and for QuerySet:
|
|
218
|
+
|
|
219
|
+
#### KafkaSkipMixin
|
|
220
|
+
It adds new `kafka_skip` boolean field, which defaults to `False`. And overrides `Model.save` method and sets `kafka_skip=False`.
|
|
221
|
+
|
|
222
|
+
Usage:
|
|
223
|
+
```python
|
|
224
|
+
from django.contrib.auth.base_user import AbstractBaseUser
|
|
225
|
+
from django.contrib.auth.models import PermissionsMixin
|
|
226
|
+
from django_kafka.models import KafkaSkipMixin
|
|
227
|
+
|
|
228
|
+
class User(KafkaSkipMixin, PermissionsMixin, AbstractBaseUser):
|
|
229
|
+
# ...
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
#### KafkaSkipQueryset
|
|
234
|
+
If you have defined a custom manager on your model then you should inherit it from `KafkaSkipQueryset`. It adds `kafka_skip=False` when using `update` method.
|
|
235
|
+
|
|
236
|
+
**Note:** `kafka_skip=False` is only set when it's not provided to the `update` kwargs. E.g. `User.objects.update(first_name="John", kafka_skip=True)` will not be changed to `kafka_skip=False`.
|
|
237
|
+
|
|
238
|
+
Usage:
|
|
239
|
+
```python
|
|
240
|
+
from django.contrib.auth.base_user import AbstractBaseUser
|
|
241
|
+
from django.contrib.auth.base_user import BaseUserManager
|
|
242
|
+
from django.contrib.auth.models import PermissionsMixin
|
|
243
|
+
from django_kafka.models import KafkaSkipMixin, KafkaSkipQueryset
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class UserManager(BaseUserManager.from_queryset(KafkaSkipQueryset)):
|
|
247
|
+
# ...
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class User(KafkaSkipMixin, PermissionsMixin, AbstractBaseUser):
|
|
251
|
+
# ...
|
|
252
|
+
objects = UserManager()
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
## Making a new release
|
|
257
|
+
- [bump-my-version](https://github.com/callowayproject/bump-my-version) is used to manage releases.
|
|
258
|
+
- [Ruff](https://github.com/astral-sh/ruff) linter is used to validate the code style. Make sure your code complies withg the defined rules. You may use `ruff check --fix` for that. Ruff is executed by GitHub actions and the workflow will fail if Ruff validation fails.
|
|
259
|
+
|
|
260
|
+
- Add your changes to the [CHANGELOG](CHANGELOG.md), then run
|
|
261
|
+
```bash
|
|
262
|
+
docker compose run --rm app bump-my-version bump <major|minor|patch>
|
|
263
|
+
```
|
|
264
|
+
This will update version major/minor/patch version respectively and add a tag for release.
|
|
265
|
+
|
|
266
|
+
- Push including new tag to publish the release to pypi.
|
|
267
|
+
```bash
|
|
268
|
+
git push origin tag <tag_name>
|
|
269
|
+
```
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# django-kafka
|
|
2
|
+
This library is using [confluent-kafka-python](https://github.com/confluentinc/confluent-kafka-python) which is a wrapper around the [librdkafka](https://github.com/confluentinc/librdkafka) (Apache Kafka C/C++ client library).
|
|
3
|
+
|
|
4
|
+
It helps to integrate kafka with Django.
|
|
5
|
+
|
|
6
|
+
## Quick start
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
pip install django-kafka
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
### Configure:
|
|
13
|
+
Considering you have locally setup kafka instance with no authentication. All you need is to define the bootstrap servers.
|
|
14
|
+
```python
|
|
15
|
+
# ./settings.py
|
|
16
|
+
|
|
17
|
+
INSTALLED_APPS = [
|
|
18
|
+
# ...
|
|
19
|
+
"django_kafka",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
DJANGO_KAFKA = {
|
|
23
|
+
"GLOBAL_CONFIG": {
|
|
24
|
+
"bootstrap.servers": "kafka1:9092",
|
|
25
|
+
},
|
|
26
|
+
}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Define a Topic:
|
|
30
|
+
|
|
31
|
+
Topics define how to handle incoming messages and how to produce an outgoing message.
|
|
32
|
+
```python
|
|
33
|
+
from confluent_kafka.serialization import MessageField
|
|
34
|
+
from django_kafka.topic import Topic
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Topic1(Topic):
|
|
38
|
+
name = "topic1"
|
|
39
|
+
|
|
40
|
+
def consume(self, msg):
|
|
41
|
+
key = self.deserialize(msg.key(), MessageField.KEY, msg.headers())
|
|
42
|
+
value = self.deserialize(msg.value(), MessageField.VALUE, msg.headers())
|
|
43
|
+
# ... process values
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Define a Consumer:
|
|
47
|
+
|
|
48
|
+
Consumers define which topics they take care of. Usually you want one consumer per project. If 2 consumers are defined, then they will be started in parallel.
|
|
49
|
+
|
|
50
|
+
Consumers are auto-discovered and are expected to be located under the `consumers.py`.
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
# ./consumers.py
|
|
54
|
+
|
|
55
|
+
from django_kafka import kafka
|
|
56
|
+
from django_kafka.consumer import Consumer, Topics
|
|
57
|
+
|
|
58
|
+
from my_app.topics import Topic1
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# register your consumer using `DjangoKafka` class API decorator
|
|
62
|
+
@kafka.consumers()
|
|
63
|
+
class MyAppConsumer(Consumer):
|
|
64
|
+
# tell the consumers which topics to process using `django_kafka.consumer.Topics` interface.
|
|
65
|
+
topics = Topics(
|
|
66
|
+
Topic1(),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
config = {
|
|
70
|
+
"group.id": "my-app-consumer",
|
|
71
|
+
"auto.offset.reset": "latest",
|
|
72
|
+
"enable.auto.offset.store": False,
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
### Start the Consumers:
|
|
78
|
+
You can use django management command to start defined consumers.
|
|
79
|
+
```bash
|
|
80
|
+
./manage.py kafka_consume
|
|
81
|
+
```
|
|
82
|
+
Or you can use `DjangoKafka` class API.
|
|
83
|
+
```python
|
|
84
|
+
from django_kafka import kafka
|
|
85
|
+
|
|
86
|
+
kafka.start_consumers()
|
|
87
|
+
```
|
|
88
|
+
Check [Confluent Python Consumer](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#consumer) for API documentation.
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
### Produce:
|
|
93
|
+
Message are produced using a Topic instance.
|
|
94
|
+
```python
|
|
95
|
+
from my_app.topics import Topic1
|
|
96
|
+
|
|
97
|
+
# this will send a message to kafka, serializing it using the defined serializer
|
|
98
|
+
Topic1().produce("some message")
|
|
99
|
+
```
|
|
100
|
+
Check [Confluent Python Producer](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#producer) for API documentation.
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
### Define schema registry:
|
|
104
|
+
|
|
105
|
+
The library is using [Confluent's SchemaRegistryClient](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#schemaregistryclient). In order to use it define a `SCHEMA_REGISTRY` setting.
|
|
106
|
+
|
|
107
|
+
Find available configs in the [SchemaRegistryClient docs](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#schemaregistryclient).
|
|
108
|
+
```python
|
|
109
|
+
DJANGO_KAFKA = {
|
|
110
|
+
"SCHEMA_REGISTRY": {
|
|
111
|
+
"url": "http://schema-registry",
|
|
112
|
+
},
|
|
113
|
+
}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Note:** take [django_kafka.topic.AvroTopic](./django_kafka/topic.py) as an example if you want to implement a custom Topic with your schema.
|
|
117
|
+
|
|
118
|
+
## Settings:
|
|
119
|
+
|
|
120
|
+
**Defaults:**
|
|
121
|
+
```python
|
|
122
|
+
DJANGO_KAFKA = {
|
|
123
|
+
"CLIENT_ID": f"{socket.gethostname()}-python",
|
|
124
|
+
"GLOBAL_CONFIG": {},
|
|
125
|
+
"PRODUCER_CONFIG": {},
|
|
126
|
+
"CONSUMER_CONFIG": {},
|
|
127
|
+
"POLLING_FREQUENCY": 1, # seconds
|
|
128
|
+
"SCHEMA_REGISTRY": {},
|
|
129
|
+
"ERROR_HANDLER": "django_kafka.error_handlers.ClientErrorHandler",
|
|
130
|
+
}
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
#### `CLIENT_ID`
|
|
134
|
+
Default: `f"{socket.gethostname()}-python"`
|
|
135
|
+
|
|
136
|
+
An id string to pass to the server when making requests. The purpose of this is to be able to track the source of requests beyond just ip/port by allowing a logical application name to be included in server-side request logging.
|
|
137
|
+
|
|
138
|
+
**Note:** This parameter is included in the config of both the consumer and producer unless `client.id` is overwritten within `PRODUCER_CONFIG` or `CONSUMER_CONFIG`.
|
|
139
|
+
|
|
140
|
+
#### `GLOBAL_CONFIG`
|
|
141
|
+
Default: `{}`
|
|
142
|
+
|
|
143
|
+
Defines configurations applied to both consumer and producer. See [configs marked with `*`](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
|
|
144
|
+
|
|
145
|
+
#### `PRODUCER_CONFIG`
|
|
146
|
+
Default: `{}`
|
|
147
|
+
|
|
148
|
+
Defines configurations of the producer. See [configs marked with `P`](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
|
|
149
|
+
|
|
150
|
+
#### `CONSUMER_CONFIG`
|
|
151
|
+
Default: `{}`
|
|
152
|
+
|
|
153
|
+
Defines configurations of the consumer. See [configs marked with `C`](https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md).
|
|
154
|
+
|
|
155
|
+
#### `POLLING_FREQUENCY`
|
|
156
|
+
Default: 1 # second
|
|
157
|
+
|
|
158
|
+
How often client polls for events.
|
|
159
|
+
|
|
160
|
+
#### `SCHEMA_REGISTRY`
|
|
161
|
+
Default: `{}`
|
|
162
|
+
|
|
163
|
+
Configuration for [confluent_kafka.schema_registry.SchemaRegistryClient](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#schemaregistryclient).
|
|
164
|
+
|
|
165
|
+
#### `ERROR_HANDLER`
|
|
166
|
+
Default: `django_kafka.error_handlers.ClientErrorHandler`
|
|
167
|
+
|
|
168
|
+
This is an `error_cb` hook (see [Kafka Client Configuration](https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration) for reference).
|
|
169
|
+
It is triggered for client global errors and in case of fatal error it raises `DjangoKafkaException`.
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
## Bidirectional data sync with no infinite event loop.
|
|
173
|
+
|
|
174
|
+
**For example, you want to keep a User table in sync in multiple systems.**
|
|
175
|
+
|
|
176
|
+
The idea is to send events from all systems to the same topic, and also consume events from the same topic, marking the record with `kafka_skip=True` at the consumption time.
|
|
177
|
+
- Producer should respect `kafka_skip=True` and do not produce new events when `True`.
|
|
178
|
+
- Any updates to the User table, which are happening outside the consumer, should set `kafka_skip=False` which will allow the producer to create an event again.
|
|
179
|
+
|
|
180
|
+
This way the chronology is strictly kept and the infinite events loop is avoided.
|
|
181
|
+
|
|
182
|
+
The disadvantage is that each system will still consume its own message.
|
|
183
|
+
|
|
184
|
+
#### There are 2 mixins for django Model and for QuerySet:
|
|
185
|
+
|
|
186
|
+
#### KafkaSkipMixin
|
|
187
|
+
It adds new `kafka_skip` boolean field, which defaults to `False`. And overrides `Model.save` method and sets `kafka_skip=False`.
|
|
188
|
+
|
|
189
|
+
Usage:
|
|
190
|
+
```python
|
|
191
|
+
from django.contrib.auth.base_user import AbstractBaseUser
|
|
192
|
+
from django.contrib.auth.models import PermissionsMixin
|
|
193
|
+
from django_kafka.models import KafkaSkipMixin
|
|
194
|
+
|
|
195
|
+
class User(KafkaSkipMixin, PermissionsMixin, AbstractBaseUser):
|
|
196
|
+
# ...
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
#### KafkaSkipQueryset
|
|
201
|
+
If you have defined a custom manager on your model then you should inherit it from `KafkaSkipQueryset`. It adds `kafka_skip=False` when using `update` method.
|
|
202
|
+
|
|
203
|
+
**Note:** `kafka_skip=False` is only set when it's not provided to the `update` kwargs. E.g. `User.objects.update(first_name="John", kafka_skip=True)` will not be changed to `kafka_skip=False`.
|
|
204
|
+
|
|
205
|
+
Usage:
|
|
206
|
+
```python
|
|
207
|
+
from django.contrib.auth.base_user import AbstractBaseUser
|
|
208
|
+
from django.contrib.auth.base_user import BaseUserManager
|
|
209
|
+
from django.contrib.auth.models import PermissionsMixin
|
|
210
|
+
from django_kafka.models import KafkaSkipMixin, KafkaSkipQueryset
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class UserManager(BaseUserManager.from_queryset(KafkaSkipQueryset)):
|
|
214
|
+
# ...
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class User(KafkaSkipMixin, PermissionsMixin, AbstractBaseUser):
|
|
218
|
+
# ...
|
|
219
|
+
objects = UserManager()
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
## Making a new release
|
|
224
|
+
- [bump-my-version](https://github.com/callowayproject/bump-my-version) is used to manage releases.
|
|
225
|
+
- [Ruff](https://github.com/astral-sh/ruff) linter is used to validate the code style. Make sure your code complies withg the defined rules. You may use `ruff check --fix` for that. Ruff is executed by GitHub actions and the workflow will fail if Ruff validation fails.
|
|
226
|
+
|
|
227
|
+
- Add your changes to the [CHANGELOG](CHANGELOG.md), then run
|
|
228
|
+
```bash
|
|
229
|
+
docker compose run --rm app bump-my-version bump <major|minor|patch>
|
|
230
|
+
```
|
|
231
|
+
This will update version major/minor/patch version respectively and add a tag for release.
|
|
232
|
+
|
|
233
|
+
- Push including new tag to publish the release to pypi.
|
|
234
|
+
```bash
|
|
235
|
+
git push origin tag <tag_name>
|
|
236
|
+
```
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from multiprocessing.pool import Pool
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from confluent_kafka.schema_registry import SchemaRegistryClient
|
|
6
|
+
from django.utils.functional import cached_property
|
|
7
|
+
from django.utils.module_loading import autodiscover_modules
|
|
8
|
+
|
|
9
|
+
from django_kafka.conf import settings
|
|
10
|
+
from django_kafka.exceptions import DjangoKafkaError
|
|
11
|
+
from django_kafka.producer import Producer
|
|
12
|
+
from django_kafka.registry import ConsumersRegistry
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
__version__ = "0.0.1"
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"autodiscover",
|
|
20
|
+
"DjangoKafka",
|
|
21
|
+
"kafka",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def autodiscover():
|
|
26
|
+
autodiscover_modules("consumers")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DjangoKafka:
|
|
30
|
+
consumers = ConsumersRegistry()
|
|
31
|
+
|
|
32
|
+
@cached_property
|
|
33
|
+
def producer(self) -> Producer:
|
|
34
|
+
return Producer()
|
|
35
|
+
|
|
36
|
+
@cached_property
|
|
37
|
+
def schema_client(self) -> SchemaRegistryClient:
|
|
38
|
+
"""
|
|
39
|
+
https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#schemaregistryclient
|
|
40
|
+
"""
|
|
41
|
+
if not settings.SCHEMA_REGISTRY:
|
|
42
|
+
raise DjangoKafkaError(
|
|
43
|
+
"`SCHEMA_REGISTRY` configuration is not defined.",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
return SchemaRegistryClient(settings.SCHEMA_REGISTRY)
|
|
47
|
+
|
|
48
|
+
def start_consumer(self, consumer: str):
|
|
49
|
+
self.consumers[consumer]().start()
|
|
50
|
+
|
|
51
|
+
def start_consumers(self, consumers: Optional[list[str]] = None):
|
|
52
|
+
consumers = consumers or list(self.consumers)
|
|
53
|
+
with Pool(processes=len(consumers)) as pool:
|
|
54
|
+
try:
|
|
55
|
+
pool.map(self.start_consumer, consumers)
|
|
56
|
+
except KeyboardInterrupt:
|
|
57
|
+
# Stops the worker processes immediately without completing
|
|
58
|
+
# outstanding work.
|
|
59
|
+
pool.terminate()
|
|
60
|
+
# Wait for the worker processes to exit.
|
|
61
|
+
# Should be called after close() or terminate().
|
|
62
|
+
pool.join()
|
|
63
|
+
logger.debug("KeyboardInterrupt. Pool workers terminated.")
|
|
64
|
+
else:
|
|
65
|
+
# Prevents any more tasks from being submitted to the pool.
|
|
66
|
+
# Once all the tasks have been completed the worker processes will exit.
|
|
67
|
+
pool.close()
|
|
68
|
+
# Wait for the worker processes to exit.
|
|
69
|
+
# Should be called after close() or terminate().
|
|
70
|
+
pool.join()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
kafka = DjangoKafka()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import socket
|
|
2
|
+
|
|
3
|
+
from django.conf import settings as django_settings
|
|
4
|
+
|
|
5
|
+
SETTINGS_KEY = "DJANGO_KAFKA"
|
|
6
|
+
DEFAULTS = {
|
|
7
|
+
"CLIENT_ID": f"{socket.gethostname()}-python",
|
|
8
|
+
"ERROR_HANDLER": "django_kafka.error_handlers.ClientErrorHandler",
|
|
9
|
+
"GLOBAL_CONFIG": {},
|
|
10
|
+
"PRODUCER_CONFIG": {},
|
|
11
|
+
"CONSUMER_CONFIG": {},
|
|
12
|
+
"POLLING_FREQUENCY": 1, # seconds
|
|
13
|
+
"SCHEMA_REGISTRY": {},
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Settings:
|
|
18
|
+
@property
|
|
19
|
+
def _settings(self):
|
|
20
|
+
return getattr(django_settings, SETTINGS_KEY, {})
|
|
21
|
+
|
|
22
|
+
def __getattr__(self, attr):
|
|
23
|
+
if attr in self._settings:
|
|
24
|
+
return self._settings[attr]
|
|
25
|
+
|
|
26
|
+
if attr in DEFAULTS:
|
|
27
|
+
return DEFAULTS[attr]
|
|
28
|
+
|
|
29
|
+
raise AttributeError(f"Invalid setting: '{attr}'")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
settings = Settings()
|