buz 2.13.1rc9__py3-none-any.whl → 2.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- buz/event/async_event_bus.py +15 -0
- buz/event/event_bus.py +2 -2
- buz/event/infrastructure/buz_kafka/async_buz_kafka_event_bus.py +107 -0
- buz/event/infrastructure/buz_kafka/base_buz_aiokafka_async_consumer.py +5 -10
- buz/event/infrastructure/buz_kafka/buz_aiokafka_async_consumer.py +3 -4
- buz/event/infrastructure/buz_kafka/buz_aiokafka_multi_threaded_consumer.py +2 -4
- buz/event/infrastructure/buz_kafka/buz_kafka_event_bus.py +4 -6
- buz/event/infrastructure/buz_kafka/kafka_event_sync_subscriber_executor.py +2 -2
- buz/event/infrastructure/kombu/kombu_consumer.py +1 -0
- buz/event/infrastructure/kombu/kombu_event_bus.py +6 -7
- buz/event/middleware/async_publish_middleware.py +13 -0
- buz/event/middleware/async_publish_middleware_chain_resolver.py +22 -0
- buz/event/sync/sync_event_bus.py +2 -2
- buz/event/transactional_outbox/outbox_repository.py +5 -1
- buz/event/transactional_outbox/transactional_outbox_event_bus.py +12 -11
- buz/kafka/__init__.py +28 -2
- buz/kafka/domain/exceptions/not_valid_partition_number_exception.py +10 -0
- buz/kafka/domain/models/__init__.py +0 -0
- buz/kafka/domain/models/kafka_supported_compression_type.py +8 -0
- buz/kafka/domain/services/__init__.py +0 -0
- buz/kafka/domain/services/async_kafka_producer.py +21 -0
- buz/kafka/domain/services/kafka_admin_client.py +15 -1
- buz/kafka/domain/services/kafka_producer.py +3 -1
- buz/kafka/infrastructure/aiokafka/aiokafka_consumer.py +12 -9
- buz/kafka/infrastructure/aiokafka/aiokafka_producer.py +98 -0
- buz/kafka/infrastructure/interfaces/__init__.py +0 -0
- buz/kafka/infrastructure/interfaces/async_connection_manager.py +11 -0
- buz/kafka/infrastructure/interfaces/connection_manager.py +11 -0
- buz/kafka/infrastructure/kafka_python/kafka_python_admin_client.py +230 -45
- buz/kafka/infrastructure/kafka_python/kafka_python_admin_test_client.py +3 -3
- buz/kafka/infrastructure/kafka_python/kafka_python_producer.py +51 -22
- {buz-2.13.1rc9.dist-info → buz-2.14.1.dist-info}/METADATA +1 -1
- {buz-2.13.1rc9.dist-info → buz-2.14.1.dist-info}/RECORD +36 -24
- buz/kafka/infrastructure/kafka_python/factories/kafka_python_producer_factory.py +0 -20
- /buz/kafka/{infrastructure/kafka_python/factories → domain/exceptions}/__init__.py +0 -0
- {buz-2.13.1rc9.dist-info → buz-2.14.1.dist-info}/LICENSE +0 -0
- {buz-2.13.1rc9.dist-info → buz-2.14.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from ssl import SSLContext
|
|
4
|
+
from typing import Generic, Optional, TypeVar
|
|
5
|
+
|
|
6
|
+
from aiokafka import AIOKafkaProducer as NativeAIOKafkaProducer
|
|
7
|
+
from aiokafka.helpers import create_ssl_context
|
|
8
|
+
|
|
9
|
+
from buz.kafka.domain.models.kafka_connection_config import KafkaConnectionConfig
|
|
10
|
+
from buz.kafka.domain.models.kafka_supported_compression_type import KafkaSupportedCompressionType
|
|
11
|
+
from buz.kafka.domain.models.kafka_supported_security_protocols import KafkaSupportedSecurityProtocols
|
|
12
|
+
from buz.kafka.domain.services.async_kafka_producer import AsyncKafkaProducer
|
|
13
|
+
from buz.kafka.infrastructure.serializers.byte_serializer import ByteSerializer
|
|
14
|
+
from buz.kafka.infrastructure.serializers.kafka_header_serializer import KafkaHeaderSerializer
|
|
15
|
+
|
|
16
|
+
T = TypeVar("T")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AIOKafkaProducer(AsyncKafkaProducer, Generic[T]):
|
|
20
|
+
__DEFAULT_REQUEST_TIMEOUT_MS = 5000
|
|
21
|
+
__kafka_producer: Optional[NativeAIOKafkaProducer] = None
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
*,
|
|
26
|
+
connection_config: KafkaConnectionConfig,
|
|
27
|
+
byte_serializer: ByteSerializer[T],
|
|
28
|
+
compression_type: Optional[KafkaSupportedCompressionType] = None,
|
|
29
|
+
retry_backoff_ms: int = 100,
|
|
30
|
+
) -> None:
|
|
31
|
+
self.__connection_config = connection_config
|
|
32
|
+
self.__byte_serializer = byte_serializer
|
|
33
|
+
self.__header_serializer = KafkaHeaderSerializer()
|
|
34
|
+
self.__compression_type = compression_type
|
|
35
|
+
self.__retry_backoff_ms = retry_backoff_ms
|
|
36
|
+
|
|
37
|
+
async def _get_aiokafka_producer(self) -> NativeAIOKafkaProducer:
|
|
38
|
+
if self.__kafka_producer:
|
|
39
|
+
return self.__kafka_producer
|
|
40
|
+
|
|
41
|
+
ssl_context: Optional[SSLContext] = None
|
|
42
|
+
|
|
43
|
+
sasl_mechanism = (
|
|
44
|
+
self.__connection_config.credentials.sasl_mechanism.value
|
|
45
|
+
if self.__connection_config.credentials.sasl_mechanism
|
|
46
|
+
else "PLAIN"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if self.__connection_config.credentials.security_protocol == KafkaSupportedSecurityProtocols.SASL_SSL:
|
|
50
|
+
ssl_context = create_ssl_context()
|
|
51
|
+
|
|
52
|
+
self.__kafka_producer = NativeAIOKafkaProducer(
|
|
53
|
+
client_id=self.__connection_config.client_id,
|
|
54
|
+
bootstrap_servers=",".join(self.__connection_config.bootstrap_servers),
|
|
55
|
+
sasl_mechanism=sasl_mechanism,
|
|
56
|
+
ssl_context=ssl_context,
|
|
57
|
+
sasl_plain_username=self.__connection_config.credentials.user,
|
|
58
|
+
sasl_plain_password=self.__connection_config.credentials.password,
|
|
59
|
+
retry_backoff_ms=self.__retry_backoff_ms,
|
|
60
|
+
request_timeout_ms=self.__DEFAULT_REQUEST_TIMEOUT_MS,
|
|
61
|
+
compression_type=self.__compression_type.value if self.__compression_type else None,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
await self.__kafka_producer.start()
|
|
65
|
+
|
|
66
|
+
return self.__kafka_producer
|
|
67
|
+
|
|
68
|
+
async def connect(self) -> None:
|
|
69
|
+
await self._get_aiokafka_producer()
|
|
70
|
+
|
|
71
|
+
async def disconnect(self) -> None:
|
|
72
|
+
if self.__kafka_producer is None:
|
|
73
|
+
return None
|
|
74
|
+
await self.__kafka_producer.stop()
|
|
75
|
+
self.__kafka_producer = None
|
|
76
|
+
|
|
77
|
+
async def produce(
|
|
78
|
+
self,
|
|
79
|
+
*,
|
|
80
|
+
topic: str,
|
|
81
|
+
message: T,
|
|
82
|
+
partition_key: Optional[str] = None,
|
|
83
|
+
headers: Optional[dict[str, str]] = None,
|
|
84
|
+
) -> None:
|
|
85
|
+
serialized_headers = self.__header_serializer.serialize(headers) if headers is not None else None
|
|
86
|
+
kafka_producer = await self._get_aiokafka_producer()
|
|
87
|
+
|
|
88
|
+
await kafka_producer.send_and_wait(
|
|
89
|
+
topic=topic,
|
|
90
|
+
value=self.__byte_serializer.serialize(message),
|
|
91
|
+
headers=serialized_headers,
|
|
92
|
+
key=partition_key.encode("utf-8") if partition_key else None,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
async def close(self) -> None:
|
|
96
|
+
if self.__kafka_producer is not None:
|
|
97
|
+
await self.__kafka_producer.stop()
|
|
98
|
+
self.__kafka_producer = None
|
|
File without changes
|
|
@@ -8,10 +8,12 @@ from typing import Any, Callable, Optional, Sequence, cast
|
|
|
8
8
|
from cachetools import TTLCache
|
|
9
9
|
from kafka import KafkaClient, KafkaConsumer
|
|
10
10
|
from kafka.admin import KafkaAdminClient as KafkaPythonLibraryAdminClient, NewTopic
|
|
11
|
+
from kafka.admin.new_partitions import NewPartitions
|
|
11
12
|
from kafka.errors import TopicAlreadyExistsError
|
|
12
13
|
from kafka.structs import TopicPartition, OffsetAndTimestamp
|
|
13
14
|
|
|
14
15
|
from buz.kafka.domain.exceptions.not_all_partition_assigned_exception import NotAllPartitionAssignedException
|
|
16
|
+
from buz.kafka.domain.exceptions.not_valid_partition_number_exception import NotValidPartitionNumberException
|
|
15
17
|
from buz.kafka.domain.exceptions.topic_already_created_exception import KafkaTopicsAlreadyCreatedException
|
|
16
18
|
from buz.kafka.domain.exceptions.topic_not_found_exception import TopicNotFoundException
|
|
17
19
|
from buz.kafka.domain.models.consumer_initial_offset_position import ConsumerInitialOffsetPosition
|
|
@@ -30,18 +32,19 @@ TOPIC_CACHE_KEY = "topics"
|
|
|
30
32
|
class KafkaPythonAdminClient(KafkaAdminClient):
|
|
31
33
|
__PYTHON_KAFKA_DUPLICATED_TOPIC_ERROR_CODE = 36
|
|
32
34
|
|
|
35
|
+
_kafka_admin: Optional[KafkaPythonLibraryAdminClient] = None
|
|
36
|
+
_kafka_client: Optional[KafkaClient] = None
|
|
37
|
+
|
|
33
38
|
def __init__(
|
|
34
39
|
self,
|
|
35
40
|
*,
|
|
36
41
|
logger: Logger,
|
|
37
|
-
|
|
42
|
+
connection_config: KafkaConnectionConfig,
|
|
38
43
|
cache_ttl_seconds: int = 0,
|
|
39
44
|
):
|
|
40
45
|
self._logger = logger
|
|
41
|
-
self.
|
|
42
|
-
self._config_in_library_format = self.__get_kafka_config_in_library_format(
|
|
43
|
-
self._kafka_admin = KafkaPythonLibraryAdminClient(**self._config_in_library_format)
|
|
44
|
-
self._kafka_client = KafkaClient(**self._config_in_library_format)
|
|
46
|
+
self.__connection_config = connection_config
|
|
47
|
+
self._config_in_library_format = self.__get_kafka_config_in_library_format(self.__connection_config)
|
|
45
48
|
self.__ttl_cache: TTLCache[str, Any] = TTLCache(maxsize=1, ttl=cache_ttl_seconds)
|
|
46
49
|
|
|
47
50
|
def __get_kafka_config_in_library_format(self, config: KafkaConnectionConfig) -> dict:
|
|
@@ -54,6 +57,28 @@ class KafkaPythonAdminClient(KafkaAdminClient):
|
|
|
54
57
|
"sasl_plain_password": config.credentials.password,
|
|
55
58
|
}
|
|
56
59
|
|
|
60
|
+
def connect(self):
|
|
61
|
+
self._get_kafka_admin()
|
|
62
|
+
self._get_kafka_client()
|
|
63
|
+
|
|
64
|
+
def disconnect(self):
|
|
65
|
+
if self._kafka_admin is not None:
|
|
66
|
+
self._kafka_admin.close()
|
|
67
|
+
self._kafka_admin = None
|
|
68
|
+
if self._kafka_client is not None:
|
|
69
|
+
self._kafka_client.close()
|
|
70
|
+
self._kafka_client = None
|
|
71
|
+
|
|
72
|
+
def _get_kafka_admin(self) -> KafkaPythonLibraryAdminClient:
|
|
73
|
+
if not self._kafka_admin:
|
|
74
|
+
self._kafka_admin = KafkaPythonLibraryAdminClient(**self._config_in_library_format)
|
|
75
|
+
return self._kafka_admin
|
|
76
|
+
|
|
77
|
+
def _get_kafka_client(self) -> KafkaClient:
|
|
78
|
+
if not self._kafka_client:
|
|
79
|
+
self._kafka_client = KafkaClient(**self._config_in_library_format)
|
|
80
|
+
return self._kafka_client
|
|
81
|
+
|
|
57
82
|
def create_topics(
|
|
58
83
|
self,
|
|
59
84
|
*,
|
|
@@ -70,7 +95,7 @@ class KafkaPythonAdminClient(KafkaAdminClient):
|
|
|
70
95
|
]
|
|
71
96
|
|
|
72
97
|
try:
|
|
73
|
-
self.
|
|
98
|
+
self._get_kafka_admin().create_topics(new_topics=new_topics)
|
|
74
99
|
except TopicAlreadyExistsError as error:
|
|
75
100
|
topic_names = self.__get_list_of_kafka_topics_from_topic_already_exists_error(error)
|
|
76
101
|
raise KafkaTopicsAlreadyCreatedException(topic_names=topic_names)
|
|
@@ -94,7 +119,7 @@ class KafkaPythonAdminClient(KafkaAdminClient):
|
|
|
94
119
|
self,
|
|
95
120
|
) -> set[str]:
|
|
96
121
|
return self.__resolve_cached_property(
|
|
97
|
-
TOPIC_CACHE_KEY, lambda: set(self.
|
|
122
|
+
TOPIC_CACHE_KEY, lambda: set(self._get_kafka_admin().list_topics()) - INTERNAL_KAFKA_TOPICS
|
|
98
123
|
)
|
|
99
124
|
|
|
100
125
|
def __resolve_cached_property(self, property_key: str, callback: Callable) -> Any:
|
|
@@ -110,7 +135,7 @@ class KafkaPythonAdminClient(KafkaAdminClient):
|
|
|
110
135
|
*,
|
|
111
136
|
topics: set[str],
|
|
112
137
|
) -> None:
|
|
113
|
-
self.
|
|
138
|
+
self._get_kafka_admin().delete_topics(
|
|
114
139
|
topics=topics,
|
|
115
140
|
)
|
|
116
141
|
self.__remove_cache_property(TOPIC_CACHE_KEY)
|
|
@@ -123,18 +148,18 @@ class KafkaPythonAdminClient(KafkaAdminClient):
|
|
|
123
148
|
*,
|
|
124
149
|
subscription_groups: set[str],
|
|
125
150
|
) -> None:
|
|
126
|
-
self.
|
|
151
|
+
self._get_kafka_admin().delete_consumer_groups(
|
|
127
152
|
group_ids=subscription_groups,
|
|
128
153
|
)
|
|
129
154
|
|
|
130
|
-
def
|
|
155
|
+
def get_cluster_consumer_groups(
|
|
131
156
|
self,
|
|
132
157
|
) -> set[str]:
|
|
133
|
-
return set(self.
|
|
158
|
+
return set([consumer_group_tuple[0] for consumer_group_tuple in self._get_kafka_admin().list_consumer_groups()])
|
|
134
159
|
|
|
135
160
|
def _wait_for_cluster_update(self) -> None:
|
|
136
|
-
future = self.
|
|
137
|
-
self.
|
|
161
|
+
future = self._get_kafka_client().cluster.request_update()
|
|
162
|
+
self._get_kafka_client().poll(future=future)
|
|
138
163
|
|
|
139
164
|
def move_offsets_to_datetime(
|
|
140
165
|
self,
|
|
@@ -143,6 +168,46 @@ class KafkaPythonAdminClient(KafkaAdminClient):
|
|
|
143
168
|
topic: str,
|
|
144
169
|
target_datetime: datetime,
|
|
145
170
|
) -> None:
|
|
171
|
+
(consumer, topic_partitions) = self.__get_consumer_with_all_partitions_assigned(
|
|
172
|
+
consumer_group=consumer_group,
|
|
173
|
+
topic=topic,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
offsets_for_date = self.__get_first_offset_after_date(
|
|
177
|
+
consumer=consumer,
|
|
178
|
+
topic_partitions=topic_partitions,
|
|
179
|
+
target_datetime=target_datetime,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
end_offsets = consumer.end_offsets(topic_partitions)
|
|
184
|
+
|
|
185
|
+
if end_offsets is None or len(end_offsets.keys()) != len(topic_partitions):
|
|
186
|
+
raise Exception(f'There was an error extracting the end offsets of the topic "{topic}"')
|
|
187
|
+
|
|
188
|
+
for topic_partition in topic_partitions:
|
|
189
|
+
offset_and_timestamp = offsets_for_date.get(topic_partition)
|
|
190
|
+
if offset_and_timestamp:
|
|
191
|
+
self._logger.info(f'moving "{topic_partition}" to the offset "{offset_and_timestamp.offset}"')
|
|
192
|
+
consumer.seek(topic_partition, offset_and_timestamp.offset)
|
|
193
|
+
else:
|
|
194
|
+
self._logger.info(
|
|
195
|
+
f'moving "{topic_partition}" to the end of the topic because there are no messages later than "{target_datetime}"'
|
|
196
|
+
)
|
|
197
|
+
consumer.seek(topic_partition, end_offsets[topic_partition])
|
|
198
|
+
|
|
199
|
+
consumer.commit()
|
|
200
|
+
except Exception as exception:
|
|
201
|
+
consumer.close()
|
|
202
|
+
raise exception
|
|
203
|
+
|
|
204
|
+
consumer.close()
|
|
205
|
+
|
|
206
|
+
def __get_consumer_with_all_partitions_assigned(
|
|
207
|
+
self,
|
|
208
|
+
consumer_group: str,
|
|
209
|
+
topic: str,
|
|
210
|
+
) -> tuple[KafkaConsumer, Sequence[TopicPartition]]:
|
|
146
211
|
consumer = KafkaConsumer(
|
|
147
212
|
group_id=consumer_group,
|
|
148
213
|
enable_auto_commit=False,
|
|
@@ -152,44 +217,29 @@ class KafkaPythonAdminClient(KafkaAdminClient):
|
|
|
152
217
|
**self._config_in_library_format,
|
|
153
218
|
)
|
|
154
219
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
if partitions is None:
|
|
158
|
-
raise TopicNotFoundException(topic)
|
|
159
|
-
|
|
160
|
-
topic_partitions = [TopicPartition(topic, p) for p in partitions]
|
|
161
|
-
consumer.subscribe(topics=[topic])
|
|
162
|
-
|
|
163
|
-
self.__force_partition_assignment(consumer)
|
|
220
|
+
try:
|
|
221
|
+
partitions = self.get_number_of_partitions(topic)
|
|
164
222
|
|
|
165
|
-
|
|
166
|
-
if len(consumer.assignment()) != len(topic_partitions):
|
|
167
|
-
raise NotAllPartitionAssignedException(topic)
|
|
223
|
+
topic_partitions = [TopicPartition(topic=topic, partition=partition) for partition in range(partitions)]
|
|
168
224
|
|
|
169
|
-
|
|
170
|
-
consumer=consumer,
|
|
171
|
-
topic_partitions=topic_partitions,
|
|
172
|
-
target_datetime=target_datetime,
|
|
173
|
-
)
|
|
225
|
+
consumer.subscribe(topic)
|
|
174
226
|
|
|
175
|
-
|
|
227
|
+
self.__force_partition_assignment(consumer)
|
|
176
228
|
|
|
177
|
-
|
|
178
|
-
|
|
229
|
+
# We need all the partitions in order to update the offsets
|
|
230
|
+
if len(consumer.assignment()) != len(topic_partitions):
|
|
231
|
+
raise NotAllPartitionAssignedException(topic)
|
|
179
232
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
else:
|
|
186
|
-
self._logger.info(
|
|
187
|
-
f'moving "{topic_partition}" to the end of the topic because there are no messages later than "{target_datetime}"'
|
|
188
|
-
)
|
|
189
|
-
consumer.seek(topic_partition, end_offsets[topic_partition])
|
|
233
|
+
# This could produce a race condition, but it is a limitation of kafka admin (we are not able to check if all the partition are assigned using the manual assignment)
|
|
234
|
+
# https://github.com/dpkp/kafka-python/blob/master/kafka/consumer/group.py#L430
|
|
235
|
+
consumer.unsubscribe()
|
|
236
|
+
consumer.assign(topic_partitions)
|
|
237
|
+
self.__force_partition_assignment(consumer)
|
|
190
238
|
|
|
191
|
-
|
|
192
|
-
|
|
239
|
+
return (consumer, topic_partitions)
|
|
240
|
+
except Exception as exception:
|
|
241
|
+
consumer.close()
|
|
242
|
+
raise exception
|
|
193
243
|
|
|
194
244
|
def __get_first_offset_after_date(
|
|
195
245
|
self,
|
|
@@ -212,3 +262,138 @@ class KafkaPythonAdminClient(KafkaAdminClient):
|
|
|
212
262
|
# We are not to commit the new offset, but we need to execute a polling in order to start the partition assignment
|
|
213
263
|
def __force_partition_assignment(self, consumer: KafkaConsumer) -> None:
|
|
214
264
|
consumer.poll(max_records=1, timeout_ms=0)
|
|
265
|
+
|
|
266
|
+
def increase_topic_partitions_and_set_offset_of_related_consumer_groups_to_the_beginning_of_the_new_ones(
|
|
267
|
+
self,
|
|
268
|
+
*,
|
|
269
|
+
topic: str,
|
|
270
|
+
new_number_of_partitions: int,
|
|
271
|
+
) -> None:
|
|
272
|
+
self._logger.info(
|
|
273
|
+
f'Increasing topic "{topic}" partitions: Verifying the new number of partitions "{new_number_of_partitions}"'
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
previous_partitions_number = self.get_number_of_partitions(topic)
|
|
277
|
+
topic_partitions = [
|
|
278
|
+
TopicPartition(topic=topic, partition=partition) for partition in range(previous_partitions_number)
|
|
279
|
+
]
|
|
280
|
+
|
|
281
|
+
if previous_partitions_number >= new_number_of_partitions:
|
|
282
|
+
raise NotValidPartitionNumberException(
|
|
283
|
+
partition_number=new_number_of_partitions,
|
|
284
|
+
min_partition_number=len(topic_partitions),
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
self._logger.info(f'Increasing topic "{topic}" partitions: Extracting related consumer groups')
|
|
288
|
+
related_consumer_groups = self.__get_consumer_groups_related_to_a_topic(topic_partitions)
|
|
289
|
+
|
|
290
|
+
self._logger.info(
|
|
291
|
+
f'Increasing topic "{topic}" partitions: The following consumer groups will be updated:"{related_consumer_groups}"'
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
consumers_to_update: list[KafkaConsumer] = []
|
|
295
|
+
new_partitions_consumer: Optional[KafkaConsumer] = None
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
for consumer_group in related_consumer_groups:
|
|
299
|
+
self._logger.info(
|
|
300
|
+
f'Increasing topic "{topic}" partitions: Requesting the assignment of the partitions of the group "{consumer_group}"'
|
|
301
|
+
)
|
|
302
|
+
(consumer_with_all_partitions, _) = self.__get_consumer_with_all_partitions_assigned(
|
|
303
|
+
consumer_group=consumer_group,
|
|
304
|
+
topic=topic,
|
|
305
|
+
)
|
|
306
|
+
consumers_to_update.append(consumer_with_all_partitions)
|
|
307
|
+
|
|
308
|
+
self._logger.info(
|
|
309
|
+
f'Increasing topic "{topic}" partitions: Incrementing the partition to "{new_number_of_partitions}"'
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
self._get_kafka_admin().create_partitions(
|
|
313
|
+
{
|
|
314
|
+
topic: NewPartitions(total_count=new_number_of_partitions),
|
|
315
|
+
}
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
new_partitions = [
|
|
319
|
+
TopicPartition(
|
|
320
|
+
topic=topic,
|
|
321
|
+
partition=partition_index,
|
|
322
|
+
)
|
|
323
|
+
for partition_index in range(previous_partitions_number, new_number_of_partitions)
|
|
324
|
+
]
|
|
325
|
+
|
|
326
|
+
for consumer_group in related_consumer_groups:
|
|
327
|
+
self._logger.info(
|
|
328
|
+
f'Increasing topic "{topic}" partitions: Moving the offset of the consumer group "{consumer_group}" to the beginning of the new partitions'
|
|
329
|
+
)
|
|
330
|
+
# We need to create a new consumer because kafka-python has a limitation that does not allow to assign specific partitions to a consumer subscribed to an entire topic
|
|
331
|
+
new_partitions_consumer = KafkaConsumer(
|
|
332
|
+
group_id=consumer_group,
|
|
333
|
+
enable_auto_commit=False,
|
|
334
|
+
auto_offset_reset=KafkaPythonConsumerInitialOffsetPositionTranslator.to_kafka_supported_format(
|
|
335
|
+
ConsumerInitialOffsetPosition.BEGINNING
|
|
336
|
+
),
|
|
337
|
+
**self._config_in_library_format,
|
|
338
|
+
)
|
|
339
|
+
new_partitions_consumer.assign(new_partitions)
|
|
340
|
+
for new_partition in new_partitions:
|
|
341
|
+
new_partitions_consumer.seek(new_partition, 0)
|
|
342
|
+
new_partitions_consumer.commit()
|
|
343
|
+
new_partitions_consumer.close()
|
|
344
|
+
|
|
345
|
+
self._logger.info(f'Increasing topic "{topic}" partitions: Process complete')
|
|
346
|
+
|
|
347
|
+
except Exception as exception:
|
|
348
|
+
for consumer_with_all_partitions in consumers_to_update:
|
|
349
|
+
consumer_with_all_partitions.close()
|
|
350
|
+
|
|
351
|
+
if new_partitions_consumer is not None:
|
|
352
|
+
new_partitions_consumer.close()
|
|
353
|
+
|
|
354
|
+
self._logger.error(f'Increasing topic "{topic}" partitions: unexpected error {exception}')
|
|
355
|
+
raise exception
|
|
356
|
+
|
|
357
|
+
return
|
|
358
|
+
|
|
359
|
+
def get_number_of_partitions(self, topic: str) -> int:
|
|
360
|
+
consumer = KafkaConsumer(
|
|
361
|
+
enable_auto_commit=False,
|
|
362
|
+
auto_offset_reset=KafkaPythonConsumerInitialOffsetPositionTranslator.to_kafka_supported_format(
|
|
363
|
+
ConsumerInitialOffsetPosition.BEGINNING
|
|
364
|
+
),
|
|
365
|
+
**self._config_in_library_format,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
try:
|
|
369
|
+
partitions = consumer.partitions_for_topic(topic)
|
|
370
|
+
if partitions is None:
|
|
371
|
+
raise TopicNotFoundException(topic_name=topic)
|
|
372
|
+
|
|
373
|
+
return len(partitions)
|
|
374
|
+
except Exception as exception:
|
|
375
|
+
consumer.close()
|
|
376
|
+
raise exception
|
|
377
|
+
|
|
378
|
+
# The purpose of this function is to get all the consumer groups that are consuming from the topic
|
|
379
|
+
# It is a heavy tasks because we need to get the offset of all the partitions of the topic
|
|
380
|
+
def __get_consumer_groups_related_to_a_topic(self, topic_partitions: Sequence[TopicPartition]) -> set[str]:
|
|
381
|
+
cluster_consumer_groups = self.get_cluster_consumer_groups()
|
|
382
|
+
|
|
383
|
+
related_consumer_groups: set[str] = set()
|
|
384
|
+
|
|
385
|
+
for consumer_group in cluster_consumer_groups:
|
|
386
|
+
partitions_offsets = list(
|
|
387
|
+
self._get_kafka_admin()
|
|
388
|
+
.list_consumer_group_offsets(consumer_group, partitions=topic_partitions)
|
|
389
|
+
.values()
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
partitions_with_valid_offsets = [partition for partition in partitions_offsets if partition.offset != -1]
|
|
393
|
+
|
|
394
|
+
if len(partitions_with_valid_offsets) == 0:
|
|
395
|
+
continue
|
|
396
|
+
|
|
397
|
+
related_consumer_groups.add(consumer_group)
|
|
398
|
+
|
|
399
|
+
return related_consumer_groups
|
|
@@ -27,10 +27,10 @@ class KafkaPythonAdminTestClient(KafkaPythonAdminClient, KafkaAdminTestClient):
|
|
|
27
27
|
self,
|
|
28
28
|
*,
|
|
29
29
|
logger: Logger,
|
|
30
|
-
|
|
30
|
+
connection_config: KafkaConnectionConfig,
|
|
31
31
|
):
|
|
32
32
|
super().__init__(
|
|
33
|
-
|
|
33
|
+
connection_config=connection_config,
|
|
34
34
|
logger=logger,
|
|
35
35
|
)
|
|
36
36
|
|
|
@@ -88,5 +88,5 @@ class KafkaPythonAdminTestClient(KafkaPythonAdminClient, KafkaAdminTestClient):
|
|
|
88
88
|
self,
|
|
89
89
|
) -> None:
|
|
90
90
|
self.delete_topics(topics=self.get_topics())
|
|
91
|
-
self.delete_subscription_groups(subscription_groups=self.
|
|
91
|
+
self.delete_subscription_groups(subscription_groups=self.get_cluster_consumer_groups())
|
|
92
92
|
self._wait_for_cluster_update()
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Generic, Optional, TypeVar
|
|
3
|
+
from typing import Generic, Optional, TypeVar, cast
|
|
4
4
|
|
|
5
5
|
from kafka import KafkaProducer as KafkaPythonLibraryProducer
|
|
6
|
+
from kafka.producer.future import FutureRecordMetadata
|
|
6
7
|
|
|
7
8
|
from buz.kafka.domain.models.kafka_connection_config import KafkaConnectionConfig
|
|
9
|
+
from buz.kafka.domain.models.kafka_supported_compression_type import KafkaSupportedCompressionType
|
|
8
10
|
from buz.kafka.domain.services.kafka_producer import KafkaProducer
|
|
9
11
|
from buz.kafka.infrastructure.serializers.byte_serializer import ByteSerializer
|
|
10
12
|
from buz.kafka.infrastructure.serializers.kafka_header_serializer import KafkaHeaderSerializer
|
|
@@ -13,33 +15,55 @@ T = TypeVar("T")
|
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class KafkaPythonProducer(KafkaProducer, Generic[T]):
|
|
18
|
+
__kafka_producer: Optional[KafkaPythonLibraryProducer] = None
|
|
19
|
+
__SEND_TIMEOUT_SECONDS = 5
|
|
20
|
+
|
|
16
21
|
def __init__(
|
|
17
22
|
self,
|
|
18
23
|
*,
|
|
19
|
-
|
|
24
|
+
connection_config: KafkaConnectionConfig,
|
|
20
25
|
byte_serializer: ByteSerializer[T],
|
|
21
26
|
retries: int = 0,
|
|
22
27
|
retry_backoff_ms: int = 100,
|
|
28
|
+
compression_type: Optional[KafkaSupportedCompressionType] = None,
|
|
23
29
|
) -> None:
|
|
24
|
-
self.
|
|
30
|
+
self.__connection_config = connection_config
|
|
25
31
|
self.__byte_serializer = byte_serializer
|
|
26
32
|
self.__header_serializer = KafkaHeaderSerializer()
|
|
33
|
+
self.__retries = retries
|
|
34
|
+
self.__retry_backoff_ms = retry_backoff_ms
|
|
35
|
+
self.__compression_type = compression_type
|
|
27
36
|
|
|
28
|
-
|
|
37
|
+
def _get_kafka_producer(self) -> KafkaPythonLibraryProducer:
|
|
38
|
+
if self.__kafka_producer is None:
|
|
39
|
+
sasl_mechanism = (
|
|
40
|
+
self.__connection_config.credentials.sasl_mechanism.value
|
|
41
|
+
if self.__connection_config.credentials.sasl_mechanism
|
|
42
|
+
else None
|
|
43
|
+
)
|
|
44
|
+
compression_type = self.__compression_type.value if self.__compression_type else None
|
|
29
45
|
|
|
30
|
-
|
|
31
|
-
|
|
46
|
+
self.__kafka_producer = KafkaPythonLibraryProducer(
|
|
47
|
+
client_id=self.__connection_config.client_id,
|
|
48
|
+
bootstrap_servers=self.__connection_config.bootstrap_servers,
|
|
49
|
+
security_protocol=self.__connection_config.credentials.security_protocol.value,
|
|
50
|
+
sasl_mechanism=sasl_mechanism,
|
|
51
|
+
sasl_plain_username=self.__connection_config.credentials.user,
|
|
52
|
+
sasl_plain_password=self.__connection_config.credentials.password,
|
|
53
|
+
retries=self.__retries,
|
|
54
|
+
retry_backoff_ms=self.__retry_backoff_ms,
|
|
55
|
+
compression_type=compression_type,
|
|
56
|
+
)
|
|
32
57
|
|
|
33
|
-
self.__kafka_producer
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
)
|
|
58
|
+
return self.__kafka_producer
|
|
59
|
+
|
|
60
|
+
def connect(self):
|
|
61
|
+
self._get_kafka_producer()
|
|
62
|
+
|
|
63
|
+
def disconnect(self) -> None:
|
|
64
|
+
if self.__kafka_producer is not None:
|
|
65
|
+
self.__kafka_producer.close()
|
|
66
|
+
self.__kafka_producer = None
|
|
43
67
|
|
|
44
68
|
def produce(
|
|
45
69
|
self,
|
|
@@ -50,12 +74,17 @@ class KafkaPythonProducer(KafkaProducer, Generic[T]):
|
|
|
50
74
|
headers: Optional[dict[str, str]] = None,
|
|
51
75
|
) -> None:
|
|
52
76
|
serialized_headers = self.__header_serializer.serialize(headers) if headers is not None else None
|
|
77
|
+
kafka_producer = self._get_kafka_producer()
|
|
53
78
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
79
|
+
message_future = cast(
|
|
80
|
+
FutureRecordMetadata,
|
|
81
|
+
kafka_producer.send(
|
|
82
|
+
topic=topic,
|
|
83
|
+
value=self.__byte_serializer.serialize(message),
|
|
84
|
+
headers=serialized_headers,
|
|
85
|
+
key=partition_key,
|
|
86
|
+
),
|
|
59
87
|
)
|
|
88
|
+
|
|
60
89
|
# We are forcing a flush because the task related with the send is asynchronous, and we want that the event to be sent after call produce
|
|
61
|
-
self.
|
|
90
|
+
message_future.get(self.__SEND_TIMEOUT_SECONDS)
|