lcdp-kafka-utils 1.5.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: lcdp-kafka-utils
3
+ Version: 1.5.8
4
+ Summary: Kafka Utils
5
+ Author: Le Comptoir Des Pharmacies
6
+ Author-email: webmaster@lecomptoirdespharmacies.fr
7
+ Requires-Python: >=3.8
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.8
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Requires-Dist: confluent_kafka (==2.5.0)
17
+ Requires-Dist: fastavro (==1.9.4)
File without changes
@@ -0,0 +1,62 @@
1
+ from confluent_kafka.cimpl import Consumer as _ConsumerImpl
2
+ from confluent_kafka.error import (ConsumeError,
3
+ KeyDeserializationError,
4
+ ValueDeserializationError)
5
+ from confluent_kafka.serialization import (SerializationContext,
6
+ MessageField)
7
+
8
+
9
+ class BatchDeserializingConsumer(_ConsumerImpl):
10
+
11
+ def __init__(self, conf):
12
+ conf_copy = conf.copy()
13
+ self._key_deserializer = conf_copy.pop('key.deserializer', None)
14
+ self._value_deserializer = conf_copy.pop('value.deserializer', None)
15
+ super(BatchDeserializingConsumer, self).__init__(conf_copy)
16
+
17
+ def consume(self, num_messages=1, timeout=-1):
18
+
19
+ # consume num_messages messages
20
+ records = super(BatchDeserializingConsumer, self).consume(num_messages, timeout)
21
+ # contain all deserialized records
22
+ deserialized_records = []
23
+
24
+ for record in records:
25
+ try:
26
+ if record is None:
27
+ continue
28
+
29
+ # deserialize the record
30
+ deserialized_records.append(self.deserialize_record(record))
31
+
32
+ except Exception as e:
33
+ print("Error when deserializing record: {}".format(record), e)
34
+ continue
35
+
36
+ return deserialized_records
37
+
38
+ def deserialize_record(self, record):
39
+
40
+ if record.error() is not None:
41
+ raise ConsumeError(record.error(), kafka_message=record)
42
+
43
+ ctx = SerializationContext(record.topic(), MessageField.VALUE, record.headers())
44
+ value = record.value()
45
+ if self._value_deserializer is not None:
46
+ try:
47
+ value = self._value_deserializer(value, ctx)
48
+ except Exception as se:
49
+ raise ValueDeserializationError(exception=se, kafka_message=record)
50
+
51
+ key = record.key()
52
+ ctx.field = MessageField.KEY
53
+ if self._key_deserializer is not None:
54
+ try:
55
+ key = self._key_deserializer(key, ctx)
56
+ except Exception as se:
57
+ raise KeyDeserializationError(exception=se, kafka_message=record)
58
+
59
+ record.set_key(key)
60
+ record.set_value(value)
61
+
62
+ return record
@@ -0,0 +1,160 @@
1
+ import logging
2
+ import threading
3
+ import time
4
+
5
+ from confluent_kafka.serializing_producer import SerializingProducer
6
+ from .config import check_configuration, build_consumer_config, build_producer_config
7
+ from .batch_deserializing_consumer import BatchDeserializingConsumer
8
+
9
+ from confluent_kafka import KafkaException
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+ CONSUMER_POLL_TIMEOUT = 10.0
14
+ DURATION_BEFORE_RESTART = 5.0
15
+ N_BATCH_RECORDS = 1000
16
+
17
+
18
+ def delivery_report(err, msg):
19
+ """
20
+ Reports the failure or success of a message delivery.
21
+ Args:
22
+ err (KafkaError): The error that occurred on None on success.
23
+ msg (Message): The message that was produced or failed.
24
+ Note:
25
+ In the delivery report callback the Message.key() and Message.value()
26
+ will be the binary format as encoded by any configured Serializers and
27
+ not the same object that was passed to produce().
28
+ If you wish to pass the original object(s) for key and value to delivery
29
+ report callback we recommend a bound callback or lambda where you pass
30
+ the objects along.
31
+ """
32
+ if err is not None:
33
+ log.error("Delivery failed for record {}: {}".format(msg.key(), err))
34
+ return
35
+ log.debug('Record {} successfully produced to {} [{}] at offset {}'.format(
36
+ msg.key(), msg.topic(), msg.partition(), msg.offset()))
37
+
38
+
39
+ class CdcDtoTransformer(object):
40
+
41
+ def __init__(self, topic_in, topic_out):
42
+ self.topic_in = topic_in
43
+ self.topic_out = topic_out
44
+ self.running = False
45
+
46
+ def run(self):
47
+ if not check_configuration():
48
+ raise Exception(f"Missing environment variable to run Kafka")
49
+
50
+ if not self.topic_in or not self.topic_out:
51
+ raise Exception(f"Empty topic in/out for CDC to DTO transformer")
52
+
53
+ # RUNNING CONSUMER FOR READING MESSAGE FROM THE KAFKA TOPIC
54
+ t = threading.Thread(target=self.start_listener, name="Start kafka listener")
55
+ t.daemon = True
56
+ t.start()
57
+
58
+ def start_listener(self):
59
+ while True:
60
+ try:
61
+ self.__process_events()
62
+ except Exception as ex:
63
+ log.error("Exception occured in Kafka Events Processing Thread: {}", ex)
64
+ # pause a bit before restart
65
+ log.info("Going to restart in : {} seconds".format(DURATION_BEFORE_RESTART))
66
+ time.sleep(DURATION_BEFORE_RESTART)
67
+ pass
68
+
69
+ def __process_events(self):
70
+ try:
71
+ self.running = True
72
+ consumer = self.create_consumer()
73
+ producer = self.create_producer()
74
+
75
+ while self.running:
76
+ try:
77
+ # read N_MESSAGE_BATCH messages at the same time
78
+ records = consumer.consume(N_BATCH_RECORDS, CONSUMER_POLL_TIMEOUT)
79
+
80
+ except KafkaException as ex:
81
+ # Ignore the exception like TOPIC_NOT_FOUND, PARTITTION_NOT_FOUND
82
+ log.error("CdcDtoTransformer exception", ex)
83
+ continue
84
+
85
+
86
+ for record in records:
87
+
88
+ if record is None:
89
+ # There is no new record in the topic, do nothing
90
+ continue
91
+
92
+ if record.error():
93
+ print("Error reading message : {}".format(record.error()))
94
+ continue
95
+
96
+ if record.key() is None:
97
+ log.error("MUST FIX - Strange null key processed with value : {}".format(record.value()))
98
+ continue
99
+
100
+ if record.value() is None:
101
+ # Debezium by default generates a tombstone record to enable Kafka compaction after a delete record was generated.
102
+ # This record is usually filtered out to avoid duplicates as a delete record is converted to a tombstone record, too
103
+ continue
104
+
105
+ # 'after' can be null if ressource deleted or in case of update with capture.mode <> change_streams_update_full (default since 1.8.0.Alpha1)
106
+ # In this situation, just publish an event in the topic with null value
107
+ cdc_value = record.value().get('after', None)
108
+ cdc_key = record.key()
109
+
110
+ dto_value = self.build_value(cdc_value)
111
+ dto_key = self.build_key(cdc_key)
112
+
113
+ producer.produce(topic=self.topic_out,
114
+ key=dto_key.dict() if dto_key else None,
115
+ value=dto_value.dict() if dto_value else None,
116
+ on_delivery=delivery_report)
117
+
118
+ # Flush all message and commit after produce batch records
119
+ producer.flush()
120
+ consumer.commit(asynchronous=True)
121
+ finally:
122
+ if consumer:
123
+ log.info("closing consumer")
124
+ consumer.close()
125
+
126
+ if producer:
127
+ log.info("Flushing records...")
128
+ producer.flush()
129
+
130
+ def build_key(self, msg):
131
+ raise NotImplementedError("Please Implement this method")
132
+
133
+ def build_value(self, msg):
134
+ raise NotImplementedError("Please Implement this method")
135
+
136
+ def get_key_schema(self):
137
+ raise NotImplementedError("Please Implement this method")
138
+
139
+ def get_value_schema(self):
140
+ raise NotImplementedError("Please Implement this method")
141
+
142
+ def get_name(self):
143
+ raise NotImplementedError("Please Implement this method")
144
+
145
+ def get_version(self):
146
+ raise NotImplementedError("Please Implement this method")
147
+
148
+ def create_producer(self):
149
+ config = build_producer_config(self.get_key_schema(), self.get_value_schema())
150
+ batch_producer = SerializingProducer(config)
151
+ return batch_producer
152
+
153
+ def create_consumer(self):
154
+ config = build_consumer_config(self.get_name(), self.get_version())
155
+ batch_consumer = BatchDeserializingConsumer(config)
156
+ batch_consumer.subscribe([self.topic_in])
157
+ return batch_consumer
158
+
159
+ def shutdown(self):
160
+ self.running = False
@@ -0,0 +1,89 @@
1
+ import os
2
+ from confluent_kafka.schema_registry import SchemaRegistryClient
3
+ from confluent_kafka.schema_registry.avro import AvroDeserializer, AvroSerializer
4
+
5
+ SERVICE_NAME = os.getenv('SERVICE_NAME')
6
+ HERMES_BROKER = os.getenv('HERMES_BROKER')
7
+ HERMES_USERNAME = os.getenv('HERMES_USERNAME')
8
+ HERMES_PASSWORD = os.getenv('HERMES_PASSWORD')
9
+ HERMES_SCHEMA_REGISTRY_URL = os.getenv('HERMES_SCHEMA_REGISTRY_URL')
10
+
11
+
12
+ def check_configuration():
13
+ return HERMES_BROKER and HERMES_SCHEMA_REGISTRY_URL
14
+
15
+
16
+ def build_schema_registry_config():
17
+ config = {
18
+ 'url': HERMES_SCHEMA_REGISTRY_URL,
19
+ }
20
+
21
+ if HERMES_USERNAME:
22
+ config = {
23
+ **config,
24
+ **{
25
+ 'basic.auth.user.info': "{}:{}".format(HERMES_USERNAME, HERMES_PASSWORD)
26
+ }
27
+ }
28
+
29
+ return config
30
+
31
+
32
+ def build_base_config():
33
+ config = {
34
+ 'bootstrap.servers': HERMES_BROKER,
35
+ }
36
+
37
+ if HERMES_USERNAME:
38
+ config = {
39
+ **config,
40
+ **{
41
+ 'security.protocol': 'SASL_SSL',
42
+ 'sasl.mechanism': 'SCRAM-SHA-512',
43
+ 'sasl.username': HERMES_USERNAME,
44
+ 'sasl.password': HERMES_PASSWORD,
45
+ }
46
+ }
47
+
48
+ return config
49
+
50
+
51
+ def build_consumer_config(name, version):
52
+ schema_registry_client = SchemaRegistryClient(
53
+ build_schema_registry_config()
54
+ )
55
+ avro_deserializer = AvroDeserializer(schema_registry_client)
56
+
57
+ # Compute GROUP_ID
58
+ application_name = HERMES_USERNAME if HERMES_USERNAME else SERVICE_NAME
59
+ group_id = "{}.{}.{}".format(application_name, name, version)
60
+
61
+ return {
62
+ **build_base_config(),
63
+ **{
64
+ 'group.id': group_id,
65
+ 'auto.offset.reset': 'earliest',
66
+ 'enable.auto.commit': 'false',
67
+ 'key.deserializer': avro_deserializer,
68
+ 'value.deserializer': avro_deserializer
69
+ }
70
+ }
71
+
72
+
73
+ def build_producer_config(key_schema_str, value_schema_str):
74
+ schema_registry_client = SchemaRegistryClient(
75
+ build_schema_registry_config()
76
+ )
77
+ avro_serializer_configuration = {"auto.register.schemas": True}
78
+ key_serializer = AvroSerializer(schema_str=key_schema_str, schema_registry_client=schema_registry_client,
79
+ conf=avro_serializer_configuration)
80
+ value_serializer = AvroSerializer(schema_str=value_schema_str, schema_registry_client=schema_registry_client,
81
+ conf=avro_serializer_configuration)
82
+
83
+ return {
84
+ **build_base_config(),
85
+ **{
86
+ 'key.serializer': key_serializer,
87
+ 'value.serializer': value_serializer
88
+ }
89
+ }
@@ -0,0 +1,98 @@
1
+ import logging
2
+ import threading
3
+ import time
4
+
5
+ from confluent_kafka import KafkaException
6
+ from .config import check_configuration, build_consumer_config
7
+ from .batch_deserializing_consumer import BatchDeserializingConsumer
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+ CONSUMER_POLL_TIMEOUT = 10.0
12
+ DURATION_BEFORE_RESTART = 5.0
13
+ N_BATCH_RECORDS = 1000
14
+
15
+
16
+ class DtoActionExecute(object):
17
+
18
+ def __init__(self, topic_in):
19
+ self.topic_in = topic_in
20
+ self.running = False
21
+
22
+ def run(self):
23
+ if not check_configuration():
24
+ raise Exception(f"Missing environment variable to run Kafka")
25
+
26
+ if not self.topic_in:
27
+ raise Exception(f"Empty topic in for DTO")
28
+
29
+ # RUNNING CONSUMER FOR READING MESSAGE FROM THE KAFKA TOPIC
30
+ t = threading.Thread(target=self.start_listener, name="Start kafka listener")
31
+ t.daemon = True
32
+ t.start()
33
+
34
+ def start_listener(self):
35
+ while True:
36
+ try:
37
+ self.__process_events()
38
+ except Exception as ex:
39
+ log.error("Exception occured in Kafka Events Processing Thread: {}", ex)
40
+ # pause a bit before restart
41
+ log.info("Going to restart in : {} seconds".format(DURATION_BEFORE_RESTART))
42
+ time.sleep(DURATION_BEFORE_RESTART)
43
+ pass
44
+
45
+ def __process_events(self):
46
+ try:
47
+ self.running = True
48
+ consumer = self.create_consumer()
49
+
50
+ while self.running:
51
+ try:
52
+ # read N_MESSAGE_BATCH messages at the same time
53
+ records = consumer.consume(N_BATCH_RECORDS, CONSUMER_POLL_TIMEOUT)
54
+
55
+
56
+ except KafkaException as ex:
57
+ # Ignore the exception like TOPIC_NOT_FOUND, PARTITTION_NOT_FOUND
58
+ log.error("DtoActionExecute exception", ex)
59
+ continue
60
+
61
+ if records:
62
+ self.action_events(records)
63
+
64
+ consumer.commit(asynchronous=True)
65
+
66
+ finally:
67
+ if consumer:
68
+ log.info("closing consumer")
69
+ consumer.close()
70
+
71
+ def action_events(self, records):
72
+ raise NotImplementedError("Please Implement this method")
73
+
74
+ def get_name(self):
75
+ raise NotImplementedError("Please Implement this method")
76
+
77
+ def get_version(self):
78
+ raise NotImplementedError("Please Implement this method")
79
+
80
+ def create_consumer(self):
81
+ config = build_consumer_config(self.get_name(), self.get_version())
82
+ batch_consumer = BatchDeserializingConsumer(config)
83
+ batch_consumer.subscribe([self.topic_in])
84
+ return batch_consumer
85
+
86
+ def shutdown(self):
87
+ self.running = False
88
+
89
+ @staticmethod
90
+ def dedupe(records):
91
+ unique_keys = []
92
+ deduped_records = []
93
+ for record in list(reversed(records)):
94
+ if record.key() not in unique_keys:
95
+ deduped_records.append(record)
96
+ unique_keys.append(record.key())
97
+
98
+ return list(reversed(deduped_records))
@@ -0,0 +1,24 @@
1
+ [tool.poetry]
2
+ name = "lcdp-kafka-utils"
3
+ # https://github.com/python-poetry/poetry/issues/1208
4
+ version = "1.5.8"
5
+ description = "Kafka Utils"
6
+ authors = ["Le Comptoir Des Pharmacies <webmaster@lecomptoirdespharmacies.fr>"]
7
+
8
+ [tool.poetry-dynamic-versioning]
9
+ enable = false
10
+ vcs = "git"
11
+
12
+ [tool.poetry.requires-plugins]
13
+ poetry-dynamic-versioning = { version = ">=1.0.0,<2.0.0", extras = ["plugin"] }
14
+
15
+ [tool.poetry.dependencies]
16
+ python = ">=3.8"
17
+ confluent_kafka = "2.5.0"
18
+ fastavro = "1.9.4"
19
+
20
+ [tool.poetry.dev-dependencies]
21
+
22
+ [build-system]
23
+ requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"]
24
+ build-backend = "poetry_dynamic_versioning.backend"