lcdp-kafka-utils 1.5.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lcdp_kafka_utils-1.5.8/PKG-INFO +17 -0
- lcdp_kafka_utils-1.5.8/lcdp_kafka_utils/__init__.py +0 -0
- lcdp_kafka_utils-1.5.8/lcdp_kafka_utils/batch_deserializing_consumer.py +62 -0
- lcdp_kafka_utils-1.5.8/lcdp_kafka_utils/cdc_dto_transformer.py +160 -0
- lcdp_kafka_utils-1.5.8/lcdp_kafka_utils/config.py +89 -0
- lcdp_kafka_utils-1.5.8/lcdp_kafka_utils/dto_action_execute.py +98 -0
- lcdp_kafka_utils-1.5.8/pyproject.toml +24 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lcdp-kafka-utils
|
|
3
|
+
Version: 1.5.8
|
|
4
|
+
Summary: Kafka Utils
|
|
5
|
+
Author: Le Comptoir Des Pharmacies
|
|
6
|
+
Author-email: webmaster@lecomptoirdespharmacies.fr
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Requires-Dist: confluent_kafka (==2.5.0)
|
|
17
|
+
Requires-Dist: fastavro (==1.9.4)
|
|
File without changes
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from confluent_kafka.cimpl import Consumer as _ConsumerImpl
|
|
2
|
+
from confluent_kafka.error import (ConsumeError,
|
|
3
|
+
KeyDeserializationError,
|
|
4
|
+
ValueDeserializationError)
|
|
5
|
+
from confluent_kafka.serialization import (SerializationContext,
|
|
6
|
+
MessageField)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BatchDeserializingConsumer(_ConsumerImpl):
|
|
10
|
+
|
|
11
|
+
def __init__(self, conf):
|
|
12
|
+
conf_copy = conf.copy()
|
|
13
|
+
self._key_deserializer = conf_copy.pop('key.deserializer', None)
|
|
14
|
+
self._value_deserializer = conf_copy.pop('value.deserializer', None)
|
|
15
|
+
super(BatchDeserializingConsumer, self).__init__(conf_copy)
|
|
16
|
+
|
|
17
|
+
def consume(self, num_messages=1, timeout=-1):
|
|
18
|
+
|
|
19
|
+
# consume num_messages messages
|
|
20
|
+
records = super(BatchDeserializingConsumer, self).consume(num_messages, timeout)
|
|
21
|
+
# contain all deserialized records
|
|
22
|
+
deserialized_records = []
|
|
23
|
+
|
|
24
|
+
for record in records:
|
|
25
|
+
try:
|
|
26
|
+
if record is None:
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
# deserialize the record
|
|
30
|
+
deserialized_records.append(self.deserialize_record(record))
|
|
31
|
+
|
|
32
|
+
except Exception as e:
|
|
33
|
+
print("Error when deserializing record: {}".format(record), e)
|
|
34
|
+
continue
|
|
35
|
+
|
|
36
|
+
return deserialized_records
|
|
37
|
+
|
|
38
|
+
def deserialize_record(self, record):
|
|
39
|
+
|
|
40
|
+
if record.error() is not None:
|
|
41
|
+
raise ConsumeError(record.error(), kafka_message=record)
|
|
42
|
+
|
|
43
|
+
ctx = SerializationContext(record.topic(), MessageField.VALUE, record.headers())
|
|
44
|
+
value = record.value()
|
|
45
|
+
if self._value_deserializer is not None:
|
|
46
|
+
try:
|
|
47
|
+
value = self._value_deserializer(value, ctx)
|
|
48
|
+
except Exception as se:
|
|
49
|
+
raise ValueDeserializationError(exception=se, kafka_message=record)
|
|
50
|
+
|
|
51
|
+
key = record.key()
|
|
52
|
+
ctx.field = MessageField.KEY
|
|
53
|
+
if self._key_deserializer is not None:
|
|
54
|
+
try:
|
|
55
|
+
key = self._key_deserializer(key, ctx)
|
|
56
|
+
except Exception as se:
|
|
57
|
+
raise KeyDeserializationError(exception=se, kafka_message=record)
|
|
58
|
+
|
|
59
|
+
record.set_key(key)
|
|
60
|
+
record.set_value(value)
|
|
61
|
+
|
|
62
|
+
return record
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
from confluent_kafka.serializing_producer import SerializingProducer
|
|
6
|
+
from .config import check_configuration, build_consumer_config, build_producer_config
|
|
7
|
+
from .batch_deserializing_consumer import BatchDeserializingConsumer
|
|
8
|
+
|
|
9
|
+
from confluent_kafka import KafkaException
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
CONSUMER_POLL_TIMEOUT = 10.0
|
|
14
|
+
DURATION_BEFORE_RESTART = 5.0
|
|
15
|
+
N_BATCH_RECORDS = 1000
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def delivery_report(err, msg):
|
|
19
|
+
"""
|
|
20
|
+
Reports the failure or success of a message delivery.
|
|
21
|
+
Args:
|
|
22
|
+
err (KafkaError): The error that occurred on None on success.
|
|
23
|
+
msg (Message): The message that was produced or failed.
|
|
24
|
+
Note:
|
|
25
|
+
In the delivery report callback the Message.key() and Message.value()
|
|
26
|
+
will be the binary format as encoded by any configured Serializers and
|
|
27
|
+
not the same object that was passed to produce().
|
|
28
|
+
If you wish to pass the original object(s) for key and value to delivery
|
|
29
|
+
report callback we recommend a bound callback or lambda where you pass
|
|
30
|
+
the objects along.
|
|
31
|
+
"""
|
|
32
|
+
if err is not None:
|
|
33
|
+
log.error("Delivery failed for record {}: {}".format(msg.key(), err))
|
|
34
|
+
return
|
|
35
|
+
log.debug('Record {} successfully produced to {} [{}] at offset {}'.format(
|
|
36
|
+
msg.key(), msg.topic(), msg.partition(), msg.offset()))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CdcDtoTransformer(object):
|
|
40
|
+
|
|
41
|
+
def __init__(self, topic_in, topic_out):
|
|
42
|
+
self.topic_in = topic_in
|
|
43
|
+
self.topic_out = topic_out
|
|
44
|
+
self.running = False
|
|
45
|
+
|
|
46
|
+
def run(self):
|
|
47
|
+
if not check_configuration():
|
|
48
|
+
raise Exception(f"Missing environment variable to run Kafka")
|
|
49
|
+
|
|
50
|
+
if not self.topic_in or not self.topic_out:
|
|
51
|
+
raise Exception(f"Empty topic in/out for CDC to DTO transformer")
|
|
52
|
+
|
|
53
|
+
# RUNNING CONSUMER FOR READING MESSAGE FROM THE KAFKA TOPIC
|
|
54
|
+
t = threading.Thread(target=self.start_listener, name="Start kafka listener")
|
|
55
|
+
t.daemon = True
|
|
56
|
+
t.start()
|
|
57
|
+
|
|
58
|
+
def start_listener(self):
|
|
59
|
+
while True:
|
|
60
|
+
try:
|
|
61
|
+
self.__process_events()
|
|
62
|
+
except Exception as ex:
|
|
63
|
+
log.error("Exception occured in Kafka Events Processing Thread: {}", ex)
|
|
64
|
+
# pause a bit before restart
|
|
65
|
+
log.info("Going to restart in : {} seconds".format(DURATION_BEFORE_RESTART))
|
|
66
|
+
time.sleep(DURATION_BEFORE_RESTART)
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
def __process_events(self):
|
|
70
|
+
try:
|
|
71
|
+
self.running = True
|
|
72
|
+
consumer = self.create_consumer()
|
|
73
|
+
producer = self.create_producer()
|
|
74
|
+
|
|
75
|
+
while self.running:
|
|
76
|
+
try:
|
|
77
|
+
# read N_MESSAGE_BATCH messages at the same time
|
|
78
|
+
records = consumer.consume(N_BATCH_RECORDS, CONSUMER_POLL_TIMEOUT)
|
|
79
|
+
|
|
80
|
+
except KafkaException as ex:
|
|
81
|
+
# Ignore the exception like TOPIC_NOT_FOUND, PARTITTION_NOT_FOUND
|
|
82
|
+
log.error("CdcDtoTransformer exception", ex)
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
for record in records:
|
|
87
|
+
|
|
88
|
+
if record is None:
|
|
89
|
+
# There is no new record in the topic, do nothing
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
if record.error():
|
|
93
|
+
print("Error reading message : {}".format(record.error()))
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
if record.key() is None:
|
|
97
|
+
log.error("MUST FIX - Strange null key processed with value : {}".format(record.value()))
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
if record.value() is None:
|
|
101
|
+
# Debezium by default generates a tombstone record to enable Kafka compaction after a delete record was generated.
|
|
102
|
+
# This record is usually filtered out to avoid duplicates as a delete record is converted to a tombstone record, too
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
# 'after' can be null if ressource deleted or in case of update with capture.mode <> change_streams_update_full (default since 1.8.0.Alpha1)
|
|
106
|
+
# In this situation, just publish an event in the topic with null value
|
|
107
|
+
cdc_value = record.value().get('after', None)
|
|
108
|
+
cdc_key = record.key()
|
|
109
|
+
|
|
110
|
+
dto_value = self.build_value(cdc_value)
|
|
111
|
+
dto_key = self.build_key(cdc_key)
|
|
112
|
+
|
|
113
|
+
producer.produce(topic=self.topic_out,
|
|
114
|
+
key=dto_key.dict() if dto_key else None,
|
|
115
|
+
value=dto_value.dict() if dto_value else None,
|
|
116
|
+
on_delivery=delivery_report)
|
|
117
|
+
|
|
118
|
+
# Flush all message and commit after produce batch records
|
|
119
|
+
producer.flush()
|
|
120
|
+
consumer.commit(asynchronous=True)
|
|
121
|
+
finally:
|
|
122
|
+
if consumer:
|
|
123
|
+
log.info("closing consumer")
|
|
124
|
+
consumer.close()
|
|
125
|
+
|
|
126
|
+
if producer:
|
|
127
|
+
log.info("Flushing records...")
|
|
128
|
+
producer.flush()
|
|
129
|
+
|
|
130
|
+
def build_key(self, msg):
|
|
131
|
+
raise NotImplementedError("Please Implement this method")
|
|
132
|
+
|
|
133
|
+
def build_value(self, msg):
|
|
134
|
+
raise NotImplementedError("Please Implement this method")
|
|
135
|
+
|
|
136
|
+
def get_key_schema(self):
|
|
137
|
+
raise NotImplementedError("Please Implement this method")
|
|
138
|
+
|
|
139
|
+
def get_value_schema(self):
|
|
140
|
+
raise NotImplementedError("Please Implement this method")
|
|
141
|
+
|
|
142
|
+
def get_name(self):
|
|
143
|
+
raise NotImplementedError("Please Implement this method")
|
|
144
|
+
|
|
145
|
+
def get_version(self):
|
|
146
|
+
raise NotImplementedError("Please Implement this method")
|
|
147
|
+
|
|
148
|
+
def create_producer(self):
|
|
149
|
+
config = build_producer_config(self.get_key_schema(), self.get_value_schema())
|
|
150
|
+
batch_producer = SerializingProducer(config)
|
|
151
|
+
return batch_producer
|
|
152
|
+
|
|
153
|
+
def create_consumer(self):
|
|
154
|
+
config = build_consumer_config(self.get_name(), self.get_version())
|
|
155
|
+
batch_consumer = BatchDeserializingConsumer(config)
|
|
156
|
+
batch_consumer.subscribe([self.topic_in])
|
|
157
|
+
return batch_consumer
|
|
158
|
+
|
|
159
|
+
def shutdown(self):
|
|
160
|
+
self.running = False
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from confluent_kafka.schema_registry import SchemaRegistryClient
|
|
3
|
+
from confluent_kafka.schema_registry.avro import AvroDeserializer, AvroSerializer
|
|
4
|
+
|
|
5
|
+
SERVICE_NAME = os.getenv('SERVICE_NAME')
|
|
6
|
+
HERMES_BROKER = os.getenv('HERMES_BROKER')
|
|
7
|
+
HERMES_USERNAME = os.getenv('HERMES_USERNAME')
|
|
8
|
+
HERMES_PASSWORD = os.getenv('HERMES_PASSWORD')
|
|
9
|
+
HERMES_SCHEMA_REGISTRY_URL = os.getenv('HERMES_SCHEMA_REGISTRY_URL')
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def check_configuration():
|
|
13
|
+
return HERMES_BROKER and HERMES_SCHEMA_REGISTRY_URL
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def build_schema_registry_config():
|
|
17
|
+
config = {
|
|
18
|
+
'url': HERMES_SCHEMA_REGISTRY_URL,
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if HERMES_USERNAME:
|
|
22
|
+
config = {
|
|
23
|
+
**config,
|
|
24
|
+
**{
|
|
25
|
+
'basic.auth.user.info': "{}:{}".format(HERMES_USERNAME, HERMES_PASSWORD)
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return config
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def build_base_config():
|
|
33
|
+
config = {
|
|
34
|
+
'bootstrap.servers': HERMES_BROKER,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if HERMES_USERNAME:
|
|
38
|
+
config = {
|
|
39
|
+
**config,
|
|
40
|
+
**{
|
|
41
|
+
'security.protocol': 'SASL_SSL',
|
|
42
|
+
'sasl.mechanism': 'SCRAM-SHA-512',
|
|
43
|
+
'sasl.username': HERMES_USERNAME,
|
|
44
|
+
'sasl.password': HERMES_PASSWORD,
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return config
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def build_consumer_config(name, version):
|
|
52
|
+
schema_registry_client = SchemaRegistryClient(
|
|
53
|
+
build_schema_registry_config()
|
|
54
|
+
)
|
|
55
|
+
avro_deserializer = AvroDeserializer(schema_registry_client)
|
|
56
|
+
|
|
57
|
+
# Compute GROUP_ID
|
|
58
|
+
application_name = HERMES_USERNAME if HERMES_USERNAME else SERVICE_NAME
|
|
59
|
+
group_id = "{}.{}.{}".format(application_name, name, version)
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
**build_base_config(),
|
|
63
|
+
**{
|
|
64
|
+
'group.id': group_id,
|
|
65
|
+
'auto.offset.reset': 'earliest',
|
|
66
|
+
'enable.auto.commit': 'false',
|
|
67
|
+
'key.deserializer': avro_deserializer,
|
|
68
|
+
'value.deserializer': avro_deserializer
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def build_producer_config(key_schema_str, value_schema_str):
|
|
74
|
+
schema_registry_client = SchemaRegistryClient(
|
|
75
|
+
build_schema_registry_config()
|
|
76
|
+
)
|
|
77
|
+
avro_serializer_configuration = {"auto.register.schemas": True}
|
|
78
|
+
key_serializer = AvroSerializer(schema_str=key_schema_str, schema_registry_client=schema_registry_client,
|
|
79
|
+
conf=avro_serializer_configuration)
|
|
80
|
+
value_serializer = AvroSerializer(schema_str=value_schema_str, schema_registry_client=schema_registry_client,
|
|
81
|
+
conf=avro_serializer_configuration)
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
**build_base_config(),
|
|
85
|
+
**{
|
|
86
|
+
'key.serializer': key_serializer,
|
|
87
|
+
'value.serializer': value_serializer
|
|
88
|
+
}
|
|
89
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import threading
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
from confluent_kafka import KafkaException
|
|
6
|
+
from .config import check_configuration, build_consumer_config
|
|
7
|
+
from .batch_deserializing_consumer import BatchDeserializingConsumer
|
|
8
|
+
|
|
9
|
+
log = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
CONSUMER_POLL_TIMEOUT = 10.0
|
|
12
|
+
DURATION_BEFORE_RESTART = 5.0
|
|
13
|
+
N_BATCH_RECORDS = 1000
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DtoActionExecute(object):
|
|
17
|
+
|
|
18
|
+
def __init__(self, topic_in):
|
|
19
|
+
self.topic_in = topic_in
|
|
20
|
+
self.running = False
|
|
21
|
+
|
|
22
|
+
def run(self):
|
|
23
|
+
if not check_configuration():
|
|
24
|
+
raise Exception(f"Missing environment variable to run Kafka")
|
|
25
|
+
|
|
26
|
+
if not self.topic_in:
|
|
27
|
+
raise Exception(f"Empty topic in for DTO")
|
|
28
|
+
|
|
29
|
+
# RUNNING CONSUMER FOR READING MESSAGE FROM THE KAFKA TOPIC
|
|
30
|
+
t = threading.Thread(target=self.start_listener, name="Start kafka listener")
|
|
31
|
+
t.daemon = True
|
|
32
|
+
t.start()
|
|
33
|
+
|
|
34
|
+
def start_listener(self):
|
|
35
|
+
while True:
|
|
36
|
+
try:
|
|
37
|
+
self.__process_events()
|
|
38
|
+
except Exception as ex:
|
|
39
|
+
log.error("Exception occured in Kafka Events Processing Thread: {}", ex)
|
|
40
|
+
# pause a bit before restart
|
|
41
|
+
log.info("Going to restart in : {} seconds".format(DURATION_BEFORE_RESTART))
|
|
42
|
+
time.sleep(DURATION_BEFORE_RESTART)
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
def __process_events(self):
|
|
46
|
+
try:
|
|
47
|
+
self.running = True
|
|
48
|
+
consumer = self.create_consumer()
|
|
49
|
+
|
|
50
|
+
while self.running:
|
|
51
|
+
try:
|
|
52
|
+
# read N_MESSAGE_BATCH messages at the same time
|
|
53
|
+
records = consumer.consume(N_BATCH_RECORDS, CONSUMER_POLL_TIMEOUT)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
except KafkaException as ex:
|
|
57
|
+
# Ignore the exception like TOPIC_NOT_FOUND, PARTITTION_NOT_FOUND
|
|
58
|
+
log.error("DtoActionExecute exception", ex)
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
if records:
|
|
62
|
+
self.action_events(records)
|
|
63
|
+
|
|
64
|
+
consumer.commit(asynchronous=True)
|
|
65
|
+
|
|
66
|
+
finally:
|
|
67
|
+
if consumer:
|
|
68
|
+
log.info("closing consumer")
|
|
69
|
+
consumer.close()
|
|
70
|
+
|
|
71
|
+
def action_events(self, records):
|
|
72
|
+
raise NotImplementedError("Please Implement this method")
|
|
73
|
+
|
|
74
|
+
def get_name(self):
|
|
75
|
+
raise NotImplementedError("Please Implement this method")
|
|
76
|
+
|
|
77
|
+
def get_version(self):
|
|
78
|
+
raise NotImplementedError("Please Implement this method")
|
|
79
|
+
|
|
80
|
+
def create_consumer(self):
|
|
81
|
+
config = build_consumer_config(self.get_name(), self.get_version())
|
|
82
|
+
batch_consumer = BatchDeserializingConsumer(config)
|
|
83
|
+
batch_consumer.subscribe([self.topic_in])
|
|
84
|
+
return batch_consumer
|
|
85
|
+
|
|
86
|
+
def shutdown(self):
|
|
87
|
+
self.running = False
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def dedupe(records):
|
|
91
|
+
unique_keys = []
|
|
92
|
+
deduped_records = []
|
|
93
|
+
for record in list(reversed(records)):
|
|
94
|
+
if record.key() not in unique_keys:
|
|
95
|
+
deduped_records.append(record)
|
|
96
|
+
unique_keys.append(record.key())
|
|
97
|
+
|
|
98
|
+
return list(reversed(deduped_records))
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "lcdp-kafka-utils"
|
|
3
|
+
# https://github.com/python-poetry/poetry/issues/1208
|
|
4
|
+
version = "1.5.8"
|
|
5
|
+
description = "Kafka Utils"
|
|
6
|
+
authors = ["Le Comptoir Des Pharmacies <webmaster@lecomptoirdespharmacies.fr>"]
|
|
7
|
+
|
|
8
|
+
[tool.poetry-dynamic-versioning]
|
|
9
|
+
enable = false
|
|
10
|
+
vcs = "git"
|
|
11
|
+
|
|
12
|
+
[tool.poetry.requires-plugins]
|
|
13
|
+
poetry-dynamic-versioning = { version = ">=1.0.0,<2.0.0", extras = ["plugin"] }
|
|
14
|
+
|
|
15
|
+
[tool.poetry.dependencies]
|
|
16
|
+
python = ">=3.8"
|
|
17
|
+
confluent_kafka = "2.5.0"
|
|
18
|
+
fastavro = "1.9.4"
|
|
19
|
+
|
|
20
|
+
[tool.poetry.dev-dependencies]
|
|
21
|
+
|
|
22
|
+
[build-system]
|
|
23
|
+
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"]
|
|
24
|
+
build-backend = "poetry_dynamic_versioning.backend"
|