dgkafka 1.0.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dgkafka/__init__.py +5 -0
- dgkafka/avro_consumer.py +70 -0
- dgkafka/avro_producer.py +114 -0
- dgkafka/consumer.py +103 -0
- dgkafka/errors.py +6 -0
- dgkafka/json_consumer.py +25 -0
- dgkafka/producer.py +147 -0
- dgkafka-1.0.0a0.dist-info/METADATA +217 -0
- dgkafka-1.0.0a0.dist-info/RECORD +12 -0
- dgkafka-1.0.0a0.dist-info/WHEEL +5 -0
- dgkafka-1.0.0a0.dist-info/licenses/LICENSE +21 -0
- dgkafka-1.0.0a0.dist-info/top_level.txt +1 -0
dgkafka/__init__.py
ADDED
dgkafka/avro_consumer.py
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
from typing import Any, Iterator
|
2
|
+
|
3
|
+
from dgkafka.consumer import KafkaConsumer
|
4
|
+
from dglog import Logger
|
5
|
+
from confluent_kafka import Message
|
6
|
+
from confluent_kafka.avro import AvroConsumer
|
7
|
+
from confluent_kafka.avro.serializer import SerializerError
|
8
|
+
from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient
|
9
|
+
|
10
|
+
|
11
|
+
class AvroKafkaConsumer(KafkaConsumer):
|
12
|
+
"""Kafka consumer with Avro schema support using Schema Registry."""
|
13
|
+
|
14
|
+
def __init__(self, schema_registry_url: str, logger_: Logger | None = None, **configs: Any) -> None:
|
15
|
+
"""
|
16
|
+
Initialize Avro consumer.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
schema_registry_url: URL of Schema Registry
|
20
|
+
logger_: Optional logger instance
|
21
|
+
configs: Kafka consumer configuration
|
22
|
+
"""
|
23
|
+
self.schema_registry_url = schema_registry_url
|
24
|
+
self.schema_registry_client = CachedSchemaRegistryClient({'url': schema_registry_url})
|
25
|
+
super().__init__(logger_=logger_, **configs)
|
26
|
+
|
27
|
+
def _init_consumer(self, **configs: Any) -> None:
|
28
|
+
"""Initialize AvroConsumer instance."""
|
29
|
+
try:
|
30
|
+
# AvroConsumer requires schema registry in config
|
31
|
+
configs['schema.registry.url'] = self.schema_registry_url
|
32
|
+
self.consumer = AvroConsumer(configs)
|
33
|
+
self.logger.info("[*] Avro consumer initialized successfully")
|
34
|
+
except Exception as ex:
|
35
|
+
self.logger.error(f"[x] Failed to initialize Avro consumer: {ex}")
|
36
|
+
raise
|
37
|
+
|
38
|
+
def consume(self, num_messages: int = 1, timeout: float = 10.0, **kwargs: Any) -> Iterator[dict[str, Any] | None, Message]:
|
39
|
+
"""
|
40
|
+
Consume Avro-encoded messages.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
num_messages: Maximum number of messages to consume
|
44
|
+
timeout: Poll timeout in seconds
|
45
|
+
kwargs: Additional arguments
|
46
|
+
|
47
|
+
Yields:
|
48
|
+
Deserialized Avro messages as dictionaries or Message objects on error
|
49
|
+
"""
|
50
|
+
consumer = self._ensure_consumer()
|
51
|
+
|
52
|
+
for _ in range(num_messages):
|
53
|
+
try:
|
54
|
+
if (msg := self._consume(consumer, timeout)) is None:
|
55
|
+
continue
|
56
|
+
yield msg.value()
|
57
|
+
except SerializerError as e:
|
58
|
+
self.logger.error(f"[x] Avro deserialization failed: {e}")
|
59
|
+
yield msg # Return raw message on deserialization error
|
60
|
+
except Exception as ex:
|
61
|
+
self.logger.error(f"[!] Unexpected error: {ex}")
|
62
|
+
continue
|
63
|
+
|
64
|
+
def get_schema(self, subject: str, version: int = 1) -> dict[str, Any]:
|
65
|
+
"""Get Avro schema from Schema Registry."""
|
66
|
+
return self.schema_registry_client.get_schema(subject, version)
|
67
|
+
|
68
|
+
def get_latest_schema(self, subject: str) -> dict[str, Any]:
|
69
|
+
"""Get latest version of schema for given subject."""
|
70
|
+
return self.schema_registry_client.get_latest_schema(subject)[1]
|
dgkafka/avro_producer.py
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
from typing import Optional, Union, Dict, Any
|
2
|
+
from confluent_kafka.avro import AvroProducer
|
3
|
+
from confluent_kafka.avro.serializer import SerializerError
|
4
|
+
from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient
|
5
|
+
|
6
|
+
from dglog import Logger
|
7
|
+
|
8
|
+
from dgkafka.producer import KafkaProducer
|
9
|
+
|
10
|
+
|
11
|
+
class AvroKafkaProducer(KafkaProducer):
|
12
|
+
"""Kafka producer with Avro schema support using Schema Registry."""
|
13
|
+
|
14
|
+
def __init__(
|
15
|
+
self,
|
16
|
+
schema_registry_url: str,
|
17
|
+
default_key_schema: Optional[Dict[str, Any]] = None,
|
18
|
+
default_value_schema: Optional[Dict[str, Any]] = None,
|
19
|
+
logger_: Optional[Logger] = None,
|
20
|
+
**configs: Any
|
21
|
+
) -> None:
|
22
|
+
"""
|
23
|
+
Initialize Avro producer.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
schema_registry_url: URL of Schema Registry
|
27
|
+
default_key_schema: Default Avro schema for message keys
|
28
|
+
default_value_schema: Default Avro schema for message values
|
29
|
+
logger_: Optional logger instance
|
30
|
+
configs: Kafka producer configuration
|
31
|
+
"""
|
32
|
+
self.schema_registry_url = schema_registry_url
|
33
|
+
self.default_key_schema = default_key_schema
|
34
|
+
self.default_value_schema = default_value_schema
|
35
|
+
self.schema_registry_client = CachedSchemaRegistryClient({'url': schema_registry_url})
|
36
|
+
super().__init__(logger_=logger_, **configs)
|
37
|
+
|
38
|
+
def _init_producer(self, **configs: Any) -> None:
|
39
|
+
"""Initialize AvroProducer instance."""
|
40
|
+
try:
|
41
|
+
# AvroProducer requires schema registry in config
|
42
|
+
configs['schema.registry.url'] = self.schema_registry_url
|
43
|
+
self.producer = AvroProducer(
|
44
|
+
config=configs,
|
45
|
+
default_key_schema=self.default_key_schema,
|
46
|
+
default_value_schema=self.default_value_schema
|
47
|
+
)
|
48
|
+
self.logger.info("Avro producer initialized successfully")
|
49
|
+
except Exception as ex:
|
50
|
+
self.logger.error(f"Failed to initialize Avro producer: {ex}")
|
51
|
+
raise
|
52
|
+
|
53
|
+
def produce(
|
54
|
+
self,
|
55
|
+
topic: str,
|
56
|
+
value: Union[Dict[str, Any], Any],
|
57
|
+
key: Optional[Union[Dict[str, Any], str]] = None,
|
58
|
+
value_schema: Optional[Dict[str, Any]] = None,
|
59
|
+
key_schema: Optional[Dict[str, Any]] = None,
|
60
|
+
partition: Optional[int] = None,
|
61
|
+
headers: Optional[Dict[str, bytes]] = None,
|
62
|
+
flush: bool = True
|
63
|
+
) -> None:
|
64
|
+
"""
|
65
|
+
Produce Avro-encoded message to Kafka.
|
66
|
+
|
67
|
+
Args:
|
68
|
+
topic: Target topic name
|
69
|
+
value: Message value (must match Avro schema)
|
70
|
+
key: Message key (optional)
|
71
|
+
value_schema: Avro schema for message value (optional)
|
72
|
+
key_schema: Avro schema for message key (optional)
|
73
|
+
partition: Specific partition (optional)
|
74
|
+
headers: Message headers (optional)
|
75
|
+
flush: Immediately flush after producing (default: True)
|
76
|
+
"""
|
77
|
+
producer = self._ensure_producer()
|
78
|
+
producer.poll(0)
|
79
|
+
|
80
|
+
# Prepare headers
|
81
|
+
headers_list = None
|
82
|
+
if headers:
|
83
|
+
headers_list = [(k, v if isinstance(v, bytes) else str(v).encode('utf-8'))
|
84
|
+
for k, v in headers.items()]
|
85
|
+
|
86
|
+
try:
|
87
|
+
producer.produce(
|
88
|
+
topic=topic,
|
89
|
+
value=value,
|
90
|
+
value_schema=value_schema,
|
91
|
+
key=key,
|
92
|
+
key_schema=key_schema,
|
93
|
+
partition=partition,
|
94
|
+
on_delivery=self.delivery_report,
|
95
|
+
headers=headers_list
|
96
|
+
)
|
97
|
+
|
98
|
+
if flush:
|
99
|
+
producer.flush()
|
100
|
+
|
101
|
+
except SerializerError as ex:
|
102
|
+
self.logger.error(f"Avro serialization failed: {ex}")
|
103
|
+
raise
|
104
|
+
except Exception as ex:
|
105
|
+
self.logger.error(f"Failed to produce Avro message: {ex}")
|
106
|
+
raise
|
107
|
+
|
108
|
+
def get_schema(self, subject: str, version: int = 1) -> Dict[str, Any]:
|
109
|
+
"""Get Avro schema from Schema Registry."""
|
110
|
+
return self.schema_registry_client.get_schema(subject, version)
|
111
|
+
|
112
|
+
def get_latest_schema(self, subject: str) -> Dict[str, Any]:
|
113
|
+
"""Get latest version of schema for given subject."""
|
114
|
+
return self.schema_registry_client.get_latest_schema(subject)[1]
|
dgkafka/consumer.py
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
from typing import Literal, Iterator, Any
|
2
|
+
from dgkafka.errors import ConsumerNotSetError
|
3
|
+
from dglog import Logger
|
4
|
+
from confluent_kafka import Consumer, KafkaError, KafkaException, Message, TopicPartition
|
5
|
+
from confluent_kafka import OFFSET_STORED, OFFSET_BEGINNING, OFFSET_END
|
6
|
+
|
7
|
+
OffsetType = Literal[OFFSET_STORED, OFFSET_BEGINNING, OFFSET_END] | int
|
8
|
+
|
9
|
+
|
10
|
+
class KafkaConsumer:
|
11
|
+
def __init__(self, logger_: Logger | None = None, **configs: Any) -> None:
|
12
|
+
self.consumer: Consumer | None = None
|
13
|
+
self.logger = logger_ if logger_ else Logger()
|
14
|
+
self.logger.auto_configure()
|
15
|
+
self._init_consumer(**configs)
|
16
|
+
|
17
|
+
def _init_consumer(self, **configs: Any) -> None:
|
18
|
+
"""Internal method to initialize consumer"""
|
19
|
+
try:
|
20
|
+
self.consumer = Consumer(configs)
|
21
|
+
except KafkaException as ex:
|
22
|
+
self.logger.error(f"[x] Failed to initialize Kafka consumer: {ex}")
|
23
|
+
raise
|
24
|
+
|
25
|
+
def close(self) -> None:
|
26
|
+
"""Safely close the consumer"""
|
27
|
+
if self.consumer is not None:
|
28
|
+
try:
|
29
|
+
self.consumer.close()
|
30
|
+
self.logger.info("[*] Kafka consumer closed successfully")
|
31
|
+
except KafkaException as ex:
|
32
|
+
self.logger.error(f"[x] Error closing consumer: {ex}")
|
33
|
+
raise
|
34
|
+
finally:
|
35
|
+
self.consumer = None
|
36
|
+
|
37
|
+
def __enter__(self):
|
38
|
+
"""Context manager entry point"""
|
39
|
+
if self.consumer is None:
|
40
|
+
self._init_consumer()
|
41
|
+
return self
|
42
|
+
|
43
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
44
|
+
"""Context manager exit point"""
|
45
|
+
self.close()
|
46
|
+
|
47
|
+
def _ensure_consumer(self) -> Consumer:
|
48
|
+
"""Ensure consumer is initialized"""
|
49
|
+
if self.consumer is None:
|
50
|
+
raise ConsumerNotSetError('[!] Consumer not initialized!')
|
51
|
+
return self.consumer
|
52
|
+
|
53
|
+
def subscribe(self, topics: str | list[str], partition: int | None = None,
|
54
|
+
offset: OffsetType = OFFSET_STORED) -> None:
|
55
|
+
"""Subscribe to topics"""
|
56
|
+
consumer = self._ensure_consumer()
|
57
|
+
|
58
|
+
if partition is not None and offset != OFFSET_STORED:
|
59
|
+
topic_list = [topics] if isinstance(topics, str) else topics
|
60
|
+
for topic in topic_list:
|
61
|
+
self._assign_topic_partition(topic, partition, offset)
|
62
|
+
else:
|
63
|
+
topics_list = [topics] if isinstance(topics, str) else topics
|
64
|
+
consumer.subscribe(topics_list)
|
65
|
+
self.logger.info(f"[*] Subscribed to topics: {topics_list}")
|
66
|
+
|
67
|
+
def _assign_topic_partition(self, topic: str, partition: int, offset: OffsetType) -> None:
|
68
|
+
"""Assign to specific partition"""
|
69
|
+
consumer = self._ensure_consumer()
|
70
|
+
topic_partition = TopicPartition(topic, partition, offset)
|
71
|
+
consumer.assign([topic_partition])
|
72
|
+
consumer.seek(topic_partition)
|
73
|
+
self.logger.info(f"[*] Assigned to topic '{topic}' partition {partition} with offset {offset}")
|
74
|
+
|
75
|
+
def consume(self, num_messages: int = 1, timeout: float = 10.0, decode_utf8: bool = False) -> Iterator[Message | str]:
|
76
|
+
"""Consume messages"""
|
77
|
+
consumer = self._ensure_consumer()
|
78
|
+
|
79
|
+
for _ in range(num_messages):
|
80
|
+
if (msg := self._consume(consumer, timeout)) is None:
|
81
|
+
continue
|
82
|
+
yield msg.value().decode('utf-8') if decode_utf8 else msg
|
83
|
+
|
84
|
+
def _consume(self, consumer: Consumer, timeout: float) -> Message | None:
|
85
|
+
msg = consumer.poll(timeout)
|
86
|
+
if msg is None:
|
87
|
+
return
|
88
|
+
if msg.error():
|
89
|
+
self.logger.error(f"Consumer error: {msg.error()}")
|
90
|
+
return
|
91
|
+
self.logger.info(f"[<] Received message from {msg.topic()} [partition {msg.partition()}, offset {msg.offset()}]")
|
92
|
+
self.logger.debug(f"[*] Message content: {msg.value()}")
|
93
|
+
return msg
|
94
|
+
|
95
|
+
def commit(self, message: Message | None = None, offsets: list[TopicPartition] | None = None,
|
96
|
+
asynchronous: bool = True) -> list[TopicPartition] | None:
|
97
|
+
"""Commit offsets to Kafka."""
|
98
|
+
consumer = self._ensure_consumer()
|
99
|
+
if message:
|
100
|
+
return consumer.commit(message=message, asynchronous=asynchronous)
|
101
|
+
elif offsets:
|
102
|
+
return consumer.commit(offsets=offsets, asynchronous=asynchronous)
|
103
|
+
return consumer.commit(asynchronous=asynchronous)
|
dgkafka/errors.py
ADDED
dgkafka/json_consumer.py
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
from confluent_kafka import DeserializingConsumer
|
2
|
+
from confluent_kafka.schema_registry import SchemaRegistryClient
|
3
|
+
from confluent_kafka.schema_registry.json_schema import JSONDeserializer
|
4
|
+
from confluent_kafka.serialization import StringDeserializer
|
5
|
+
from dglog import Logger
|
6
|
+
from dgkafka.consumer import KafkaConsumer
|
7
|
+
|
8
|
+
|
9
|
+
class JsonKafkaConsumer(KafkaConsumer):
|
10
|
+
def __init__(self, logger_: Logger | None = None, **configs):
|
11
|
+
self.consumer: DeserializingConsumer | None = None
|
12
|
+
self.schema_registry = {'url': configs.pop('schema.registry.url')}
|
13
|
+
self.schema_client = SchemaRegistryClient(self.schema_registry)
|
14
|
+
self.deserializer = JSONDeserializer(schema_str=None, schema_registry_client=self.schema_client)
|
15
|
+
|
16
|
+
super(JsonKafkaConsumer, self).__init__(logger_, **configs)
|
17
|
+
|
18
|
+
def init_consumer(self, logger_: Logger | None = None, **configs):
|
19
|
+
consumer_conf = {
|
20
|
+
**configs,
|
21
|
+
'key.deserializer': StringDeserializer('utf_8'),
|
22
|
+
'value.deserializer': self.deserializer
|
23
|
+
}
|
24
|
+
self.logger = logger_ or Logger()
|
25
|
+
self.consumer = DeserializingConsumer(consumer_conf)
|
dgkafka/producer.py
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
import uuid
|
2
|
+
from typing import Optional, Any
|
3
|
+
from dglog import Logger
|
4
|
+
from confluent_kafka import Producer, Message
|
5
|
+
from dgkafka.errors import ProducerNotSetError
|
6
|
+
|
7
|
+
|
8
|
+
class KafkaProducer:
|
9
|
+
def __init__(self, logger_: Logger | None = None, **configs: Any) -> None:
|
10
|
+
"""Initialize Kafka producer.
|
11
|
+
|
12
|
+
Args:
|
13
|
+
logger_: Optional logger instance
|
14
|
+
configs: Kafka producer configuration
|
15
|
+
"""
|
16
|
+
self.producer: Producer | None = None
|
17
|
+
self.logger = logger_ if logger_ else Logger()
|
18
|
+
self.logger.auto_configure()
|
19
|
+
self._init_producer(**configs)
|
20
|
+
|
21
|
+
def _init_producer(self, **configs: Any) -> None:
|
22
|
+
"""Internal method to initialize producer."""
|
23
|
+
try:
|
24
|
+
self.producer = Producer(configs)
|
25
|
+
self.logger.info("[*] Kafka producer initialized successfully")
|
26
|
+
except Exception as ex:
|
27
|
+
self.logger.error(f"[x] Failed to initialize producer: {ex}")
|
28
|
+
raise
|
29
|
+
|
30
|
+
def close(self) -> None:
|
31
|
+
"""Close the producer connection."""
|
32
|
+
if self.producer is not None:
|
33
|
+
try:
|
34
|
+
self.producer.flush()
|
35
|
+
self.logger.info("[*] Producer closed successfully")
|
36
|
+
except Exception as ex:
|
37
|
+
self.logger.error(f"[x] Error closing producer: {ex}")
|
38
|
+
raise
|
39
|
+
finally:
|
40
|
+
self.producer = None
|
41
|
+
|
42
|
+
def __enter__(self):
|
43
|
+
"""Context manager entry point."""
|
44
|
+
if self.producer is None:
|
45
|
+
self._init_producer()
|
46
|
+
return self
|
47
|
+
|
48
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
49
|
+
"""Context manager exit point."""
|
50
|
+
self.close()
|
51
|
+
|
52
|
+
def _ensure_producer(self) -> Producer:
|
53
|
+
"""Ensure producer is initialized."""
|
54
|
+
if self.producer is None:
|
55
|
+
raise ProducerNotSetError('[!] Producer not initialized!')
|
56
|
+
return self.producer
|
57
|
+
|
58
|
+
def delivery_report(self, err: Optional[Any], msg: Message) -> None:
|
59
|
+
"""Delivery callback for produced messages.
|
60
|
+
|
61
|
+
Args:
|
62
|
+
err: Error object if delivery failed
|
63
|
+
msg: Delivered message object
|
64
|
+
"""
|
65
|
+
if err is not None:
|
66
|
+
self.logger.error(f"[x] Message delivery failed: {err}")
|
67
|
+
self.logger.debug(f"[~] Failed message details: {msg}")
|
68
|
+
else:
|
69
|
+
self.logger.info(f"[>] Message delivered to {msg.topic()} [partition {msg.partition()}, offset {msg.offset()}]")
|
70
|
+
|
71
|
+
def produce(
|
72
|
+
self,
|
73
|
+
topic: str,
|
74
|
+
message: str | bytes | dict[str, Any],
|
75
|
+
key: str | None = None,
|
76
|
+
partition: int | None = None,
|
77
|
+
headers: dict[str, bytes] | None = None,
|
78
|
+
flush: bool = True
|
79
|
+
) -> None:
|
80
|
+
"""Produce a message to Kafka.
|
81
|
+
|
82
|
+
Args:
|
83
|
+
topic: Target topic name
|
84
|
+
message: Message content (str, bytes or dict)
|
85
|
+
key: Message key (optional)
|
86
|
+
partition: Specific partition (optional)
|
87
|
+
headers: Message headers (optional)
|
88
|
+
flush: Immediately flush after producing (default: True)
|
89
|
+
"""
|
90
|
+
producer = self._ensure_producer()
|
91
|
+
producer.poll(0)
|
92
|
+
|
93
|
+
# Generate key if not provided
|
94
|
+
key = key if key is not None else str(uuid.uuid4())
|
95
|
+
key_bytes = key.encode('utf-8')
|
96
|
+
|
97
|
+
# Prepare message value
|
98
|
+
if isinstance(message, str):
|
99
|
+
value = message.encode('utf-8')
|
100
|
+
elif isinstance(message, bytes):
|
101
|
+
value = message
|
102
|
+
else: # Assume dict-like object
|
103
|
+
try:
|
104
|
+
import json
|
105
|
+
value = json.dumps(message).encode('utf-8')
|
106
|
+
except Exception as ex:
|
107
|
+
self.logger.error(f"[x] Failed to serialize message: {ex}")
|
108
|
+
raise
|
109
|
+
|
110
|
+
# Prepare message headers
|
111
|
+
headers_list = None
|
112
|
+
if headers:
|
113
|
+
headers_list = [(k, v if isinstance(v, bytes) else str(v).encode('utf-8'))
|
114
|
+
for k, v in headers.items()]
|
115
|
+
|
116
|
+
# Produce message
|
117
|
+
try:
|
118
|
+
producer.produce(
|
119
|
+
topic=topic,
|
120
|
+
value=value,
|
121
|
+
key=key_bytes,
|
122
|
+
partition=partition,
|
123
|
+
on_delivery=self.delivery_report,
|
124
|
+
headers=headers_list
|
125
|
+
)
|
126
|
+
|
127
|
+
if flush:
|
128
|
+
producer.flush()
|
129
|
+
|
130
|
+
except Exception as ex:
|
131
|
+
self.logger.error(f"[x] Failed to produce message: {ex}")
|
132
|
+
raise
|
133
|
+
|
134
|
+
def flush(self, timeout: float = 10.0) -> None:
|
135
|
+
"""Wait for all messages to be delivered.
|
136
|
+
|
137
|
+
Args:
|
138
|
+
timeout: Maximum time to wait (seconds)
|
139
|
+
"""
|
140
|
+
producer = self._ensure_producer()
|
141
|
+
try:
|
142
|
+
remaining = producer.flush(timeout)
|
143
|
+
if remaining > 0:
|
144
|
+
self.logger.warning(f"[!] {remaining} messages remain undelivered after flush timeout")
|
145
|
+
except Exception as ex:
|
146
|
+
self.logger.error(f"[x] Flush failed: {ex}")
|
147
|
+
raise
|
@@ -0,0 +1,217 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: dgkafka
|
3
|
+
Version: 1.0.0a0
|
4
|
+
Summary: Kafka clients
|
5
|
+
Home-page: https://gitlab.com/gng-group/dgkafka.git
|
6
|
+
Author: Malanris
|
7
|
+
Author-email: Roman Rasputin <admin@roro.su>
|
8
|
+
License: MIT License
|
9
|
+
Project-URL: Homepage, https://gitlab.com/gng-group/dgkafka
|
10
|
+
Project-URL: BugTracker, https://gitlab.com/gng-group/dgkafka/issues
|
11
|
+
Keywords: logging,logger,rotation,timed,advanced
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
16
|
+
Classifier: Operating System :: OS Independent
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
18
|
+
Classifier: Topic :: System :: Logging
|
19
|
+
Requires-Python: >=3.7
|
20
|
+
Description-Content-Type: text/markdown
|
21
|
+
License-File: LICENSE
|
22
|
+
Dynamic: author
|
23
|
+
Dynamic: home-page
|
24
|
+
Dynamic: license-file
|
25
|
+
|
26
|
+
# dgkafka
|
27
|
+
|
28
|
+
Python package for working with Apache Kafka supporting multiple data formats.
|
29
|
+
|
30
|
+
## Installation
|
31
|
+
|
32
|
+
```bash
|
33
|
+
pip install dgkafka
|
34
|
+
```
|
35
|
+
|
36
|
+
For Avro support (requires additional dependencies):
|
37
|
+
|
38
|
+
```bash
|
39
|
+
pip install dgkafka[avro]
|
40
|
+
```
|
41
|
+
|
42
|
+
## Features
|
43
|
+
|
44
|
+
- Producers and consumers for different data formats:
|
45
|
+
- Raw messages (bytes/strings)
|
46
|
+
- JSON
|
47
|
+
- Avro (with Schema Registry integration)
|
48
|
+
- Robust error handling
|
49
|
+
- Comprehensive operation logging
|
50
|
+
- Context manager support
|
51
|
+
- Flexible configuration
|
52
|
+
|
53
|
+
## Quick Start
|
54
|
+
|
55
|
+
### Basic Producer/Consumer
|
56
|
+
|
57
|
+
```python
|
58
|
+
from dgkafka import KafkaProducer, KafkaConsumer
|
59
|
+
|
60
|
+
# Producer
|
61
|
+
with KafkaProducer(bootstrap_servers='localhost:9092') as producer:
|
62
|
+
producer.produce('test_topic', 'Hello, Kafka!')
|
63
|
+
|
64
|
+
# Consumer
|
65
|
+
with KafkaConsumer(bootstrap_servers='localhost:9092', group_id='test_group') as consumer:
|
66
|
+
consumer.subscribe(['test_topic'])
|
67
|
+
for msg in consumer.consume():
|
68
|
+
print(msg.value())
|
69
|
+
```
|
70
|
+
|
71
|
+
### JSON Support
|
72
|
+
|
73
|
+
```python
|
74
|
+
from dgkafka import JsonKafkaProducer, JsonKafkaConsumer
|
75
|
+
|
76
|
+
# Producer
|
77
|
+
with JsonKafkaProducer(bootstrap_servers='localhost:9092') as producer:
|
78
|
+
producer.produce('json_topic', {'key': 'value'})
|
79
|
+
|
80
|
+
# Consumer
|
81
|
+
with JsonKafkaConsumer(bootstrap_servers='localhost:9092', group_id='json_group') as consumer:
|
82
|
+
consumer.subscribe(['json_topic'])
|
83
|
+
for msg in consumer.consume():
|
84
|
+
print(msg.value()) # Automatically deserialized JSON
|
85
|
+
```
|
86
|
+
|
87
|
+
### Avro Support
|
88
|
+
|
89
|
+
```python
|
90
|
+
from dgkafka import AvroKafkaProducer, AvroKafkaConsumer
|
91
|
+
|
92
|
+
# Producer
|
93
|
+
value_schema = {
|
94
|
+
"type": "record",
|
95
|
+
"name": "User",
|
96
|
+
"fields": [
|
97
|
+
{"name": "name", "type": "string"},
|
98
|
+
{"name": "age", "type": "int"}
|
99
|
+
]
|
100
|
+
}
|
101
|
+
|
102
|
+
with AvroKafkaProducer(
|
103
|
+
schema_registry_url='http://localhost:8081',
|
104
|
+
bootstrap_servers='localhost:9092',
|
105
|
+
default_value_schema=value_schema
|
106
|
+
) as producer:
|
107
|
+
producer.produce('avro_topic', {'name': 'Alice', 'age': 30})
|
108
|
+
|
109
|
+
# Consumer
|
110
|
+
with AvroKafkaConsumer(
|
111
|
+
schema_registry_url='http://localhost:8081',
|
112
|
+
bootstrap_servers='localhost:9092',
|
113
|
+
group_id='avro_group'
|
114
|
+
) as consumer:
|
115
|
+
consumer.subscribe(['avro_topic'])
|
116
|
+
for msg in consumer.consume():
|
117
|
+
print(msg.value()) # Automatically deserialized Avro object
|
118
|
+
```
|
119
|
+
|
120
|
+
## Classes
|
121
|
+
|
122
|
+
### Base Classes
|
123
|
+
|
124
|
+
- `KafkaProducer` - base message producer
|
125
|
+
- `KafkaConsumer` - base message consumer
|
126
|
+
|
127
|
+
### Specialized Classes
|
128
|
+
|
129
|
+
- `JsonKafkaProducer` - JSON message producer (inherits from `KafkaProducer`)
|
130
|
+
- `JsonKafkaConsumer` - JSON message consumer (inherits from `KafkaConsumer`)
|
131
|
+
- `AvroKafkaProducer` - Avro message producer (inherits from `KafkaProducer`)
|
132
|
+
- `AvroKafkaConsumer` - Avro message consumer (inherits from `KafkaConsumer`)
|
133
|
+
|
134
|
+
## Configuration
|
135
|
+
|
136
|
+
All classes accept standard Kafka configuration parameters:
|
137
|
+
|
138
|
+
```python
|
139
|
+
config = {
|
140
|
+
'bootstrap.servers': 'localhost:9092',
|
141
|
+
'group.id': 'my_group',
|
142
|
+
'auto.offset.reset': 'earliest'
|
143
|
+
}
|
144
|
+
```
|
145
|
+
|
146
|
+
Avro classes require additional parameter:
|
147
|
+
- `schema_registry_url` - Schema Registry URL
|
148
|
+
|
149
|
+
## Logging
|
150
|
+
|
151
|
+
All classes use `dglog.Logger` for logging. You can provide a custom logger:
|
152
|
+
|
153
|
+
```python
|
154
|
+
from dglog import Logger
|
155
|
+
|
156
|
+
logger = Logger()
|
157
|
+
producer = KafkaProducer(logger_=logger, ...)
|
158
|
+
```
|
159
|
+
|
160
|
+
## Best Practices
|
161
|
+
|
162
|
+
1. Always use context managers (`with`) for proper resource cleanup
|
163
|
+
2. Implement error handling and retry logic for production use
|
164
|
+
3. Pre-register Avro schemas in Schema Registry
|
165
|
+
4. Configure appropriate `acks` and `retries` parameters for producers
|
166
|
+
5. Monitor consumer lag and producer throughput
|
167
|
+
|
168
|
+
## Advanced Usage
|
169
|
+
|
170
|
+
### Custom Serialization
|
171
|
+
|
172
|
+
```python
|
173
|
+
# Custom Avro serializer
|
174
|
+
class CustomAvroProducer(AvroKafkaProducer):
|
175
|
+
def _serialize_value(self, value):
|
176
|
+
# Custom serialization logic
|
177
|
+
return super()._serialize_value(value)
|
178
|
+
```
|
179
|
+
|
180
|
+
### Message Headers
|
181
|
+
|
182
|
+
```python
|
183
|
+
# Adding headers to messages
|
184
|
+
headers = {
|
185
|
+
'correlation_id': '12345',
|
186
|
+
'message_type': 'user_update'
|
187
|
+
}
|
188
|
+
|
189
|
+
producer.produce(
|
190
|
+
topic='events',
|
191
|
+
value=message_data,
|
192
|
+
headers=headers
|
193
|
+
)
|
194
|
+
```
|
195
|
+
|
196
|
+
### Error Handling
|
197
|
+
|
198
|
+
```python
|
199
|
+
from confluent_kafka import KafkaException
|
200
|
+
|
201
|
+
try:
|
202
|
+
with AvroKafkaProducer(...) as producer:
|
203
|
+
producer.produce(...)
|
204
|
+
except KafkaException as e:
|
205
|
+
print(f"Kafka error occurred: {e}")
|
206
|
+
```
|
207
|
+
|
208
|
+
## Performance Tips
|
209
|
+
|
210
|
+
1. Batch messages when possible (`batch.num.messages` config)
|
211
|
+
2. Adjust `linger.ms` for better batching
|
212
|
+
3. Use `compression.type` (lz4, snappy, or gzip)
|
213
|
+
4. Tune `fetch.max.bytes` and `max.partition.fetch.bytes` for consumers
|
214
|
+
|
215
|
+
## License
|
216
|
+
|
217
|
+
MIT
|
@@ -0,0 +1,12 @@
|
|
1
|
+
dgkafka/__init__.py,sha256=2hPW2KtgErppTV4_5NeKUVmLiic4evF1eRoqmXCLpwU,201
|
2
|
+
dgkafka/avro_consumer.py,sha256=5nWQwY6ynX_h8KGSdZ2LCL9BlXcyHlRqjwnFP66zccE,2891
|
3
|
+
dgkafka/avro_producer.py,sha256=Vn9-t30U_pj81clSnadXCPYoXZJ_O_EuRZKa2yYlmrU,4345
|
4
|
+
dgkafka/consumer.py,sha256=pVqXpAR7pA8JyYmg49PdOct0_fou5RmQmrGTtP8cd7o,4341
|
5
|
+
dgkafka/errors.py,sha256=PaH46tXameS--hrrUXKhQkZlBHvMSMPmjhVeRkmFvV0,95
|
6
|
+
dgkafka/json_consumer.py,sha256=7Gzn7C9WpyCTPDV6eFDugAx5gC9vdV-HrTh3Nv--zIE,1152
|
7
|
+
dgkafka/producer.py,sha256=-901NGP0szc6KQ1VOKtoq0zScIME9Bg6VzgRgBBbYFM,4997
|
8
|
+
dgkafka-1.0.0a0.dist-info/licenses/LICENSE,sha256=pAZXnNE2dxxwXFIduGyn1gpvPefJtUYOYZOi3yeGG94,1068
|
9
|
+
dgkafka-1.0.0a0.dist-info/METADATA,sha256=vbR-EAldSdreDK5W9rDLISmiSHBLjWOgLx9cfjT6a1o,5340
|
10
|
+
dgkafka-1.0.0a0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
11
|
+
dgkafka-1.0.0a0.dist-info/top_level.txt,sha256=GyNrxOh7IPdL0t2SxH8DWxg3fUma-ezQ1Kz4zIr2B7U,8
|
12
|
+
dgkafka-1.0.0a0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) [year] [fullname]
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
dgkafka
|