dgkafka 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dgkafka/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ from .consumer import KafkaConsumer
2
+ from .producer import KafkaProducer
3
+ try:
4
+ from .avro_consumer import AvroKafkaConsumer
5
+ from .avro_producer import AvroKafkaProducer
6
+ except ImportError:
7
+ pass
8
+ try:
9
+ from .json_consumer import JsonKafkaConsumer
10
+ except ImportError:
11
+ pass
@@ -0,0 +1,74 @@
1
+ from typing import Any, Iterator
2
+
3
+ from dgkafka.consumer import KafkaConsumer
4
+
5
+ from confluent_kafka import Message
6
+ from confluent_kafka.avro import AvroConsumer
7
+ from confluent_kafka.avro.serializer import SerializerError
8
+ from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient
9
+
10
+ import logging
11
+ import dglog
12
+
13
+
14
+ class AvroKafkaConsumer(KafkaConsumer):
15
+ """Kafka consumer with Avro schema support using Schema Registry."""
16
+
17
+ def __init__(self, logger_: logging.Logger | dglog.Logger | None = None, **configs: Any) -> None:
18
+ """
19
+ Initialize Avro consumer.
20
+
21
+ Args:
22
+ schema_registry_url: URL of Schema Registry
23
+ logger_: Optional logger instance
24
+ configs: Kafka consumer configuration
25
+ """
26
+ self.schema_registry_url = configs.get('schema.registry.url')
27
+ assert self.schema_registry_url is not None, "schema.registry.url is required"
28
+
29
+ self.schema_registry_client = CachedSchemaRegistryClient(url=self.schema_registry_url)
30
+ super().__init__(logger_=logger_, **configs)
31
+
32
+ def _init_consumer(self, **configs: Any) -> None:
33
+ """Initialize AvroConsumer instance."""
34
+ try:
35
+ self.consumer = AvroConsumer(configs)
36
+ self.logger.info("[*] Avro consumer initialized successfully")
37
+ except Exception as ex:
38
+ self.logger.error(f"[x] Failed to initialize avro consumer: {ex}")
39
+ raise
40
+
41
+ def consume(self, num_messages: int = 1, timeout: float = 1.0, decode_: bool = False, **kwargs: Any) -> Iterator[str | bytes | Message | None]:
42
+ """
43
+ Consume Avro-encoded messages.
44
+
45
+ Args:
46
+ num_messages: Maximum number of messages to consume
47
+ timeout: Poll timeout in seconds
48
+ kwargs: Additional arguments
49
+
50
+ Yields:
51
+ Deserialized Avro messages as dictionaries or Message objects on error
52
+ """
53
+ consumer = self._ensure_consumer()
54
+
55
+ for _ in range(num_messages):
56
+ msg = self._consume(consumer, timeout)
57
+ try:
58
+ if msg is None:
59
+ continue
60
+ yield msg.value() if decode_ else msg
61
+ except SerializerError as e:
62
+ self.logger.error(f"[x] Avro deserialization failed: {e}")
63
+ yield msg # Return raw message on deserialization error
64
+ except Exception as ex:
65
+ self.logger.error(f"[!] Unexpected error: {ex}")
66
+ continue
67
+
68
+ def get_schema(self, subject: str, version: int = 1) -> dict[str, Any]:
69
+ """Get Avro schema from Schema Registry."""
70
+ return self.schema_registry_client.get_schema(subject, version)
71
+
72
+ def get_latest_schema(self, subject: str) -> dict[str, Any]:
73
+ """Get latest version of schema for given subject."""
74
+ return self.schema_registry_client.get_latest_schema(subject)[1]
@@ -0,0 +1,138 @@
1
+ from typing import Optional, Union, Dict, Any
2
+ from confluent_kafka.avro import AvroProducer
3
+ from confluent_kafka.avro.serializer import SerializerError
4
+ from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient
5
+
6
+ import dglog
7
+ import logging
8
+
9
+ from dgkafka.producer import KafkaProducer
10
+
11
+
12
+ class AvroKafkaProducer(KafkaProducer):
13
+ """Kafka producer with Avro schema support using Schema Registry."""
14
+
15
+ def __init__(
16
+ self,
17
+ default_key_schema: str | None = None,
18
+ default_value_schema: str | None = None,
19
+ logger_: logging.Logger | dglog.Logger | None = None,
20
+ **configs: Any
21
+ ) -> None:
22
+ """
23
+ Initialize Avro producer.
24
+
25
+ Args:
26
+ schema_registry_url: URL of Schema Registry
27
+ default_key_schema: Default Avro schema for message keys
28
+ default_value_schema: Default Avro schema for message values
29
+ logger_: Optional logger instance
30
+ configs: Kafka producer configuration
31
+ """
32
+ self.schema_registry_url = configs.get('schema.registry.url')
33
+ assert self.schema_registry_url is not None, "schema.registry.url is required"
34
+
35
+ self.default_key_schema = default_key_schema
36
+ self.default_value_schema = default_value_schema
37
+ self.schema_registry_client = CachedSchemaRegistryClient(url=self.schema_registry_url)
38
+ super().__init__(logger_=logger_, **configs)
39
+
40
+ def _init_producer(self, **configs: Any) -> None:
41
+ """Initialize AvroProducer instance."""
42
+ try:
43
+ self.producer = AvroProducer(
44
+ config=configs,
45
+ default_key_schema=self.default_key_schema,
46
+ default_value_schema=self.default_value_schema
47
+ )
48
+ self.logger.info("[*] Avro producer initialized successfully")
49
+ except Exception as ex:
50
+ self.logger.error(f"[x] Failed to initialize avro producer: {ex}")
51
+ raise
52
+
53
+ def produce(
54
+ self,
55
+ topic: str,
56
+ value: dict[str, Any] | Any,
57
+ key: dict[str, Any] | str | None = None,
58
+ value_schema: dict[str, Any] | None = None,
59
+ key_schema: dict[str, Any] | None = None,
60
+ partition: int | None = None,
61
+ headers: dict[str, bytes] | None = None,
62
+ flush: bool = True
63
+ ) -> None:
64
+ """
65
+ Produce Avro-encoded message to Kafka.
66
+
67
+ Args:
68
+ topic: Target topic name
69
+ value: Message value (must match Avro schema)
70
+ key: Message key (optional)
71
+ value_schema: Avro schema for message value (optional)
72
+ key_schema: Avro schema for message key (optional)
73
+ partition: Specific partition (optional)
74
+ headers: Message headers (optional)
75
+ flush: Immediately flush after producing (default: True)
76
+ """
77
+ producer = self._ensure_producer()
78
+ producer.poll(0)
79
+
80
+ self._delivery_status['success'] = None
81
+
82
+ # Prepare headers
83
+ headers_list = None
84
+ if headers:
85
+ headers_list = [(k, v if isinstance(v, bytes) else str(v).encode('utf-8'))
86
+ for k, v in headers.items()]
87
+
88
+ try:
89
+ if not partition:
90
+ producer.produce(
91
+ topic=topic,
92
+ value=value,
93
+ value_schema=value_schema,
94
+ key=key,
95
+ key_schema=key_schema,
96
+ on_delivery=self.delivery_report,
97
+ headers=headers_list
98
+ )
99
+ else:
100
+ producer.produce(
101
+ topic=topic,
102
+ value=value,
103
+ value_schema=value_schema,
104
+ key=key,
105
+ key_schema=key_schema,
106
+ partition=partition,
107
+ on_delivery=self.delivery_report,
108
+ headers=headers_list
109
+ )
110
+
111
+ if flush:
112
+ remaining = producer.flush(timeout)
113
+ if remaining > 0:
114
+ self.logger.warning(f"[!] {remaining} messages remain undelivered after flush timeout")
115
+ return False
116
+
117
+ # Если flush=True, статус должен быть установлен к этому моменту
118
+ if flush and self._delivery_status['success'] is not None:
119
+ return self._delivery_status['success']
120
+
121
+ # Если flush=False, мы не можем гарантировать доставку, возвращаем True
122
+ # (так как технически ошибки пока нет)
123
+ return True
124
+
125
+ except SerializerError as ex:
126
+ self.logger.error(f"[x] Avro serialization failed: {ex}")
127
+ return False
128
+ except Exception as ex:
129
+ self.logger.error(f"[x] Failed to produce Avro message: {ex}")
130
+ return False
131
+
132
+ def get_schema(self, subject: str, version: int = 1) -> Dict[str, Any]:
133
+ """Get Avro schema from Schema Registry."""
134
+ return self.schema_registry_client.get_schema(subject, version)
135
+
136
+ def get_latest_schema(self, subject: str) -> Dict[str, Any]:
137
+ """Get latest version of schema for given subject."""
138
+ return self.schema_registry_client.get_latest_schema(subject)[1]
dgkafka/config.py ADDED
@@ -0,0 +1,84 @@
1
+ from typing import Dict, Any, Optional, Literal
2
+ from pydantic import BaseModel, Field, ConfigDict
3
+ from enum import Enum
4
+
5
+ class SecurityProtocol(str, Enum):
6
+ PLAINTEXT = "PLAINTEXT"
7
+ SSL = "SSL"
8
+ SASL_PLAINTEXT = "SASL_PLAINTEXT"
9
+ SASL_SSL = "SASL_SSL"
10
+
11
+ class KafkaConfig(BaseModel):
12
+ """Base configuration for all Kafka clients"""
13
+ bootstrap_servers: str = Field(..., alias="bootstrap.servers")
14
+ security_protocol: Literal["PLAINTEXT", "SSL", "SASL_PLAINTEXT", "SASL_SSL"] = Field(default="SSL",
15
+ alias="security.protocol")
16
+ ssl_ca_location: Optional[str] = Field(default=None, alias="ssl.ca.location")
17
+ ssl_certificate_location: Optional[str] = Field(default=None, alias="ssl.certificate.location")
18
+ ssl_key_location: Optional[str] = Field(default=None, alias="ssl.key.location")
19
+ ssl_endpoint_identification_algorithm: Optional[str] = Field(default=None,
20
+ alias="ssl.endpoint.identification.algorithm")
21
+
22
+ model_config = ConfigDict(
23
+ populate_by_name=True,
24
+ extra="forbid",
25
+ protected_namespaces=()
26
+ )
27
+
28
+ def get(self) -> Dict[str, Any]:
29
+ """Get config in format suitable for confluent_kafka"""
30
+ return self.model_dump(by_alias=True, exclude_none=True)
31
+
32
+ @classmethod
33
+ def set(cls, config_dict: Dict[str, Any]) -> "KafkaConfig":
34
+ """Create config from dictionary"""
35
+ return cls(**config_dict)
36
+
37
+ class ConsumerConfig(KafkaConfig):
38
+ """Base consumer configuration"""
39
+ group_id: str = Field(..., alias="group.id")
40
+ enable_auto_commit: bool = Field(default=False, alias="enable.auto.commit")
41
+ auto_offset_reset: Literal["earliest", "latest"] = Field(
42
+ default="earliest", alias="auto.offset.reset")
43
+ session_timeout_ms: int = Field(default=10000, alias="session.timeout.ms")
44
+ max_poll_interval_ms: int = Field(default=300000, alias="max.poll.interval.ms")
45
+
46
+ class ProducerConfig(KafkaConfig):
47
+ """Base producer configuration"""
48
+ acks: Literal["all", "0", "1"] = Field(default="all")
49
+ retries: int = Field(default=0)
50
+ compression_type: str = Field(default="none", alias="compression.type")
51
+ batch_size: int = Field(default=16384, alias="batch.size")
52
+ linger_ms: int = Field(default=0, alias="linger.ms")
53
+
54
+
55
+ class AvroConfigMixin:
56
+ schema_registry_url: str = Field(..., alias="schema.registry.url")
57
+ schema_registry_ssl_ca_location: Optional[str] = Field(
58
+ default=None, alias="schema.registry.ssl.ca.location")
59
+ schema_registry_ssl_certificate_location: Optional[str] = Field(
60
+ default=None, alias="schema.registry.ssl.certificate.location")
61
+ schema_registry_ssl_key_location: Optional[str] = Field(
62
+ default=None, alias="schema.registry.ssl.key.location")
63
+
64
+
65
+ class AvroConsumerConfig(ConsumerConfig, AvroConfigMixin):
66
+ """Avro consumer configuration with Schema Registry support"""
67
+
68
+ @classmethod
69
+ def set(cls, config_dict: Dict[str, Any]) -> "AvroConsumerConfig":
70
+ """Create from dictionary with Schema Registry validation"""
71
+ if "schema.registry.url" not in config_dict:
72
+ raise ValueError("schema.registry.url is required for AvroConsumer")
73
+ return cls(**config_dict)
74
+
75
+ class AvroProducerConfig(ProducerConfig, AvroConfigMixin):
76
+ """Avro producer configuration with Schema Registry support"""
77
+ max_schemas_per_subject: int = Field(default=1000, alias="max.schemas.per.subject")
78
+
79
+ @classmethod
80
+ def set(cls, config_dict: Dict[str, Any]) -> "AvroProducerConfig":
81
+ """Create from dictionary with Schema Registry validation"""
82
+ if "schema.registry.url" not in config_dict:
83
+ raise ValueError("schema.registry.url is required for AvroProducer")
84
+ return cls(**config_dict)
dgkafka/consumer.py ADDED
@@ -0,0 +1,197 @@
1
+ from typing import Literal, Iterator, Any
2
+ from dgkafka.errors import ConsumerNotSetError
3
+
4
+ from confluent_kafka import Consumer, KafkaException, Message, TopicPartition
5
+ from confluent_kafka import OFFSET_STORED, OFFSET_BEGINNING, OFFSET_END
6
+
7
+ import logging
8
+ import dglog
9
+
10
+ OffsetType = Literal[OFFSET_STORED, OFFSET_BEGINNING, OFFSET_END] | int
11
+
12
+
13
+ class KafkaConsumer:
14
+ def __init__(self, logger_: logging.Logger | dglog.Logger | None = None, **configs: Any) -> None:
15
+ self.consumer: Consumer | None = None
16
+ self.logger = logger_ if logger_ else dglog.Logger()
17
+ if isinstance(self.logger, dglog.Logger):
18
+ self.logger.auto_configure()
19
+ self._init_consumer(**configs)
20
+
21
+ def _init_consumer(self, **configs: Any) -> None:
22
+ """Internal method to initialize consumer"""
23
+ try:
24
+ self.consumer = Consumer(configs)
25
+ self.logger.info("[*] Consumer initialized successfully")
26
+ except KafkaException as ex:
27
+ self.logger.error(f"[x] Failed to initialize consumer: {ex}")
28
+ raise
29
+
30
+ def close(self) -> None:
31
+ """Safely close the consumer"""
32
+ if self.consumer is not None:
33
+ try:
34
+ self.consumer.close()
35
+ self.logger.info("[*] Consumer closed successfully")
36
+ except KafkaException as ex:
37
+ self.logger.error(f"[x] Error closing consumer: {ex}")
38
+ raise
39
+ finally:
40
+ self.consumer = None
41
+
42
+ def __enter__(self):
43
+ """Context manager entry point"""
44
+ if self.consumer is None:
45
+ self._init_consumer()
46
+ return self
47
+
48
+ def __exit__(self, exc_type, exc_val, exc_tb):
49
+ """Context manager exit point"""
50
+ self.close()
51
+
52
+ def _ensure_consumer(self) -> Consumer:
53
+ """Ensure consumer is initialized"""
54
+ if self.consumer is None:
55
+ raise ConsumerNotSetError('[!] Consumer not initialized!')
56
+ return self.consumer
57
+
58
+ def subscribe(self, topics: str | list[str], partition: int | None = None,
59
+ offset: OffsetType = OFFSET_STORED) -> None:
60
+ """Subscribe to topics"""
61
+ consumer = self._ensure_consumer()
62
+
63
+ if partition is not None and offset != OFFSET_STORED:
64
+ topic_list = [topics] if isinstance(topics, str) else topics
65
+ for topic in topic_list:
66
+ self._assign_topic_partition(topic, partition, offset)
67
+ else:
68
+ topics_list = [topics] if isinstance(topics, str) else topics
69
+ consumer.subscribe(topics_list, on_assign=self.on_assign, on_revoke=self.on_revoke)
70
+
71
+ def on_assign(self, consumer, partitions):
72
+ self.kafka_status = "UP"
73
+ for topic in {p.topic for p in partitions}:
74
+ new = {p.partition for p in partitions if p.topic == topic}
75
+ self.logger.debug(f"[@] on_assign {topic} {new if new else '{}'}")
76
+ old = {p.partition for p in consumer.assignment() if p.topic == topic}
77
+ old.update(new)
78
+ self.logger.info(f"[*] Assigned {topic} {old if old else '{}'}")
79
+
80
+ def on_revoke(self, consumer, partitions):
81
+ for topic in {p.topic for p in partitions}:
82
+ new = {p.partition for p in partitions if p.topic == topic}
83
+ self.logger.debug(f"[@] on_revoke {topic} {new if new else '{}'}")
84
+ old = {p.partition for p in consumer.assignment() if p.topic == topic}
85
+ old.difference_update(new)
86
+ self.logger.info(f"[*] Assigned {topic} {old if old else '{}'}")
87
+
88
+ def _assign_topic_partition(self, topic: str, partition: int, offset: OffsetType) -> None:
89
+ """Assign to specific partition"""
90
+ consumer = self._ensure_consumer()
91
+ topic_partition = TopicPartition(topic, partition, offset)
92
+ consumer.assign([topic_partition])
93
+ consumer.seek(topic_partition)
94
+ self.logger.info(f"[*] Assigned to topic '{topic}' partition {partition} with offset {offset}")
95
+
96
+ def consume(self, num_messages: int = 1, timeout: float = 1.0, decode_: bool = False) -> Iterator[Message | str]:
97
+ """Consume messages"""
98
+ consumer = self._ensure_consumer()
99
+
100
+ for _ in range(num_messages):
101
+ if (msg := self._consume(consumer, timeout)) is None:
102
+ continue
103
+ yield msg.value().decode('utf-8') if decode_ else msg
104
+
105
+ def _consume(self, consumer: Consumer, timeout: float) -> Message | None:
106
+ msg = consumer.poll(timeout)
107
+ if msg is None:
108
+ return None
109
+ if msg.error():
110
+ self.logger.error(f"[x] Consumer error: {msg.error()}")
111
+ return None
112
+ self.logger.info(f"[<] Received message from {msg.topic()} [partition {msg.partition()}, offset {msg.offset()}]")
113
+ self.logger.debug(f"[*] Message content: {msg.value()}")
114
+ return msg
115
+
116
+ def commit(self, message: Message | None = None, offsets: list[TopicPartition] | None = None,
117
+ asynchronous: bool = True) -> list[TopicPartition] | None:
118
+ """Commit offsets to Kafka."""
119
+ consumer = self._ensure_consumer()
120
+ if message:
121
+ return consumer.commit(message=message, asynchronous=asynchronous)
122
+ elif offsets:
123
+ return consumer.commit(offsets=offsets, asynchronous=asynchronous)
124
+ return consumer.commit(asynchronous=asynchronous)
125
+
126
+ def get_subscription_info(self) -> dict:
127
+ """Get current subscription and assignment information.
128
+
129
+ Returns:
130
+ dict: Dictionary with subscription and assignment details
131
+ {
132
+ 'subscribed_topics': list[str] | None,
133
+ 'assignments': list[dict] | None,
134
+ 'current_offsets': list[dict] | None
135
+ }
136
+ """
137
+ consumer = self._ensure_consumer()
138
+
139
+ try:
140
+ # Получаем текущие назначения (assignments)
141
+ assignments = consumer.assignment()
142
+
143
+ # Получаем текущие позиции (offsets)
144
+ current_offsets = []
145
+ if assignments:
146
+ current_offsets = [consumer.position(tp) for tp in assignments]
147
+
148
+ # Для получения подписок используем список топиков из assignments
149
+ subscribed_topics = list({tp.topic for tp in assignments}) if assignments else None
150
+
151
+ # Формируем информацию о назначениях
152
+ assignments_info = []
153
+ for tp in assignments:
154
+ assignments_info.append({
155
+ 'topic': tp.topic,
156
+ 'partition': tp.partition,
157
+ 'offset': tp.offset
158
+ })
159
+
160
+ # Формируем информацию о текущих позициях
161
+ offsets_info = []
162
+ for tp in current_offsets:
163
+ offsets_info.append({
164
+ 'topic': tp.topic,
165
+ 'partition': tp.partition,
166
+ 'offset': tp.offset
167
+ })
168
+
169
+ return {
170
+ 'subscribed_topics': subscribed_topics,
171
+ 'assignments': assignments_info if assignments_info else None,
172
+ 'current_offsets': offsets_info if offsets_info else None
173
+ }
174
+
175
+ except KafkaException as ex:
176
+ self.logger.error(f"[x] Failed to get subscription info: {ex}")
177
+ raise
178
+
179
+ def log_subscription_info(self) -> None:
180
+ """Log current subscription and assignment information."""
181
+ info = self.get_subscription_info()
182
+
183
+ if info['subscribed_topics']:
184
+ self.logger.info(f"[*] Subscribed topics: {', '.join(info['subscribed_topics'])}")
185
+ else:
186
+ self.logger.info("[!] Not subscribed to any topics")
187
+
188
+ if info['assignments']:
189
+ self.logger.info("[*] Current partition assignments:")
190
+ for assignment in info['assignments']:
191
+ self.logger.info(f" - {assignment['topic']} [partition {assignment['partition']}]")
192
+
193
+ if info['current_offsets']:
194
+ self.logger.info("[*] Current read positions:")
195
+ for offset in info['current_offsets']:
196
+ self.logger.info(
197
+ f" - {offset['topic']} [partition {offset['partition']}]: position {offset['offset']}")
dgkafka/errors.py ADDED
@@ -0,0 +1,6 @@
1
+ class ProducerNotSetError(Exception):
2
+ pass
3
+
4
+
5
+ class ConsumerNotSetError(Exception):
6
+ pass
@@ -0,0 +1,25 @@
1
+ from confluent_kafka import DeserializingConsumer
2
+ from confluent_kafka.schema_registry import SchemaRegistryClient
3
+ from confluent_kafka.schema_registry.json_schema import JSONDeserializer
4
+ from confluent_kafka.serialization import StringDeserializer
5
+ from dglog import Logger
6
+ from dgkafka.consumer import KafkaConsumer
7
+
8
+
9
+ class JsonKafkaConsumer(KafkaConsumer):
10
+ def __init__(self, logger_: Logger | None = None, **configs):
11
+ self.consumer: DeserializingConsumer | None = None
12
+ self.schema_registry = {'url': configs.pop('schema.registry.url')}
13
+ self.schema_client = SchemaRegistryClient(self.schema_registry)
14
+ self.deserializer = JSONDeserializer(schema_str=None, schema_registry_client=self.schema_client)
15
+
16
+ super(JsonKafkaConsumer, self).__init__(logger_, **configs)
17
+
18
+ def init_consumer(self, logger_: Logger | None = None, **configs):
19
+ consumer_conf = {
20
+ **configs,
21
+ 'key.deserializer': StringDeserializer('utf_8'),
22
+ 'value.deserializer': self.deserializer
23
+ }
24
+ self.logger = logger_ or Logger()
25
+ self.consumer = DeserializingConsumer(consumer_conf)
dgkafka/producer.py ADDED
@@ -0,0 +1,180 @@
1
+ import uuid
2
+ from typing import Optional, Any
3
+
4
+ from datetime import datetime, date
5
+
6
+ from confluent_kafka import Producer, Message
7
+ from dgkafka.errors import ProducerNotSetError
8
+
9
+ import logging
10
+ import dglog
11
+
12
+
13
+ class KafkaProducer:
14
+ def __init__(self, logger_: logging.Logger | dglog.Logger | None = None, **configs: Any) -> None:
15
+ """Initialize Kafka producer.
16
+
17
+ Args:
18
+ logger_: Optional logger instance
19
+ configs: Kafka producer configuration
20
+ """
21
+ self.producer: Producer | None = None
22
+ self.logger = logger_ if logger_ else dglog.Logger()
23
+
24
+ self._delivery_status = {'success': None}
25
+
26
+ if isinstance(self.logger, dglog.Logger):
27
+ self.logger.auto_configure()
28
+ self._init_producer(**configs)
29
+
30
+ def _init_producer(self, **configs: Any) -> None:
31
+ """Internal method to initialize producer."""
32
+ try:
33
+ self.producer = Producer(configs)
34
+ self.logger.info("[*] Producer initialized successfully")
35
+ except Exception as ex:
36
+ self.logger.error(f"[x] Failed to initialize producer: {ex}")
37
+ raise
38
+
39
+ def close(self) -> None:
40
+ """Close the producer connection."""
41
+ if self.producer is not None:
42
+ try:
43
+ self.producer.flush()
44
+ self.logger.info("[*] Producer closed successfully")
45
+ except Exception as ex:
46
+ self.logger.error(f"[x] Error closing producer: {ex}")
47
+ raise
48
+ finally:
49
+ self.producer = None
50
+
51
+ def __enter__(self):
52
+ """Context manager entry point."""
53
+ if self.producer is None:
54
+ self._init_producer()
55
+ return self
56
+
57
+ def __exit__(self, exc_type, exc_val, exc_tb):
58
+ """Context manager exit point."""
59
+ self.close()
60
+
61
+ def _ensure_producer(self) -> Producer:
62
+ """Ensure producer is initialized."""
63
+ if self.producer is None:
64
+ raise ProducerNotSetError('[!] Producer not initialized!')
65
+ return self.producer
66
+
67
+ def delivery_report(self, err: Optional[Any], msg: Message) -> None:
68
+ """Delivery callback for produced messages.
69
+
70
+ Args:
71
+ err: Error object if delivery failed
72
+ msg: Delivered message object
73
+ """
74
+ if err is not None:
75
+ self.logger.error(f"[x] Message delivery failed: {err}")
76
+ self.logger.debug(f"[~] Failed message details: {msg}")
77
+ self._delivery_status['success'] = False
78
+ else:
79
+ self.logger.info(
80
+ f"[>] Message delivered to {msg.topic()} [partition {msg.partition()}, offset {msg.offset()}]")
81
+ self._delivery_status['success'] = True
82
+
83
+ def produce(
84
+ self,
85
+ topic: str,
86
+ message: str | bytes | dict[str, Any],
87
+ key: str | None = None,
88
+ partition: int | None = None,
89
+ headers: dict[str, bytes] | None = None,
90
+ flush: bool = True
91
+ ) -> bool:
92
+ """Produce a message to Kafka.
93
+
94
+ Args:
95
+ topic: Target topic name
96
+ message: Message content (str, bytes or dict)
97
+ key: Message key (optional)
98
+ partition: Specific partition (optional)
99
+ headers: Message headers (optional)
100
+ flush: Immediately flush after producing (default: True)
101
+ """
102
+ producer = self._ensure_producer()
103
+ producer.poll(0)
104
+
105
+ self._delivery_status['success'] = None
106
+
107
+ # Generate key if not provided
108
+ key = key if key is not None else str(uuid.uuid4())
109
+ key_bytes = key.encode('utf-8')
110
+
111
+ # Prepare message value
112
+ if isinstance(message, str):
113
+ value = message.encode('utf-8')
114
+ elif isinstance(message, bytes):
115
+ value = message
116
+ else: # Assume dict-like object
117
+ try:
118
+ import json
119
+ dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime) or isinstance(obj, date) else None
120
+ value = json.dumps(message, ensure_ascii=False, default=dthandler, indent=4).encode('utf-8')
121
+ except Exception as ex:
122
+ self.logger.error(f"[x] Failed to serialize message: {ex}")
123
+ return False
124
+
125
+ # Prepare message headers
126
+ headers_list = None
127
+ if headers:
128
+ headers_list = [(k, v if isinstance(v, bytes) else str(v).encode('utf-8'))
129
+ for k, v in headers.items()]
130
+
131
+ # Produce message
132
+ try:
133
+ if not partition:
134
+ producer.produce(
135
+ topic=topic,
136
+ value=value,
137
+ key=key_bytes,
138
+ on_delivery=self.delivery_report,
139
+ headers=headers_list
140
+ )
141
+ else:
142
+ producer.produce(
143
+ topic=topic,
144
+ value=value,
145
+ key=key_bytes,
146
+ partition=partition,
147
+ on_delivery=self.delivery_report,
148
+ headers=headers_list
149
+ )
150
+ if flush:
151
+ remaining = producer.flush(1.0) # timeout 1 second
152
+ if remaining > 0:
153
+ self.logger.warning(f"[!] {remaining} messages remain undelivered")
154
+ return False
155
+
156
+ # Если flush=True, статус должен быть установлен к этому моменту
157
+ if flush and self._delivery_status['success'] is not None:
158
+ return self._delivery_status['success']
159
+
160
+ # Если flush=False, мы не можем гарантировать доставку, возвращаем True
161
+ # (так как технически ошибки пока нет)
162
+ return True
163
+ except Exception as ex:
164
+ self.logger.error(f"[x] Failed to produce message: {ex}")
165
+ return False
166
+
167
+ def flush(self, timeout: float = 10.0) -> None:
168
+ """Wait for all messages to be delivered.
169
+
170
+ Args:
171
+ timeout: Maximum time to wait (seconds)
172
+ """
173
+ producer = self._ensure_producer()
174
+ try:
175
+ remaining = producer.flush(timeout)
176
+ if remaining > 0:
177
+ self.logger.warning(f"[!] {remaining} messages remain undelivered after flush timeout")
178
+ except Exception as ex:
179
+ self.logger.error(f"[x] Flush failed: {ex}")
180
+ raise
@@ -0,0 +1,239 @@
1
+ Metadata-Version: 2.4
2
+ Name: dgkafka
3
+ Version: 1.0.0
4
+ Summary: Kafka clients
5
+ Home-page: https://gitlab.com/gng-group/dgkafka.git
6
+ Author: Malanris
7
+ Author-email: Roman Rasputin <admin@roro.su>
8
+ License: MIT License
9
+ Project-URL: Homepage, https://gitlab.com/gng-group/dgkafka
10
+ Project-URL: BugTracker, https://gitlab.com/gng-group/dgkafka/issues
11
+ Keywords: kafka,client,confluent,avro,fastapi,logging
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: System :: Logging
20
+ Classifier: Topic :: System :: Distributed Computing
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: confluent-kafka>=2.1.1
25
+ Requires-Dist: dglog>=1.0.0
26
+ Requires-Dist: pydantic
27
+ Provides-Extra: avro
28
+ Requires-Dist: requests; extra == "avro"
29
+ Requires-Dist: fastavro<2; extra == "avro"
30
+ Requires-Dist: avro<2,>=1.11.1; extra == "avro"
31
+ Requires-Dist: attrs; extra == "avro"
32
+ Requires-Dist: cachetools; extra == "avro"
33
+ Requires-Dist: httpx>=0.26; extra == "avro"
34
+ Requires-Dist: authlib; extra == "avro"
35
+ Provides-Extra: json
36
+ Requires-Dist: pyrsistent; extra == "json"
37
+ Requires-Dist: jsonschema; extra == "json"
38
+ Dynamic: author
39
+ Dynamic: home-page
40
+ Dynamic: license-file
41
+
42
+ # dgkafka
43
+
44
+ Python package for working with Apache Kafka supporting multiple data formats.
45
+
46
+ ## Installation
47
+
48
+ ```bash
49
+ pip install dgkafka
50
+ ```
51
+
52
+ For Avro support (requires additional dependencies):
53
+
54
+ ```bash
55
+ pip install dgkafka[avro]
56
+ ```
57
+
58
+ For Json support (requires additional dependencies):
59
+
60
+ ```bash
61
+ pip install dgkafka[json]
62
+ ```
63
+
64
+ ## Features
65
+
66
+ - Producers and consumers for different data formats:
67
+ - Raw messages (bytes/strings)
68
+ - JSON
69
+ - Avro (with Schema Registry integration)
70
+ - Robust error handling
71
+ - Comprehensive operation logging
72
+ - Context manager support
73
+ - Flexible configuration
74
+
75
+ ## Quick Start
76
+
77
+ ### Basic Producer/Consumer
78
+
79
+ ```python
80
+ from dgkafka import KafkaProducer, KafkaConsumer
81
+
82
+ # Producer
83
+ with KafkaProducer(bootstrap_servers='localhost:9092') as producer:
84
+ producer.produce('test_topic', 'Hello, Kafka!')
85
+
86
+ # Consumer
87
+ with KafkaConsumer(bootstrap_servers='localhost:9092', group_id='test_group') as consumer:
88
+ consumer.subscribe(['test_topic'])
89
+ for msg in consumer.consume():
90
+ print(msg.value())
91
+ ```
92
+
93
+ ### JSON Support
94
+
95
+ ```python
96
+ from dgkafka import JsonKafkaProducer, JsonKafkaConsumer
97
+
98
+ # Producer
99
+ with JsonKafkaProducer(bootstrap_servers='localhost:9092') as producer:
100
+ producer.produce('json_topic', {'key': 'value'})
101
+
102
+ # Consumer
103
+ with JsonKafkaConsumer(bootstrap_servers='localhost:9092', group_id='json_group') as consumer:
104
+ consumer.subscribe(['json_topic'])
105
+ for msg in consumer.consume():
106
+ print(msg.value()) # Automatically deserialized JSON
107
+ ```
108
+
109
+ ### Avro Support
110
+
111
+ ```python
112
+ from dgkafka import AvroKafkaProducer, AvroKafkaConsumer
113
+
114
+ # Producer
115
+ value_schema = {
116
+ "type": "record",
117
+ "name": "User",
118
+ "fields": [
119
+ {"name": "name", "type": "string"},
120
+ {"name": "age", "type": "int"}
121
+ ]
122
+ }
123
+
124
+ with AvroKafkaProducer(
125
+ schema_registry_url='http://localhost:8081',
126
+ bootstrap_servers='localhost:9092',
127
+ default_value_schema=value_schema
128
+ ) as producer:
129
+ producer.produce('avro_topic', {'name': 'Alice', 'age': 30})
130
+
131
+ # Consumer
132
+ with AvroKafkaConsumer(
133
+ schema_registry_url='http://localhost:8081',
134
+ bootstrap_servers='localhost:9092',
135
+ group_id='avro_group'
136
+ ) as consumer:
137
+ consumer.subscribe(['avro_topic'])
138
+ for msg in consumer.consume():
139
+ print(msg.value()) # Automatically deserialized Avro object
140
+ ```
141
+
142
+ ## Classes
143
+
144
+ ### Base Classes
145
+
146
+ - `KafkaProducer` - base message producer
147
+ - `KafkaConsumer` - base message consumer
148
+
149
+ ### Specialized Classes
150
+
151
+ - `JsonKafkaProducer` - JSON message producer (inherits from `KafkaProducer`)
152
+ - `JsonKafkaConsumer` - JSON message consumer (inherits from `KafkaConsumer`)
153
+ - `AvroKafkaProducer` - Avro message producer (inherits from `KafkaProducer`)
154
+ - `AvroKafkaConsumer` - Avro message consumer (inherits from `KafkaConsumer`)
155
+
156
+ ## Configuration
157
+
158
+ All classes accept standard Kafka configuration parameters:
159
+
160
+ ```python
161
+ config = {
162
+ 'bootstrap.servers': 'localhost:9092',
163
+ 'group.id': 'my_group',
164
+ 'auto.offset.reset': 'earliest'
165
+ }
166
+ ```
167
+
168
+ Avro classes require additional parameter:
169
+ - `schema_registry_url` - Schema Registry URL
170
+
171
+ ## Logging
172
+
173
+ All classes use `dglog.Logger` for logging. You can provide a custom logger:
174
+
175
+ ```python
176
+ from dglog import Logger
177
+
178
+ logger = Logger()
179
+ producer = KafkaProducer(logger_=logger, ...)
180
+ ```
181
+
182
+ ## Best Practices
183
+
184
+ 1. Always use context managers (`with`) for proper resource cleanup
185
+ 2. Implement error handling and retry logic for production use
186
+ 3. Pre-register Avro schemas in Schema Registry
187
+ 4. Configure appropriate `acks` and `retries` parameters for producers
188
+ 5. Monitor consumer lag and producer throughput
189
+
190
+ ## Advanced Usage
191
+
192
+ ### Custom Serialization
193
+
194
+ ```python
195
+ # Custom Avro serializer
196
+ class CustomAvroProducer(AvroKafkaProducer):
197
+ def _serialize_value(self, value):
198
+ # Custom serialization logic
199
+ return super()._serialize_value(value)
200
+ ```
201
+
202
+ ### Message Headers
203
+
204
+ ```python
205
+ # Adding headers to messages
206
+ headers = {
207
+ 'correlation_id': '12345',
208
+ 'message_type': 'user_update'
209
+ }
210
+
211
+ producer.produce(
212
+ topic='events',
213
+ value=message_data,
214
+ headers=headers
215
+ )
216
+ ```
217
+
218
+ ### Error Handling
219
+
220
+ ```python
221
+ from confluent_kafka import KafkaException
222
+
223
+ try:
224
+ with AvroKafkaProducer(...) as producer:
225
+ producer.produce(...)
226
+ except KafkaException as e:
227
+ print(f"Kafka error occurred: {e}")
228
+ ```
229
+
230
+ ## Performance Tips
231
+
232
+ 1. Batch messages when possible (`batch.num.messages` config)
233
+ 2. Adjust `linger.ms` for better batching
234
+ 3. Use `compression.type` (lz4, snappy, or gzip)
235
+ 4. Tune `fetch.max.bytes` and `max.partition.fetch.bytes` for consumers
236
+
237
+ ## License
238
+
239
+ MIT
@@ -0,0 +1,13 @@
1
+ dgkafka/__init__.py,sha256=fnqVZROyHXipdmhqZaa9XUjvQe795JJKFakwTndAiIw,286
2
+ dgkafka/avro_consumer.py,sha256=fI-VYQdw3bQJoDN4jS0F5Aa78wlxmjg5lmnTumw9FDs,2917
3
+ dgkafka/avro_producer.py,sha256=UL0RBaEKl_lbNF1UI-4w8R3AIEGHYI-Vna3iVZIlJqo,5421
4
+ dgkafka/config.py,sha256=TmfB0IOauSPr4B8InOZdoebAEGeabs7HZUXuozAFFlk,3828
5
+ dgkafka/consumer.py,sha256=OfONH61xj1sD4Z0YIQngfJOOwkx8Ko9waNYCKoXtJL4,8371
6
+ dgkafka/errors.py,sha256=PaH46tXameS--hrrUXKhQkZlBHvMSMPmjhVeRkmFvV0,95
7
+ dgkafka/json_consumer.py,sha256=7Gzn7C9WpyCTPDV6eFDugAx5gC9vdV-HrTh3Nv--zIE,1152
8
+ dgkafka/producer.py,sha256=WHVhw5HyJINxvUg_j_9iiQ0N7I3jc4niFXpZfkDHUOQ,6519
9
+ dgkafka-1.0.0.dist-info/licenses/LICENSE,sha256=pAZXnNE2dxxwXFIduGyn1gpvPefJtUYOYZOi3yeGG94,1068
10
+ dgkafka-1.0.0.dist-info/METADATA,sha256=QU2tM_MML3lTLbAjuBPpUgOgVOsvQMC8Fw_2AYy-FZ4,6059
11
+ dgkafka-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ dgkafka-1.0.0.dist-info/top_level.txt,sha256=GyNrxOh7IPdL0t2SxH8DWxg3fUma-ezQ1Kz4zIr2B7U,8
13
+ dgkafka-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) [year] [fullname]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ dgkafka