dgkafka 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dgkafka-1.0.0/LICENSE +21 -0
- dgkafka-1.0.0/PKG-INFO +239 -0
- dgkafka-1.0.0/README.md +198 -0
- dgkafka-1.0.0/dgkafka/__init__.py +11 -0
- dgkafka-1.0.0/dgkafka/avro_consumer.py +74 -0
- dgkafka-1.0.0/dgkafka/avro_producer.py +138 -0
- dgkafka-1.0.0/dgkafka/config.py +84 -0
- dgkafka-1.0.0/dgkafka/consumer.py +197 -0
- dgkafka-1.0.0/dgkafka/errors.py +6 -0
- dgkafka-1.0.0/dgkafka/json_consumer.py +25 -0
- dgkafka-1.0.0/dgkafka/producer.py +180 -0
- dgkafka-1.0.0/dgkafka.egg-info/PKG-INFO +239 -0
- dgkafka-1.0.0/dgkafka.egg-info/SOURCES.txt +18 -0
- dgkafka-1.0.0/dgkafka.egg-info/dependency_links.txt +1 -0
- dgkafka-1.0.0/dgkafka.egg-info/not-zip-safe +1 -0
- dgkafka-1.0.0/dgkafka.egg-info/requires.txt +16 -0
- dgkafka-1.0.0/dgkafka.egg-info/top_level.txt +1 -0
- dgkafka-1.0.0/pyproject.toml +50 -0
- dgkafka-1.0.0/setup.cfg +4 -0
- dgkafka-1.0.0/setup.py +32 -0
dgkafka-1.0.0/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) [year] [fullname]
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
dgkafka-1.0.0/PKG-INFO
ADDED
@@ -0,0 +1,239 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: dgkafka
|
3
|
+
Version: 1.0.0
|
4
|
+
Summary: Kafka clients
|
5
|
+
Home-page: https://gitlab.com/gng-group/dgkafka.git
|
6
|
+
Author: Malanris
|
7
|
+
Author-email: Roman Rasputin <admin@roro.su>
|
8
|
+
License: MIT License
|
9
|
+
Project-URL: Homepage, https://gitlab.com/gng-group/dgkafka
|
10
|
+
Project-URL: BugTracker, https://gitlab.com/gng-group/dgkafka/issues
|
11
|
+
Keywords: kafka,client,confluent,avro,fastapi,logging
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
17
|
+
Classifier: Operating System :: OS Independent
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
19
|
+
Classifier: Topic :: System :: Logging
|
20
|
+
Classifier: Topic :: System :: Distributed Computing
|
21
|
+
Requires-Python: >=3.10
|
22
|
+
Description-Content-Type: text/markdown
|
23
|
+
License-File: LICENSE
|
24
|
+
Requires-Dist: confluent-kafka>=2.1.1
|
25
|
+
Requires-Dist: dglog>=1.0.0
|
26
|
+
Requires-Dist: pydantic
|
27
|
+
Provides-Extra: avro
|
28
|
+
Requires-Dist: requests; extra == "avro"
|
29
|
+
Requires-Dist: fastavro<2; extra == "avro"
|
30
|
+
Requires-Dist: avro<2,>=1.11.1; extra == "avro"
|
31
|
+
Requires-Dist: attrs; extra == "avro"
|
32
|
+
Requires-Dist: cachetools; extra == "avro"
|
33
|
+
Requires-Dist: httpx>=0.26; extra == "avro"
|
34
|
+
Requires-Dist: authlib; extra == "avro"
|
35
|
+
Provides-Extra: json
|
36
|
+
Requires-Dist: pyrsistent; extra == "json"
|
37
|
+
Requires-Dist: jsonschema; extra == "json"
|
38
|
+
Dynamic: author
|
39
|
+
Dynamic: home-page
|
40
|
+
Dynamic: license-file
|
41
|
+
|
42
|
+
# dgkafka
|
43
|
+
|
44
|
+
Python package for working with Apache Kafka supporting multiple data formats.
|
45
|
+
|
46
|
+
## Installation
|
47
|
+
|
48
|
+
```bash
|
49
|
+
pip install dgkafka
|
50
|
+
```
|
51
|
+
|
52
|
+
For Avro support (requires additional dependencies):
|
53
|
+
|
54
|
+
```bash
|
55
|
+
pip install dgkafka[avro]
|
56
|
+
```
|
57
|
+
|
58
|
+
For Json support (requires additional dependencies):
|
59
|
+
|
60
|
+
```bash
|
61
|
+
pip install dgkafka[json]
|
62
|
+
```
|
63
|
+
|
64
|
+
## Features
|
65
|
+
|
66
|
+
- Producers and consumers for different data formats:
|
67
|
+
- Raw messages (bytes/strings)
|
68
|
+
- JSON
|
69
|
+
- Avro (with Schema Registry integration)
|
70
|
+
- Robust error handling
|
71
|
+
- Comprehensive operation logging
|
72
|
+
- Context manager support
|
73
|
+
- Flexible configuration
|
74
|
+
|
75
|
+
## Quick Start
|
76
|
+
|
77
|
+
### Basic Producer/Consumer
|
78
|
+
|
79
|
+
```python
|
80
|
+
from dgkafka import KafkaProducer, KafkaConsumer
|
81
|
+
|
82
|
+
# Producer
|
83
|
+
with KafkaProducer(bootstrap_servers='localhost:9092') as producer:
|
84
|
+
producer.produce('test_topic', 'Hello, Kafka!')
|
85
|
+
|
86
|
+
# Consumer
|
87
|
+
with KafkaConsumer(bootstrap_servers='localhost:9092', group_id='test_group') as consumer:
|
88
|
+
consumer.subscribe(['test_topic'])
|
89
|
+
for msg in consumer.consume():
|
90
|
+
print(msg.value())
|
91
|
+
```
|
92
|
+
|
93
|
+
### JSON Support
|
94
|
+
|
95
|
+
```python
|
96
|
+
from dgkafka import JsonKafkaProducer, JsonKafkaConsumer
|
97
|
+
|
98
|
+
# Producer
|
99
|
+
with JsonKafkaProducer(bootstrap_servers='localhost:9092') as producer:
|
100
|
+
producer.produce('json_topic', {'key': 'value'})
|
101
|
+
|
102
|
+
# Consumer
|
103
|
+
with JsonKafkaConsumer(bootstrap_servers='localhost:9092', group_id='json_group') as consumer:
|
104
|
+
consumer.subscribe(['json_topic'])
|
105
|
+
for msg in consumer.consume():
|
106
|
+
print(msg.value()) # Automatically deserialized JSON
|
107
|
+
```
|
108
|
+
|
109
|
+
### Avro Support
|
110
|
+
|
111
|
+
```python
|
112
|
+
from dgkafka import AvroKafkaProducer, AvroKafkaConsumer
|
113
|
+
|
114
|
+
# Producer
|
115
|
+
value_schema = {
|
116
|
+
"type": "record",
|
117
|
+
"name": "User",
|
118
|
+
"fields": [
|
119
|
+
{"name": "name", "type": "string"},
|
120
|
+
{"name": "age", "type": "int"}
|
121
|
+
]
|
122
|
+
}
|
123
|
+
|
124
|
+
with AvroKafkaProducer(
|
125
|
+
schema_registry_url='http://localhost:8081',
|
126
|
+
bootstrap_servers='localhost:9092',
|
127
|
+
default_value_schema=value_schema
|
128
|
+
) as producer:
|
129
|
+
producer.produce('avro_topic', {'name': 'Alice', 'age': 30})
|
130
|
+
|
131
|
+
# Consumer
|
132
|
+
with AvroKafkaConsumer(
|
133
|
+
schema_registry_url='http://localhost:8081',
|
134
|
+
bootstrap_servers='localhost:9092',
|
135
|
+
group_id='avro_group'
|
136
|
+
) as consumer:
|
137
|
+
consumer.subscribe(['avro_topic'])
|
138
|
+
for msg in consumer.consume():
|
139
|
+
print(msg.value()) # Automatically deserialized Avro object
|
140
|
+
```
|
141
|
+
|
142
|
+
## Classes
|
143
|
+
|
144
|
+
### Base Classes
|
145
|
+
|
146
|
+
- `KafkaProducer` - base message producer
|
147
|
+
- `KafkaConsumer` - base message consumer
|
148
|
+
|
149
|
+
### Specialized Classes
|
150
|
+
|
151
|
+
- `JsonKafkaProducer` - JSON message producer (inherits from `KafkaProducer`)
|
152
|
+
- `JsonKafkaConsumer` - JSON message consumer (inherits from `KafkaConsumer`)
|
153
|
+
- `AvroKafkaProducer` - Avro message producer (inherits from `KafkaProducer`)
|
154
|
+
- `AvroKafkaConsumer` - Avro message consumer (inherits from `KafkaConsumer`)
|
155
|
+
|
156
|
+
## Configuration
|
157
|
+
|
158
|
+
All classes accept standard Kafka configuration parameters:
|
159
|
+
|
160
|
+
```python
|
161
|
+
config = {
|
162
|
+
'bootstrap.servers': 'localhost:9092',
|
163
|
+
'group.id': 'my_group',
|
164
|
+
'auto.offset.reset': 'earliest'
|
165
|
+
}
|
166
|
+
```
|
167
|
+
|
168
|
+
Avro classes require additional parameter:
|
169
|
+
- `schema_registry_url` - Schema Registry URL
|
170
|
+
|
171
|
+
## Logging
|
172
|
+
|
173
|
+
All classes use `dglog.Logger` for logging. You can provide a custom logger:
|
174
|
+
|
175
|
+
```python
|
176
|
+
from dglog import Logger
|
177
|
+
|
178
|
+
logger = Logger()
|
179
|
+
producer = KafkaProducer(logger_=logger, ...)
|
180
|
+
```
|
181
|
+
|
182
|
+
## Best Practices
|
183
|
+
|
184
|
+
1. Always use context managers (`with`) for proper resource cleanup
|
185
|
+
2. Implement error handling and retry logic for production use
|
186
|
+
3. Pre-register Avro schemas in Schema Registry
|
187
|
+
4. Configure appropriate `acks` and `retries` parameters for producers
|
188
|
+
5. Monitor consumer lag and producer throughput
|
189
|
+
|
190
|
+
## Advanced Usage
|
191
|
+
|
192
|
+
### Custom Serialization
|
193
|
+
|
194
|
+
```python
|
195
|
+
# Custom Avro serializer
|
196
|
+
class CustomAvroProducer(AvroKafkaProducer):
|
197
|
+
def _serialize_value(self, value):
|
198
|
+
# Custom serialization logic
|
199
|
+
return super()._serialize_value(value)
|
200
|
+
```
|
201
|
+
|
202
|
+
### Message Headers
|
203
|
+
|
204
|
+
```python
|
205
|
+
# Adding headers to messages
|
206
|
+
headers = {
|
207
|
+
'correlation_id': '12345',
|
208
|
+
'message_type': 'user_update'
|
209
|
+
}
|
210
|
+
|
211
|
+
producer.produce(
|
212
|
+
topic='events',
|
213
|
+
value=message_data,
|
214
|
+
headers=headers
|
215
|
+
)
|
216
|
+
```
|
217
|
+
|
218
|
+
### Error Handling
|
219
|
+
|
220
|
+
```python
|
221
|
+
from confluent_kafka import KafkaException
|
222
|
+
|
223
|
+
try:
|
224
|
+
with AvroKafkaProducer(...) as producer:
|
225
|
+
producer.produce(...)
|
226
|
+
except KafkaException as e:
|
227
|
+
print(f"Kafka error occurred: {e}")
|
228
|
+
```
|
229
|
+
|
230
|
+
## Performance Tips
|
231
|
+
|
232
|
+
1. Batch messages when possible (`batch.num.messages` config)
|
233
|
+
2. Adjust `linger.ms` for better batching
|
234
|
+
3. Use `compression.type` (lz4, snappy, or gzip)
|
235
|
+
4. Tune `fetch.max.bytes` and `max.partition.fetch.bytes` for consumers
|
236
|
+
|
237
|
+
## License
|
238
|
+
|
239
|
+
MIT
|
dgkafka-1.0.0/README.md
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
# dgkafka
|
2
|
+
|
3
|
+
Python package for working with Apache Kafka supporting multiple data formats.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
pip install dgkafka
|
9
|
+
```
|
10
|
+
|
11
|
+
For Avro support (requires additional dependencies):
|
12
|
+
|
13
|
+
```bash
|
14
|
+
pip install dgkafka[avro]
|
15
|
+
```
|
16
|
+
|
17
|
+
For Json support (requires additional dependencies):
|
18
|
+
|
19
|
+
```bash
|
20
|
+
pip install dgkafka[json]
|
21
|
+
```
|
22
|
+
|
23
|
+
## Features
|
24
|
+
|
25
|
+
- Producers and consumers for different data formats:
|
26
|
+
- Raw messages (bytes/strings)
|
27
|
+
- JSON
|
28
|
+
- Avro (with Schema Registry integration)
|
29
|
+
- Robust error handling
|
30
|
+
- Comprehensive operation logging
|
31
|
+
- Context manager support
|
32
|
+
- Flexible configuration
|
33
|
+
|
34
|
+
## Quick Start
|
35
|
+
|
36
|
+
### Basic Producer/Consumer
|
37
|
+
|
38
|
+
```python
|
39
|
+
from dgkafka import KafkaProducer, KafkaConsumer
|
40
|
+
|
41
|
+
# Producer
|
42
|
+
with KafkaProducer(bootstrap_servers='localhost:9092') as producer:
|
43
|
+
producer.produce('test_topic', 'Hello, Kafka!')
|
44
|
+
|
45
|
+
# Consumer
|
46
|
+
with KafkaConsumer(bootstrap_servers='localhost:9092', group_id='test_group') as consumer:
|
47
|
+
consumer.subscribe(['test_topic'])
|
48
|
+
for msg in consumer.consume():
|
49
|
+
print(msg.value())
|
50
|
+
```
|
51
|
+
|
52
|
+
### JSON Support
|
53
|
+
|
54
|
+
```python
|
55
|
+
from dgkafka import JsonKafkaProducer, JsonKafkaConsumer
|
56
|
+
|
57
|
+
# Producer
|
58
|
+
with JsonKafkaProducer(bootstrap_servers='localhost:9092') as producer:
|
59
|
+
producer.produce('json_topic', {'key': 'value'})
|
60
|
+
|
61
|
+
# Consumer
|
62
|
+
with JsonKafkaConsumer(bootstrap_servers='localhost:9092', group_id='json_group') as consumer:
|
63
|
+
consumer.subscribe(['json_topic'])
|
64
|
+
for msg in consumer.consume():
|
65
|
+
print(msg.value()) # Automatically deserialized JSON
|
66
|
+
```
|
67
|
+
|
68
|
+
### Avro Support
|
69
|
+
|
70
|
+
```python
|
71
|
+
from dgkafka import AvroKafkaProducer, AvroKafkaConsumer
|
72
|
+
|
73
|
+
# Producer
|
74
|
+
value_schema = {
|
75
|
+
"type": "record",
|
76
|
+
"name": "User",
|
77
|
+
"fields": [
|
78
|
+
{"name": "name", "type": "string"},
|
79
|
+
{"name": "age", "type": "int"}
|
80
|
+
]
|
81
|
+
}
|
82
|
+
|
83
|
+
with AvroKafkaProducer(
|
84
|
+
schema_registry_url='http://localhost:8081',
|
85
|
+
bootstrap_servers='localhost:9092',
|
86
|
+
default_value_schema=value_schema
|
87
|
+
) as producer:
|
88
|
+
producer.produce('avro_topic', {'name': 'Alice', 'age': 30})
|
89
|
+
|
90
|
+
# Consumer
|
91
|
+
with AvroKafkaConsumer(
|
92
|
+
schema_registry_url='http://localhost:8081',
|
93
|
+
bootstrap_servers='localhost:9092',
|
94
|
+
group_id='avro_group'
|
95
|
+
) as consumer:
|
96
|
+
consumer.subscribe(['avro_topic'])
|
97
|
+
for msg in consumer.consume():
|
98
|
+
print(msg.value()) # Automatically deserialized Avro object
|
99
|
+
```
|
100
|
+
|
101
|
+
## Classes
|
102
|
+
|
103
|
+
### Base Classes
|
104
|
+
|
105
|
+
- `KafkaProducer` - base message producer
|
106
|
+
- `KafkaConsumer` - base message consumer
|
107
|
+
|
108
|
+
### Specialized Classes
|
109
|
+
|
110
|
+
- `JsonKafkaProducer` - JSON message producer (inherits from `KafkaProducer`)
|
111
|
+
- `JsonKafkaConsumer` - JSON message consumer (inherits from `KafkaConsumer`)
|
112
|
+
- `AvroKafkaProducer` - Avro message producer (inherits from `KafkaProducer`)
|
113
|
+
- `AvroKafkaConsumer` - Avro message consumer (inherits from `KafkaConsumer`)
|
114
|
+
|
115
|
+
## Configuration
|
116
|
+
|
117
|
+
All classes accept standard Kafka configuration parameters:
|
118
|
+
|
119
|
+
```python
|
120
|
+
config = {
|
121
|
+
'bootstrap.servers': 'localhost:9092',
|
122
|
+
'group.id': 'my_group',
|
123
|
+
'auto.offset.reset': 'earliest'
|
124
|
+
}
|
125
|
+
```
|
126
|
+
|
127
|
+
Avro classes require additional parameter:
|
128
|
+
- `schema_registry_url` - Schema Registry URL
|
129
|
+
|
130
|
+
## Logging
|
131
|
+
|
132
|
+
All classes use `dglog.Logger` for logging. You can provide a custom logger:
|
133
|
+
|
134
|
+
```python
|
135
|
+
from dglog import Logger
|
136
|
+
|
137
|
+
logger = Logger()
|
138
|
+
producer = KafkaProducer(logger_=logger, ...)
|
139
|
+
```
|
140
|
+
|
141
|
+
## Best Practices
|
142
|
+
|
143
|
+
1. Always use context managers (`with`) for proper resource cleanup
|
144
|
+
2. Implement error handling and retry logic for production use
|
145
|
+
3. Pre-register Avro schemas in Schema Registry
|
146
|
+
4. Configure appropriate `acks` and `retries` parameters for producers
|
147
|
+
5. Monitor consumer lag and producer throughput
|
148
|
+
|
149
|
+
## Advanced Usage
|
150
|
+
|
151
|
+
### Custom Serialization
|
152
|
+
|
153
|
+
```python
|
154
|
+
# Custom Avro serializer
|
155
|
+
class CustomAvroProducer(AvroKafkaProducer):
|
156
|
+
def _serialize_value(self, value):
|
157
|
+
# Custom serialization logic
|
158
|
+
return super()._serialize_value(value)
|
159
|
+
```
|
160
|
+
|
161
|
+
### Message Headers
|
162
|
+
|
163
|
+
```python
|
164
|
+
# Adding headers to messages
|
165
|
+
headers = {
|
166
|
+
'correlation_id': '12345',
|
167
|
+
'message_type': 'user_update'
|
168
|
+
}
|
169
|
+
|
170
|
+
producer.produce(
|
171
|
+
topic='events',
|
172
|
+
value=message_data,
|
173
|
+
headers=headers
|
174
|
+
)
|
175
|
+
```
|
176
|
+
|
177
|
+
### Error Handling
|
178
|
+
|
179
|
+
```python
|
180
|
+
from confluent_kafka import KafkaException
|
181
|
+
|
182
|
+
try:
|
183
|
+
with AvroKafkaProducer(...) as producer:
|
184
|
+
producer.produce(...)
|
185
|
+
except KafkaException as e:
|
186
|
+
print(f"Kafka error occurred: {e}")
|
187
|
+
```
|
188
|
+
|
189
|
+
## Performance Tips
|
190
|
+
|
191
|
+
1. Batch messages when possible (`batch.num.messages` config)
|
192
|
+
2. Adjust `linger.ms` for better batching
|
193
|
+
3. Use `compression.type` (lz4, snappy, or gzip)
|
194
|
+
4. Tune `fetch.max.bytes` and `max.partition.fetch.bytes` for consumers
|
195
|
+
|
196
|
+
## License
|
197
|
+
|
198
|
+
MIT
|
@@ -0,0 +1,11 @@
|
|
1
|
+
from .consumer import KafkaConsumer
|
2
|
+
from .producer import KafkaProducer
|
3
|
+
try:
|
4
|
+
from .avro_consumer import AvroKafkaConsumer
|
5
|
+
from .avro_producer import AvroKafkaProducer
|
6
|
+
except ImportError:
|
7
|
+
pass
|
8
|
+
try:
|
9
|
+
from .json_consumer import JsonKafkaConsumer
|
10
|
+
except ImportError:
|
11
|
+
pass
|
@@ -0,0 +1,74 @@
|
|
1
|
+
from typing import Any, Iterator
|
2
|
+
|
3
|
+
from dgkafka.consumer import KafkaConsumer
|
4
|
+
|
5
|
+
from confluent_kafka import Message
|
6
|
+
from confluent_kafka.avro import AvroConsumer
|
7
|
+
from confluent_kafka.avro.serializer import SerializerError
|
8
|
+
from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient
|
9
|
+
|
10
|
+
import logging
|
11
|
+
import dglog
|
12
|
+
|
13
|
+
|
14
|
+
class AvroKafkaConsumer(KafkaConsumer):
|
15
|
+
"""Kafka consumer with Avro schema support using Schema Registry."""
|
16
|
+
|
17
|
+
def __init__(self, logger_: logging.Logger | dglog.Logger | None = None, **configs: Any) -> None:
|
18
|
+
"""
|
19
|
+
Initialize Avro consumer.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
schema_registry_url: URL of Schema Registry
|
23
|
+
logger_: Optional logger instance
|
24
|
+
configs: Kafka consumer configuration
|
25
|
+
"""
|
26
|
+
self.schema_registry_url = configs.get('schema.registry.url')
|
27
|
+
assert self.schema_registry_url is not None, "schema.registry.url is required"
|
28
|
+
|
29
|
+
self.schema_registry_client = CachedSchemaRegistryClient(url=self.schema_registry_url)
|
30
|
+
super().__init__(logger_=logger_, **configs)
|
31
|
+
|
32
|
+
def _init_consumer(self, **configs: Any) -> None:
|
33
|
+
"""Initialize AvroConsumer instance."""
|
34
|
+
try:
|
35
|
+
self.consumer = AvroConsumer(configs)
|
36
|
+
self.logger.info("[*] Avro consumer initialized successfully")
|
37
|
+
except Exception as ex:
|
38
|
+
self.logger.error(f"[x] Failed to initialize avro consumer: {ex}")
|
39
|
+
raise
|
40
|
+
|
41
|
+
def consume(self, num_messages: int = 1, timeout: float = 1.0, decode_: bool = False, **kwargs: Any) -> Iterator[str | bytes | Message | None]:
|
42
|
+
"""
|
43
|
+
Consume Avro-encoded messages.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
num_messages: Maximum number of messages to consume
|
47
|
+
timeout: Poll timeout in seconds
|
48
|
+
kwargs: Additional arguments
|
49
|
+
|
50
|
+
Yields:
|
51
|
+
Deserialized Avro messages as dictionaries or Message objects on error
|
52
|
+
"""
|
53
|
+
consumer = self._ensure_consumer()
|
54
|
+
|
55
|
+
for _ in range(num_messages):
|
56
|
+
msg = self._consume(consumer, timeout)
|
57
|
+
try:
|
58
|
+
if msg is None:
|
59
|
+
continue
|
60
|
+
yield msg.value() if decode_ else msg
|
61
|
+
except SerializerError as e:
|
62
|
+
self.logger.error(f"[x] Avro deserialization failed: {e}")
|
63
|
+
yield msg # Return raw message on deserialization error
|
64
|
+
except Exception as ex:
|
65
|
+
self.logger.error(f"[!] Unexpected error: {ex}")
|
66
|
+
continue
|
67
|
+
|
68
|
+
def get_schema(self, subject: str, version: int = 1) -> dict[str, Any]:
|
69
|
+
"""Get Avro schema from Schema Registry."""
|
70
|
+
return self.schema_registry_client.get_schema(subject, version)
|
71
|
+
|
72
|
+
def get_latest_schema(self, subject: str) -> dict[str, Any]:
|
73
|
+
"""Get latest version of schema for given subject."""
|
74
|
+
return self.schema_registry_client.get_latest_schema(subject)[1]
|
@@ -0,0 +1,138 @@
|
|
1
|
+
from typing import Optional, Union, Dict, Any
|
2
|
+
from confluent_kafka.avro import AvroProducer
|
3
|
+
from confluent_kafka.avro.serializer import SerializerError
|
4
|
+
from confluent_kafka.avro.cached_schema_registry_client import CachedSchemaRegistryClient
|
5
|
+
|
6
|
+
import dglog
|
7
|
+
import logging
|
8
|
+
|
9
|
+
from dgkafka.producer import KafkaProducer
|
10
|
+
|
11
|
+
|
12
|
+
class AvroKafkaProducer(KafkaProducer):
|
13
|
+
"""Kafka producer with Avro schema support using Schema Registry."""
|
14
|
+
|
15
|
+
def __init__(
|
16
|
+
self,
|
17
|
+
default_key_schema: str | None = None,
|
18
|
+
default_value_schema: str | None = None,
|
19
|
+
logger_: logging.Logger | dglog.Logger | None = None,
|
20
|
+
**configs: Any
|
21
|
+
) -> None:
|
22
|
+
"""
|
23
|
+
Initialize Avro producer.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
schema_registry_url: URL of Schema Registry
|
27
|
+
default_key_schema: Default Avro schema for message keys
|
28
|
+
default_value_schema: Default Avro schema for message values
|
29
|
+
logger_: Optional logger instance
|
30
|
+
configs: Kafka producer configuration
|
31
|
+
"""
|
32
|
+
self.schema_registry_url = configs.get('schema.registry.url')
|
33
|
+
assert self.schema_registry_url is not None, "schema.registry.url is required"
|
34
|
+
|
35
|
+
self.default_key_schema = default_key_schema
|
36
|
+
self.default_value_schema = default_value_schema
|
37
|
+
self.schema_registry_client = CachedSchemaRegistryClient(url=self.schema_registry_url)
|
38
|
+
super().__init__(logger_=logger_, **configs)
|
39
|
+
|
40
|
+
def _init_producer(self, **configs: Any) -> None:
|
41
|
+
"""Initialize AvroProducer instance."""
|
42
|
+
try:
|
43
|
+
self.producer = AvroProducer(
|
44
|
+
config=configs,
|
45
|
+
default_key_schema=self.default_key_schema,
|
46
|
+
default_value_schema=self.default_value_schema
|
47
|
+
)
|
48
|
+
self.logger.info("[*] Avro producer initialized successfully")
|
49
|
+
except Exception as ex:
|
50
|
+
self.logger.error(f"[x] Failed to initialize avro producer: {ex}")
|
51
|
+
raise
|
52
|
+
|
53
|
+
def produce(
|
54
|
+
self,
|
55
|
+
topic: str,
|
56
|
+
value: dict[str, Any] | Any,
|
57
|
+
key: dict[str, Any] | str | None = None,
|
58
|
+
value_schema: dict[str, Any] | None = None,
|
59
|
+
key_schema: dict[str, Any] | None = None,
|
60
|
+
partition: int | None = None,
|
61
|
+
headers: dict[str, bytes] | None = None,
|
62
|
+
flush: bool = True
|
63
|
+
) -> None:
|
64
|
+
"""
|
65
|
+
Produce Avro-encoded message to Kafka.
|
66
|
+
|
67
|
+
Args:
|
68
|
+
topic: Target topic name
|
69
|
+
value: Message value (must match Avro schema)
|
70
|
+
key: Message key (optional)
|
71
|
+
value_schema: Avro schema for message value (optional)
|
72
|
+
key_schema: Avro schema for message key (optional)
|
73
|
+
partition: Specific partition (optional)
|
74
|
+
headers: Message headers (optional)
|
75
|
+
flush: Immediately flush after producing (default: True)
|
76
|
+
"""
|
77
|
+
producer = self._ensure_producer()
|
78
|
+
producer.poll(0)
|
79
|
+
|
80
|
+
self._delivery_status['success'] = None
|
81
|
+
|
82
|
+
# Prepare headers
|
83
|
+
headers_list = None
|
84
|
+
if headers:
|
85
|
+
headers_list = [(k, v if isinstance(v, bytes) else str(v).encode('utf-8'))
|
86
|
+
for k, v in headers.items()]
|
87
|
+
|
88
|
+
try:
|
89
|
+
if not partition:
|
90
|
+
producer.produce(
|
91
|
+
topic=topic,
|
92
|
+
value=value,
|
93
|
+
value_schema=value_schema,
|
94
|
+
key=key,
|
95
|
+
key_schema=key_schema,
|
96
|
+
on_delivery=self.delivery_report,
|
97
|
+
headers=headers_list
|
98
|
+
)
|
99
|
+
else:
|
100
|
+
producer.produce(
|
101
|
+
topic=topic,
|
102
|
+
value=value,
|
103
|
+
value_schema=value_schema,
|
104
|
+
key=key,
|
105
|
+
key_schema=key_schema,
|
106
|
+
partition=partition,
|
107
|
+
on_delivery=self.delivery_report,
|
108
|
+
headers=headers_list
|
109
|
+
)
|
110
|
+
|
111
|
+
if flush:
|
112
|
+
remaining = producer.flush(timeout)
|
113
|
+
if remaining > 0:
|
114
|
+
self.logger.warning(f"[!] {remaining} messages remain undelivered after flush timeout")
|
115
|
+
return False
|
116
|
+
|
117
|
+
# Если flush=True, статус должен быть установлен к этому моменту
|
118
|
+
if flush and self._delivery_status['success'] is not None:
|
119
|
+
return self._delivery_status['success']
|
120
|
+
|
121
|
+
# Если flush=False, мы не можем гарантировать доставку, возвращаем True
|
122
|
+
# (так как технически ошибки пока нет)
|
123
|
+
return True
|
124
|
+
|
125
|
+
except SerializerError as ex:
|
126
|
+
self.logger.error(f"[x] Avro serialization failed: {ex}")
|
127
|
+
return False
|
128
|
+
except Exception as ex:
|
129
|
+
self.logger.error(f"[x] Failed to produce Avro message: {ex}")
|
130
|
+
return False
|
131
|
+
|
132
|
+
def get_schema(self, subject: str, version: int = 1) -> Dict[str, Any]:
|
133
|
+
"""Get Avro schema from Schema Registry."""
|
134
|
+
return self.schema_registry_client.get_schema(subject, version)
|
135
|
+
|
136
|
+
def get_latest_schema(self, subject: str) -> Dict[str, Any]:
|
137
|
+
"""Get latest version of schema for given subject."""
|
138
|
+
return self.schema_registry_client.get_latest_schema(subject)[1]
|