cledar-sdk 2.0.2__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cledar/__init__.py +1 -0
- cledar/kafka/README.md +239 -0
- cledar/kafka/__init__.py +42 -0
- cledar/kafka/clients/base.py +117 -0
- cledar/kafka/clients/consumer.py +138 -0
- cledar/kafka/clients/producer.py +97 -0
- cledar/kafka/config/schemas.py +262 -0
- cledar/kafka/exceptions.py +17 -0
- cledar/kafka/handlers/dead_letter.py +88 -0
- cledar/kafka/handlers/parser.py +83 -0
- cledar/kafka/logger.py +5 -0
- cledar/kafka/models/input.py +17 -0
- cledar/kafka/models/message.py +14 -0
- cledar/kafka/models/output.py +12 -0
- cledar/kafka/tests/.env.test.kafka +3 -0
- cledar/kafka/tests/README.md +216 -0
- cledar/kafka/tests/conftest.py +104 -0
- cledar/kafka/tests/integration/__init__.py +1 -0
- cledar/kafka/tests/integration/conftest.py +78 -0
- cledar/kafka/tests/integration/helpers.py +47 -0
- cledar/kafka/tests/integration/test_consumer_integration.py +375 -0
- cledar/kafka/tests/integration/test_integration.py +394 -0
- cledar/kafka/tests/integration/test_producer_consumer_interaction.py +388 -0
- cledar/kafka/tests/integration/test_producer_integration.py +217 -0
- cledar/kafka/tests/unit/__init__.py +1 -0
- cledar/kafka/tests/unit/test_base_kafka_client.py +391 -0
- cledar/kafka/tests/unit/test_config_validation.py +609 -0
- cledar/kafka/tests/unit/test_dead_letter_handler.py +443 -0
- cledar/kafka/tests/unit/test_error_handling.py +674 -0
- cledar/kafka/tests/unit/test_input_parser.py +310 -0
- cledar/kafka/tests/unit/test_input_parser_comprehensive.py +489 -0
- cledar/kafka/tests/unit/test_utils.py +25 -0
- cledar/kafka/tests/unit/test_utils_comprehensive.py +408 -0
- cledar/kafka/utils/callbacks.py +28 -0
- cledar/kafka/utils/messages.py +39 -0
- cledar/kafka/utils/topics.py +15 -0
- cledar/kserve/README.md +352 -0
- cledar/kserve/__init__.py +5 -0
- cledar/kserve/tests/__init__.py +0 -0
- cledar/kserve/tests/test_utils.py +64 -0
- cledar/kserve/utils.py +30 -0
- cledar/logging/README.md +53 -0
- cledar/logging/__init__.py +5 -0
- cledar/logging/tests/test_universal_plaintext_formatter.py +249 -0
- cledar/logging/universal_plaintext_formatter.py +99 -0
- cledar/monitoring/README.md +71 -0
- cledar/monitoring/__init__.py +5 -0
- cledar/monitoring/monitoring_server.py +156 -0
- cledar/monitoring/tests/integration/test_monitoring_server_int.py +162 -0
- cledar/monitoring/tests/test_monitoring_server.py +59 -0
- cledar/nonce/README.md +99 -0
- cledar/nonce/__init__.py +5 -0
- cledar/nonce/nonce_service.py +62 -0
- cledar/nonce/tests/__init__.py +0 -0
- cledar/nonce/tests/test_nonce_service.py +136 -0
- cledar/redis/README.md +536 -0
- cledar/redis/__init__.py +17 -0
- cledar/redis/async_example.py +112 -0
- cledar/redis/example.py +67 -0
- cledar/redis/exceptions.py +25 -0
- cledar/redis/logger.py +5 -0
- cledar/redis/model.py +14 -0
- cledar/redis/redis.py +764 -0
- cledar/redis/redis_config_store.py +333 -0
- cledar/redis/tests/test_async_integration_redis.py +158 -0
- cledar/redis/tests/test_async_redis_service.py +380 -0
- cledar/redis/tests/test_integration_redis.py +119 -0
- cledar/redis/tests/test_redis_service.py +319 -0
- cledar/storage/README.md +529 -0
- cledar/storage/__init__.py +6 -0
- cledar/storage/constants.py +5 -0
- cledar/storage/exceptions.py +79 -0
- cledar/storage/models.py +41 -0
- cledar/storage/object_storage.py +1274 -0
- cledar/storage/tests/conftest.py +18 -0
- cledar/storage/tests/test_abfs.py +164 -0
- cledar/storage/tests/test_integration_filesystem.py +359 -0
- cledar/storage/tests/test_integration_s3.py +453 -0
- cledar/storage/tests/test_local.py +384 -0
- cledar/storage/tests/test_s3.py +521 -0
- {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.1.0.dist-info}/METADATA +1 -1
- cledar_sdk-2.1.0.dist-info/RECORD +84 -0
- cledar_sdk-2.0.2.dist-info/RECORD +0 -4
- {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.1.0.dist-info}/WHEEL +0 -0
- {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.1.0.dist-info}/licenses/LICENSE +0 -0
cledar/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Cledar Python SDK for data platform services and integrations."""
|
cledar/kafka/README.md
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# Kafka Service
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
The `cledar.kafka` package provides typed, testable wrappers around Kafka producer and consumer clients (Confluent Kafka), together with configuration schemas, message models, parsing and dead-letter handling utilities. It is designed for clarity, reliability, and easy testing (unit and integration).
|
|
6
|
+
|
|
7
|
+
### Key Features
|
|
8
|
+
|
|
9
|
+
- **Typed Producer/Consumer**: Simple OO wrappers for Confluent Kafka
|
|
10
|
+
- **Pydantic Configs**: Validated, frozen dataclasses for producer/consumer configuration
|
|
11
|
+
- **Dead Letter Handling**: Helper to route failed messages to DLQ topics
|
|
12
|
+
- **Message Models**: Structured input/output models
|
|
13
|
+
- **Parsing Utilities**: Safe message parsing to typed payloads
|
|
14
|
+
- **Testability**: Comprehensive unit tests and Docker-based integration tests using testcontainers
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
This package is part of the Cledar SDK. Install it using:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# Install with uv (recommended)
|
|
22
|
+
uv sync --all-groups
|
|
23
|
+
|
|
24
|
+
# Or with pip
|
|
25
|
+
pip install -e .
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Usage Examples
|
|
29
|
+
|
|
30
|
+
### Producer: send messages
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import time
|
|
34
|
+
from cledar.kafka.clients.producer import KafkaProducer
|
|
35
|
+
from cledar.kafka.config.schemas import KafkaProducerConfig
|
|
36
|
+
|
|
37
|
+
producer = KafkaProducer(
|
|
38
|
+
KafkaProducerConfig(
|
|
39
|
+
kafka_servers="localhost:9092", # or ["host1:9092", "host2:9092"]
|
|
40
|
+
kafka_group_id="example-producer",
|
|
41
|
+
kafka_topic_prefix="my-prefix.", # optional
|
|
42
|
+
kafka_block_buffer_time_sec=1,
|
|
43
|
+
)
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
producer.connect()
|
|
47
|
+
|
|
48
|
+
producer.send(
|
|
49
|
+
topic="example-topic", # final Kafka topic will include prefix
|
|
50
|
+
key="msg-1",
|
|
51
|
+
value='{"id":"1","message":"hello","timestamp": %f}' % time.time(),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Optionally check connection status
|
|
55
|
+
assert producer.is_alive()
|
|
56
|
+
|
|
57
|
+
producer.shutdown()
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Consumer: subscribe and consume
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
from cledar.kafka.clients.consumer import KafkaConsumer
|
|
64
|
+
from cledar.kafka.config.schemas import KafkaConsumerConfig
|
|
65
|
+
|
|
66
|
+
consumer = KafkaConsumer(
|
|
67
|
+
KafkaConsumerConfig(
|
|
68
|
+
kafka_servers="localhost:9092",
|
|
69
|
+
kafka_group_id="example-consumer",
|
|
70
|
+
kafka_offset="earliest",
|
|
71
|
+
kafka_topic_prefix="my-prefix.", # optional
|
|
72
|
+
kafka_block_consumer_time_sec=1,
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
consumer.connect()
|
|
77
|
+
consumer.subscribe(["example-topic"]) # subscribes to prefixed topic
|
|
78
|
+
|
|
79
|
+
msg = consumer.consume_next() # returns KafkaMessage | None
|
|
80
|
+
if msg is not None:
|
|
81
|
+
print(msg.topic, msg.key, msg.value)
|
|
82
|
+
|
|
83
|
+
assert consumer.is_alive()
|
|
84
|
+
consumer.shutdown()
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Dead Letter Handling
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from cledar.kafka.handlers.dead_letter import DeadLetterHandler
|
|
91
|
+
from cledar.kafka.models.output import FailedMessageData
|
|
92
|
+
|
|
93
|
+
# Assume you already have a connected producer and a consumed message
|
|
94
|
+
handler = DeadLetterHandler(producer, dlq_topic="errors-topic")
|
|
95
|
+
|
|
96
|
+
failure_details = [
|
|
97
|
+
FailedMessageData(
|
|
98
|
+
raised_at="2024-01-01T00:00:00Z",
|
|
99
|
+
exception_message="Processing failed",
|
|
100
|
+
exception_trace="Traceback...",
|
|
101
|
+
failure_reason="validation_error",
|
|
102
|
+
)
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
handler.handle(message, failure_details)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Parsing to Typed Payloads
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from pydantic import BaseModel
|
|
112
|
+
from cledar.kafka.handlers.parser import InputParser
|
|
113
|
+
|
|
114
|
+
class Payload(BaseModel):
|
|
115
|
+
id: str
|
|
116
|
+
message: str
|
|
117
|
+
|
|
118
|
+
parser = InputParser(Payload)
|
|
119
|
+
parsed = parser.parse_message(message) # -> ParsedMessage[Payload]
|
|
120
|
+
print(parsed.payload.id, parsed.payload.message)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Project Structure
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
cledar/kafka/
|
|
127
|
+
├── clients/
|
|
128
|
+
│ ├── base.py # BaseKafkaClient (shared logic)
|
|
129
|
+
│ ├── consumer.py # KafkaConsumer wrapper
|
|
130
|
+
│ └── producer.py # KafkaProducer wrapper
|
|
131
|
+
├── config/
|
|
132
|
+
│ └── schemas.py # Pydantic frozen dataclass configs
|
|
133
|
+
├── handlers/
|
|
134
|
+
│ ├── dead_letter.py # DeadLetterHandler
|
|
135
|
+
│ └── parser.py # InputParser and related utilities
|
|
136
|
+
├── models/
|
|
137
|
+
│ ├── input.py # Input model definitions
|
|
138
|
+
│ ├── message.py # KafkaMessage, etc.
|
|
139
|
+
│ └── output.py # FailedMessageData, etc.
|
|
140
|
+
├── utils/
|
|
141
|
+
│ ├── callbacks.py # Delivery callbacks
|
|
142
|
+
│ ├── messages.py # Message utilities (e.g., extract_id_from_value)
|
|
143
|
+
│ └── topics.py # Topic utilities/helpers
|
|
144
|
+
├── logger.py # Module logger
|
|
145
|
+
└── tests/
|
|
146
|
+
├── README.md # Tests documentation (how to run)
|
|
147
|
+
├── conftest.py # Test-wide teardown (thread cleanup)
|
|
148
|
+
├── unit/ # Unit tests (176)
|
|
149
|
+
└── integration/ # Integration tests (41) with helpers & shared fixtures
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Running Linters
|
|
153
|
+
|
|
154
|
+
Common commands from repo root:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Format (ruff)
|
|
158
|
+
uv run ruff format .
|
|
159
|
+
|
|
160
|
+
# Type-check (mypy)
|
|
161
|
+
uv run mypy kafka/
|
|
162
|
+
|
|
163
|
+
# Optional: pylint
|
|
164
|
+
uv run pylint kafka/
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Running Tests
|
|
168
|
+
|
|
169
|
+
See `kafka/tests/README.md` for full details. Quick start:
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
# Unit tests
|
|
173
|
+
PYTHONPATH=. uv run pytest kafka/tests/unit/ -v
|
|
174
|
+
|
|
175
|
+
# Integration tests (requires Docker running)
|
|
176
|
+
PYTHONPATH=. uv run pytest kafka/tests/integration/ -v
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
- Integration tests use `testcontainers` with Kafka image `confluentinc/cp-kafka:7.4.0`.
|
|
180
|
+
- Shared fixtures live in `kafka/tests/integration/conftest.py`.
|
|
181
|
+
- Helpers (e.g., `consume_until`) live in `kafka/tests/integration/helpers.py`.
|
|
182
|
+
- Test-wide teardown in `kafka/tests/conftest.py` ensures background threads do not block process exit.
|
|
183
|
+
|
|
184
|
+
## API Overview
|
|
185
|
+
|
|
186
|
+
### Configs (pydantic dataclasses)
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from cledar.kafka.config.schemas import KafkaProducerConfig, KafkaConsumerConfig
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
- Validated, frozen configs; construct with required `kafka_servers` and `kafka_group_id`.
|
|
193
|
+
- Optional fields include `kafka_topic_prefix`, timeouts, and intervals.
|
|
194
|
+
|
|
195
|
+
### Producer
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
from cledar.kafka.clients.producer import KafkaProducer
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
- `connect()` / `shutdown()`
|
|
202
|
+
- `send(topic: str, value: str, key: str | None = None, headers: list[tuple[str, bytes]] | None = None)`
|
|
203
|
+
- `check_connection()` / `is_alive()`
|
|
204
|
+
|
|
205
|
+
### Consumer
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
from cledar.kafka.clients.consumer import KafkaConsumer
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
- `connect()` / `shutdown()`
|
|
212
|
+
- `subscribe(topics: list[str])`
|
|
213
|
+
- `consume_next() -> KafkaMessage | None`
|
|
214
|
+
- `commit(message: KafkaMessage) -> None`
|
|
215
|
+
- `check_connection()` / `is_alive()`
|
|
216
|
+
|
|
217
|
+
### Errors
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
from kafka.exceptions import (
|
|
221
|
+
KafkaConnectionError,
|
|
222
|
+
KafkaProducerNotConnectedError,
|
|
223
|
+
KafkaConsumerNotConnectedError,
|
|
224
|
+
)
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Notes
|
|
228
|
+
|
|
229
|
+
- Always run tests with `PYTHONPATH=.` from the repository root to ensure imports resolve.
|
|
230
|
+
- Integration tests require Docker and will pull testcontainers images on first run.
|
|
231
|
+
- Topics are automatically prefixed with `kafka_topic_prefix` if set in configs.
|
|
232
|
+
|
|
233
|
+
## License
|
|
234
|
+
|
|
235
|
+
See the main repository LICENSE file.
|
|
236
|
+
|
|
237
|
+
## Support
|
|
238
|
+
|
|
239
|
+
For issues, questions, or contributions, please refer to the repository contribution guidelines.
|
cledar/kafka/__init__.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Kafka client and utilities for Cledar SDK."""
|
|
2
|
+
|
|
3
|
+
from .clients.base import BaseKafkaClient
|
|
4
|
+
from .clients.consumer import KafkaConsumer
|
|
5
|
+
from .clients.producer import KafkaProducer
|
|
6
|
+
from .config.schemas import (
|
|
7
|
+
KafkaConsumerConfig,
|
|
8
|
+
KafkaProducerConfig,
|
|
9
|
+
KafkaSaslMechanism,
|
|
10
|
+
KafkaSecurityProtocol,
|
|
11
|
+
)
|
|
12
|
+
from .exceptions import (
|
|
13
|
+
KafkaConnectionError,
|
|
14
|
+
KafkaConsumerError,
|
|
15
|
+
KafkaConsumerNotConnectedError,
|
|
16
|
+
KafkaProducerNotConnectedError,
|
|
17
|
+
)
|
|
18
|
+
from .handlers.dead_letter import DeadLetterHandler
|
|
19
|
+
from .handlers.parser import IncorrectMessageValueError, InputParser
|
|
20
|
+
from .models.input import InputKafkaMessage
|
|
21
|
+
from .models.message import KafkaMessage
|
|
22
|
+
from .models.output import FailedMessageData
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"KafkaConsumer",
|
|
26
|
+
"KafkaProducer",
|
|
27
|
+
"BaseKafkaClient",
|
|
28
|
+
"DeadLetterHandler",
|
|
29
|
+
"InputParser",
|
|
30
|
+
"IncorrectMessageValueError",
|
|
31
|
+
"InputKafkaMessage",
|
|
32
|
+
"FailedMessageData",
|
|
33
|
+
"KafkaMessage",
|
|
34
|
+
"KafkaProducerConfig",
|
|
35
|
+
"KafkaConsumerConfig",
|
|
36
|
+
"KafkaSecurityProtocol",
|
|
37
|
+
"KafkaSaslMechanism",
|
|
38
|
+
"KafkaConnectionError",
|
|
39
|
+
"KafkaConsumerNotConnectedError",
|
|
40
|
+
"KafkaProducerNotConnectedError",
|
|
41
|
+
"KafkaConsumerError",
|
|
42
|
+
]
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Base Kafka client module."""
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
|
|
5
|
+
from confluent_kafka import Consumer, KafkaException, Producer
|
|
6
|
+
from pydantic import ConfigDict
|
|
7
|
+
from pydantic.dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from ..config.schemas import KafkaConsumerConfig, KafkaProducerConfig
|
|
10
|
+
from ..exceptions import (
|
|
11
|
+
KafkaConnectionError,
|
|
12
|
+
KafkaConsumerNotConnectedError,
|
|
13
|
+
KafkaProducerNotConnectedError,
|
|
14
|
+
)
|
|
15
|
+
from ..logger import logger
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
|
19
|
+
class BaseKafkaClient:
|
|
20
|
+
"""Base class for Kafka clients.
|
|
21
|
+
|
|
22
|
+
This class provides common functionality for both producers and consumers,
|
|
23
|
+
such as connection monitoring and shutdown handling.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
config: KafkaProducerConfig | KafkaConsumerConfig
|
|
27
|
+
client: Producer | Consumer | None = None
|
|
28
|
+
connection_check_thread: threading.Thread | None = None
|
|
29
|
+
|
|
30
|
+
def __post_init__(self) -> None:
|
|
31
|
+
"""Initialize the client with instance-level events."""
|
|
32
|
+
# Create instance-level stop event
|
|
33
|
+
self._stop_event = threading.Event()
|
|
34
|
+
logger.info(
|
|
35
|
+
f"Initializing {self.__class__.__name__}.", extra={"config": self.config}
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def start_connection_check_thread(self) -> None:
|
|
39
|
+
"""Start a background thread to monitor the Kafka connection."""
|
|
40
|
+
if self.connection_check_thread is None:
|
|
41
|
+
self.connection_check_thread = threading.Thread(
|
|
42
|
+
target=self._monitor_connection
|
|
43
|
+
)
|
|
44
|
+
self.connection_check_thread.start()
|
|
45
|
+
logger.info(
|
|
46
|
+
f"Started {self.__class__.__name__} connection check thread.",
|
|
47
|
+
extra={"interval": self.config.kafka_connection_check_interval_sec},
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def _monitor_connection(self) -> None:
|
|
51
|
+
while not self._stop_event.wait(
|
|
52
|
+
self.config.kafka_connection_check_interval_sec
|
|
53
|
+
):
|
|
54
|
+
try:
|
|
55
|
+
self.check_connection()
|
|
56
|
+
logger.info(f"{self.__class__.__name__} connection status: Connected.")
|
|
57
|
+
except KafkaConnectionError:
|
|
58
|
+
logger.exception(f"{self.__class__.__name__} connection check failed.")
|
|
59
|
+
|
|
60
|
+
def is_alive(self) -> bool:
|
|
61
|
+
"""Check if the client is currently connected and alive.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
bool: True if connected, False otherwise.
|
|
65
|
+
|
|
66
|
+
"""
|
|
67
|
+
try:
|
|
68
|
+
self.check_connection()
|
|
69
|
+
return True
|
|
70
|
+
except (
|
|
71
|
+
KafkaProducerNotConnectedError,
|
|
72
|
+
KafkaConsumerNotConnectedError,
|
|
73
|
+
KafkaConnectionError,
|
|
74
|
+
):
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
def check_connection(self) -> None:
|
|
78
|
+
"""Check the connection to Kafka servers.
|
|
79
|
+
|
|
80
|
+
When the broker is not available (or the address is wrong),
|
|
81
|
+
the 'connection refused' error is not caught.
|
|
82
|
+
Ref: https://github.com/confluentinc/confluent-kafka-python/issues/941
|
|
83
|
+
The below is far-from-perfect workaround handling that.
|
|
84
|
+
|
|
85
|
+
"""
|
|
86
|
+
if self.client is None:
|
|
87
|
+
logger.error(
|
|
88
|
+
f"{self.__class__.__name__} is not connected. Call 'connect' first.",
|
|
89
|
+
)
|
|
90
|
+
raise (
|
|
91
|
+
KafkaProducerNotConnectedError
|
|
92
|
+
if isinstance(self.config, KafkaProducerConfig)
|
|
93
|
+
else KafkaConsumerNotConnectedError
|
|
94
|
+
)
|
|
95
|
+
try:
|
|
96
|
+
self.client.list_topics(
|
|
97
|
+
timeout=self.config.kafka_connection_check_timeout_sec
|
|
98
|
+
)
|
|
99
|
+
except KafkaException as exception:
|
|
100
|
+
logger.exception("Failed to connect to Kafka servers.")
|
|
101
|
+
raise KafkaConnectionError from exception
|
|
102
|
+
|
|
103
|
+
def shutdown(self) -> None:
|
|
104
|
+
"""Shutdown the client and stop connection monitoring."""
|
|
105
|
+
logger.info("Closing %s...", self.__class__.__name__)
|
|
106
|
+
self._stop_event.set()
|
|
107
|
+
if self.connection_check_thread is not None:
|
|
108
|
+
self.connection_check_thread.join()
|
|
109
|
+
logger.info("Stopped connection check thread.")
|
|
110
|
+
if isinstance(self.client, Producer):
|
|
111
|
+
self.client.flush(-1)
|
|
112
|
+
logger.info("%s flushed.", self.__class__.__name__)
|
|
113
|
+
elif isinstance(self.client, Consumer):
|
|
114
|
+
self.client.close()
|
|
115
|
+
# Clear the client reference to indicate shutdown
|
|
116
|
+
self.client = None
|
|
117
|
+
logger.info("%s closed.", self.__class__.__name__)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Kafka consumer client module."""
|
|
2
|
+
|
|
3
|
+
from confluent_kafka import Consumer, KafkaException
|
|
4
|
+
from pydantic import ConfigDict
|
|
5
|
+
from pydantic.dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from ..config.schemas import KafkaConsumerConfig
|
|
8
|
+
from ..exceptions import (
|
|
9
|
+
KafkaConsumerError,
|
|
10
|
+
KafkaConsumerNotConnectedError,
|
|
11
|
+
)
|
|
12
|
+
from ..logger import logger
|
|
13
|
+
from ..models.message import KafkaMessage
|
|
14
|
+
from ..utils.messages import consumer_not_connected_msg, extract_id_from_value
|
|
15
|
+
from ..utils.topics import build_topic
|
|
16
|
+
from .base import BaseKafkaClient
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
|
20
|
+
class KafkaConsumer(BaseKafkaClient):
|
|
21
|
+
"""Kafka consumer client.
|
|
22
|
+
|
|
23
|
+
This class provides methods to connect to Kafka, subscribe to topics,
|
|
24
|
+
and consume messages.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
config: KafkaConsumerConfig
|
|
28
|
+
client: Consumer | None = None
|
|
29
|
+
|
|
30
|
+
def connect(self) -> None:
|
|
31
|
+
"""Connect to Kafka servers and start connection monitoring."""
|
|
32
|
+
self.client = Consumer(self.config.to_kafka_config())
|
|
33
|
+
self.check_connection()
|
|
34
|
+
logger.info(
|
|
35
|
+
"Connected KafkaConsumer to Kafka servers.",
|
|
36
|
+
extra={"kafka_servers": self.config.kafka_servers},
|
|
37
|
+
)
|
|
38
|
+
self.start_connection_check_thread()
|
|
39
|
+
|
|
40
|
+
def subscribe(self, topics: list[str]) -> None:
|
|
41
|
+
"""Subscribe to a list of topics.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
topics: A list of topic names to subscribe to.
|
|
45
|
+
|
|
46
|
+
"""
|
|
47
|
+
if self.client is None:
|
|
48
|
+
logger.error(
|
|
49
|
+
consumer_not_connected_msg,
|
|
50
|
+
extra={"topics": topics},
|
|
51
|
+
)
|
|
52
|
+
raise KafkaConsumerNotConnectedError
|
|
53
|
+
|
|
54
|
+
topics = [
|
|
55
|
+
build_topic(topic_name=topic, prefix=self.config.kafka_topic_prefix)
|
|
56
|
+
for topic in topics
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
logger.info(
|
|
61
|
+
"Subscribing to topics.",
|
|
62
|
+
extra={"topics": topics},
|
|
63
|
+
)
|
|
64
|
+
self.client.subscribe(topics)
|
|
65
|
+
|
|
66
|
+
except KafkaException as exception:
|
|
67
|
+
logger.exception(
|
|
68
|
+
"Failed to subscribe to topics.",
|
|
69
|
+
extra={"topics": topics},
|
|
70
|
+
)
|
|
71
|
+
raise exception
|
|
72
|
+
|
|
73
|
+
def consume_next(self) -> KafkaMessage | None:
|
|
74
|
+
"""Consume the next message from subscribed topics.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
KafkaMessage | None: The consumed message or None if no message is
|
|
78
|
+
available.
|
|
79
|
+
|
|
80
|
+
"""
|
|
81
|
+
if self.client is None:
|
|
82
|
+
logger.error(consumer_not_connected_msg)
|
|
83
|
+
raise KafkaConsumerNotConnectedError
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
msg = self.client.poll(self.config.kafka_block_consumer_time_sec)
|
|
87
|
+
|
|
88
|
+
if msg is None:
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
if msg.error():
|
|
92
|
+
logger.error(
|
|
93
|
+
"Consumer error.",
|
|
94
|
+
extra={"error": msg.error()},
|
|
95
|
+
)
|
|
96
|
+
raise KafkaConsumerError(msg.error())
|
|
97
|
+
|
|
98
|
+
logger.debug(
|
|
99
|
+
"Received message.",
|
|
100
|
+
extra={
|
|
101
|
+
"topic": msg.topic(),
|
|
102
|
+
"msg_id": extract_id_from_value(msg.value().decode("utf-8")),
|
|
103
|
+
"key": msg.key(),
|
|
104
|
+
},
|
|
105
|
+
)
|
|
106
|
+
return KafkaMessage(
|
|
107
|
+
topic=msg.topic(),
|
|
108
|
+
value=msg.value().decode("utf-8") if msg.value() else None,
|
|
109
|
+
key=msg.key().decode("utf-8") if msg.key() else None,
|
|
110
|
+
offset=msg.offset(),
|
|
111
|
+
partition=msg.partition(),
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
except KafkaException as exception:
|
|
115
|
+
logger.exception("Failed to consume message.")
|
|
116
|
+
raise exception
|
|
117
|
+
|
|
118
|
+
def commit(self, message: KafkaMessage) -> None:
|
|
119
|
+
"""Commit offsets for the current message.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
message: The message for which to commit offsets.
|
|
123
|
+
|
|
124
|
+
"""
|
|
125
|
+
if self.client is None:
|
|
126
|
+
logger.error(consumer_not_connected_msg)
|
|
127
|
+
raise KafkaConsumerNotConnectedError
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
self.client.commit(asynchronous=True)
|
|
131
|
+
logger.debug(
|
|
132
|
+
"Commit requested.",
|
|
133
|
+
extra={"offset": message.offset, "partition": message.partition},
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
except KafkaException as exception:
|
|
137
|
+
logger.exception("Failed to commit offsets.")
|
|
138
|
+
raise exception
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Kafka producer client module."""
|
|
2
|
+
|
|
3
|
+
from confluent_kafka import KafkaException, Producer
|
|
4
|
+
from pydantic import ConfigDict
|
|
5
|
+
from pydantic.dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from ..config.schemas import KafkaProducerConfig
|
|
8
|
+
from ..exceptions import KafkaProducerNotConnectedError
|
|
9
|
+
from ..logger import logger
|
|
10
|
+
from ..utils.callbacks import delivery_callback
|
|
11
|
+
from ..utils.messages import extract_id_from_value
|
|
12
|
+
from ..utils.topics import build_topic
|
|
13
|
+
from .base import BaseKafkaClient
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
|
17
|
+
class KafkaProducer(BaseKafkaClient):
|
|
18
|
+
"""Kafka producer client.
|
|
19
|
+
|
|
20
|
+
This class provides methods to connect to Kafka and send messages.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
config: KafkaProducerConfig
|
|
24
|
+
client: Producer | None = None
|
|
25
|
+
|
|
26
|
+
def connect(self) -> None:
|
|
27
|
+
"""Connect to Kafka servers and start connection monitoring."""
|
|
28
|
+
self.client = Producer(self.config.to_kafka_config())
|
|
29
|
+
self.check_connection()
|
|
30
|
+
logger.info(
|
|
31
|
+
"Connected Producer to Kafka servers.",
|
|
32
|
+
extra={"kafka_servers": self.config.kafka_servers},
|
|
33
|
+
)
|
|
34
|
+
self.start_connection_check_thread()
|
|
35
|
+
|
|
36
|
+
def send(
|
|
37
|
+
self,
|
|
38
|
+
topic: str,
|
|
39
|
+
value: str | None,
|
|
40
|
+
key: str | None,
|
|
41
|
+
headers: list[tuple[str, bytes]] | None = None,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Send a message to a Kafka topic.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
topic: The name of the topic to send the message to.
|
|
47
|
+
value: The message value.
|
|
48
|
+
key: The message key.
|
|
49
|
+
headers: Optional list of message headers.
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
if self.client is None:
|
|
53
|
+
logger.error(
|
|
54
|
+
"KafkaProducer is not connected. Call 'connect' first.",
|
|
55
|
+
extra={
|
|
56
|
+
"topic": topic,
|
|
57
|
+
"msg_id": extract_id_from_value(value),
|
|
58
|
+
"key": key,
|
|
59
|
+
},
|
|
60
|
+
)
|
|
61
|
+
raise KafkaProducerNotConnectedError
|
|
62
|
+
|
|
63
|
+
topic = build_topic(topic_name=topic, prefix=self.config.kafka_topic_prefix)
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
logger.debug(
|
|
67
|
+
"Sending message to topic.",
|
|
68
|
+
extra={
|
|
69
|
+
"topic": topic,
|
|
70
|
+
"msg_id": extract_id_from_value(value),
|
|
71
|
+
"key": key,
|
|
72
|
+
"headers": headers,
|
|
73
|
+
},
|
|
74
|
+
)
|
|
75
|
+
self.client.produce(
|
|
76
|
+
topic=topic,
|
|
77
|
+
value=value,
|
|
78
|
+
key=key,
|
|
79
|
+
headers=headers,
|
|
80
|
+
callback=delivery_callback,
|
|
81
|
+
)
|
|
82
|
+
self.client.poll(0)
|
|
83
|
+
|
|
84
|
+
except BufferError:
|
|
85
|
+
logger.warning("Buffer full, waiting for free space on the queue")
|
|
86
|
+
self.client.poll(self.config.kafka_block_buffer_time_sec)
|
|
87
|
+
self.client.produce(
|
|
88
|
+
topic=topic,
|
|
89
|
+
value=value,
|
|
90
|
+
key=key,
|
|
91
|
+
headers=headers,
|
|
92
|
+
callback=delivery_callback,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
except KafkaException as exception:
|
|
96
|
+
logger.exception("Failed to send message.")
|
|
97
|
+
raise exception
|