investify-utils 2.0.0a4__tar.gz → 2.0.0a5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of investify-utils might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: investify-utils
3
- Version: 2.0.0a4
3
+ Version: 2.0.0a5
4
4
  Summary: Shared utilities for Investify services
5
5
  Author-Email: Investify <dev@investify.vn>
6
6
  License: MIT
@@ -20,6 +20,8 @@ Provides-Extra: postgres-async
20
20
  Requires-Dist: pandas>=2.0; extra == "postgres-async"
21
21
  Requires-Dist: sqlalchemy>=2.0; extra == "postgres-async"
22
22
  Requires-Dist: asyncpg>=0.29; extra == "postgres-async"
23
+ Provides-Extra: kafka
24
+ Requires-Dist: confluent-kafka>=2.0; extra == "kafka"
23
25
  Provides-Extra: s3
24
26
  Requires-Dist: boto3>=1.34; extra == "s3"
25
27
  Provides-Extra: dev
@@ -0,0 +1,43 @@
1
+ """
2
+ Kafka Avro producer and consumer clients.
3
+
4
+ Sync (for Celery workers, scripts):
5
+ from investify_utils.kafka import AvroProducer, AvroConsumer
6
+
7
+ Async (for LangGraph, FastAPI):
8
+ from investify_utils.kafka import AsyncAvroProducer, AsyncAvroConsumer
9
+ """
10
+
11
+
12
+ def __getattr__(name: str):
13
+ """Lazy import to avoid loading confluent-kafka if not needed."""
14
+ if name == "AvroProducer":
15
+ from investify_utils.kafka.sync_producer import AvroProducer
16
+
17
+ return AvroProducer
18
+ if name == "AvroConsumer":
19
+ from investify_utils.kafka.sync_consumer import AvroConsumer
20
+
21
+ return AvroConsumer
22
+ if name == "OffsetTracker":
23
+ from investify_utils.kafka.sync_consumer import OffsetTracker
24
+
25
+ return OffsetTracker
26
+ if name == "AsyncAvroProducer":
27
+ from investify_utils.kafka.async_producer import AsyncAvroProducer
28
+
29
+ return AsyncAvroProducer
30
+ if name == "AsyncAvroConsumer":
31
+ from investify_utils.kafka.async_consumer import AsyncAvroConsumer
32
+
33
+ return AsyncAvroConsumer
34
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
35
+
36
+
37
+ __all__ = [
38
+ "AvroProducer",
39
+ "AvroConsumer",
40
+ "OffsetTracker",
41
+ "AsyncAvroProducer",
42
+ "AsyncAvroConsumer",
43
+ ]
@@ -0,0 +1,210 @@
1
+ """
2
+ Asynchronous Avro consumer using confluent-kafka with asyncio.
3
+
4
+ Features:
5
+ - Non-blocking poll with async/await
6
+ - Suitable for FastAPI websocket streaming
7
+ - Background message fetching with queue
8
+
9
+ Usage:
10
+ from investify_utils.kafka import AsyncAvroConsumer
11
+
12
+ consumer = AsyncAvroConsumer(
13
+ topic="my-topic",
14
+ subject="my-topic-value",
15
+ schema_registry_url="http://localhost:8081",
16
+ bootstrap_servers="localhost:9092",
17
+ group_id="my-consumer-group",
18
+ )
19
+
20
+ # In FastAPI websocket
21
+ @app.websocket("/ws")
22
+ async def websocket_endpoint(websocket: WebSocket):
23
+ await websocket.accept()
24
+ async for key, value in consumer:
25
+ await websocket.send_json(value)
26
+ """
27
+
28
+ import asyncio
29
+ import logging
30
+ from typing import AsyncIterator
31
+
32
+ from confluent_kafka import DeserializingConsumer, TopicPartition
33
+ from confluent_kafka.schema_registry import SchemaRegistryClient
34
+ from confluent_kafka.schema_registry.avro import AvroDeserializer
35
+ from confluent_kafka.serialization import StringDeserializer
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ class AsyncAvroConsumer:
41
+ """
42
+ Asynchronous Avro consumer for async frameworks.
43
+
44
+ Uses a background thread for polling (confluent-kafka is not async-native)
45
+ and an asyncio queue to bridge to async code.
46
+
47
+ Args:
48
+ topic: Kafka topic name or list of topics
49
+ subject: Schema Registry subject name
50
+ schema_registry_url: Schema Registry URL
51
+ bootstrap_servers: Kafka bootstrap servers
52
+ group_id: Consumer group ID
53
+ seek_to_end: Start from latest offset (default: False)
54
+ queue_size: Max messages to buffer (default: 100)
55
+ **kwargs: Additional Kafka consumer config
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ topic: str | list[str],
61
+ subject: str,
62
+ schema_registry_url: str,
63
+ bootstrap_servers: str,
64
+ group_id: str,
65
+ seek_to_end: bool = False,
66
+ queue_size: int = 100,
67
+ **kwargs,
68
+ ):
69
+ self._schema_registry_url = schema_registry_url
70
+ self._bootstrap_servers = bootstrap_servers
71
+ self._subject = subject
72
+ self._group_id = group_id
73
+ self._topic = topic
74
+ self._seek_to_end = seek_to_end
75
+ self._queue_size = queue_size
76
+ self._kwargs = kwargs
77
+
78
+ self._consumer: DeserializingConsumer | None = None
79
+ self._queue: asyncio.Queue | None = None
80
+ self._poll_task: asyncio.Task | None = None
81
+ self._running = False
82
+
83
+ def _init_consumer(self) -> DeserializingConsumer:
84
+ """Initialize the consumer."""
85
+ schema_registry_client = SchemaRegistryClient({"url": self._schema_registry_url})
86
+ registered_schema = schema_registry_client.get_latest_version(self._subject)
87
+ schema_str = registered_schema.schema.schema_str
88
+
89
+ avro_deserializer = AvroDeserializer(schema_registry_client, schema_str)
90
+
91
+ consumer_config = {
92
+ "bootstrap.servers": self._bootstrap_servers,
93
+ "group.id": self._group_id,
94
+ "key.deserializer": StringDeserializer("utf_8"),
95
+ "value.deserializer": avro_deserializer,
96
+ **self._kwargs,
97
+ }
98
+ consumer = DeserializingConsumer(consumer_config)
99
+
100
+ topic_list = self._topic if isinstance(self._topic, list) else [self._topic]
101
+
102
+ if self._seek_to_end:
103
+
104
+ def seek_to_end_assign(c, partitions):
105
+ for p in partitions:
106
+ high_offset = c.get_watermark_offsets(p)[1]
107
+ p.offset = high_offset
108
+ c.assign(partitions)
109
+
110
+ consumer.subscribe(topic_list, on_assign=seek_to_end_assign)
111
+ else:
112
+ consumer.subscribe(topic_list)
113
+
114
+ return consumer
115
+
116
+ async def start(self) -> None:
117
+ """Start consuming messages in background."""
118
+ if self._running:
119
+ return
120
+
121
+ self._consumer = self._init_consumer()
122
+ self._queue = asyncio.Queue(maxsize=self._queue_size)
123
+ self._running = True
124
+ self._poll_task = asyncio.create_task(self._poll_loop())
125
+
126
+ async def _poll_loop(self) -> None:
127
+ """Background task for polling messages."""
128
+ loop = asyncio.get_running_loop()
129
+
130
+ while self._running:
131
+ # Run blocking poll in thread executor
132
+ msg = await loop.run_in_executor(None, self._consumer.poll, 0.1)
133
+
134
+ if msg is None:
135
+ continue
136
+
137
+ if msg.error():
138
+ logger.error(f"Consumer error: {msg.error()}")
139
+ continue
140
+
141
+ # Put message in queue (blocks if full)
142
+ await self._queue.put((msg.key(), msg.value(), msg))
143
+
144
+ async def poll(self, timeout: float = 1.0) -> tuple[str | None, dict | None, object] | None:
145
+ """
146
+ Poll for a single message.
147
+
148
+ Args:
149
+ timeout: Maximum time to wait in seconds
150
+
151
+ Returns:
152
+ Tuple of (key, value, raw_msg) or None if timeout
153
+ """
154
+ if not self._running:
155
+ await self.start()
156
+
157
+ try:
158
+ return await asyncio.wait_for(self._queue.get(), timeout=timeout)
159
+ except asyncio.TimeoutError:
160
+ return None
161
+
162
+ async def __aiter__(self) -> AsyncIterator[tuple[str | None, dict | None]]:
163
+ """
164
+ Async iterator for consuming messages.
165
+
166
+ Yields:
167
+ Tuple of (key, value) for each message
168
+ """
169
+ if not self._running:
170
+ await self.start()
171
+
172
+ while self._running:
173
+ try:
174
+ key, value, _ = await asyncio.wait_for(self._queue.get(), timeout=1.0)
175
+ yield key, value
176
+ except asyncio.TimeoutError:
177
+ continue
178
+
179
+ def commit(self) -> None:
180
+ """Commit current offsets."""
181
+ if self._consumer:
182
+ self._consumer.commit()
183
+
184
+ def commit_offsets(self, offsets: list[TopicPartition]) -> None:
185
+ """Commit specific offsets."""
186
+ if self._consumer:
187
+ self._consumer.commit(offsets=offsets)
188
+
189
+ async def close(self) -> None:
190
+ """Stop consuming and close the consumer."""
191
+ self._running = False
192
+
193
+ if self._poll_task:
194
+ self._poll_task.cancel()
195
+ try:
196
+ await self._poll_task
197
+ except asyncio.CancelledError:
198
+ pass
199
+ self._poll_task = None
200
+
201
+ if self._consumer:
202
+ self._consumer.close()
203
+ self._consumer = None
204
+
205
+ async def __aenter__(self):
206
+ await self.start()
207
+ return self
208
+
209
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
210
+ await self.close()
@@ -0,0 +1,151 @@
1
+ """
2
+ Asynchronous Avro producer using confluent-kafka with asyncio.
3
+
4
+ Features:
5
+ - Non-blocking produce with async/await
6
+ - Background asyncio task for polling
7
+ - Suitable for async frameworks (LangGraph, FastAPI)
8
+
9
+ Usage:
10
+ from investify_utils.kafka import AsyncAvroProducer
11
+
12
+ producer = AsyncAvroProducer(
13
+ topic="my-topic",
14
+ subject="my-topic-value",
15
+ schema_registry_url="http://localhost:8081",
16
+ bootstrap_servers="localhost:9092",
17
+ )
18
+
19
+ # In async context
20
+ await producer.produce(key="key1", value={"field": "value"})
21
+ producer.close()
22
+ """
23
+
24
+ import asyncio
25
+ import logging
26
+
27
+ from confluent_kafka import KafkaException, SerializingProducer
28
+ from confluent_kafka.schema_registry import SchemaRegistryClient, record_subject_name_strategy
29
+ from confluent_kafka.schema_registry.avro import AvroSerializer
30
+ from confluent_kafka.serialization import StringSerializer
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class AsyncAvroProducer:
36
+ """
37
+ Asynchronous Avro producer for async frameworks.
38
+
39
+ Args:
40
+ topic: Kafka topic name
41
+ subject: Schema Registry subject name
42
+ schema_registry_url: Schema Registry URL
43
+ bootstrap_servers: Kafka bootstrap servers
44
+ **kwargs: Additional Kafka producer config
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ topic: str,
50
+ subject: str,
51
+ schema_registry_url: str,
52
+ bootstrap_servers: str,
53
+ **kwargs,
54
+ ):
55
+ self.topic = topic
56
+ self._schema_registry_url = schema_registry_url
57
+ self._bootstrap_servers = bootstrap_servers
58
+ self._subject = subject
59
+ self._kwargs = kwargs
60
+ self._producer: SerializingProducer | None = None
61
+ self._poll_task: asyncio.Task | None = None
62
+
63
+ @property
64
+ def producer(self) -> SerializingProducer:
65
+ """Lazy producer initialization."""
66
+ if self._producer is None:
67
+ schema_registry_client = SchemaRegistryClient({"url": self._schema_registry_url})
68
+ registered_schema = schema_registry_client.get_latest_version(self._subject)
69
+ schema_str = registered_schema.schema.schema_str
70
+
71
+ avro_serializer = AvroSerializer(
72
+ schema_registry_client,
73
+ schema_str,
74
+ conf={
75
+ "auto.register.schemas": False,
76
+ "subject.name.strategy": record_subject_name_strategy,
77
+ },
78
+ )
79
+
80
+ producer_config = {
81
+ "bootstrap.servers": self._bootstrap_servers,
82
+ "key.serializer": StringSerializer("utf_8"),
83
+ "value.serializer": avro_serializer,
84
+ **self._kwargs,
85
+ }
86
+ self._producer = SerializingProducer(producer_config)
87
+
88
+ # Start background polling task
89
+ self._poll_task = asyncio.create_task(self._poll_loop())
90
+
91
+ return self._producer
92
+
93
+ async def _poll_loop(self) -> None:
94
+ """Background task for polling delivery callbacks."""
95
+ while True:
96
+ self._producer.poll(0.1)
97
+ await asyncio.sleep(0.1)
98
+
99
+ async def produce(
100
+ self,
101
+ value: dict,
102
+ key: str | None = None,
103
+ on_delivery: callable | None = None,
104
+ ) -> asyncio.Future:
105
+ """
106
+ Produce a message asynchronously.
107
+
108
+ Args:
109
+ value: Message value (dict matching Avro schema)
110
+ key: Optional message key
111
+ on_delivery: Optional callback(err, msg) for delivery confirmation
112
+
113
+ Returns:
114
+ Future that resolves to the delivered message
115
+ """
116
+ loop = asyncio.get_running_loop()
117
+ result = loop.create_future()
118
+
119
+ def ack(err, msg):
120
+ if err:
121
+ loop.call_soon_threadsafe(result.set_exception, KafkaException(err))
122
+ else:
123
+ loop.call_soon_threadsafe(result.set_result, msg)
124
+ if on_delivery:
125
+ loop.call_soon_threadsafe(on_delivery, err, msg)
126
+
127
+ self.producer.produce(self.topic, key=key, value=value, on_delivery=ack)
128
+ return await result
129
+
130
+ def flush(self, timeout: float = 10.0) -> int:
131
+ """
132
+ Wait for all messages to be delivered.
133
+
134
+ Args:
135
+ timeout: Maximum time to wait in seconds
136
+
137
+ Returns:
138
+ Number of messages still in queue
139
+ """
140
+ if self._producer:
141
+ return self._producer.flush(timeout)
142
+ return 0
143
+
144
+ def close(self) -> None:
145
+ """Cancel polling task and flush pending messages."""
146
+ if self._poll_task:
147
+ self._poll_task.cancel()
148
+ self._poll_task = None
149
+ if self._producer:
150
+ self._producer.flush()
151
+ self._producer = None
@@ -0,0 +1,201 @@
1
+ """
2
+ Synchronous Avro consumer using confluent-kafka.
3
+
4
+ Features:
5
+ - Avro deserialization with Schema Registry
6
+ - Offset tracking for reliable commits
7
+ - Seek to end option for real-time streaming
8
+
9
+ Usage:
10
+ from investify_utils.kafka import AvroConsumer
11
+
12
+ consumer = AvroConsumer(
13
+ topic="my-topic",
14
+ subject="my-topic-value",
15
+ schema_registry_url="http://localhost:8081",
16
+ bootstrap_servers="localhost:9092",
17
+ group_id="my-consumer-group",
18
+ )
19
+
20
+ try:
21
+ while True:
22
+ msg = consumer.poll(timeout=1.0)
23
+ if msg is None:
24
+ continue
25
+ print(msg.key(), msg.value())
26
+ consumer.commit()
27
+ finally:
28
+ consumer.close()
29
+ """
30
+
31
+ import logging
32
+ from collections import defaultdict
33
+
34
+ from confluent_kafka import DeserializingConsumer, TopicPartition
35
+ from confluent_kafka.schema_registry import SchemaRegistryClient
36
+ from confluent_kafka.schema_registry.avro import AvroDeserializer
37
+ from confluent_kafka.serialization import StringDeserializer
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ class OffsetTracker:
43
+ """
44
+ Track message offsets for reliable commits with out-of-order processing.
45
+
46
+ Only commits offsets where all previous offsets have been marked done,
47
+ preventing data loss when processing messages in parallel.
48
+ """
49
+
50
+ def __init__(self):
51
+ self._tracker = defaultdict(lambda: defaultdict(lambda: defaultdict(bool)))
52
+
53
+ def log(self, msg, done: bool = False) -> None:
54
+ """
55
+ Log a message offset.
56
+
57
+ Args:
58
+ msg: Kafka message
59
+ done: Whether processing is complete
60
+ """
61
+ topic = msg.topic()
62
+ partition = msg.partition()
63
+ offset = msg.offset()
64
+ self._tracker[topic][partition][offset] = done
65
+
66
+ def prepare_commit_offsets(self) -> list[TopicPartition]:
67
+ """
68
+ Get offsets safe to commit.
69
+
70
+ Returns:
71
+ List of TopicPartition with offsets to commit
72
+ """
73
+ new_tracker = defaultdict(lambda: defaultdict(lambda: defaultdict(bool)))
74
+ to_commit_offsets = []
75
+
76
+ for topic, partitions in self._tracker.items():
77
+ for partition, offsets in partitions.items():
78
+ sorted_offsets = sorted(offsets.items())
79
+ to_commit_offset = None
80
+ update_to_commit_offset = True
81
+
82
+ for offset, done in sorted_offsets:
83
+ update_to_commit_offset = update_to_commit_offset and done
84
+ if update_to_commit_offset:
85
+ to_commit_offset = offset + 1
86
+ else:
87
+ new_tracker[topic][partition][offset] = done
88
+
89
+ if to_commit_offset is not None:
90
+ to_commit_offsets.append(TopicPartition(topic, partition, to_commit_offset))
91
+
92
+ self._tracker = new_tracker
93
+ return to_commit_offsets
94
+
95
+
96
+ class AvroConsumer:
97
+ """
98
+ Synchronous Avro consumer.
99
+
100
+ Args:
101
+ topic: Kafka topic name or list of topics
102
+ subject: Schema Registry subject name
103
+ schema_registry_url: Schema Registry URL
104
+ bootstrap_servers: Kafka bootstrap servers
105
+ group_id: Consumer group ID
106
+ seek_to_end: Start from latest offset (default: False)
107
+ **kwargs: Additional Kafka consumer config
108
+ """
109
+
110
+ def __init__(
111
+ self,
112
+ topic: str | list[str],
113
+ subject: str,
114
+ schema_registry_url: str,
115
+ bootstrap_servers: str,
116
+ group_id: str,
117
+ seek_to_end: bool = False,
118
+ **kwargs,
119
+ ):
120
+ self._schema_registry_url = schema_registry_url
121
+ self._bootstrap_servers = bootstrap_servers
122
+ self._subject = subject
123
+ self._group_id = group_id
124
+ self._topic = topic
125
+ self._seek_to_end = seek_to_end
126
+ self._kwargs = kwargs
127
+ self._consumer: DeserializingConsumer | None = None
128
+
129
+ @property
130
+ def consumer(self) -> DeserializingConsumer:
131
+ """Lazy consumer initialization."""
132
+ if self._consumer is None:
133
+ schema_registry_client = SchemaRegistryClient({"url": self._schema_registry_url})
134
+ registered_schema = schema_registry_client.get_latest_version(self._subject)
135
+ schema_str = registered_schema.schema.schema_str
136
+
137
+ avro_deserializer = AvroDeserializer(schema_registry_client, schema_str)
138
+
139
+ consumer_config = {
140
+ "bootstrap.servers": self._bootstrap_servers,
141
+ "group.id": self._group_id,
142
+ "key.deserializer": StringDeserializer("utf_8"),
143
+ "value.deserializer": avro_deserializer,
144
+ **self._kwargs,
145
+ }
146
+ self._consumer = DeserializingConsumer(consumer_config)
147
+
148
+ topic_list = self._topic if isinstance(self._topic, list) else [self._topic]
149
+
150
+ if self._seek_to_end:
151
+
152
+ def seek_to_end_assign(consumer, partitions):
153
+ for p in partitions:
154
+ high_offset = consumer.get_watermark_offsets(p)[1]
155
+ p.offset = high_offset
156
+ consumer.assign(partitions)
157
+
158
+ self._consumer.subscribe(topic_list, on_assign=seek_to_end_assign)
159
+ else:
160
+ self._consumer.subscribe(topic_list)
161
+
162
+ return self._consumer
163
+
164
+ def poll(self, timeout: float = 1.0):
165
+ """
166
+ Poll for a message.
167
+
168
+ Args:
169
+ timeout: Maximum time to wait in seconds
170
+
171
+ Returns:
172
+ Message object or None if no message available
173
+ """
174
+ msg = self.consumer.poll(timeout)
175
+ if msg is None:
176
+ return None
177
+
178
+ if msg.error():
179
+ logger.error(f"Consumer error: {msg.error()}")
180
+ return None
181
+
182
+ return msg
183
+
184
+ def commit(self) -> None:
185
+ """Commit current offsets."""
186
+ self.consumer.commit()
187
+
188
+ def commit_offsets(self, offsets: list[TopicPartition]) -> None:
189
+ """
190
+ Commit specific offsets.
191
+
192
+ Args:
193
+ offsets: List of TopicPartition with offsets to commit
194
+ """
195
+ self.consumer.commit(offsets=offsets)
196
+
197
+ def close(self) -> None:
198
+ """Close the consumer."""
199
+ if self._consumer:
200
+ self._consumer.close()
201
+ self._consumer = None
@@ -0,0 +1,140 @@
1
+ """
2
+ Synchronous Avro producer using confluent-kafka.
3
+
4
+ Features:
5
+ - Avro serialization with Schema Registry
6
+ - Background polling thread for delivery callbacks
7
+ - Thread-safe produce operations
8
+
9
+ Usage:
10
+ from investify_utils.kafka import AvroProducer
11
+
12
+ producer = AvroProducer(
13
+ topic="my-topic",
14
+ subject="my-topic-value",
15
+ schema_registry_url="http://localhost:8081",
16
+ bootstrap_servers="localhost:9092",
17
+ )
18
+
19
+ producer.produce(key="key1", value={"field": "value"})
20
+ producer.flush()
21
+ producer.close()
22
+ """
23
+
24
+ import logging
25
+ import threading
26
+
27
+ from confluent_kafka import SerializingProducer
28
+ from confluent_kafka.schema_registry import SchemaRegistryClient, record_subject_name_strategy
29
+ from confluent_kafka.schema_registry.avro import AvroSerializer
30
+ from confluent_kafka.serialization import StringSerializer
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class AvroProducer:
36
+ """
37
+ Synchronous Avro producer with background polling.
38
+
39
+ Args:
40
+ topic: Kafka topic name
41
+ subject: Schema Registry subject name (usually "{topic}-value")
42
+ schema_registry_url: Schema Registry URL
43
+ bootstrap_servers: Kafka bootstrap servers
44
+ **kwargs: Additional Kafka producer config
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ topic: str,
50
+ subject: str,
51
+ schema_registry_url: str,
52
+ bootstrap_servers: str,
53
+ **kwargs,
54
+ ):
55
+ self.topic = topic
56
+ self._schema_registry_url = schema_registry_url
57
+ self._bootstrap_servers = bootstrap_servers
58
+ self._subject = subject
59
+ self._kwargs = kwargs
60
+ self._producer: SerializingProducer | None = None
61
+ self._shutdown_event: threading.Event | None = None
62
+ self._poll_thread: threading.Thread | None = None
63
+
64
+ @property
65
+ def producer(self) -> SerializingProducer:
66
+ """Lazy producer initialization."""
67
+ if self._producer is None:
68
+ schema_registry_client = SchemaRegistryClient({"url": self._schema_registry_url})
69
+ registered_schema = schema_registry_client.get_latest_version(self._subject)
70
+ schema_str = registered_schema.schema.schema_str
71
+
72
+ avro_serializer = AvroSerializer(
73
+ schema_registry_client,
74
+ schema_str,
75
+ conf={
76
+ "auto.register.schemas": False,
77
+ "subject.name.strategy": record_subject_name_strategy,
78
+ },
79
+ )
80
+
81
+ producer_config = {
82
+ "bootstrap.servers": self._bootstrap_servers,
83
+ "key.serializer": StringSerializer("utf_8"),
84
+ "value.serializer": avro_serializer,
85
+ **self._kwargs,
86
+ }
87
+ self._producer = SerializingProducer(producer_config)
88
+
89
+ # Start background polling thread
90
+ self._shutdown_event = threading.Event()
91
+ self._poll_thread = threading.Thread(target=self._poll_loop, daemon=True)
92
+ self._poll_thread.start()
93
+
94
+ return self._producer
95
+
96
+ def _poll_loop(self) -> None:
97
+ """Background thread for polling delivery callbacks."""
98
+ while not self._shutdown_event.is_set():
99
+ self._producer.poll(0.1)
100
+ self._shutdown_event.wait(0.1)
101
+
102
+ def produce(
103
+ self,
104
+ value: dict,
105
+ key: str | None = None,
106
+ on_delivery: callable | None = None,
107
+ ) -> None:
108
+ """
109
+ Produce a message to Kafka.
110
+
111
+ Args:
112
+ value: Message value (dict matching Avro schema)
113
+ key: Optional message key
114
+ on_delivery: Optional callback(err, msg) for delivery confirmation
115
+ """
116
+ try:
117
+ self.producer.produce(self.topic, key=key, value=value, on_delivery=on_delivery)
118
+ except Exception as e:
119
+ logger.error(f"Failed to produce message: {e!r}")
120
+ raise
121
+
122
+ def flush(self, timeout: float = 10.0) -> int:
123
+ """
124
+ Wait for all messages to be delivered.
125
+
126
+ Args:
127
+ timeout: Maximum time to wait in seconds
128
+
129
+ Returns:
130
+ Number of messages still in queue (0 if all delivered)
131
+ """
132
+ return self.producer.flush(timeout)
133
+
134
+ def close(self) -> None:
135
+ """Flush pending messages and stop background polling."""
136
+ if self._shutdown_event:
137
+ self._shutdown_event.set()
138
+ if self._producer:
139
+ self._producer.flush()
140
+ self._producer = None
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "investify-utils"
9
- version = "2.0.0a4"
9
+ version = "2.0.0a5"
10
10
  description = "Shared utilities for Investify services"
11
11
  readme = "README.md"
12
12
  requires-python = ">=3.12"
@@ -40,6 +40,9 @@ postgres-async = [
40
40
  "sqlalchemy>=2.0",
41
41
  "asyncpg>=0.29",
42
42
  ]
43
+ kafka = [
44
+ "confluent-kafka>=2.0",
45
+ ]
43
46
  s3 = [
44
47
  "boto3>=1.34",
45
48
  ]