investify-utils 2.0.0a8__py3-none-any.whl → 2.0.0a9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of investify-utils might be problematic. Click here for more details.
- investify_utils/kafka/__init__.py +2 -7
- {investify_utils-2.0.0a8.dist-info → investify_utils-2.0.0a9.dist-info}/METADATA +1 -1
- {investify_utils-2.0.0a8.dist-info → investify_utils-2.0.0a9.dist-info}/RECORD +5 -6
- investify_utils/kafka/async_consumer.py +0 -210
- {investify_utils-2.0.0a8.dist-info → investify_utils-2.0.0a9.dist-info}/WHEEL +0 -0
- {investify_utils-2.0.0a8.dist-info → investify_utils-2.0.0a9.dist-info}/entry_points.txt +0 -0
|
@@ -4,8 +4,8 @@ Kafka Avro producer and consumer clients.
|
|
|
4
4
|
Sync (for Celery workers, scripts):
|
|
5
5
|
from investify_utils.kafka import AvroProducer, AvroConsumer
|
|
6
6
|
|
|
7
|
-
Async (for LangGraph, FastAPI):
|
|
8
|
-
from investify_utils.kafka import AsyncAvroProducer
|
|
7
|
+
Async producer (for LangGraph, FastAPI):
|
|
8
|
+
from investify_utils.kafka import AsyncAvroProducer
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
|
|
@@ -27,10 +27,6 @@ def __getattr__(name: str):
|
|
|
27
27
|
from investify_utils.kafka.async_producer import AsyncAvroProducer
|
|
28
28
|
|
|
29
29
|
return AsyncAvroProducer
|
|
30
|
-
if name == "AsyncAvroConsumer":
|
|
31
|
-
from investify_utils.kafka.async_consumer import AsyncAvroConsumer
|
|
32
|
-
|
|
33
|
-
return AsyncAvroConsumer
|
|
34
30
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
35
31
|
|
|
36
32
|
|
|
@@ -39,5 +35,4 @@ __all__ = [
|
|
|
39
35
|
"AvroConsumer",
|
|
40
36
|
"OffsetTracker",
|
|
41
37
|
"AsyncAvroProducer",
|
|
42
|
-
"AsyncAvroConsumer",
|
|
43
38
|
]
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
investify_utils-2.0.
|
|
2
|
-
investify_utils-2.0.
|
|
3
|
-
investify_utils-2.0.
|
|
1
|
+
investify_utils-2.0.0a9.dist-info/METADATA,sha256=aJ5o01E-gKq6EmJwHkyDzCjowlBU4gmUN3hxlPWxK5k,3675
|
|
2
|
+
investify_utils-2.0.0a9.dist-info/WHEEL,sha256=tsUv_t7BDeJeRHaSrczbGeuK-TtDpGsWi_JfpzD255I,90
|
|
3
|
+
investify_utils-2.0.0a9.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
4
4
|
investify_utils/__init__.py,sha256=sES5ddhmaCRi79i6oax6MhB7H5nJg5QjuG6VV1VI2mc,918
|
|
5
5
|
investify_utils/helpers.py,sha256=1l7nv-P8m-vHQGhjTAJMi-pkvQb8OPzGDIn1KQ499dE,4246
|
|
6
|
-
investify_utils/kafka/__init__.py,sha256=
|
|
7
|
-
investify_utils/kafka/async_consumer.py,sha256=yivm8dgCACfFFbG9uxA7fsoRrWfCEtPHCyZi2qkD0Qk,6631
|
|
6
|
+
investify_utils/kafka/__init__.py,sha256=WR4SMKiPCjj1sO9sfSTWEXp_6ltUVJ3-YDF_RmZH2bw,1062
|
|
8
7
|
investify_utils/kafka/async_producer.py,sha256=HtQ5SaL5ShJf7RetO5zItfg9EBFcPj1y18i_V0p-vFg,4752
|
|
9
8
|
investify_utils/kafka/sync_consumer.py,sha256=NJu9tQ5MrqL7-0Cvtt9Gmq-Qro_O4VVFP85qIKMf_ZM,6305
|
|
10
9
|
investify_utils/kafka/sync_producer.py,sha256=9EyhKZNCgmBgZw50gwSfSRUTnfPnCq520Mh9MHnGVlI,4498
|
|
@@ -14,4 +13,4 @@ investify_utils/postgres/async_client.py,sha256=M3F7-AsBJ43WWhfknnvTK9BeiYAyO0R6
|
|
|
14
13
|
investify_utils/postgres/sync_client.py,sha256=1mozgrNGUUKCR2ETAr9G9dzvW8uG_TmSqcbA63tRpM8,6507
|
|
15
14
|
investify_utils/s3/__init__.py,sha256=0YX-efJTP38Q5XMCyr7u-fXMjCJXkAR7dG817quTns8,399
|
|
16
15
|
investify_utils/s3/sync_client.py,sha256=fj6ejhAu06BUBRe2pnceKaNGhbPM79Xf47geL0DB-i0,6771
|
|
17
|
-
investify_utils-2.0.
|
|
16
|
+
investify_utils-2.0.0a9.dist-info/RECORD,,
|
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Asynchronous Avro consumer using confluent-kafka with asyncio.
|
|
3
|
-
|
|
4
|
-
Features:
|
|
5
|
-
- Non-blocking poll with async/await
|
|
6
|
-
- Suitable for FastAPI websocket streaming
|
|
7
|
-
- Background message fetching with queue
|
|
8
|
-
|
|
9
|
-
Usage:
|
|
10
|
-
from investify_utils.kafka import AsyncAvroConsumer
|
|
11
|
-
|
|
12
|
-
consumer = AsyncAvroConsumer(
|
|
13
|
-
topic="my-topic",
|
|
14
|
-
subject="my-topic-value",
|
|
15
|
-
schema_registry_url="http://localhost:8081",
|
|
16
|
-
bootstrap_servers="localhost:9092",
|
|
17
|
-
group_id="my-consumer-group",
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
# In FastAPI websocket
|
|
21
|
-
@app.websocket("/ws")
|
|
22
|
-
async def websocket_endpoint(websocket: WebSocket):
|
|
23
|
-
await websocket.accept()
|
|
24
|
-
async for key, value in consumer:
|
|
25
|
-
await websocket.send_json(value)
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
import asyncio
|
|
29
|
-
import logging
|
|
30
|
-
from typing import AsyncIterator
|
|
31
|
-
|
|
32
|
-
from confluent_kafka import DeserializingConsumer, TopicPartition
|
|
33
|
-
from confluent_kafka.schema_registry import SchemaRegistryClient
|
|
34
|
-
from confluent_kafka.schema_registry.avro import AvroDeserializer
|
|
35
|
-
from confluent_kafka.serialization import StringDeserializer
|
|
36
|
-
|
|
37
|
-
logger = logging.getLogger(__name__)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class AsyncAvroConsumer:
|
|
41
|
-
"""
|
|
42
|
-
Asynchronous Avro consumer for async frameworks.
|
|
43
|
-
|
|
44
|
-
Uses a background thread for polling (confluent-kafka is not async-native)
|
|
45
|
-
and an asyncio queue to bridge to async code.
|
|
46
|
-
|
|
47
|
-
Args:
|
|
48
|
-
topic: Kafka topic name or list of topics
|
|
49
|
-
subject: Schema Registry subject name
|
|
50
|
-
schema_registry_url: Schema Registry URL
|
|
51
|
-
bootstrap_servers: Kafka bootstrap servers
|
|
52
|
-
group_id: Consumer group ID
|
|
53
|
-
seek_to_end: Start from latest offset (default: False)
|
|
54
|
-
queue_size: Max messages to buffer (default: 100)
|
|
55
|
-
**kwargs: Additional Kafka consumer config
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
def __init__(
|
|
59
|
-
self,
|
|
60
|
-
topic: str | list[str],
|
|
61
|
-
subject: str,
|
|
62
|
-
schema_registry_url: str,
|
|
63
|
-
bootstrap_servers: str,
|
|
64
|
-
group_id: str,
|
|
65
|
-
seek_to_end: bool = False,
|
|
66
|
-
queue_size: int = 100,
|
|
67
|
-
**kwargs,
|
|
68
|
-
):
|
|
69
|
-
self._schema_registry_url = schema_registry_url
|
|
70
|
-
self._bootstrap_servers = bootstrap_servers
|
|
71
|
-
self._subject = subject
|
|
72
|
-
self._group_id = group_id
|
|
73
|
-
self._topic = topic
|
|
74
|
-
self._seek_to_end = seek_to_end
|
|
75
|
-
self._queue_size = queue_size
|
|
76
|
-
self._kwargs = kwargs
|
|
77
|
-
|
|
78
|
-
self._consumer: DeserializingConsumer | None = None
|
|
79
|
-
self._queue: asyncio.Queue | None = None
|
|
80
|
-
self._poll_task: asyncio.Task | None = None
|
|
81
|
-
self._running = False
|
|
82
|
-
|
|
83
|
-
def _init_consumer(self) -> DeserializingConsumer:
|
|
84
|
-
"""Initialize the consumer."""
|
|
85
|
-
schema_registry_client = SchemaRegistryClient({"url": self._schema_registry_url})
|
|
86
|
-
registered_schema = schema_registry_client.get_latest_version(self._subject)
|
|
87
|
-
schema_str = registered_schema.schema.schema_str
|
|
88
|
-
|
|
89
|
-
avro_deserializer = AvroDeserializer(schema_registry_client, schema_str)
|
|
90
|
-
|
|
91
|
-
consumer_config = {
|
|
92
|
-
"bootstrap.servers": self._bootstrap_servers,
|
|
93
|
-
"group.id": self._group_id,
|
|
94
|
-
"key.deserializer": StringDeserializer("utf_8"),
|
|
95
|
-
"value.deserializer": avro_deserializer,
|
|
96
|
-
**self._kwargs,
|
|
97
|
-
}
|
|
98
|
-
consumer = DeserializingConsumer(consumer_config)
|
|
99
|
-
|
|
100
|
-
topic_list = self._topic if isinstance(self._topic, list) else [self._topic]
|
|
101
|
-
|
|
102
|
-
if self._seek_to_end:
|
|
103
|
-
|
|
104
|
-
def seek_to_end_assign(c, partitions):
|
|
105
|
-
for p in partitions:
|
|
106
|
-
high_offset = c.get_watermark_offsets(p)[1]
|
|
107
|
-
p.offset = high_offset
|
|
108
|
-
c.assign(partitions)
|
|
109
|
-
|
|
110
|
-
consumer.subscribe(topic_list, on_assign=seek_to_end_assign)
|
|
111
|
-
else:
|
|
112
|
-
consumer.subscribe(topic_list)
|
|
113
|
-
|
|
114
|
-
return consumer
|
|
115
|
-
|
|
116
|
-
async def start(self) -> None:
|
|
117
|
-
"""Start consuming messages in background."""
|
|
118
|
-
if self._running:
|
|
119
|
-
return
|
|
120
|
-
|
|
121
|
-
self._consumer = self._init_consumer()
|
|
122
|
-
self._queue = asyncio.Queue(maxsize=self._queue_size)
|
|
123
|
-
self._running = True
|
|
124
|
-
self._poll_task = asyncio.create_task(self._poll_loop())
|
|
125
|
-
|
|
126
|
-
async def _poll_loop(self) -> None:
|
|
127
|
-
"""Background task for polling messages."""
|
|
128
|
-
loop = asyncio.get_running_loop()
|
|
129
|
-
|
|
130
|
-
while self._running:
|
|
131
|
-
# Run blocking poll in thread executor
|
|
132
|
-
msg = await loop.run_in_executor(None, self._consumer.poll, 0.1)
|
|
133
|
-
|
|
134
|
-
if msg is None:
|
|
135
|
-
continue
|
|
136
|
-
|
|
137
|
-
if msg.error():
|
|
138
|
-
logger.error(f"Consumer error: {msg.error()}")
|
|
139
|
-
continue
|
|
140
|
-
|
|
141
|
-
# Put message in queue (blocks if full)
|
|
142
|
-
await self._queue.put((msg.key(), msg.value(), msg))
|
|
143
|
-
|
|
144
|
-
async def poll(self, timeout: float = 1.0) -> tuple[str | None, dict | None, object] | None:
|
|
145
|
-
"""
|
|
146
|
-
Poll for a single message.
|
|
147
|
-
|
|
148
|
-
Args:
|
|
149
|
-
timeout: Maximum time to wait in seconds
|
|
150
|
-
|
|
151
|
-
Returns:
|
|
152
|
-
Tuple of (key, value, raw_msg) or None if timeout
|
|
153
|
-
"""
|
|
154
|
-
if not self._running:
|
|
155
|
-
await self.start()
|
|
156
|
-
|
|
157
|
-
try:
|
|
158
|
-
return await asyncio.wait_for(self._queue.get(), timeout=timeout)
|
|
159
|
-
except asyncio.TimeoutError:
|
|
160
|
-
return None
|
|
161
|
-
|
|
162
|
-
async def __aiter__(self) -> AsyncIterator[tuple[str | None, dict | None]]:
|
|
163
|
-
"""
|
|
164
|
-
Async iterator for consuming messages.
|
|
165
|
-
|
|
166
|
-
Yields:
|
|
167
|
-
Tuple of (key, value) for each message
|
|
168
|
-
"""
|
|
169
|
-
if not self._running:
|
|
170
|
-
await self.start()
|
|
171
|
-
|
|
172
|
-
while self._running:
|
|
173
|
-
try:
|
|
174
|
-
key, value, _ = await asyncio.wait_for(self._queue.get(), timeout=1.0)
|
|
175
|
-
yield key, value
|
|
176
|
-
except asyncio.TimeoutError:
|
|
177
|
-
continue
|
|
178
|
-
|
|
179
|
-
def commit(self) -> None:
|
|
180
|
-
"""Commit current offsets."""
|
|
181
|
-
if self._consumer:
|
|
182
|
-
self._consumer.commit()
|
|
183
|
-
|
|
184
|
-
def commit_offsets(self, offsets: list[TopicPartition]) -> None:
|
|
185
|
-
"""Commit specific offsets."""
|
|
186
|
-
if self._consumer:
|
|
187
|
-
self._consumer.commit(offsets=offsets)
|
|
188
|
-
|
|
189
|
-
async def close(self) -> None:
|
|
190
|
-
"""Stop consuming and close the consumer."""
|
|
191
|
-
self._running = False
|
|
192
|
-
|
|
193
|
-
if self._poll_task:
|
|
194
|
-
self._poll_task.cancel()
|
|
195
|
-
try:
|
|
196
|
-
await self._poll_task
|
|
197
|
-
except asyncio.CancelledError:
|
|
198
|
-
pass
|
|
199
|
-
self._poll_task = None
|
|
200
|
-
|
|
201
|
-
if self._consumer:
|
|
202
|
-
self._consumer.close()
|
|
203
|
-
self._consumer = None
|
|
204
|
-
|
|
205
|
-
async def __aenter__(self):
|
|
206
|
-
await self.start()
|
|
207
|
-
return self
|
|
208
|
-
|
|
209
|
-
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
210
|
-
await self.close()
|
|
File without changes
|
|
File without changes
|