investify-utils 2.0.0a10__tar.gz → 2.0.0a12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of investify-utils might be problematic. Click here for more details.

Files changed (18) hide show
  1. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/PKG-INFO +3 -1
  2. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/__init__.py +4 -3
  3. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/kafka/__init__.py +15 -2
  4. investify_utils-2.0.0a12/investify_utils/kafka/async_consumer.py +210 -0
  5. investify_utils-2.0.0a12/investify_utils/kafka/async_producer.py +152 -0
  6. investify_utils-2.0.0a12/investify_utils/s3/__init__.py +25 -0
  7. investify_utils-2.0.0a12/investify_utils/s3/async_client.py +228 -0
  8. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/s3/sync_client.py +7 -6
  9. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/pyproject.toml +4 -1
  10. investify_utils-2.0.0a10/investify_utils/s3/__init__.py +0 -18
  11. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/README.md +0 -0
  12. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/helpers.py +0 -0
  13. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/kafka/sync_consumer.py +0 -0
  14. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/kafka/sync_producer.py +0 -0
  15. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/logging.py +0 -0
  16. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/postgres/__init__.py +0 -0
  17. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/postgres/async_client.py +0 -0
  18. {investify_utils-2.0.0a10 → investify_utils-2.0.0a12}/investify_utils/postgres/sync_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: investify-utils
3
- Version: 2.0.0a10
3
+ Version: 2.0.0a12
4
4
  Summary: Shared utilities for Investify services
5
5
  Author-Email: Investify <dev@investify.vn>
6
6
  License: MIT
@@ -24,6 +24,8 @@ Provides-Extra: kafka
24
24
  Requires-Dist: confluent-kafka[avro,schemaregistry]>=2.0; extra == "kafka"
25
25
  Provides-Extra: s3
26
26
  Requires-Dist: boto3>=1.34; extra == "s3"
27
+ Provides-Extra: s3-async
28
+ Requires-Dist: aioboto3>=15.0; extra == "s3-async"
27
29
  Provides-Extra: helpers
28
30
  Requires-Dist: pandas>=2.0; extra == "helpers"
29
31
  Requires-Dist: numpy>=2.0; extra == "helpers"
@@ -5,7 +5,8 @@ Install with optional dependencies:
5
5
  pip install investify-utils[postgres] # Sync PostgreSQL client
6
6
  pip install investify-utils[postgres-async] # Async PostgreSQL client
7
7
  pip install investify-utils[kafka] # Kafka Avro producer/consumer
8
- pip install investify-utils[s3] # S3 client
8
+ pip install investify-utils[s3] # Sync S3 client
9
+ pip install investify-utils[s3-async] # Async S3 client
9
10
  pip install investify-utils[helpers] # Timestamp/SQL utilities
10
11
 
11
12
  Usage:
@@ -19,10 +20,10 @@ Usage:
19
20
  from investify_utils.kafka import AvroProducer, AvroConsumer
20
21
 
21
22
  # S3
22
- from investify_utils.s3 import S3Client
23
+ from investify_utils.s3 import S3Client, AsyncS3Client
23
24
 
24
25
  # Helpers
25
26
  from investify_utils.helpers import convert_to_pd_timestamp, create_sql_in_filter
26
27
  """
27
28
 
28
- __version__ = "2.0.0a8"
29
+ __version__ = "2.0.0a12"
@@ -1,8 +1,11 @@
1
1
  """
2
- Kafka Avro producer and consumer clients (sync only).
2
+ Kafka Avro producer and consumer clients.
3
3
 
4
- Usage:
4
+ Sync (for Celery workers, scripts):
5
5
  from investify_utils.kafka import AvroProducer, AvroConsumer
6
+
7
+ Async (for LangGraph, FastAPI):
8
+ from investify_utils.kafka import AsyncAvroProducer, AsyncAvroConsumer
6
9
  """
7
10
 
8
11
 
@@ -20,6 +23,14 @@ def __getattr__(name: str):
20
23
  from investify_utils.kafka.sync_consumer import OffsetTracker
21
24
 
22
25
  return OffsetTracker
26
+ if name == "AsyncAvroProducer":
27
+ from investify_utils.kafka.async_producer import AsyncAvroProducer
28
+
29
+ return AsyncAvroProducer
30
+ if name == "AsyncAvroConsumer":
31
+ from investify_utils.kafka.async_consumer import AsyncAvroConsumer
32
+
33
+ return AsyncAvroConsumer
23
34
  raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
24
35
 
25
36
 
@@ -27,4 +38,6 @@ __all__ = [
27
38
  "AvroProducer",
28
39
  "AvroConsumer",
29
40
  "OffsetTracker",
41
+ "AsyncAvroProducer",
42
+ "AsyncAvroConsumer",
30
43
  ]
@@ -0,0 +1,210 @@
1
+ """
2
+ Asynchronous Avro consumer using confluent-kafka with asyncio.
3
+
4
+ Features:
5
+ - Non-blocking poll with async/await
6
+ - Suitable for FastAPI websocket streaming
7
+ - Background message fetching with queue
8
+
9
+ Usage:
10
+ from investify_utils.kafka import AsyncAvroConsumer
11
+
12
+ consumer = AsyncAvroConsumer(
13
+ topic="my-topic",
14
+ subject="my-topic-value",
15
+ schema_registry_url="http://localhost:8081",
16
+ bootstrap_servers="localhost:9092",
17
+ group_id="my-consumer-group",
18
+ )
19
+
20
+ # In FastAPI websocket
21
+ @app.websocket("/ws")
22
+ async def websocket_endpoint(websocket: WebSocket):
23
+ await websocket.accept()
24
+ async for key, value in consumer:
25
+ await websocket.send_json(value)
26
+ """
27
+
28
+ import asyncio
29
+ import logging
30
+ from typing import AsyncIterator
31
+
32
+ from confluent_kafka import DeserializingConsumer, TopicPartition
33
+ from confluent_kafka.schema_registry import SchemaRegistryClient
34
+ from confluent_kafka.schema_registry.avro import AvroDeserializer
35
+ from confluent_kafka.serialization import StringDeserializer
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ class AsyncAvroConsumer:
41
+ """
42
+ Asynchronous Avro consumer for async frameworks.
43
+
44
+ Uses a background thread for polling (confluent-kafka is not async-native)
45
+ and an asyncio queue to bridge to async code.
46
+
47
+ Args:
48
+ topic: Kafka topic name or list of topics
49
+ subject: Schema Registry subject name
50
+ schema_registry_url: Schema Registry URL
51
+ bootstrap_servers: Kafka bootstrap servers
52
+ group_id: Consumer group ID
53
+ seek_to_end: Start from latest offset (default: False)
54
+ queue_size: Max messages to buffer (default: 100)
55
+ **kwargs: Additional Kafka consumer config
56
+ """
57
+
58
+ def __init__(
59
+ self,
60
+ topic: str | list[str],
61
+ subject: str,
62
+ schema_registry_url: str,
63
+ bootstrap_servers: str,
64
+ group_id: str,
65
+ seek_to_end: bool = False,
66
+ queue_size: int = 100,
67
+ **kwargs,
68
+ ):
69
+ self._schema_registry_url = schema_registry_url
70
+ self._bootstrap_servers = bootstrap_servers
71
+ self._subject = subject
72
+ self._group_id = group_id
73
+ self._topic = topic
74
+ self._seek_to_end = seek_to_end
75
+ self._queue_size = queue_size
76
+ self._kwargs = kwargs
77
+
78
+ self._consumer: DeserializingConsumer | None = None
79
+ self._queue: asyncio.Queue | None = None
80
+ self._poll_task: asyncio.Task | None = None
81
+ self._running = False
82
+
83
+ def _init_consumer(self) -> DeserializingConsumer:
84
+ """Initialize the consumer."""
85
+ schema_registry_client = SchemaRegistryClient({"url": self._schema_registry_url})
86
+ registered_schema = schema_registry_client.get_latest_version(self._subject)
87
+ schema_str = registered_schema.schema.schema_str
88
+
89
+ avro_deserializer = AvroDeserializer(schema_registry_client, schema_str)
90
+
91
+ consumer_config = {
92
+ "bootstrap.servers": self._bootstrap_servers,
93
+ "group.id": self._group_id,
94
+ "key.deserializer": StringDeserializer("utf_8"),
95
+ "value.deserializer": avro_deserializer,
96
+ **self._kwargs,
97
+ }
98
+ consumer = DeserializingConsumer(consumer_config)
99
+
100
+ topic_list = self._topic if isinstance(self._topic, list) else [self._topic]
101
+
102
+ if self._seek_to_end:
103
+
104
+ def seek_to_end_assign(c, partitions):
105
+ for p in partitions:
106
+ high_offset = c.get_watermark_offsets(p)[1]
107
+ p.offset = high_offset
108
+ c.assign(partitions)
109
+
110
+ consumer.subscribe(topic_list, on_assign=seek_to_end_assign)
111
+ else:
112
+ consumer.subscribe(topic_list)
113
+
114
+ return consumer
115
+
116
+ async def start(self) -> None:
117
+ """Start consuming messages in background."""
118
+ if self._running:
119
+ return
120
+
121
+ self._consumer = self._init_consumer()
122
+ self._queue = asyncio.Queue(maxsize=self._queue_size)
123
+ self._running = True
124
+ self._poll_task = asyncio.create_task(self._poll_loop())
125
+
126
+ async def _poll_loop(self) -> None:
127
+ """Background task for polling messages."""
128
+ loop = asyncio.get_running_loop()
129
+
130
+ while self._running:
131
+ # Run blocking poll in thread executor
132
+ msg = await loop.run_in_executor(None, self._consumer.poll, 0.1)
133
+
134
+ if msg is None:
135
+ continue
136
+
137
+ if msg.error():
138
+ logger.error(f"Consumer error: {msg.error()}")
139
+ continue
140
+
141
+ # Put message in queue (blocks if full)
142
+ await self._queue.put((msg.key(), msg.value(), msg))
143
+
144
+ async def poll(self, timeout: float = 1.0) -> tuple[str | None, dict | None, object] | None:
145
+ """
146
+ Poll for a single message.
147
+
148
+ Args:
149
+ timeout: Maximum time to wait in seconds
150
+
151
+ Returns:
152
+ Tuple of (key, value, raw_msg) or None if timeout
153
+ """
154
+ if not self._running:
155
+ await self.start()
156
+
157
+ try:
158
+ return await asyncio.wait_for(self._queue.get(), timeout=timeout)
159
+ except asyncio.TimeoutError:
160
+ return None
161
+
162
+ async def __aiter__(self) -> AsyncIterator[tuple[str | None, dict | None]]:
163
+ """
164
+ Async iterator for consuming messages.
165
+
166
+ Yields:
167
+ Tuple of (key, value) for each message
168
+ """
169
+ if not self._running:
170
+ await self.start()
171
+
172
+ while self._running:
173
+ try:
174
+ key, value, _ = await asyncio.wait_for(self._queue.get(), timeout=1.0)
175
+ yield key, value
176
+ except asyncio.TimeoutError:
177
+ continue
178
+
179
+ def commit(self) -> None:
180
+ """Commit current offsets."""
181
+ if self._consumer:
182
+ self._consumer.commit()
183
+
184
+ def commit_offsets(self, offsets: list[TopicPartition]) -> None:
185
+ """Commit specific offsets."""
186
+ if self._consumer:
187
+ self._consumer.commit(offsets=offsets)
188
+
189
+ async def close(self) -> None:
190
+ """Stop consuming and close the consumer."""
191
+ self._running = False
192
+
193
+ if self._poll_task:
194
+ self._poll_task.cancel()
195
+ try:
196
+ await self._poll_task
197
+ except asyncio.CancelledError:
198
+ pass
199
+ self._poll_task = None
200
+
201
+ if self._consumer:
202
+ self._consumer.close()
203
+ self._consumer = None
204
+
205
+ async def __aenter__(self):
206
+ await self.start()
207
+ return self
208
+
209
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
210
+ await self.close()
@@ -0,0 +1,152 @@
1
+ """
2
+ Asynchronous Avro producer using confluent-kafka with asyncio.
3
+
4
+ Features:
5
+ - Non-blocking produce with async/await
6
+ - Background asyncio task for polling
7
+ - Suitable for async frameworks (LangGraph, FastAPI)
8
+
9
+ Usage:
10
+ from investify_utils.kafka import AsyncAvroProducer
11
+
12
+ producer = AsyncAvroProducer(
13
+ topic="my-topic",
14
+ subject="my-topic-value",
15
+ schema_registry_url="http://localhost:8081",
16
+ bootstrap_servers="localhost:9092",
17
+ )
18
+
19
+ # In async context
20
+ await producer.produce(key="key1", value={"field": "value"})
21
+ producer.close()
22
+ """
23
+
24
+ import asyncio
25
+ import logging
26
+ from typing import Callable
27
+
28
+ from confluent_kafka import KafkaException, SerializingProducer
29
+ from confluent_kafka.schema_registry import SchemaRegistryClient, record_subject_name_strategy
30
+ from confluent_kafka.schema_registry.avro import AvroSerializer
31
+ from confluent_kafka.serialization import StringSerializer
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class AsyncAvroProducer:
37
+ """
38
+ Asynchronous Avro producer for async frameworks.
39
+
40
+ Args:
41
+ topic: Kafka topic name
42
+ subject: Schema Registry subject name
43
+ schema_registry_url: Schema Registry URL
44
+ bootstrap_servers: Kafka bootstrap servers
45
+ **kwargs: Additional Kafka producer config
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ topic: str,
51
+ subject: str,
52
+ schema_registry_url: str,
53
+ bootstrap_servers: str,
54
+ **kwargs,
55
+ ):
56
+ self.topic = topic
57
+ self._schema_registry_url = schema_registry_url
58
+ self._bootstrap_servers = bootstrap_servers
59
+ self._subject = subject
60
+ self._kwargs = kwargs
61
+ self._producer: SerializingProducer | None = None
62
+ self._poll_task: asyncio.Task | None = None
63
+
64
+ @property
65
+ def producer(self) -> SerializingProducer:
66
+ """Lazy producer initialization."""
67
+ if self._producer is None:
68
+ schema_registry_client = SchemaRegistryClient({"url": self._schema_registry_url})
69
+ registered_schema = schema_registry_client.get_latest_version(self._subject)
70
+ schema_str = registered_schema.schema.schema_str
71
+
72
+ avro_serializer = AvroSerializer(
73
+ schema_registry_client,
74
+ schema_str,
75
+ conf={
76
+ "auto.register.schemas": False,
77
+ "subject.name.strategy": record_subject_name_strategy,
78
+ },
79
+ )
80
+
81
+ producer_config = {
82
+ "bootstrap.servers": self._bootstrap_servers,
83
+ "key.serializer": StringSerializer("utf_8"),
84
+ "value.serializer": avro_serializer,
85
+ **self._kwargs,
86
+ }
87
+ self._producer = SerializingProducer(producer_config)
88
+
89
+ # Start background polling task
90
+ self._poll_task = asyncio.create_task(self._poll_loop())
91
+
92
+ return self._producer
93
+
94
+ async def _poll_loop(self) -> None:
95
+ """Background task for polling delivery callbacks."""
96
+ while True:
97
+ self._producer.poll(0.1)
98
+ await asyncio.sleep(0.1)
99
+
100
+ async def produce(
101
+ self,
102
+ value: dict,
103
+ key: str | None = None,
104
+ on_delivery: Callable | None = None,
105
+ ) -> asyncio.Future:
106
+ """
107
+ Produce a message asynchronously.
108
+
109
+ Args:
110
+ value: Message value (dict matching Avro schema)
111
+ key: Optional message key
112
+ on_delivery: Optional callback(err, msg) for delivery confirmation
113
+
114
+ Returns:
115
+ Future that resolves to the delivered message
116
+ """
117
+ loop = asyncio.get_running_loop()
118
+ result = loop.create_future()
119
+
120
+ def ack(err, msg):
121
+ if err:
122
+ loop.call_soon_threadsafe(result.set_exception, KafkaException(err))
123
+ else:
124
+ loop.call_soon_threadsafe(result.set_result, msg)
125
+ if on_delivery:
126
+ loop.call_soon_threadsafe(on_delivery, err, msg)
127
+
128
+ self.producer.produce(self.topic, key=key, value=value, on_delivery=ack)
129
+ return await result
130
+
131
+ def flush(self, timeout: float = 10.0) -> int:
132
+ """
133
+ Wait for all messages to be delivered.
134
+
135
+ Args:
136
+ timeout: Maximum time to wait in seconds
137
+
138
+ Returns:
139
+ Number of messages still in queue
140
+ """
141
+ if self._producer:
142
+ return self._producer.flush(timeout)
143
+ return 0
144
+
145
+ def close(self) -> None:
146
+ """Cancel polling task and flush pending messages."""
147
+ if self._poll_task:
148
+ self._poll_task.cancel()
149
+ self._poll_task = None
150
+ if self._producer:
151
+ self._producer.flush()
152
+ self._producer = None
@@ -0,0 +1,25 @@
1
+ """
2
+ S3-compatible object storage client.
3
+
4
+ Sync client (boto3):
5
+ from investify_utils.s3 import S3Client
6
+
7
+ Async client (aioboto3):
8
+ from investify_utils.s3 import AsyncS3Client
9
+ """
10
+
11
+
12
+ def __getattr__(name: str):
13
+ """Lazy import to avoid loading boto3/aioboto3 if not needed."""
14
+ if name == "S3Client":
15
+ from investify_utils.s3.sync_client import S3Client
16
+
17
+ return S3Client
18
+ if name == "AsyncS3Client":
19
+ from investify_utils.s3.async_client import AsyncS3Client
20
+
21
+ return AsyncS3Client
22
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
23
+
24
+
25
+ __all__ = ["S3Client", "AsyncS3Client"]
@@ -0,0 +1,228 @@
1
+ """
2
+ Async S3-compatible object storage client using aioboto3.
3
+
4
+ Features:
5
+ - Works with AWS S3, Ceph RGW, MinIO, and other S3-compatible services
6
+ - Lazy session initialization
7
+ - Same API as sync S3Client, all methods async
8
+
9
+ Usage:
10
+ from investify_utils.s3 import AsyncS3Client
11
+
12
+ client = AsyncS3Client(
13
+ endpoint_url="https://s3.example.com",
14
+ access_key="access_key",
15
+ secret_key="secret_key",
16
+ )
17
+
18
+ # Get object as bytes
19
+ data = await client.get_object("my-bucket", "remote.pdf")
20
+
21
+ # Put object from bytes/string
22
+ await client.put_object("my-bucket", "file.txt", b"content", content_type="text/plain")
23
+ """
24
+
25
+ import os
26
+ from typing import IO
27
+
28
+ import aioboto3
29
+ from botocore.config import Config
30
+ from botocore.exceptions import ClientError
31
+
32
+
33
+ class AsyncS3Client:
34
+ """
35
+ Async S3-compatible object storage client.
36
+
37
+ Args:
38
+ endpoint_url: S3 endpoint URL (e.g., https://s3.amazonaws.com)
39
+ access_key: AWS access key ID
40
+ secret_key: AWS secret access key
41
+ region: AWS region (default: None)
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ endpoint_url: str,
47
+ access_key: str,
48
+ secret_key: str,
49
+ region: str | None = None,
50
+ ):
51
+ self._endpoint_url = endpoint_url
52
+ self._access_key = access_key
53
+ self._secret_key = secret_key
54
+ self._region = region
55
+ self._session = None
56
+
57
+ @property
58
+ def session(self) -> aioboto3.Session:
59
+ """Lazy session initialization."""
60
+ if self._session is None:
61
+ self._session = aioboto3.Session(
62
+ aws_access_key_id=self._access_key,
63
+ aws_secret_access_key=self._secret_key,
64
+ region_name=self._region,
65
+ )
66
+ return self._session
67
+
68
+ def _client_ctx(self):
69
+ """Create an async context manager for the S3 client."""
70
+ return self.session.client(
71
+ "s3",
72
+ endpoint_url=self._endpoint_url,
73
+ config=Config(signature_version="s3v4"),
74
+ )
75
+
76
+ async def list_buckets(self) -> list[str]:
77
+ """List all buckets."""
78
+ async with self._client_ctx() as client:
79
+ response = await client.list_buckets()
80
+ return [bucket["Name"] for bucket in response["Buckets"]]
81
+
82
+ async def list_objects(
83
+ self,
84
+ bucket: str,
85
+ prefix: str = "",
86
+ max_keys: int | None = None,
87
+ ) -> list[dict]:
88
+ """
89
+ List objects in a bucket with optional prefix filter.
90
+
91
+ Args:
92
+ bucket: Bucket name
93
+ prefix: Filter objects by prefix (e.g., "folder/")
94
+ max_keys: Maximum number of objects to return (None = all)
95
+
96
+ Returns:
97
+ List of object metadata dicts with keys: Key, Size, LastModified
98
+ """
99
+ objects = []
100
+ async with self._client_ctx() as client:
101
+ paginator = client.get_paginator("list_objects_v2")
102
+ async for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
103
+ for obj in page.get("Contents", []):
104
+ objects.append(
105
+ {
106
+ "Key": obj["Key"],
107
+ "Size": obj["Size"],
108
+ "LastModified": obj["LastModified"],
109
+ }
110
+ )
111
+ if max_keys and len(objects) >= max_keys:
112
+ return objects
113
+ return objects
114
+
115
+ async def upload_file(self, file_path: str, bucket: str, key: str | None = None) -> None:
116
+ """
117
+ Upload a local file to S3.
118
+
119
+ Args:
120
+ file_path: Local file path
121
+ bucket: Bucket name
122
+ key: Object key (default: basename of file_path)
123
+ """
124
+ if key is None:
125
+ key = os.path.basename(file_path)
126
+ async with self._client_ctx() as client:
127
+ await client.upload_file(file_path, bucket, key)
128
+
129
+ async def download_file(self, bucket: str, key: str, file_path: str) -> None:
130
+ """
131
+ Download an object to a local file.
132
+
133
+ Args:
134
+ bucket: Bucket name
135
+ key: Object key
136
+ file_path: Local file path to save to
137
+ """
138
+ async with self._client_ctx() as client:
139
+ await client.download_file(bucket, key, file_path)
140
+
141
+ async def get_object(self, bucket: str, key: str) -> bytes:
142
+ """
143
+ Get object content as bytes.
144
+
145
+ Args:
146
+ bucket: Bucket name
147
+ key: Object key
148
+
149
+ Returns:
150
+ Object content as bytes
151
+ """
152
+ async with self._client_ctx() as client:
153
+ response = await client.get_object(Bucket=bucket, Key=key)
154
+ return await response["Body"].read()
155
+
156
+ async def put_object(
157
+ self,
158
+ bucket: str,
159
+ key: str,
160
+ data: str | bytes | IO[bytes],
161
+ content_type: str | None = None,
162
+ content_disposition: str | None = None,
163
+ ) -> None:
164
+ """
165
+ Upload data directly to S3.
166
+
167
+ Args:
168
+ bucket: Bucket name
169
+ key: Object key
170
+ data: Content as string, bytes, or file-like object
171
+ content_type: MIME type (e.g., "application/pdf")
172
+ content_disposition: Content-Disposition header value
173
+ """
174
+ params: dict = {"Bucket": bucket, "Key": key, "Body": data}
175
+ if content_type:
176
+ params["ContentType"] = content_type
177
+ if content_disposition:
178
+ params["ContentDisposition"] = content_disposition
179
+ async with self._client_ctx() as client:
180
+ await client.put_object(**params)
181
+
182
+ async def delete_object(self, bucket: str, key: str) -> None:
183
+ """Delete a single object."""
184
+ async with self._client_ctx() as client:
185
+ await client.delete_object(Bucket=bucket, Key=key)
186
+
187
+ async def delete_prefix(self, bucket: str, prefix: str) -> int:
188
+ """
189
+ Delete all objects with a given prefix.
190
+
191
+ Args:
192
+ bucket: Bucket name
193
+ prefix: Prefix to match (e.g., "folder/" deletes all in folder)
194
+
195
+ Returns:
196
+ Number of objects deleted
197
+ """
198
+ deleted_count = 0
199
+ async with self._client_ctx() as client:
200
+ paginator = client.get_paginator("list_objects_v2")
201
+ async for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
202
+ contents = page.get("Contents", [])
203
+ if not contents:
204
+ continue
205
+ delete_keys = [{"Key": obj["Key"]} for obj in contents]
206
+ await client.delete_objects(Bucket=bucket, Delete={"Objects": delete_keys})
207
+ deleted_count += len(delete_keys)
208
+ return deleted_count
209
+
210
+ async def exists(self, bucket: str, key: str) -> bool:
211
+ """
212
+ Check if an object exists.
213
+
214
+ Args:
215
+ bucket: Bucket name
216
+ key: Object key
217
+
218
+ Returns:
219
+ True if object exists, False otherwise
220
+ """
221
+ try:
222
+ async with self._client_ctx() as client:
223
+ await client.head_object(Bucket=bucket, Key=key)
224
+ return True
225
+ except ClientError as e:
226
+ if e.response["Error"]["Code"] == "404":
227
+ return False
228
+ raise
@@ -103,11 +103,13 @@ class S3Client:
103
103
 
104
104
  for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
105
105
  for obj in page.get("Contents", []):
106
- objects.append({
107
- "Key": obj["Key"],
108
- "Size": obj["Size"],
109
- "LastModified": obj["LastModified"],
110
- })
106
+ objects.append(
107
+ {
108
+ "Key": obj["Key"],
109
+ "Size": obj["Size"],
110
+ "LastModified": obj["LastModified"],
111
+ }
112
+ )
111
113
  if max_keys and len(objects) >= max_keys:
112
114
  return objects
113
115
 
@@ -223,4 +225,3 @@ class S3Client:
223
225
  if e.response["Error"]["Code"] == "404":
224
226
  return False
225
227
  raise
226
-
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "investify-utils"
9
- version = "2.0.0a10"
9
+ version = "2.0.0a12"
10
10
  description = "Shared utilities for Investify services"
11
11
  readme = "README.md"
12
12
  requires-python = ">=3.12"
@@ -46,6 +46,9 @@ kafka = [
46
46
  s3 = [
47
47
  "boto3>=1.34",
48
48
  ]
49
+ s3-async = [
50
+ "aioboto3>=15.0",
51
+ ]
49
52
  helpers = [
50
53
  "pandas>=2.0",
51
54
  "numpy>=2.0",
@@ -1,18 +0,0 @@
1
- """
2
- S3-compatible object storage client.
3
-
4
- Usage:
5
- from investify_utils.s3 import S3Client
6
- """
7
-
8
-
9
- def __getattr__(name: str):
10
- """Lazy import to avoid loading boto3 if not needed."""
11
- if name == "S3Client":
12
- from investify_utils.s3.sync_client import S3Client
13
-
14
- return S3Client
15
- raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
16
-
17
-
18
- __all__ = ["S3Client"]