cledar-sdk 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cledar/__init__.py +0 -0
- cledar/kafka/README.md +239 -0
- cledar/kafka/__init__.py +40 -0
- cledar/kafka/clients/base.py +98 -0
- cledar/kafka/clients/consumer.py +110 -0
- cledar/kafka/clients/producer.py +80 -0
- cledar/kafka/config/schemas.py +178 -0
- cledar/kafka/exceptions.py +22 -0
- cledar/kafka/handlers/dead_letter.py +82 -0
- cledar/kafka/handlers/parser.py +49 -0
- cledar/kafka/logger.py +3 -0
- cledar/kafka/models/input.py +13 -0
- cledar/kafka/models/message.py +10 -0
- cledar/kafka/models/output.py +8 -0
- cledar/kafka/tests/.env.test.kafka +3 -0
- cledar/kafka/tests/README.md +216 -0
- cledar/kafka/tests/conftest.py +104 -0
- cledar/kafka/tests/integration/__init__.py +1 -0
- cledar/kafka/tests/integration/conftest.py +78 -0
- cledar/kafka/tests/integration/helpers.py +47 -0
- cledar/kafka/tests/integration/test_consumer_integration.py +375 -0
- cledar/kafka/tests/integration/test_integration.py +394 -0
- cledar/kafka/tests/integration/test_producer_consumer_interaction.py +388 -0
- cledar/kafka/tests/integration/test_producer_integration.py +217 -0
- cledar/kafka/tests/unit/__init__.py +1 -0
- cledar/kafka/tests/unit/test_base_kafka_client.py +391 -0
- cledar/kafka/tests/unit/test_config_validation.py +609 -0
- cledar/kafka/tests/unit/test_dead_letter_handler.py +443 -0
- cledar/kafka/tests/unit/test_error_handling.py +674 -0
- cledar/kafka/tests/unit/test_input_parser.py +310 -0
- cledar/kafka/tests/unit/test_input_parser_comprehensive.py +489 -0
- cledar/kafka/tests/unit/test_utils.py +25 -0
- cledar/kafka/tests/unit/test_utils_comprehensive.py +408 -0
- cledar/kafka/utils/callbacks.py +19 -0
- cledar/kafka/utils/messages.py +28 -0
- cledar/kafka/utils/topics.py +2 -0
- cledar/kserve/README.md +352 -0
- cledar/kserve/__init__.py +3 -0
- cledar/kserve/tests/__init__.py +0 -0
- cledar/kserve/tests/test_utils.py +64 -0
- cledar/kserve/utils.py +27 -0
- cledar/logging/README.md +53 -0
- cledar/logging/__init__.py +3 -0
- cledar/logging/tests/test_universal_plaintext_formatter.py +249 -0
- cledar/logging/universal_plaintext_formatter.py +94 -0
- cledar/monitoring/README.md +71 -0
- cledar/monitoring/__init__.py +3 -0
- cledar/monitoring/monitoring_server.py +112 -0
- cledar/monitoring/tests/integration/test_monitoring_server_int.py +162 -0
- cledar/monitoring/tests/test_monitoring_server.py +59 -0
- cledar/nonce/README.md +99 -0
- cledar/nonce/__init__.py +3 -0
- cledar/nonce/nonce_service.py +36 -0
- cledar/nonce/tests/__init__.py +0 -0
- cledar/nonce/tests/test_nonce_service.py +136 -0
- cledar/redis/README.md +536 -0
- cledar/redis/__init__.py +15 -0
- cledar/redis/async_example.py +111 -0
- cledar/redis/example.py +37 -0
- cledar/redis/exceptions.py +22 -0
- cledar/redis/logger.py +3 -0
- cledar/redis/model.py +10 -0
- cledar/redis/redis.py +525 -0
- cledar/redis/redis_config_store.py +252 -0
- cledar/redis/tests/test_async_integration_redis.py +158 -0
- cledar/redis/tests/test_async_redis_service.py +380 -0
- cledar/redis/tests/test_integration_redis.py +119 -0
- cledar/redis/tests/test_redis_service.py +319 -0
- cledar/storage/README.md +529 -0
- cledar/storage/__init__.py +4 -0
- cledar/storage/constants.py +3 -0
- cledar/storage/exceptions.py +50 -0
- cledar/storage/models.py +19 -0
- cledar/storage/object_storage.py +955 -0
- cledar/storage/tests/conftest.py +18 -0
- cledar/storage/tests/test_abfs.py +164 -0
- cledar/storage/tests/test_integration_filesystem.py +359 -0
- cledar/storage/tests/test_integration_s3.py +453 -0
- cledar/storage/tests/test_local.py +384 -0
- cledar/storage/tests/test_s3.py +521 -0
- {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.0.3.dist-info}/METADATA +1 -1
- cledar_sdk-2.0.3.dist-info/RECORD +84 -0
- cledar_sdk-2.0.2.dist-info/RECORD +0 -4
- {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.0.3.dist-info}/WHEEL +0 -0
- {cledar_sdk-2.0.2.dist-info → cledar_sdk-2.0.3.dist-info}/licenses/LICENSE +0 -0
cledar/storage/README.md
ADDED
|
@@ -0,0 +1,529 @@
|
|
|
1
|
+
# Storage Service
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
The `cledar.storage` package provides a unified interface for interacting with S3-compatible object storage (like AWS S3, MinIO), Azure Blob Storage via ABFS/ABFSS (adlfs), and local filesystem storage. It abstracts away the complexity of managing files across different storage backends, providing a consistent API for common operations.
|
|
6
|
+
|
|
7
|
+
### Key Features
|
|
8
|
+
|
|
9
|
+
- **Unified API**: Single interface for S3, Azure ABFS, and local filesystem operations
|
|
10
|
+
- **S3 Compatible**: Works with AWS S3, MinIO, and other S3-compatible storage systems
|
|
11
|
+
- **Azure ABFS Support**: Works with Azure Blob Storage using `abfs://` or `abfss://` URIs (via `adlfs`)
|
|
12
|
+
- **Comprehensive Operations**: Upload, download, list, copy, move, and delete files
|
|
13
|
+
- **Metadata Support**: Get file size, info, and check existence
|
|
14
|
+
- **Buffer Support**: Upload/download directly from/to memory buffers
|
|
15
|
+
- **Retry Logic**: Built-in retry mechanisms for network operations
|
|
16
|
+
- **Type Safety**: Fully typed with Python type hints
|
|
17
|
+
- **Well Tested**: Extensive unit tests and integration tests
|
|
18
|
+
|
|
19
|
+
### Use Cases
|
|
20
|
+
|
|
21
|
+
- Storing and retrieving application data from S3
|
|
22
|
+
- Managing media files (images, videos, audio)
|
|
23
|
+
- Handling temporary file storage
|
|
24
|
+
- Cross-platform file operations (local and cloud)
|
|
25
|
+
- Backup and archival systems
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
This package is part of the Cledar SDK. Install it using:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Install with uv (recommended)
|
|
33
|
+
uv sync --all-groups
|
|
34
|
+
|
|
35
|
+
# Or with pip
|
|
36
|
+
pip install -e .
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Usage Example
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from cledar.storage import ObjectStorageService, ObjectStorageServiceConfig
|
|
43
|
+
import io
|
|
44
|
+
|
|
45
|
+
# Configure the service
|
|
46
|
+
config = ObjectStorageServiceConfig(
|
|
47
|
+
s3_endpoint_url="https://s3.amazonaws.com",
|
|
48
|
+
s3_access_key="your-access-key",
|
|
49
|
+
s3_secret_key="your-secret-key",
|
|
50
|
+
s3_max_concurrency=10,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Create service instance
|
|
54
|
+
service = ObjectStorageService(config)
|
|
55
|
+
|
|
56
|
+
# Upload a file from buffer
|
|
57
|
+
buffer = io.BytesIO(b"Hello, World!")
|
|
58
|
+
service.upload_buffer(buffer=buffer, bucket="my-bucket", key="hello.txt")
|
|
59
|
+
|
|
60
|
+
# Read a file
|
|
61
|
+
content = service.read_file(bucket="my-bucket", key="hello.txt")
|
|
62
|
+
print(content) # b"Hello, World!"
|
|
63
|
+
|
|
64
|
+
# List objects
|
|
65
|
+
files = service.list_objects(bucket="my-bucket", prefix="folder/", recursive=True)
|
|
66
|
+
print(files) # ['folder/file1.txt', 'folder/file2.txt', ...]
|
|
67
|
+
|
|
68
|
+
# Check if file exists
|
|
69
|
+
exists = service.file_exists(bucket="my-bucket", key="hello.txt")
|
|
70
|
+
print(exists) # True
|
|
71
|
+
|
|
72
|
+
# Get file metadata
|
|
73
|
+
size = service.get_file_size(bucket="my-bucket", key="hello.txt")
|
|
74
|
+
info = service.get_file_info(bucket="my-bucket", key="hello.txt")
|
|
75
|
+
|
|
76
|
+
# Copy file
|
|
77
|
+
service.copy_file(
|
|
78
|
+
source_bucket="my-bucket",
|
|
79
|
+
source_key="hello.txt",
|
|
80
|
+
dest_bucket="my-bucket",
|
|
81
|
+
dest_key="hello-copy.txt"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Delete file
|
|
85
|
+
service.delete_file(bucket="my-bucket", key="hello.txt")
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Development
|
|
89
|
+
|
|
90
|
+
### Project Structure
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
cledar/storage/
|
|
94
|
+
├── __init__.py # Package initialization
|
|
95
|
+
├── exceptions.py # Custom exceptions
|
|
96
|
+
├── object_storage.py # Main service implementation
|
|
97
|
+
├── tests/
|
|
98
|
+
│ ├── conftest.py # Pytest fixtures
|
|
99
|
+
│ ├── test_s3.py # Unit tests for S3 operations
|
|
100
|
+
│ ├── test_local.py # Unit tests for local operations
|
|
101
|
+
│ └── test_integration_s3.py # Integration tests with real MinIO
|
|
102
|
+
└── README.md # This file
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Running Linters
|
|
106
|
+
|
|
107
|
+
The project is configured for multiple linters (see `pyproject.toml` for configuration).
|
|
108
|
+
|
|
109
|
+
### Available Linter Configurations
|
|
110
|
+
|
|
111
|
+
The project includes configurations for:
|
|
112
|
+
- **Pylint**: Python code analysis (`.tool.pylint` in `pyproject.toml`)
|
|
113
|
+
- **Mypy**: Static type checking (`.tool.mypy` in `pyproject.toml`)
|
|
114
|
+
- **Black**: Code formatting (`.tool.black` in `pyproject.toml`)
|
|
115
|
+
|
|
116
|
+
### Installing Linters
|
|
117
|
+
|
|
118
|
+
Linters are not included in the dev dependencies by default. Install them separately:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# Install all linters
|
|
122
|
+
pip install pylint mypy black
|
|
123
|
+
|
|
124
|
+
# Or with uv
|
|
125
|
+
uv pip install pylint mypy black
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Running Linters
|
|
129
|
+
|
|
130
|
+
Once installed, run them from the SDK root directory:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
# From the SDK root directory
|
|
134
|
+
cd /path/to/cledar-python-sdk
|
|
135
|
+
|
|
136
|
+
# Run pylint on storage
|
|
137
|
+
pylint cledar/storage/
|
|
138
|
+
|
|
139
|
+
# Run mypy type checking (strict mode configured)
|
|
140
|
+
mypy cledar/storage/
|
|
141
|
+
|
|
142
|
+
# Check code formatting with black
|
|
143
|
+
black --check cledar/storage/
|
|
144
|
+
|
|
145
|
+
# Auto-format code
|
|
146
|
+
black cledar/storage/
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Run All Linters
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
# Run all linters in sequence
|
|
153
|
+
pylint cledar/storage/ && \
|
|
154
|
+
mypy cledar/storage/ && \
|
|
155
|
+
black --check cledar/storage/
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### IDE Integration
|
|
159
|
+
|
|
160
|
+
Most IDEs support these linters natively:
|
|
161
|
+
- **VSCode**: Install Python extension, linters auto-detected via `pyproject.toml`
|
|
162
|
+
- **PyCharm**: Enable in Settings → Tools → Python Integrated Tools
|
|
163
|
+
- **Cursor**: Same as VSCode
|
|
164
|
+
|
|
165
|
+
## Running Unit Tests
|
|
166
|
+
|
|
167
|
+
Unit tests use mocks to test the code in isolation without requiring external dependencies.
|
|
168
|
+
|
|
169
|
+
### Run All Unit Tests
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
# From the SDK root directory
|
|
173
|
+
cd /path/to/cledar-python-sdk
|
|
174
|
+
|
|
175
|
+
# Set PYTHONPATH and run unit tests
|
|
176
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_s3.py cledar/storage/tests/test_local.py -v
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### Run Specific Test File
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
# Run S3 unit tests
|
|
183
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_s3.py -v
|
|
184
|
+
|
|
185
|
+
# Run local filesystem unit tests
|
|
186
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_local.py -v
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Run Specific Test
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
# Run a specific test by name
|
|
193
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_s3.py::test_upload_file_filesystem_with_bucket_key_should_use_s3 -v
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Run with Coverage
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
# Generate coverage report
|
|
200
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_s3.py cledar/storage/tests/test_local.py \
|
|
201
|
+
--cov=storage_service \
|
|
202
|
+
--cov-report=html \
|
|
203
|
+
--cov-report=term
|
|
204
|
+
|
|
205
|
+
# View HTML report
|
|
206
|
+
open htmlcov/index.html
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Unit Test Details
|
|
210
|
+
|
|
211
|
+
- **Test Framework**: pytest
|
|
212
|
+
- **Mocking**: unittest.mock
|
|
213
|
+
- **Fixtures**: Defined in `conftest.py`
|
|
214
|
+
- **Test Count**: 54 unit tests
|
|
215
|
+
- **Execution Time**: ~0.1 seconds (fast, no external dependencies)
|
|
216
|
+
|
|
217
|
+
#### What Unit Tests Cover:
|
|
218
|
+
|
|
219
|
+
- ✅ S3 operations (upload, download, list, delete, copy, move)
|
|
220
|
+
- ✅ Local filesystem operations
|
|
221
|
+
- ✅ Error handling and exceptions
|
|
222
|
+
- ✅ Parameter validation
|
|
223
|
+
- ✅ Retry mechanisms
|
|
224
|
+
- ✅ Buffer operations
|
|
225
|
+
- ✅ Metadata operations
|
|
226
|
+
|
|
227
|
+
## Running Integration Tests
|
|
228
|
+
|
|
229
|
+
Integration tests use [testcontainers](https://testcontainers-python.readthedocs.io/) to spin up a real MinIO container and test against actual S3-compatible storage.
|
|
230
|
+
|
|
231
|
+
### Prerequisites
|
|
232
|
+
|
|
233
|
+
**Required**:
|
|
234
|
+
- Docker installed and running on your machine
|
|
235
|
+
- Network access to pull Docker images
|
|
236
|
+
|
|
237
|
+
**Optional**:
|
|
238
|
+
- Docker Desktop (macOS/Windows) or Docker Engine (Linux)
|
|
239
|
+
|
|
240
|
+
### Run All Integration Tests
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
# From the SDK root directory
|
|
244
|
+
cd /path/to/cledar-python-sdk
|
|
245
|
+
|
|
246
|
+
# Set PYTHONPATH and run integration tests
|
|
247
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_integration_s3.py -v
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### Run Specific Integration Test Class
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
# Run only basic operations tests
|
|
254
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_integration_s3.py::TestIntegrationBasicOperations -v
|
|
255
|
+
|
|
256
|
+
# Run only file operations tests
|
|
257
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_integration_s3.py::TestIntegrationFileOperations -v
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
### Run with Detailed Output
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
# Show container startup logs
|
|
264
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_integration_s3.py -v -s --log-cli-level=INFO
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Integration Test Details
|
|
268
|
+
|
|
269
|
+
- **Test Framework**: pytest + testcontainers
|
|
270
|
+
- **Container**: MinIO (S3-compatible storage)
|
|
271
|
+
- **Image**: `minio/minio:RELEASE.2022-12-02T19-19-22Z`
|
|
272
|
+
- **Test Count**: 21 integration tests
|
|
273
|
+
- **Execution Time**: ~3 seconds (includes container startup)
|
|
274
|
+
|
|
275
|
+
#### How Integration Tests Work
|
|
276
|
+
|
|
277
|
+
1. **Container Startup** (Module Scope):
|
|
278
|
+
```python
|
|
279
|
+
@pytest.fixture(scope="module")
|
|
280
|
+
def minio_container():
|
|
281
|
+
"""Start a MinIO container for testing."""
|
|
282
|
+
with MinioContainer(
|
|
283
|
+
access_key="minioadmin",
|
|
284
|
+
secret_key="minioadmin",
|
|
285
|
+
) as minio:
|
|
286
|
+
yield minio
|
|
287
|
+
```
|
|
288
|
+
- Container starts once for all tests in the module
|
|
289
|
+
- Automatically pulls MinIO Docker image if not present
|
|
290
|
+
- Exposes MinIO on a random available port
|
|
291
|
+
|
|
292
|
+
2. **Service Configuration** (Module Scope):
|
|
293
|
+
```python
|
|
294
|
+
@pytest.fixture(scope="module")
|
|
295
|
+
def object_storage_service(minio_container):
|
|
296
|
+
"""Create an ObjectStorageService connected to MinIO."""
|
|
297
|
+
host = minio_container.get_container_host_ip()
|
|
298
|
+
port = minio_container.get_exposed_port(minio_container.port)
|
|
299
|
+
endpoint_url = f"http://{host}:{port}"
|
|
300
|
+
|
|
301
|
+
config = ObjectStorageServiceConfig(
|
|
302
|
+
s3_endpoint_url=endpoint_url,
|
|
303
|
+
s3_access_key=minio_container.access_key,
|
|
304
|
+
s3_secret_key=minio_container.secret_key,
|
|
305
|
+
s3_max_concurrency=10,
|
|
306
|
+
)
|
|
307
|
+
return ObjectStorageService(config)
|
|
308
|
+
```
|
|
309
|
+
- Creates service instance connected to MinIO container
|
|
310
|
+
- Shared across all tests in the module
|
|
311
|
+
|
|
312
|
+
3. **Test Isolation** (Function Scope):
|
|
313
|
+
```python
|
|
314
|
+
@pytest.fixture
|
|
315
|
+
def test_bucket(object_storage_service):
|
|
316
|
+
"""Create a unique test bucket for each test."""
|
|
317
|
+
bucket_name = f"test-bucket-{fake.uuid4()}"
|
|
318
|
+
object_storage_service.client.mkdir(f"s3://{bucket_name}")
|
|
319
|
+
yield bucket_name
|
|
320
|
+
# Cleanup: delete all objects and bucket
|
|
321
|
+
try:
|
|
322
|
+
objects = object_storage_service.list_objects(
|
|
323
|
+
bucket=bucket_name, recursive=True
|
|
324
|
+
)
|
|
325
|
+
for obj in objects:
|
|
326
|
+
object_storage_service.delete_file(bucket=bucket_name, key=obj)
|
|
327
|
+
object_storage_service.client.rmdir(f"s3://{bucket_name}")
|
|
328
|
+
except Exception:
|
|
329
|
+
pass
|
|
330
|
+
```
|
|
331
|
+
- Each test gets a unique bucket (UUID-based name)
|
|
332
|
+
- Bucket and all its contents are cleaned up after each test
|
|
333
|
+
- Ensures complete test isolation
|
|
334
|
+
|
|
335
|
+
4. **Automatic Cleanup**:
|
|
336
|
+
- testcontainers automatically stops and removes containers after tests
|
|
337
|
+
- No manual cleanup required
|
|
338
|
+
- Containers are cleaned up even if tests fail
|
|
339
|
+
|
|
340
|
+
#### What Integration Tests Cover:
|
|
341
|
+
|
|
342
|
+
- ✅ **Basic Operations** (4 tests): Connection health, bucket existence, error handling
|
|
343
|
+
- ✅ **Buffer Operations** (2 tests): Upload/read buffers, parameter validation
|
|
344
|
+
- ✅ **File Operations** (2 tests): Upload/download files, retry mechanisms
|
|
345
|
+
- ✅ **List Operations** (3 tests): Recursive/non-recursive listing, prefix filtering
|
|
346
|
+
- ✅ **File Management** (4 tests): Existence checks, deletion, metadata retrieval
|
|
347
|
+
- ✅ **Copy/Move Operations** (2 tests): File copying and moving
|
|
348
|
+
- ✅ **Error Handling** (3 tests): Nonexistent file operations, invalid parameters
|
|
349
|
+
- ✅ **Large Files** (1 test): 10MB file upload/download
|
|
350
|
+
|
|
351
|
+
### Run All Tests (Unit + Integration)
|
|
352
|
+
|
|
353
|
+
```bash
|
|
354
|
+
# Run everything
|
|
355
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/ -v
|
|
356
|
+
|
|
357
|
+
# Run with coverage
|
|
358
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/ \
|
|
359
|
+
--cov=storage_service \
|
|
360
|
+
--cov-report=html \
|
|
361
|
+
--cov-report=term \
|
|
362
|
+
-v
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
**Total Test Count**: 75 tests (54 unit + 21 integration)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
#### Slow First Run
|
|
369
|
+
First execution is slower due to:
|
|
370
|
+
- Pulling testcontainers/ryuk image
|
|
371
|
+
- Pulling MinIO image
|
|
372
|
+
- Container initialization
|
|
373
|
+
|
|
374
|
+
Subsequent runs are much faster (~2.5s).
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
## CI/CD Integration
|
|
378
|
+
|
|
379
|
+
### GitLab CI Example
|
|
380
|
+
|
|
381
|
+
```yaml
|
|
382
|
+
test-unit:
|
|
383
|
+
stage: test
|
|
384
|
+
image: python:3.12
|
|
385
|
+
script:
|
|
386
|
+
- pip install uv
|
|
387
|
+
- uv sync --all-groups
|
|
388
|
+
- PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_s3.py cledar/storage/tests/test_local.py -v
|
|
389
|
+
|
|
390
|
+
test-integration:
|
|
391
|
+
stage: test
|
|
392
|
+
image: python:3.12
|
|
393
|
+
services:
|
|
394
|
+
- docker:dind # Docker-in-Docker for testcontainers
|
|
395
|
+
variables:
|
|
396
|
+
DOCKER_HOST: tcp://docker:2375
|
|
397
|
+
DOCKER_TLS_CERTDIR: ""
|
|
398
|
+
script:
|
|
399
|
+
- pip install uv
|
|
400
|
+
- uv sync --all-groups
|
|
401
|
+
- PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_integration_s3.py -v
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
### GitHub Actions Example
|
|
405
|
+
|
|
406
|
+
```yaml
|
|
407
|
+
name: Tests
|
|
408
|
+
on: [push, pull_request]
|
|
409
|
+
|
|
410
|
+
jobs:
|
|
411
|
+
unit-tests:
|
|
412
|
+
runs-on: ubuntu-latest
|
|
413
|
+
steps:
|
|
414
|
+
- uses: actions/checkout@v3
|
|
415
|
+
- uses: actions/setup-python@v4
|
|
416
|
+
with:
|
|
417
|
+
python-version: '3.12'
|
|
418
|
+
- name: Install dependencies
|
|
419
|
+
run: |
|
|
420
|
+
pip install uv
|
|
421
|
+
uv sync --all-groups
|
|
422
|
+
- name: Run unit tests
|
|
423
|
+
run: PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_s3.py cledar/storage/tests/test_local.py -v
|
|
424
|
+
|
|
425
|
+
integration-tests:
|
|
426
|
+
runs-on: ubuntu-latest
|
|
427
|
+
steps:
|
|
428
|
+
- uses: actions/checkout@v3
|
|
429
|
+
- uses: actions/setup-python@v4
|
|
430
|
+
with:
|
|
431
|
+
python-version: '3.12'
|
|
432
|
+
- name: Install dependencies
|
|
433
|
+
run: |
|
|
434
|
+
pip install uv
|
|
435
|
+
uv sync --all-groups
|
|
436
|
+
- name: Run integration tests
|
|
437
|
+
run: PYTHONPATH=$PWD uv run pytest cledar/storage/tests/test_integration_s3.py -v
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
## API Reference
|
|
441
|
+
|
|
442
|
+
### ObjectStorageServiceConfig
|
|
443
|
+
|
|
444
|
+
Configuration dataclass for the storage service.
|
|
445
|
+
|
|
446
|
+
```python
|
|
447
|
+
@dataclass
|
|
448
|
+
class ObjectStorageServiceConfig:
|
|
449
|
+
s3_endpoint_url: str # S3 endpoint URL
|
|
450
|
+
s3_access_key: str # Access key
|
|
451
|
+
s3_secret_key: str # Secret key
|
|
452
|
+
s3_max_concurrency: int # Max concurrent connections
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
### ObjectStorageService
|
|
456
|
+
|
|
457
|
+
Main service class providing storage operations.
|
|
458
|
+
|
|
459
|
+
#### Methods
|
|
460
|
+
|
|
461
|
+
- `is_alive() -> bool` - Check if service can connect to storage
|
|
462
|
+
- `has_bucket(bucket: str, throw: bool = False) -> bool` - Check if bucket exists
|
|
463
|
+
- `upload_buffer(buffer, bucket, key)` - Upload from memory buffer
|
|
464
|
+
- `upload_file(file_path, bucket, key)` - Upload from file
|
|
465
|
+
- `read_file(bucket, key, max_tries=3) -> bytes` - Read file contents
|
|
466
|
+
- `download_file(dest_path, bucket, key, max_tries=3)` - Download to file
|
|
467
|
+
- `list_objects(bucket, prefix="", recursive=True) -> list[str]` - List objects
|
|
468
|
+
- `delete_file(bucket, key)` - Delete a file
|
|
469
|
+
- `file_exists(bucket, key) -> bool` - Check if file exists
|
|
470
|
+
- `get_file_size(bucket, key) -> int` - Get file size in bytes
|
|
471
|
+
- `get_file_info(bucket, key) -> dict` - Get file metadata
|
|
472
|
+
- `copy_file(source_bucket, source_key, dest_bucket, dest_key)` - Copy file
|
|
473
|
+
- `move_file(source_bucket, source_key, dest_bucket, dest_key)` - Move file
|
|
474
|
+
|
|
475
|
+
All methods support both S3 operations (using `bucket` and `key`), Azure ABFS operations (using `path` starting with `abfs://` or `abfss://`), and local filesystem operations (using `path` or `destination_path`). Mixed-backend copy/move (e.g., S3 to ABFS) is supported via streamed transfer.
|
|
476
|
+
|
|
477
|
+
### Azure Configuration
|
|
478
|
+
|
|
479
|
+
Install the optional dependency:
|
|
480
|
+
|
|
481
|
+
```bash
|
|
482
|
+
uv sync --all-groups # or pip install adlfs
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
Provide credentials through `ObjectStorageServiceConfig` (any that apply):
|
|
486
|
+
|
|
487
|
+
```python
|
|
488
|
+
config = ObjectStorageServiceConfig(
|
|
489
|
+
s3_endpoint_url="https://s3.amazonaws.com",
|
|
490
|
+
s3_access_key="...",
|
|
491
|
+
s3_secret_key="...",
|
|
492
|
+
s3_max_concurrency=10,
|
|
493
|
+
# Azure optional settings
|
|
494
|
+
azure_account_name="youraccount", # optional
|
|
495
|
+
azure_account_key="<account key>", # or OAuth below
|
|
496
|
+
azure_tenant_id="<tenant id>", # optional OAuth
|
|
497
|
+
azure_client_id="<client id>", # optional OAuth
|
|
498
|
+
azure_client_secret="<client secret>", # optional OAuth
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
service = ObjectStorageService(config)
|
|
502
|
+
content = service.read_file(path="abfs://container/path/to/file.txt")
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
### Running Pre-commit Checks
|
|
507
|
+
|
|
508
|
+
```bash
|
|
509
|
+
# Format code
|
|
510
|
+
uv run black cledar/storage/
|
|
511
|
+
|
|
512
|
+
# Check types
|
|
513
|
+
uv run mypy cledar/storage/
|
|
514
|
+
|
|
515
|
+
# Run linter
|
|
516
|
+
uv run pylint cledar/storage/
|
|
517
|
+
|
|
518
|
+
# Run all tests
|
|
519
|
+
PYTHONPATH=$PWD uv run pytest cledar/storage/tests/ -v
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
## License
|
|
523
|
+
|
|
524
|
+
See the main repository LICENSE file.
|
|
525
|
+
|
|
526
|
+
## Support
|
|
527
|
+
|
|
528
|
+
For issues, questions, or contributions, please refer to the main repository's contribution guidelines.
|
|
529
|
+
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
class ObjectStorageError(Exception):
|
|
2
|
+
pass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class RequiredBucketNotFoundError(ObjectStorageError):
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class UploadBufferError(ObjectStorageError):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class UploadFileError(ObjectStorageError):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ReadFileError(ObjectStorageError):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DownloadFileError(ObjectStorageError):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ListObjectsError(ObjectStorageError):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DeleteFileError(ObjectStorageError):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class GetFileSizeError(ObjectStorageError):
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class GetFileInfoError(ObjectStorageError):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class CopyFileError(ObjectStorageError):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class MoveFileError(ObjectStorageError):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class CheckFileExistenceError(ObjectStorageError):
|
|
50
|
+
pass
|
cledar/storage/models.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ObjectStorageServiceConfig(BaseModel):
|
|
7
|
+
# s3 configuration
|
|
8
|
+
s3_endpoint_url: str | None = None
|
|
9
|
+
s3_access_key: str | None = None
|
|
10
|
+
s3_secret_key: str | None = None
|
|
11
|
+
s3_max_concurrency: int | None = None
|
|
12
|
+
# azure configuration
|
|
13
|
+
azure_account_name: str | None = None
|
|
14
|
+
azure_account_key: str | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TransferPath(BaseModel):
|
|
18
|
+
backend: Literal["s3", "abfs", "local"]
|
|
19
|
+
path: str
|