julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MinioClient protocol definition and repository utilities.
|
|
3
|
+
|
|
4
|
+
This module defines the protocol interface that both the real Minio client
|
|
5
|
+
and our fake test client must implement. This follows Clean Architecture
|
|
6
|
+
dependency inversion principles by depending on abstractions rather than
|
|
7
|
+
concrete implementations.
|
|
8
|
+
|
|
9
|
+
It also provides MinioRepositoryMixin, a mixin that encapsulates
|
|
10
|
+
common patterns used across all Minio repository implementations to reduce
|
|
11
|
+
code duplication and ensure consistent error handling and logging.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import io
|
|
15
|
+
import json
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
from typing import (
|
|
18
|
+
Protocol,
|
|
19
|
+
Any,
|
|
20
|
+
Dict,
|
|
21
|
+
Optional,
|
|
22
|
+
runtime_checkable,
|
|
23
|
+
List,
|
|
24
|
+
Union,
|
|
25
|
+
TypeVar,
|
|
26
|
+
BinaryIO,
|
|
27
|
+
)
|
|
28
|
+
from urllib3.response import BaseHTTPResponse
|
|
29
|
+
from minio.datatypes import Object
|
|
30
|
+
from minio.api import ObjectWriteResult
|
|
31
|
+
from minio.error import S3Error # type: ignore[import-untyped]
|
|
32
|
+
from pydantic import BaseModel
|
|
33
|
+
|
|
34
|
+
# Import ContentStream here to avoid circular imports
|
|
35
|
+
from julee.domain.models.custom_fields.content_stream import (
|
|
36
|
+
ContentStream,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
T = TypeVar("T", bound=BaseModel)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@runtime_checkable
|
|
43
|
+
class MinioClient(Protocol):
|
|
44
|
+
"""
|
|
45
|
+
Protocol defining the MinIO client interface used by the repository.
|
|
46
|
+
|
|
47
|
+
This protocol captures only the methods we actually use, making our
|
|
48
|
+
dependency explicit and testable. Both the real minio.Minio client and
|
|
49
|
+
our FakeMinioClient implement this protocol.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def bucket_exists(self, bucket_name: str) -> bool:
|
|
53
|
+
"""Check if a bucket exists.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
bucket_name: Name of the bucket to check
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
True if bucket exists, False otherwise
|
|
60
|
+
"""
|
|
61
|
+
...
|
|
62
|
+
|
|
63
|
+
def make_bucket(self, bucket_name: str) -> None:
|
|
64
|
+
"""Create a bucket.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
bucket_name: Name of the bucket to create
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
S3Error: If bucket creation fails
|
|
71
|
+
"""
|
|
72
|
+
...
|
|
73
|
+
|
|
74
|
+
def put_object(
|
|
75
|
+
self,
|
|
76
|
+
bucket_name: str,
|
|
77
|
+
object_name: str,
|
|
78
|
+
data: BinaryIO,
|
|
79
|
+
length: int,
|
|
80
|
+
content_type: str = "application/octet-stream",
|
|
81
|
+
metadata: Optional[Dict[str, Union[str, List[str], tuple[str]]]] = None,
|
|
82
|
+
) -> ObjectWriteResult:
|
|
83
|
+
"""Store an object in the bucket.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
bucket_name: Name of the bucket
|
|
87
|
+
object_name: Name of the object to store
|
|
88
|
+
data: Object data (stream or bytes)
|
|
89
|
+
length: Size of the object in bytes
|
|
90
|
+
content_type: MIME type of the object
|
|
91
|
+
metadata: Optional metadata dict
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Object upload result
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
S3Error: If object storage fails
|
|
98
|
+
"""
|
|
99
|
+
...
|
|
100
|
+
|
|
101
|
+
def get_object(self, bucket_name: str, object_name: str) -> BaseHTTPResponse:
|
|
102
|
+
"""Retrieve an object from the bucket.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
bucket_name: Name of the bucket
|
|
106
|
+
object_name: Name of the object to retrieve
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
HTTPResponse containing the object data
|
|
110
|
+
|
|
111
|
+
Raises:
|
|
112
|
+
S3Error: If object retrieval fails (e.g., NoSuchKey)
|
|
113
|
+
"""
|
|
114
|
+
...
|
|
115
|
+
|
|
116
|
+
def stat_object(self, bucket_name: str, object_name: str) -> Object:
|
|
117
|
+
"""Get object metadata without retrieving the object data.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
bucket_name: Name of the bucket
|
|
121
|
+
object_name: Name of the object
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Object metadata
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
S3Error: If object doesn't exist (NoSuchKey) or other errors
|
|
128
|
+
"""
|
|
129
|
+
...
|
|
130
|
+
|
|
131
|
+
def list_objects(self, bucket_name: str, prefix: str = "") -> Any:
|
|
132
|
+
"""List objects in a bucket with optional prefix filter.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
bucket_name: Name of the bucket
|
|
136
|
+
prefix: Optional prefix to filter objects
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Iterator or list of objects matching the prefix
|
|
140
|
+
|
|
141
|
+
Raises:
|
|
142
|
+
S3Error: If bucket doesn't exist or other errors
|
|
143
|
+
"""
|
|
144
|
+
...
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class MinioRepositoryMixin:
|
|
148
|
+
"""
|
|
149
|
+
Mixin that provides common repository patterns for Minio implementations.
|
|
150
|
+
|
|
151
|
+
This mixin encapsulates common functionality used across all Minio
|
|
152
|
+
repository
|
|
153
|
+
implementations, including:
|
|
154
|
+
- Bucket creation and management
|
|
155
|
+
- JSON serialization/deserialization with proper error handling
|
|
156
|
+
- Standardized S3Error handling for NoSuchKey cases
|
|
157
|
+
- Consistent logging patterns
|
|
158
|
+
- Response cleanup
|
|
159
|
+
- ID generation with logging
|
|
160
|
+
|
|
161
|
+
Classes using this mixin must provide:
|
|
162
|
+
- self.client: MinioClient instance
|
|
163
|
+
- self.logger: logging.Logger instance (typically set in __init__)
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
# Type annotations for attributes that implementing classes must provide
|
|
167
|
+
client: MinioClient
|
|
168
|
+
logger: Any # logging.Logger, but avoiding import
|
|
169
|
+
|
|
170
|
+
def ensure_buckets_exist(self, bucket_names: Union[str, List[str]]) -> None:
|
|
171
|
+
"""Ensure one or more buckets exist, creating them if necessary.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
bucket_names: Single bucket name or list of bucket names
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
S3Error: If bucket creation fails
|
|
178
|
+
"""
|
|
179
|
+
if isinstance(bucket_names, str):
|
|
180
|
+
bucket_names = [bucket_names]
|
|
181
|
+
|
|
182
|
+
for bucket_name in bucket_names:
|
|
183
|
+
try:
|
|
184
|
+
if not self.client.bucket_exists(bucket_name):
|
|
185
|
+
self.logger.info(
|
|
186
|
+
"Creating bucket",
|
|
187
|
+
extra={"bucket_name": bucket_name},
|
|
188
|
+
)
|
|
189
|
+
self.client.make_bucket(bucket_name)
|
|
190
|
+
else:
|
|
191
|
+
self.logger.debug(
|
|
192
|
+
"Bucket already exists",
|
|
193
|
+
extra={"bucket_name": bucket_name},
|
|
194
|
+
)
|
|
195
|
+
except S3Error as e:
|
|
196
|
+
self.logger.error(
|
|
197
|
+
"Failed to create bucket",
|
|
198
|
+
extra={"bucket_name": bucket_name, "error": str(e)},
|
|
199
|
+
)
|
|
200
|
+
raise
|
|
201
|
+
|
|
202
|
+
def get_many_json_objects(
|
|
203
|
+
self,
|
|
204
|
+
bucket_name: str,
|
|
205
|
+
object_names: List[str],
|
|
206
|
+
model_class: type[T],
|
|
207
|
+
not_found_log_message: str,
|
|
208
|
+
error_log_message: str,
|
|
209
|
+
extra_log_data: Optional[Dict[str, Any]] = None,
|
|
210
|
+
) -> Dict[str, Optional[T]]:
|
|
211
|
+
"""Get multiple JSON objects from Minio and deserialize them.
|
|
212
|
+
|
|
213
|
+
Note: S3/MinIO does not have native batch retrieval operations.
|
|
214
|
+
This method makes individual GetObject calls for each object but
|
|
215
|
+
provides consolidated error handling, logging, and connection reuse.
|
|
216
|
+
The real benefit comes with other backends (PostgreSQL, Redis, etc.)
|
|
217
|
+
that support true batch operations.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
bucket_name: Name of the bucket
|
|
221
|
+
object_names: List of object names to retrieve
|
|
222
|
+
model_class: Pydantic model class to deserialize to
|
|
223
|
+
not_found_log_message: Message to log when objects are not found
|
|
224
|
+
error_log_message: Message to log on other errors
|
|
225
|
+
extra_log_data: Additional data to include in log entries
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Dict mapping object_name to deserialized model (or None if not
|
|
229
|
+
found)
|
|
230
|
+
|
|
231
|
+
Raises:
|
|
232
|
+
S3Error: For non-NoSuchKey errors
|
|
233
|
+
"""
|
|
234
|
+
extra_log_data = extra_log_data or {}
|
|
235
|
+
result: Dict[str, Optional[T]] = {}
|
|
236
|
+
found_count = 0
|
|
237
|
+
|
|
238
|
+
self.logger.debug(
|
|
239
|
+
"Attempting to retrieve multiple objects",
|
|
240
|
+
extra={
|
|
241
|
+
**extra_log_data,
|
|
242
|
+
"object_count": len(object_names),
|
|
243
|
+
"bucket_name": bucket_name,
|
|
244
|
+
},
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
for object_name in object_names:
|
|
248
|
+
try:
|
|
249
|
+
response = self.client.get_object(
|
|
250
|
+
bucket_name=bucket_name, object_name=object_name
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Read and clean up response
|
|
254
|
+
data = response.read()
|
|
255
|
+
response.close()
|
|
256
|
+
response.release_conn()
|
|
257
|
+
|
|
258
|
+
# Deserialize JSON to Pydantic model
|
|
259
|
+
json_str = data.decode("utf-8")
|
|
260
|
+
json_dict = json.loads(json_str)
|
|
261
|
+
|
|
262
|
+
entity = model_class(**json_dict)
|
|
263
|
+
result[object_name] = entity
|
|
264
|
+
found_count += 1
|
|
265
|
+
|
|
266
|
+
except S3Error as e:
|
|
267
|
+
if getattr(e, "code", None) == "NoSuchKey":
|
|
268
|
+
self.logger.debug(
|
|
269
|
+
not_found_log_message,
|
|
270
|
+
extra={**extra_log_data, "object_name": object_name},
|
|
271
|
+
)
|
|
272
|
+
result[object_name] = None
|
|
273
|
+
else:
|
|
274
|
+
self.logger.error(
|
|
275
|
+
error_log_message,
|
|
276
|
+
extra={
|
|
277
|
+
**extra_log_data,
|
|
278
|
+
"object_name": object_name,
|
|
279
|
+
"error": str(e),
|
|
280
|
+
},
|
|
281
|
+
)
|
|
282
|
+
raise
|
|
283
|
+
|
|
284
|
+
self.logger.info(
|
|
285
|
+
f"Retrieved {found_count}/{len(object_names)} objects",
|
|
286
|
+
extra={
|
|
287
|
+
**extra_log_data,
|
|
288
|
+
"requested_count": len(object_names),
|
|
289
|
+
"found_count": found_count,
|
|
290
|
+
"missing_count": len(object_names) - found_count,
|
|
291
|
+
"bucket_name": bucket_name,
|
|
292
|
+
},
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
return result
|
|
296
|
+
|
|
297
|
+
def get_many_binary_objects(
|
|
298
|
+
self,
|
|
299
|
+
bucket_name: str,
|
|
300
|
+
object_names: List[str],
|
|
301
|
+
not_found_log_message: str,
|
|
302
|
+
error_log_message: str,
|
|
303
|
+
extra_log_data: Optional[Dict[str, Any]] = None,
|
|
304
|
+
) -> Dict[str, Optional[ContentStream]]:
|
|
305
|
+
"""Get multiple binary objects from Minio as ContentStreams.
|
|
306
|
+
|
|
307
|
+
Note: S3/MinIO does not have native batch retrieval operations.
|
|
308
|
+
This method makes individual GetObject calls for each object but
|
|
309
|
+
provides consolidated error handling, logging, and connection reuse.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
bucket_name: Name of the bucket
|
|
313
|
+
object_names: List of object names to retrieve
|
|
314
|
+
not_found_log_message: Message to log when objects are not found
|
|
315
|
+
error_log_message: Message to log on other errors
|
|
316
|
+
extra_log_data: Additional data to include in log entries
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
Dict mapping object_name to ContentStream (or None if not found)
|
|
320
|
+
|
|
321
|
+
Raises:
|
|
322
|
+
S3Error: For non-NoSuchKey errors
|
|
323
|
+
"""
|
|
324
|
+
extra_log_data = extra_log_data or {}
|
|
325
|
+
result: Dict[str, Optional[ContentStream]] = {}
|
|
326
|
+
found_count = 0
|
|
327
|
+
|
|
328
|
+
self.logger.debug(
|
|
329
|
+
"Attempting to retrieve multiple binary objects",
|
|
330
|
+
extra={
|
|
331
|
+
**extra_log_data,
|
|
332
|
+
"object_count": len(object_names),
|
|
333
|
+
"bucket_name": bucket_name,
|
|
334
|
+
},
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
for object_name in object_names:
|
|
338
|
+
try:
|
|
339
|
+
response = self.client.get_object(
|
|
340
|
+
bucket_name=bucket_name, object_name=object_name
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Create ContentStream directly from the response
|
|
344
|
+
content_stream = ContentStream(response)
|
|
345
|
+
result[object_name] = content_stream
|
|
346
|
+
found_count += 1
|
|
347
|
+
|
|
348
|
+
except S3Error as e:
|
|
349
|
+
if getattr(e, "code", None) == "NoSuchKey":
|
|
350
|
+
self.logger.debug(
|
|
351
|
+
not_found_log_message,
|
|
352
|
+
extra={**extra_log_data, "object_name": object_name},
|
|
353
|
+
)
|
|
354
|
+
result[object_name] = None
|
|
355
|
+
else:
|
|
356
|
+
self.logger.error(
|
|
357
|
+
error_log_message,
|
|
358
|
+
extra={
|
|
359
|
+
**extra_log_data,
|
|
360
|
+
"object_name": object_name,
|
|
361
|
+
"error": str(e),
|
|
362
|
+
},
|
|
363
|
+
)
|
|
364
|
+
raise
|
|
365
|
+
|
|
366
|
+
self.logger.info(
|
|
367
|
+
f"Retrieved {found_count}/{len(object_names)} binary objects",
|
|
368
|
+
extra={
|
|
369
|
+
**extra_log_data,
|
|
370
|
+
"requested_count": len(object_names),
|
|
371
|
+
"found_count": found_count,
|
|
372
|
+
"missing_count": len(object_names) - found_count,
|
|
373
|
+
"bucket_name": bucket_name,
|
|
374
|
+
},
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
return result
|
|
378
|
+
|
|
379
|
+
def get_json_object(
|
|
380
|
+
self,
|
|
381
|
+
bucket_name: str,
|
|
382
|
+
object_name: str,
|
|
383
|
+
model_class: type[T],
|
|
384
|
+
not_found_log_message: str,
|
|
385
|
+
error_log_message: str,
|
|
386
|
+
extra_log_data: Optional[Dict[str, Any]] = None,
|
|
387
|
+
) -> Optional[T]:
|
|
388
|
+
"""Get a JSON object from Minio and deserialize it to a Pydantic
|
|
389
|
+
model.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
bucket_name: Name of the bucket
|
|
393
|
+
object_name: Name of the object
|
|
394
|
+
model_class: Pydantic model class to deserialize to
|
|
395
|
+
not_found_log_message: Message to log when object is not found
|
|
396
|
+
error_log_message: Message to log on other errors
|
|
397
|
+
extra_log_data: Additional data to include in log entries
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
Deserialized Pydantic model instance, or None if not found
|
|
401
|
+
|
|
402
|
+
Raises:
|
|
403
|
+
S3Error: For non-NoSuchKey errors
|
|
404
|
+
"""
|
|
405
|
+
extra_log_data = extra_log_data or {}
|
|
406
|
+
|
|
407
|
+
try:
|
|
408
|
+
response = self.client.get_object(
|
|
409
|
+
bucket_name=bucket_name, object_name=object_name
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Read and clean up response
|
|
413
|
+
data = response.read()
|
|
414
|
+
response.close()
|
|
415
|
+
response.release_conn()
|
|
416
|
+
|
|
417
|
+
# Deserialize JSON to Pydantic model
|
|
418
|
+
json_str = data.decode("utf-8")
|
|
419
|
+
json_dict = json.loads(json_str)
|
|
420
|
+
|
|
421
|
+
return model_class(**json_dict)
|
|
422
|
+
|
|
423
|
+
except S3Error as e:
|
|
424
|
+
if getattr(e, "code", None) == "NoSuchKey":
|
|
425
|
+
self.logger.debug(
|
|
426
|
+
not_found_log_message,
|
|
427
|
+
extra=extra_log_data,
|
|
428
|
+
)
|
|
429
|
+
return None
|
|
430
|
+
else:
|
|
431
|
+
self.logger.error(
|
|
432
|
+
error_log_message,
|
|
433
|
+
extra={**extra_log_data, "error": str(e)},
|
|
434
|
+
)
|
|
435
|
+
raise
|
|
436
|
+
|
|
437
|
+
def put_json_object(
|
|
438
|
+
self,
|
|
439
|
+
bucket_name: str,
|
|
440
|
+
object_name: str,
|
|
441
|
+
model: BaseModel,
|
|
442
|
+
success_log_message: str,
|
|
443
|
+
error_log_message: str,
|
|
444
|
+
extra_log_data: Optional[Dict[str, Any]] = None,
|
|
445
|
+
) -> None:
|
|
446
|
+
"""Store a Pydantic model as a JSON object in Minio.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
bucket_name: Name of the bucket
|
|
450
|
+
object_name: Name of the object
|
|
451
|
+
model: Pydantic model instance to serialize
|
|
452
|
+
success_log_message: Message to log on successful storage
|
|
453
|
+
error_log_message: Message to log on error
|
|
454
|
+
extra_log_data: Additional data to include in log entries
|
|
455
|
+
|
|
456
|
+
Raises:
|
|
457
|
+
S3Error: If object storage fails
|
|
458
|
+
"""
|
|
459
|
+
extra_log_data = extra_log_data or {}
|
|
460
|
+
|
|
461
|
+
try:
|
|
462
|
+
# Serialize using Pydantic's JSON serialization
|
|
463
|
+
json_data = model.model_dump_json()
|
|
464
|
+
|
|
465
|
+
json_bytes = json_data.encode("utf-8")
|
|
466
|
+
self.client.put_object(
|
|
467
|
+
bucket_name=bucket_name,
|
|
468
|
+
object_name=object_name,
|
|
469
|
+
data=io.BytesIO(json_bytes),
|
|
470
|
+
length=len(json_bytes),
|
|
471
|
+
content_type="application/json",
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
self.logger.info(
|
|
475
|
+
success_log_message,
|
|
476
|
+
extra=extra_log_data,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
except S3Error as e:
|
|
480
|
+
self.logger.error(
|
|
481
|
+
error_log_message,
|
|
482
|
+
extra={**extra_log_data, "error": str(e)},
|
|
483
|
+
)
|
|
484
|
+
raise
|
|
485
|
+
|
|
486
|
+
def update_timestamps(self, model: Any) -> None:
|
|
487
|
+
"""Update timestamps on a model (created_at if None, always
|
|
488
|
+
updated_at).
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
model: Pydantic model with created_at and updated_at fields
|
|
492
|
+
"""
|
|
493
|
+
now = datetime.now(timezone.utc)
|
|
494
|
+
|
|
495
|
+
# Set created_at if it's None (for new objects)
|
|
496
|
+
if hasattr(model, "created_at") and getattr(model, "created_at", None) is None:
|
|
497
|
+
setattr(model, "created_at", now)
|
|
498
|
+
|
|
499
|
+
# Always update updated_at
|
|
500
|
+
if hasattr(model, "updated_at"):
|
|
501
|
+
setattr(model, "updated_at", now)
|
|
502
|
+
|
|
503
|
+
def generate_id_with_prefix(self, prefix: str) -> str:
|
|
504
|
+
"""Generate a unique ID with the given prefix and log the generation.
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
prefix: Prefix for the generated ID (e.g., "ks", "doc")
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
Unique ID string in format "{prefix}-{uuid}"
|
|
511
|
+
"""
|
|
512
|
+
import uuid
|
|
513
|
+
from datetime import datetime, timezone
|
|
514
|
+
|
|
515
|
+
generated_id = f"{prefix}-{uuid.uuid4()}"
|
|
516
|
+
|
|
517
|
+
self.logger.debug(
|
|
518
|
+
"Generated ID",
|
|
519
|
+
extra={
|
|
520
|
+
"generated_id": generated_id,
|
|
521
|
+
"prefix": prefix,
|
|
522
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
523
|
+
},
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
return generated_id
|
|
527
|
+
|
|
528
|
+
def list_objects_with_prefix_extract_ids(
|
|
529
|
+
self,
|
|
530
|
+
bucket_name: str,
|
|
531
|
+
prefix: str,
|
|
532
|
+
entity_type_name: str,
|
|
533
|
+
) -> List[str]:
|
|
534
|
+
"""Extract entity IDs from objects with a given prefix.
|
|
535
|
+
|
|
536
|
+
This method provides a common implementation for listing objects
|
|
537
|
+
and extracting IDs, eliminating code duplication in list_all methods.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
bucket_name: Name of the bucket to list objects from
|
|
541
|
+
prefix: Object name prefix to filter by (e.g., "spec/", "query/")
|
|
542
|
+
entity_type_name: Name for logging (e.g., "specs", "queries")
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
List of entity IDs extracted from object names
|
|
546
|
+
|
|
547
|
+
Raises:
|
|
548
|
+
Exception: If listing objects fails
|
|
549
|
+
"""
|
|
550
|
+
self.logger.debug(
|
|
551
|
+
f"Listing all {entity_type_name}",
|
|
552
|
+
extra={"bucket": bucket_name, "prefix": prefix},
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
# List all objects with the specified prefix
|
|
556
|
+
objects = self.client.list_objects(bucket_name=bucket_name, prefix=prefix)
|
|
557
|
+
|
|
558
|
+
# Extract IDs from object names by removing the prefix
|
|
559
|
+
entity_ids = []
|
|
560
|
+
for obj in objects:
|
|
561
|
+
# Extract ID by removing the prefix
|
|
562
|
+
entity_id = obj.object_name[len(prefix) :]
|
|
563
|
+
entity_ids.append(entity_id)
|
|
564
|
+
|
|
565
|
+
self.logger.debug(
|
|
566
|
+
f"Found {entity_type_name} objects",
|
|
567
|
+
extra={"count": len(entity_ids), "entity_ids": entity_ids},
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
return entity_ids
|