julee 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. julee/__init__.py +3 -0
  2. julee/api/__init__.py +20 -0
  3. julee/api/app.py +180 -0
  4. julee/api/dependencies.py +257 -0
  5. julee/api/requests.py +175 -0
  6. julee/api/responses.py +43 -0
  7. julee/api/routers/__init__.py +43 -0
  8. julee/api/routers/assembly_specifications.py +212 -0
  9. julee/api/routers/documents.py +182 -0
  10. julee/api/routers/knowledge_service_configs.py +79 -0
  11. julee/api/routers/knowledge_service_queries.py +293 -0
  12. julee/api/routers/system.py +137 -0
  13. julee/api/routers/workflows.py +234 -0
  14. julee/api/services/__init__.py +20 -0
  15. julee/api/services/system_initialization.py +214 -0
  16. julee/api/tests/__init__.py +14 -0
  17. julee/api/tests/routers/__init__.py +17 -0
  18. julee/api/tests/routers/test_assembly_specifications.py +749 -0
  19. julee/api/tests/routers/test_documents.py +301 -0
  20. julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
  21. julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
  22. julee/api/tests/routers/test_system.py +179 -0
  23. julee/api/tests/routers/test_workflows.py +393 -0
  24. julee/api/tests/test_app.py +285 -0
  25. julee/api/tests/test_dependencies.py +245 -0
  26. julee/api/tests/test_requests.py +250 -0
  27. julee/domain/__init__.py +22 -0
  28. julee/domain/models/__init__.py +49 -0
  29. julee/domain/models/assembly/__init__.py +17 -0
  30. julee/domain/models/assembly/assembly.py +103 -0
  31. julee/domain/models/assembly/tests/__init__.py +0 -0
  32. julee/domain/models/assembly/tests/factories.py +37 -0
  33. julee/domain/models/assembly/tests/test_assembly.py +430 -0
  34. julee/domain/models/assembly_specification/__init__.py +24 -0
  35. julee/domain/models/assembly_specification/assembly_specification.py +172 -0
  36. julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
  37. julee/domain/models/assembly_specification/tests/__init__.py +0 -0
  38. julee/domain/models/assembly_specification/tests/factories.py +78 -0
  39. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
  40. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
  41. julee/domain/models/custom_fields/__init__.py +0 -0
  42. julee/domain/models/custom_fields/content_stream.py +68 -0
  43. julee/domain/models/custom_fields/tests/__init__.py +0 -0
  44. julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
  45. julee/domain/models/document/__init__.py +17 -0
  46. julee/domain/models/document/document.py +150 -0
  47. julee/domain/models/document/tests/__init__.py +0 -0
  48. julee/domain/models/document/tests/factories.py +76 -0
  49. julee/domain/models/document/tests/test_document.py +297 -0
  50. julee/domain/models/knowledge_service_config/__init__.py +17 -0
  51. julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
  52. julee/domain/models/policy/__init__.py +15 -0
  53. julee/domain/models/policy/document_policy_validation.py +220 -0
  54. julee/domain/models/policy/policy.py +203 -0
  55. julee/domain/models/policy/tests/__init__.py +0 -0
  56. julee/domain/models/policy/tests/factories.py +47 -0
  57. julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
  58. julee/domain/models/policy/tests/test_policy.py +546 -0
  59. julee/domain/repositories/__init__.py +27 -0
  60. julee/domain/repositories/assembly.py +45 -0
  61. julee/domain/repositories/assembly_specification.py +52 -0
  62. julee/domain/repositories/base.py +146 -0
  63. julee/domain/repositories/document.py +49 -0
  64. julee/domain/repositories/document_policy_validation.py +52 -0
  65. julee/domain/repositories/knowledge_service_config.py +54 -0
  66. julee/domain/repositories/knowledge_service_query.py +44 -0
  67. julee/domain/repositories/policy.py +49 -0
  68. julee/domain/use_cases/__init__.py +17 -0
  69. julee/domain/use_cases/decorators.py +107 -0
  70. julee/domain/use_cases/extract_assemble_data.py +649 -0
  71. julee/domain/use_cases/initialize_system_data.py +842 -0
  72. julee/domain/use_cases/tests/__init__.py +7 -0
  73. julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
  74. julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
  75. julee/domain/use_cases/tests/test_validate_document.py +1228 -0
  76. julee/domain/use_cases/validate_document.py +736 -0
  77. julee/fixtures/assembly_specifications.yaml +70 -0
  78. julee/fixtures/documents.yaml +178 -0
  79. julee/fixtures/knowledge_service_configs.yaml +37 -0
  80. julee/fixtures/knowledge_service_queries.yaml +27 -0
  81. julee/repositories/__init__.py +17 -0
  82. julee/repositories/memory/__init__.py +31 -0
  83. julee/repositories/memory/assembly.py +84 -0
  84. julee/repositories/memory/assembly_specification.py +125 -0
  85. julee/repositories/memory/base.py +227 -0
  86. julee/repositories/memory/document.py +149 -0
  87. julee/repositories/memory/document_policy_validation.py +104 -0
  88. julee/repositories/memory/knowledge_service_config.py +123 -0
  89. julee/repositories/memory/knowledge_service_query.py +120 -0
  90. julee/repositories/memory/policy.py +87 -0
  91. julee/repositories/memory/tests/__init__.py +0 -0
  92. julee/repositories/memory/tests/test_document.py +212 -0
  93. julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
  94. julee/repositories/memory/tests/test_policy.py +443 -0
  95. julee/repositories/minio/__init__.py +31 -0
  96. julee/repositories/minio/assembly.py +103 -0
  97. julee/repositories/minio/assembly_specification.py +170 -0
  98. julee/repositories/minio/client.py +570 -0
  99. julee/repositories/minio/document.py +530 -0
  100. julee/repositories/minio/document_policy_validation.py +120 -0
  101. julee/repositories/minio/knowledge_service_config.py +187 -0
  102. julee/repositories/minio/knowledge_service_query.py +211 -0
  103. julee/repositories/minio/policy.py +106 -0
  104. julee/repositories/minio/tests/__init__.py +0 -0
  105. julee/repositories/minio/tests/fake_client.py +213 -0
  106. julee/repositories/minio/tests/test_assembly.py +374 -0
  107. julee/repositories/minio/tests/test_assembly_specification.py +391 -0
  108. julee/repositories/minio/tests/test_client_protocol.py +57 -0
  109. julee/repositories/minio/tests/test_document.py +591 -0
  110. julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
  111. julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
  112. julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
  113. julee/repositories/minio/tests/test_policy.py +559 -0
  114. julee/repositories/temporal/__init__.py +38 -0
  115. julee/repositories/temporal/activities.py +114 -0
  116. julee/repositories/temporal/activity_names.py +34 -0
  117. julee/repositories/temporal/proxies.py +159 -0
  118. julee/services/__init__.py +18 -0
  119. julee/services/knowledge_service/__init__.py +48 -0
  120. julee/services/knowledge_service/anthropic/__init__.py +12 -0
  121. julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
  122. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
  123. julee/services/knowledge_service/factory.py +138 -0
  124. julee/services/knowledge_service/knowledge_service.py +160 -0
  125. julee/services/knowledge_service/memory/__init__.py +13 -0
  126. julee/services/knowledge_service/memory/knowledge_service.py +278 -0
  127. julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
  128. julee/services/knowledge_service/test_factory.py +112 -0
  129. julee/services/temporal/__init__.py +38 -0
  130. julee/services/temporal/activities.py +86 -0
  131. julee/services/temporal/activity_names.py +22 -0
  132. julee/services/temporal/proxies.py +41 -0
  133. julee/util/__init__.py +0 -0
  134. julee/util/domain.py +119 -0
  135. julee/util/repos/__init__.py +0 -0
  136. julee/util/repos/minio/__init__.py +0 -0
  137. julee/util/repos/minio/file_storage.py +213 -0
  138. julee/util/repos/temporal/__init__.py +11 -0
  139. julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
  140. julee/util/repos/temporal/data_converter.py +123 -0
  141. julee/util/repos/temporal/minio_file_storage.py +12 -0
  142. julee/util/repos/temporal/proxies/__init__.py +0 -0
  143. julee/util/repos/temporal/proxies/file_storage.py +58 -0
  144. julee/util/repositories.py +55 -0
  145. julee/util/temporal/__init__.py +22 -0
  146. julee/util/temporal/activities.py +123 -0
  147. julee/util/temporal/decorators.py +473 -0
  148. julee/util/tests/__init__.py +1 -0
  149. julee/util/tests/test_decorators.py +770 -0
  150. julee/util/validation/__init__.py +29 -0
  151. julee/util/validation/repository.py +100 -0
  152. julee/util/validation/type_guards.py +369 -0
  153. julee/worker.py +211 -0
  154. julee/workflows/__init__.py +26 -0
  155. julee/workflows/extract_assemble.py +215 -0
  156. julee/workflows/validate_document.py +228 -0
  157. julee-0.1.0.dist-info/METADATA +195 -0
  158. julee-0.1.0.dist-info/RECORD +161 -0
  159. julee-0.1.0.dist-info/WHEEL +5 -0
  160. julee-0.1.0.dist-info/licenses/LICENSE +674 -0
  161. julee-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,570 @@
1
+ """
2
+ MinioClient protocol definition and repository utilities.
3
+
4
+ This module defines the protocol interface that both the real Minio client
5
+ and our fake test client must implement. This follows Clean Architecture
6
+ dependency inversion principles by depending on abstractions rather than
7
+ concrete implementations.
8
+
9
+ It also provides MinioRepositoryMixin, a mixin that encapsulates
10
+ common patterns used across all Minio repository implementations to reduce
11
+ code duplication and ensure consistent error handling and logging.
12
+ """
13
+
14
+ import io
15
+ import json
16
+ from datetime import datetime, timezone
17
+ from typing import (
18
+ Protocol,
19
+ Any,
20
+ Dict,
21
+ Optional,
22
+ runtime_checkable,
23
+ List,
24
+ Union,
25
+ TypeVar,
26
+ BinaryIO,
27
+ )
28
+ from urllib3.response import BaseHTTPResponse
29
+ from minio.datatypes import Object
30
+ from minio.api import ObjectWriteResult
31
+ from minio.error import S3Error # type: ignore[import-untyped]
32
+ from pydantic import BaseModel
33
+
34
+ # Import ContentStream here to avoid circular imports
35
+ from julee.domain.models.custom_fields.content_stream import (
36
+ ContentStream,
37
+ )
38
+
39
+ T = TypeVar("T", bound=BaseModel)
40
+
41
+
42
+ @runtime_checkable
43
+ class MinioClient(Protocol):
44
+ """
45
+ Protocol defining the MinIO client interface used by the repository.
46
+
47
+ This protocol captures only the methods we actually use, making our
48
+ dependency explicit and testable. Both the real minio.Minio client and
49
+ our FakeMinioClient implement this protocol.
50
+ """
51
+
52
+ def bucket_exists(self, bucket_name: str) -> bool:
53
+ """Check if a bucket exists.
54
+
55
+ Args:
56
+ bucket_name: Name of the bucket to check
57
+
58
+ Returns:
59
+ True if bucket exists, False otherwise
60
+ """
61
+ ...
62
+
63
+ def make_bucket(self, bucket_name: str) -> None:
64
+ """Create a bucket.
65
+
66
+ Args:
67
+ bucket_name: Name of the bucket to create
68
+
69
+ Raises:
70
+ S3Error: If bucket creation fails
71
+ """
72
+ ...
73
+
74
+ def put_object(
75
+ self,
76
+ bucket_name: str,
77
+ object_name: str,
78
+ data: BinaryIO,
79
+ length: int,
80
+ content_type: str = "application/octet-stream",
81
+ metadata: Optional[Dict[str, Union[str, List[str], tuple[str]]]] = None,
82
+ ) -> ObjectWriteResult:
83
+ """Store an object in the bucket.
84
+
85
+ Args:
86
+ bucket_name: Name of the bucket
87
+ object_name: Name of the object to store
88
+ data: Object data (stream or bytes)
89
+ length: Size of the object in bytes
90
+ content_type: MIME type of the object
91
+ metadata: Optional metadata dict
92
+
93
+ Returns:
94
+ Object upload result
95
+
96
+ Raises:
97
+ S3Error: If object storage fails
98
+ """
99
+ ...
100
+
101
+ def get_object(self, bucket_name: str, object_name: str) -> BaseHTTPResponse:
102
+ """Retrieve an object from the bucket.
103
+
104
+ Args:
105
+ bucket_name: Name of the bucket
106
+ object_name: Name of the object to retrieve
107
+
108
+ Returns:
109
+ HTTPResponse containing the object data
110
+
111
+ Raises:
112
+ S3Error: If object retrieval fails (e.g., NoSuchKey)
113
+ """
114
+ ...
115
+
116
+ def stat_object(self, bucket_name: str, object_name: str) -> Object:
117
+ """Get object metadata without retrieving the object data.
118
+
119
+ Args:
120
+ bucket_name: Name of the bucket
121
+ object_name: Name of the object
122
+
123
+ Returns:
124
+ Object metadata
125
+
126
+ Raises:
127
+ S3Error: If object doesn't exist (NoSuchKey) or other errors
128
+ """
129
+ ...
130
+
131
+ def list_objects(self, bucket_name: str, prefix: str = "") -> Any:
132
+ """List objects in a bucket with optional prefix filter.
133
+
134
+ Args:
135
+ bucket_name: Name of the bucket
136
+ prefix: Optional prefix to filter objects
137
+
138
+ Returns:
139
+ Iterator or list of objects matching the prefix
140
+
141
+ Raises:
142
+ S3Error: If bucket doesn't exist or other errors
143
+ """
144
+ ...
145
+
146
+
147
+ class MinioRepositoryMixin:
148
+ """
149
+ Mixin that provides common repository patterns for Minio implementations.
150
+
151
+ This mixin encapsulates common functionality used across all Minio
152
+ repository
153
+ implementations, including:
154
+ - Bucket creation and management
155
+ - JSON serialization/deserialization with proper error handling
156
+ - Standardized S3Error handling for NoSuchKey cases
157
+ - Consistent logging patterns
158
+ - Response cleanup
159
+ - ID generation with logging
160
+
161
+ Classes using this mixin must provide:
162
+ - self.client: MinioClient instance
163
+ - self.logger: logging.Logger instance (typically set in __init__)
164
+ """
165
+
166
+ # Type annotations for attributes that implementing classes must provide
167
+ client: MinioClient
168
+ logger: Any # logging.Logger, but avoiding import
169
+
170
+ def ensure_buckets_exist(self, bucket_names: Union[str, List[str]]) -> None:
171
+ """Ensure one or more buckets exist, creating them if necessary.
172
+
173
+ Args:
174
+ bucket_names: Single bucket name or list of bucket names
175
+
176
+ Raises:
177
+ S3Error: If bucket creation fails
178
+ """
179
+ if isinstance(bucket_names, str):
180
+ bucket_names = [bucket_names]
181
+
182
+ for bucket_name in bucket_names:
183
+ try:
184
+ if not self.client.bucket_exists(bucket_name):
185
+ self.logger.info(
186
+ "Creating bucket",
187
+ extra={"bucket_name": bucket_name},
188
+ )
189
+ self.client.make_bucket(bucket_name)
190
+ else:
191
+ self.logger.debug(
192
+ "Bucket already exists",
193
+ extra={"bucket_name": bucket_name},
194
+ )
195
+ except S3Error as e:
196
+ self.logger.error(
197
+ "Failed to create bucket",
198
+ extra={"bucket_name": bucket_name, "error": str(e)},
199
+ )
200
+ raise
201
+
202
+ def get_many_json_objects(
203
+ self,
204
+ bucket_name: str,
205
+ object_names: List[str],
206
+ model_class: type[T],
207
+ not_found_log_message: str,
208
+ error_log_message: str,
209
+ extra_log_data: Optional[Dict[str, Any]] = None,
210
+ ) -> Dict[str, Optional[T]]:
211
+ """Get multiple JSON objects from Minio and deserialize them.
212
+
213
+ Note: S3/MinIO does not have native batch retrieval operations.
214
+ This method makes individual GetObject calls for each object but
215
+ provides consolidated error handling, logging, and connection reuse.
216
+ The real benefit comes with other backends (PostgreSQL, Redis, etc.)
217
+ that support true batch operations.
218
+
219
+ Args:
220
+ bucket_name: Name of the bucket
221
+ object_names: List of object names to retrieve
222
+ model_class: Pydantic model class to deserialize to
223
+ not_found_log_message: Message to log when objects are not found
224
+ error_log_message: Message to log on other errors
225
+ extra_log_data: Additional data to include in log entries
226
+
227
+ Returns:
228
+ Dict mapping object_name to deserialized model (or None if not
229
+ found)
230
+
231
+ Raises:
232
+ S3Error: For non-NoSuchKey errors
233
+ """
234
+ extra_log_data = extra_log_data or {}
235
+ result: Dict[str, Optional[T]] = {}
236
+ found_count = 0
237
+
238
+ self.logger.debug(
239
+ "Attempting to retrieve multiple objects",
240
+ extra={
241
+ **extra_log_data,
242
+ "object_count": len(object_names),
243
+ "bucket_name": bucket_name,
244
+ },
245
+ )
246
+
247
+ for object_name in object_names:
248
+ try:
249
+ response = self.client.get_object(
250
+ bucket_name=bucket_name, object_name=object_name
251
+ )
252
+
253
+ # Read and clean up response
254
+ data = response.read()
255
+ response.close()
256
+ response.release_conn()
257
+
258
+ # Deserialize JSON to Pydantic model
259
+ json_str = data.decode("utf-8")
260
+ json_dict = json.loads(json_str)
261
+
262
+ entity = model_class(**json_dict)
263
+ result[object_name] = entity
264
+ found_count += 1
265
+
266
+ except S3Error as e:
267
+ if getattr(e, "code", None) == "NoSuchKey":
268
+ self.logger.debug(
269
+ not_found_log_message,
270
+ extra={**extra_log_data, "object_name": object_name},
271
+ )
272
+ result[object_name] = None
273
+ else:
274
+ self.logger.error(
275
+ error_log_message,
276
+ extra={
277
+ **extra_log_data,
278
+ "object_name": object_name,
279
+ "error": str(e),
280
+ },
281
+ )
282
+ raise
283
+
284
+ self.logger.info(
285
+ f"Retrieved {found_count}/{len(object_names)} objects",
286
+ extra={
287
+ **extra_log_data,
288
+ "requested_count": len(object_names),
289
+ "found_count": found_count,
290
+ "missing_count": len(object_names) - found_count,
291
+ "bucket_name": bucket_name,
292
+ },
293
+ )
294
+
295
+ return result
296
+
297
+ def get_many_binary_objects(
298
+ self,
299
+ bucket_name: str,
300
+ object_names: List[str],
301
+ not_found_log_message: str,
302
+ error_log_message: str,
303
+ extra_log_data: Optional[Dict[str, Any]] = None,
304
+ ) -> Dict[str, Optional[ContentStream]]:
305
+ """Get multiple binary objects from Minio as ContentStreams.
306
+
307
+ Note: S3/MinIO does not have native batch retrieval operations.
308
+ This method makes individual GetObject calls for each object but
309
+ provides consolidated error handling, logging, and connection reuse.
310
+
311
+ Args:
312
+ bucket_name: Name of the bucket
313
+ object_names: List of object names to retrieve
314
+ not_found_log_message: Message to log when objects are not found
315
+ error_log_message: Message to log on other errors
316
+ extra_log_data: Additional data to include in log entries
317
+
318
+ Returns:
319
+ Dict mapping object_name to ContentStream (or None if not found)
320
+
321
+ Raises:
322
+ S3Error: For non-NoSuchKey errors
323
+ """
324
+ extra_log_data = extra_log_data or {}
325
+ result: Dict[str, Optional[ContentStream]] = {}
326
+ found_count = 0
327
+
328
+ self.logger.debug(
329
+ "Attempting to retrieve multiple binary objects",
330
+ extra={
331
+ **extra_log_data,
332
+ "object_count": len(object_names),
333
+ "bucket_name": bucket_name,
334
+ },
335
+ )
336
+
337
+ for object_name in object_names:
338
+ try:
339
+ response = self.client.get_object(
340
+ bucket_name=bucket_name, object_name=object_name
341
+ )
342
+
343
+ # Create ContentStream directly from the response
344
+ content_stream = ContentStream(response)
345
+ result[object_name] = content_stream
346
+ found_count += 1
347
+
348
+ except S3Error as e:
349
+ if getattr(e, "code", None) == "NoSuchKey":
350
+ self.logger.debug(
351
+ not_found_log_message,
352
+ extra={**extra_log_data, "object_name": object_name},
353
+ )
354
+ result[object_name] = None
355
+ else:
356
+ self.logger.error(
357
+ error_log_message,
358
+ extra={
359
+ **extra_log_data,
360
+ "object_name": object_name,
361
+ "error": str(e),
362
+ },
363
+ )
364
+ raise
365
+
366
+ self.logger.info(
367
+ f"Retrieved {found_count}/{len(object_names)} binary objects",
368
+ extra={
369
+ **extra_log_data,
370
+ "requested_count": len(object_names),
371
+ "found_count": found_count,
372
+ "missing_count": len(object_names) - found_count,
373
+ "bucket_name": bucket_name,
374
+ },
375
+ )
376
+
377
+ return result
378
+
379
+ def get_json_object(
380
+ self,
381
+ bucket_name: str,
382
+ object_name: str,
383
+ model_class: type[T],
384
+ not_found_log_message: str,
385
+ error_log_message: str,
386
+ extra_log_data: Optional[Dict[str, Any]] = None,
387
+ ) -> Optional[T]:
388
+ """Get a JSON object from Minio and deserialize it to a Pydantic
389
+ model.
390
+
391
+ Args:
392
+ bucket_name: Name of the bucket
393
+ object_name: Name of the object
394
+ model_class: Pydantic model class to deserialize to
395
+ not_found_log_message: Message to log when object is not found
396
+ error_log_message: Message to log on other errors
397
+ extra_log_data: Additional data to include in log entries
398
+
399
+ Returns:
400
+ Deserialized Pydantic model instance, or None if not found
401
+
402
+ Raises:
403
+ S3Error: For non-NoSuchKey errors
404
+ """
405
+ extra_log_data = extra_log_data or {}
406
+
407
+ try:
408
+ response = self.client.get_object(
409
+ bucket_name=bucket_name, object_name=object_name
410
+ )
411
+
412
+ # Read and clean up response
413
+ data = response.read()
414
+ response.close()
415
+ response.release_conn()
416
+
417
+ # Deserialize JSON to Pydantic model
418
+ json_str = data.decode("utf-8")
419
+ json_dict = json.loads(json_str)
420
+
421
+ return model_class(**json_dict)
422
+
423
+ except S3Error as e:
424
+ if getattr(e, "code", None) == "NoSuchKey":
425
+ self.logger.debug(
426
+ not_found_log_message,
427
+ extra=extra_log_data,
428
+ )
429
+ return None
430
+ else:
431
+ self.logger.error(
432
+ error_log_message,
433
+ extra={**extra_log_data, "error": str(e)},
434
+ )
435
+ raise
436
+
437
+ def put_json_object(
438
+ self,
439
+ bucket_name: str,
440
+ object_name: str,
441
+ model: BaseModel,
442
+ success_log_message: str,
443
+ error_log_message: str,
444
+ extra_log_data: Optional[Dict[str, Any]] = None,
445
+ ) -> None:
446
+ """Store a Pydantic model as a JSON object in Minio.
447
+
448
+ Args:
449
+ bucket_name: Name of the bucket
450
+ object_name: Name of the object
451
+ model: Pydantic model instance to serialize
452
+ success_log_message: Message to log on successful storage
453
+ error_log_message: Message to log on error
454
+ extra_log_data: Additional data to include in log entries
455
+
456
+ Raises:
457
+ S3Error: If object storage fails
458
+ """
459
+ extra_log_data = extra_log_data or {}
460
+
461
+ try:
462
+ # Serialize using Pydantic's JSON serialization
463
+ json_data = model.model_dump_json()
464
+
465
+ json_bytes = json_data.encode("utf-8")
466
+ self.client.put_object(
467
+ bucket_name=bucket_name,
468
+ object_name=object_name,
469
+ data=io.BytesIO(json_bytes),
470
+ length=len(json_bytes),
471
+ content_type="application/json",
472
+ )
473
+
474
+ self.logger.info(
475
+ success_log_message,
476
+ extra=extra_log_data,
477
+ )
478
+
479
+ except S3Error as e:
480
+ self.logger.error(
481
+ error_log_message,
482
+ extra={**extra_log_data, "error": str(e)},
483
+ )
484
+ raise
485
+
486
+ def update_timestamps(self, model: Any) -> None:
487
+ """Update timestamps on a model (created_at if None, always
488
+ updated_at).
489
+
490
+ Args:
491
+ model: Pydantic model with created_at and updated_at fields
492
+ """
493
+ now = datetime.now(timezone.utc)
494
+
495
+ # Set created_at if it's None (for new objects)
496
+ if hasattr(model, "created_at") and getattr(model, "created_at", None) is None:
497
+ setattr(model, "created_at", now)
498
+
499
+ # Always update updated_at
500
+ if hasattr(model, "updated_at"):
501
+ setattr(model, "updated_at", now)
502
+
503
+ def generate_id_with_prefix(self, prefix: str) -> str:
504
+ """Generate a unique ID with the given prefix and log the generation.
505
+
506
+ Args:
507
+ prefix: Prefix for the generated ID (e.g., "ks", "doc")
508
+
509
+ Returns:
510
+ Unique ID string in format "{prefix}-{uuid}"
511
+ """
512
+ import uuid
513
+ from datetime import datetime, timezone
514
+
515
+ generated_id = f"{prefix}-{uuid.uuid4()}"
516
+
517
+ self.logger.debug(
518
+ "Generated ID",
519
+ extra={
520
+ "generated_id": generated_id,
521
+ "prefix": prefix,
522
+ "generated_at": datetime.now(timezone.utc).isoformat(),
523
+ },
524
+ )
525
+
526
+ return generated_id
527
+
528
+ def list_objects_with_prefix_extract_ids(
529
+ self,
530
+ bucket_name: str,
531
+ prefix: str,
532
+ entity_type_name: str,
533
+ ) -> List[str]:
534
+ """Extract entity IDs from objects with a given prefix.
535
+
536
+ This method provides a common implementation for listing objects
537
+ and extracting IDs, eliminating code duplication in list_all methods.
538
+
539
+ Args:
540
+ bucket_name: Name of the bucket to list objects from
541
+ prefix: Object name prefix to filter by (e.g., "spec/", "query/")
542
+ entity_type_name: Name for logging (e.g., "specs", "queries")
543
+
544
+ Returns:
545
+ List of entity IDs extracted from object names
546
+
547
+ Raises:
548
+ Exception: If listing objects fails
549
+ """
550
+ self.logger.debug(
551
+ f"Listing all {entity_type_name}",
552
+ extra={"bucket": bucket_name, "prefix": prefix},
553
+ )
554
+
555
+ # List all objects with the specified prefix
556
+ objects = self.client.list_objects(bucket_name=bucket_name, prefix=prefix)
557
+
558
+ # Extract IDs from object names by removing the prefix
559
+ entity_ids = []
560
+ for obj in objects:
561
+ # Extract ID by removing the prefix
562
+ entity_id = obj.object_name[len(prefix) :]
563
+ entity_ids.append(entity_id)
564
+
565
+ self.logger.debug(
566
+ f"Found {entity_type_name} objects",
567
+ extra={"count": len(entity_ids), "entity_ids": entity_ids},
568
+ )
569
+
570
+ return entity_ids