julee 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. julee/__init__.py +3 -0
  2. julee/api/__init__.py +20 -0
  3. julee/api/app.py +180 -0
  4. julee/api/dependencies.py +257 -0
  5. julee/api/requests.py +175 -0
  6. julee/api/responses.py +43 -0
  7. julee/api/routers/__init__.py +43 -0
  8. julee/api/routers/assembly_specifications.py +212 -0
  9. julee/api/routers/documents.py +182 -0
  10. julee/api/routers/knowledge_service_configs.py +79 -0
  11. julee/api/routers/knowledge_service_queries.py +293 -0
  12. julee/api/routers/system.py +137 -0
  13. julee/api/routers/workflows.py +234 -0
  14. julee/api/services/__init__.py +20 -0
  15. julee/api/services/system_initialization.py +214 -0
  16. julee/api/tests/__init__.py +14 -0
  17. julee/api/tests/routers/__init__.py +17 -0
  18. julee/api/tests/routers/test_assembly_specifications.py +749 -0
  19. julee/api/tests/routers/test_documents.py +301 -0
  20. julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
  21. julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
  22. julee/api/tests/routers/test_system.py +179 -0
  23. julee/api/tests/routers/test_workflows.py +393 -0
  24. julee/api/tests/test_app.py +285 -0
  25. julee/api/tests/test_dependencies.py +245 -0
  26. julee/api/tests/test_requests.py +250 -0
  27. julee/domain/__init__.py +22 -0
  28. julee/domain/models/__init__.py +49 -0
  29. julee/domain/models/assembly/__init__.py +17 -0
  30. julee/domain/models/assembly/assembly.py +103 -0
  31. julee/domain/models/assembly/tests/__init__.py +0 -0
  32. julee/domain/models/assembly/tests/factories.py +37 -0
  33. julee/domain/models/assembly/tests/test_assembly.py +430 -0
  34. julee/domain/models/assembly_specification/__init__.py +24 -0
  35. julee/domain/models/assembly_specification/assembly_specification.py +172 -0
  36. julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
  37. julee/domain/models/assembly_specification/tests/__init__.py +0 -0
  38. julee/domain/models/assembly_specification/tests/factories.py +78 -0
  39. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
  40. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
  41. julee/domain/models/custom_fields/__init__.py +0 -0
  42. julee/domain/models/custom_fields/content_stream.py +68 -0
  43. julee/domain/models/custom_fields/tests/__init__.py +0 -0
  44. julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
  45. julee/domain/models/document/__init__.py +17 -0
  46. julee/domain/models/document/document.py +150 -0
  47. julee/domain/models/document/tests/__init__.py +0 -0
  48. julee/domain/models/document/tests/factories.py +76 -0
  49. julee/domain/models/document/tests/test_document.py +297 -0
  50. julee/domain/models/knowledge_service_config/__init__.py +17 -0
  51. julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
  52. julee/domain/models/policy/__init__.py +15 -0
  53. julee/domain/models/policy/document_policy_validation.py +220 -0
  54. julee/domain/models/policy/policy.py +203 -0
  55. julee/domain/models/policy/tests/__init__.py +0 -0
  56. julee/domain/models/policy/tests/factories.py +47 -0
  57. julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
  58. julee/domain/models/policy/tests/test_policy.py +546 -0
  59. julee/domain/repositories/__init__.py +27 -0
  60. julee/domain/repositories/assembly.py +45 -0
  61. julee/domain/repositories/assembly_specification.py +52 -0
  62. julee/domain/repositories/base.py +146 -0
  63. julee/domain/repositories/document.py +49 -0
  64. julee/domain/repositories/document_policy_validation.py +52 -0
  65. julee/domain/repositories/knowledge_service_config.py +54 -0
  66. julee/domain/repositories/knowledge_service_query.py +44 -0
  67. julee/domain/repositories/policy.py +49 -0
  68. julee/domain/use_cases/__init__.py +17 -0
  69. julee/domain/use_cases/decorators.py +107 -0
  70. julee/domain/use_cases/extract_assemble_data.py +649 -0
  71. julee/domain/use_cases/initialize_system_data.py +842 -0
  72. julee/domain/use_cases/tests/__init__.py +7 -0
  73. julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
  74. julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
  75. julee/domain/use_cases/tests/test_validate_document.py +1228 -0
  76. julee/domain/use_cases/validate_document.py +736 -0
  77. julee/fixtures/assembly_specifications.yaml +70 -0
  78. julee/fixtures/documents.yaml +178 -0
  79. julee/fixtures/knowledge_service_configs.yaml +37 -0
  80. julee/fixtures/knowledge_service_queries.yaml +27 -0
  81. julee/repositories/__init__.py +17 -0
  82. julee/repositories/memory/__init__.py +31 -0
  83. julee/repositories/memory/assembly.py +84 -0
  84. julee/repositories/memory/assembly_specification.py +125 -0
  85. julee/repositories/memory/base.py +227 -0
  86. julee/repositories/memory/document.py +149 -0
  87. julee/repositories/memory/document_policy_validation.py +104 -0
  88. julee/repositories/memory/knowledge_service_config.py +123 -0
  89. julee/repositories/memory/knowledge_service_query.py +120 -0
  90. julee/repositories/memory/policy.py +87 -0
  91. julee/repositories/memory/tests/__init__.py +0 -0
  92. julee/repositories/memory/tests/test_document.py +212 -0
  93. julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
  94. julee/repositories/memory/tests/test_policy.py +443 -0
  95. julee/repositories/minio/__init__.py +31 -0
  96. julee/repositories/minio/assembly.py +103 -0
  97. julee/repositories/minio/assembly_specification.py +170 -0
  98. julee/repositories/minio/client.py +570 -0
  99. julee/repositories/minio/document.py +530 -0
  100. julee/repositories/minio/document_policy_validation.py +120 -0
  101. julee/repositories/minio/knowledge_service_config.py +187 -0
  102. julee/repositories/minio/knowledge_service_query.py +211 -0
  103. julee/repositories/minio/policy.py +106 -0
  104. julee/repositories/minio/tests/__init__.py +0 -0
  105. julee/repositories/minio/tests/fake_client.py +213 -0
  106. julee/repositories/minio/tests/test_assembly.py +374 -0
  107. julee/repositories/minio/tests/test_assembly_specification.py +391 -0
  108. julee/repositories/minio/tests/test_client_protocol.py +57 -0
  109. julee/repositories/minio/tests/test_document.py +591 -0
  110. julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
  111. julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
  112. julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
  113. julee/repositories/minio/tests/test_policy.py +559 -0
  114. julee/repositories/temporal/__init__.py +38 -0
  115. julee/repositories/temporal/activities.py +114 -0
  116. julee/repositories/temporal/activity_names.py +34 -0
  117. julee/repositories/temporal/proxies.py +159 -0
  118. julee/services/__init__.py +18 -0
  119. julee/services/knowledge_service/__init__.py +48 -0
  120. julee/services/knowledge_service/anthropic/__init__.py +12 -0
  121. julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
  122. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
  123. julee/services/knowledge_service/factory.py +138 -0
  124. julee/services/knowledge_service/knowledge_service.py +160 -0
  125. julee/services/knowledge_service/memory/__init__.py +13 -0
  126. julee/services/knowledge_service/memory/knowledge_service.py +278 -0
  127. julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
  128. julee/services/knowledge_service/test_factory.py +112 -0
  129. julee/services/temporal/__init__.py +38 -0
  130. julee/services/temporal/activities.py +86 -0
  131. julee/services/temporal/activity_names.py +22 -0
  132. julee/services/temporal/proxies.py +41 -0
  133. julee/util/__init__.py +0 -0
  134. julee/util/domain.py +119 -0
  135. julee/util/repos/__init__.py +0 -0
  136. julee/util/repos/minio/__init__.py +0 -0
  137. julee/util/repos/minio/file_storage.py +213 -0
  138. julee/util/repos/temporal/__init__.py +11 -0
  139. julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
  140. julee/util/repos/temporal/data_converter.py +123 -0
  141. julee/util/repos/temporal/minio_file_storage.py +12 -0
  142. julee/util/repos/temporal/proxies/__init__.py +0 -0
  143. julee/util/repos/temporal/proxies/file_storage.py +58 -0
  144. julee/util/repositories.py +55 -0
  145. julee/util/temporal/__init__.py +22 -0
  146. julee/util/temporal/activities.py +123 -0
  147. julee/util/temporal/decorators.py +473 -0
  148. julee/util/tests/__init__.py +1 -0
  149. julee/util/tests/test_decorators.py +770 -0
  150. julee/util/validation/__init__.py +29 -0
  151. julee/util/validation/repository.py +100 -0
  152. julee/util/validation/type_guards.py +369 -0
  153. julee/worker.py +211 -0
  154. julee/workflows/__init__.py +26 -0
  155. julee/workflows/extract_assemble.py +215 -0
  156. julee/workflows/validate_document.py +228 -0
  157. julee-0.1.0.dist-info/METADATA +195 -0
  158. julee-0.1.0.dist-info/RECORD +161 -0
  159. julee-0.1.0.dist-info/WHEEL +5 -0
  160. julee-0.1.0.dist-info/licenses/LICENSE +674 -0
  161. julee-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,591 @@
1
+ """
2
+ Unit tests for MinioDocumentRepository.
3
+
4
+ These tests mock the Minio client to test the repository implementation logic
5
+ without requiring a real MinIO instance. They follow the Clean Architecture
6
+ testing patterns and verify idempotency, error handling, and content.
7
+ """
8
+
9
+ import io
10
+ import pytest
11
+ import hashlib
12
+ import multihash
13
+ from typing import Any
14
+ from unittest.mock import Mock
15
+ from minio.error import S3Error
16
+
17
+
18
+ from julee.repositories.minio.document import MinioDocumentRepository
19
+ from julee.domain.models.document import Document, DocumentStatus
20
+ from julee.domain.models.custom_fields.content_stream import (
21
+ ContentStream,
22
+ )
23
+ from .fake_client import FakeMinioClient
24
+
25
+
26
+ @pytest.fixture
27
+ def fake_minio_client() -> FakeMinioClient:
28
+ """Provide a fake Minio client for state-based testing."""
29
+ return FakeMinioClient()
30
+
31
+
32
+ @pytest.fixture
33
+ def repository(fake_minio_client: FakeMinioClient) -> MinioDocumentRepository:
34
+ """Provide a repository instance with fake client."""
35
+ return MinioDocumentRepository(fake_minio_client)
36
+
37
+
38
+ @pytest.fixture
39
+ def sample_content() -> ContentStream:
40
+ """Sample content for testing."""
41
+ content_bytes = b"This is test content for document storage"
42
+ return ContentStream(io.BytesIO(content_bytes))
43
+
44
+
45
+ @pytest.fixture
46
+ def sample_document(sample_content: ContentStream) -> Document:
47
+ """Sample document for testing."""
48
+ # Calculate the actual multihash for this content
49
+ content_bytes = b"This is test content for document storage"
50
+ sha256_hash = hashlib.sha256(content_bytes).digest()
51
+ mh = multihash.encode(sha256_hash, multihash.SHA2_256)
52
+ actual_multihash = str(mh.hex())
53
+
54
+ return Document(
55
+ document_id="test-doc-123",
56
+ original_filename="test.txt",
57
+ content_type="text/plain",
58
+ size_bytes=len(content_bytes),
59
+ content_multihash=actual_multihash,
60
+ status=DocumentStatus.CAPTURED,
61
+ content=sample_content,
62
+ )
63
+
64
+
65
+ class TestMinioDocumentRepositoryInitialization:
66
+ """Test repository initialization and bucket creation."""
67
+
68
+ def test_init_creates_buckets_when_missing(self) -> None:
69
+ """Test that missing buckets are created during initialization."""
70
+ fake_client = FakeMinioClient()
71
+
72
+ # Verify no buckets exist initially
73
+ assert not fake_client.bucket_exists("documents")
74
+ assert not fake_client.bucket_exists("documents-content")
75
+
76
+ # Initialize repository - should create buckets
77
+ MinioDocumentRepository(fake_client)
78
+
79
+ # Verify buckets were created
80
+ assert fake_client.bucket_exists("documents")
81
+ assert fake_client.bucket_exists("documents-content")
82
+
83
+ def test_init_skips_existing_buckets(self) -> None:
84
+ """Test that existing buckets are not recreated."""
85
+ fake_client = FakeMinioClient()
86
+
87
+ # Pre-create buckets
88
+ fake_client.make_bucket("documents")
89
+ fake_client.make_bucket("documents-content")
90
+
91
+ # Initialize repository - should not fail or recreate
92
+ MinioDocumentRepository(fake_client)
93
+
94
+ # Verify buckets still exist (no exception thrown)
95
+ assert fake_client.bucket_exists("documents")
96
+ assert fake_client.bucket_exists("documents-content")
97
+
98
+ def test_init_handles_bucket_creation_error(self) -> None:
99
+ """Test proper error handling during bucket creation."""
100
+ fake_client = FakeMinioClient()
101
+
102
+ # Pre-create one bucket to cause a conflict
103
+ fake_client.make_bucket("documents")
104
+
105
+ # Override make_bucket to raise error for second bucket
106
+ original_make_bucket = fake_client.make_bucket
107
+
108
+ def failing_make_bucket(bucket_name: str) -> None:
109
+ if bucket_name == "documents-content":
110
+ raise S3Error(
111
+ code="AccessDenied",
112
+ message="Access denied",
113
+ resource="AccessDenied",
114
+ request_id="req123",
115
+ host_id="host123",
116
+ response=Mock(),
117
+ )
118
+ return original_make_bucket(bucket_name)
119
+
120
+ fake_client.make_bucket = failing_make_bucket # type: ignore[method-assign]
121
+
122
+ with pytest.raises(S3Error):
123
+ MinioDocumentRepository(fake_client)
124
+
125
+
126
+ class TestMinioDocumentRepositoryStore:
127
+ """Test document storage operations."""
128
+
129
+ async def test_store_new_document(
130
+ self, fake_minio_client: FakeMinioClient, sample_document: Document
131
+ ) -> None:
132
+ """Test storing a new document with content."""
133
+ repository = MinioDocumentRepository(fake_minio_client)
134
+
135
+ # Verify buckets are empty initially
136
+ assert fake_minio_client.get_object_count("documents") == 0
137
+ assert fake_minio_client.get_object_count("documents-content") == 0
138
+
139
+ # Act
140
+ await repository.save(sample_document)
141
+
142
+ # Assert content and metadata were stored
143
+ assert fake_minio_client.get_object_count("documents") == 1
144
+ assert fake_minio_client.get_object_count("documents-content") == 1
145
+
146
+ # Verify content was stored with calculated multihash as key
147
+ content_objects = fake_minio_client.get_stored_objects("documents-content")
148
+ calculated_multihash = sample_document.content_multihash
149
+ assert calculated_multihash in content_objects
150
+
151
+ # Verify metadata was stored with document ID as key
152
+ metadata_objects = fake_minio_client.get_stored_objects("documents")
153
+ assert sample_document.document_id in metadata_objects
154
+
155
+ async def test_store_document_with_existing_content_deduplication(
156
+ self, fake_minio_client: FakeMinioClient, sample_document: Document
157
+ ) -> None:
158
+ """Test that existing content is not re-stored (deduplication)."""
159
+ repository = MinioDocumentRepository(fake_minio_client)
160
+
161
+ # Store first document
162
+ await repository.save(sample_document)
163
+
164
+ # Verify first document was stored with correct multihash
165
+ stored_multihash = sample_document.content_multihash
166
+
167
+ # Create second document with identical content but different metadata
168
+ assert sample_document.content is not None
169
+ sample_document.content.seek(0) # Reset stream
170
+ content_bytes = sample_document.content.read()
171
+ sample_document.content.seek(0) # Reset again
172
+
173
+ second_document = Document(
174
+ document_id="different-doc-456",
175
+ original_filename="different.txt",
176
+ content_type="text/plain",
177
+ size_bytes=len(content_bytes),
178
+ content_multihash=stored_multihash, # Same calculated multihash
179
+ status=DocumentStatus.CAPTURED,
180
+ content=ContentStream(io.BytesIO(content_bytes)),
181
+ )
182
+
183
+ # Store second document - should reuse existing content
184
+ await repository.save(second_document)
185
+
186
+ # Assert: 2 metadata objects, but only 1 content object (worked)
187
+ assert fake_minio_client.get_object_count("documents") == 2
188
+ assert fake_minio_client.get_object_count("documents-content") == 1
189
+
190
+ # Verify deduplication: both documents reference same content object
191
+ content_objects = fake_minio_client.get_stored_objects("documents-content")
192
+ assert len(content_objects) == 1 # Only one content object stored
193
+ assert stored_multihash in content_objects # Stored under the correct hash key
194
+
195
+ # Verify both documents have the same multihash (share content)
196
+ assert sample_document.content_multihash == stored_multihash
197
+ assert second_document.content_multihash == stored_multihash
198
+
199
+ async def test_store_updates_multihash_when_different(
200
+ self, fake_minio_client: FakeMinioClient, sample_document: Document
201
+ ) -> None:
202
+ """Test that document multihash is updated when calculated differs."""
203
+ repository = MinioDocumentRepository(fake_minio_client)
204
+
205
+ # Deliberately set an incorrect multihash to test correction
206
+ correct_multihash = sample_document.content_multihash
207
+ sample_document.content_multihash = "incorrect_hash_12345"
208
+
209
+ # Act
210
+ await repository.save(sample_document)
211
+
212
+ # Assert multihash was corrected to the calculated value
213
+ assert sample_document.content_multihash == correct_multihash
214
+ assert sample_document.content_multihash != "incorrect_hash_12345"
215
+
216
+ # Verify content is stored under the calculated multihash
217
+ content_objects = fake_minio_client.get_stored_objects("documents-content")
218
+ assert correct_multihash in content_objects
219
+ assert "incorrect_hash_12345" not in content_objects
220
+
221
+ async def test_store_handles_content_storage_error(
222
+ self, fake_minio_client: FakeMinioClient, sample_document: Document
223
+ ) -> None:
224
+ """Test proper error handling during content storage."""
225
+ repository = MinioDocumentRepository(fake_minio_client)
226
+
227
+ # Override put_object to raise error when storing content
228
+ original_put_object = repository.client.put_object
229
+
230
+ def failing_put_object(
231
+ bucket_name: str,
232
+ object_name: str,
233
+ data: Any,
234
+ length: int,
235
+ **kwargs: Any,
236
+ ) -> Any:
237
+ if bucket_name == "documents-content":
238
+ raise S3Error(
239
+ code="AccessDenied",
240
+ message="Access denied",
241
+ resource="AccessDenied",
242
+ request_id="req123",
243
+ host_id="host123",
244
+ response=Mock(),
245
+ )
246
+ return original_put_object(bucket_name, object_name, data, length, **kwargs)
247
+
248
+ repository.client.put_object = failing_put_object # type: ignore[method-assign, assignment]
249
+
250
+ # Act & Assert
251
+ with pytest.raises(S3Error):
252
+ await repository.save(sample_document)
253
+
254
+ # Verify no objects were stored
255
+ assert fake_minio_client.get_object_count("documents") == 0
256
+ assert fake_minio_client.get_object_count("documents-content") == 0
257
+
258
+
259
+ class TestMinioDocumentRepositoryGet:
260
+ """Test document retrieval operations."""
261
+
262
+ async def test_get_existing_document(
263
+ self, repository: MinioDocumentRepository, sample_document: Document
264
+ ) -> None:
265
+ """Test retrieving an existing document with content."""
266
+ # Store a document first
267
+ await repository.save(sample_document)
268
+
269
+ # Act - retrieve the document
270
+ result = await repository.get(sample_document.document_id)
271
+
272
+ # Assert
273
+ assert result is not None
274
+ assert result.document_id == sample_document.document_id
275
+ assert result.original_filename == sample_document.original_filename
276
+ assert result.content_type == sample_document.content_type
277
+ assert result.size_bytes == sample_document.size_bytes
278
+
279
+ # Verify content can be read
280
+ assert result.content is not None
281
+ retrieved_content = result.content.read()
282
+
283
+ # Reset sample document content for comparison
284
+ assert sample_document.content is not None
285
+ sample_document.content.seek(0)
286
+ original_content = sample_document.content.read()
287
+
288
+ assert retrieved_content == original_content
289
+
290
+ async def test_get_document_missing_content_multihash(
291
+ self, repository: MinioDocumentRepository
292
+ ) -> None:
293
+ """Test handling document metadata without content_multihash."""
294
+ # Manually store invalid metadata (missing content_multihash)
295
+ invalid_metadata_json = (
296
+ '{"document_id": "test-123", "original_filename": "test.txt"}'
297
+ )
298
+ repository.client.put_object(
299
+ "documents",
300
+ "test-123",
301
+ io.BytesIO(invalid_metadata_json.encode("utf-8")),
302
+ len(invalid_metadata_json),
303
+ content_type="application/json",
304
+ )
305
+
306
+ # Act
307
+ result = await repository.get("test-123")
308
+
309
+ # Assert
310
+ assert result is None
311
+
312
+ async def test_get_document_with_missing_content(
313
+ self, repository: MinioDocumentRepository
314
+ ) -> None:
315
+ """Test that missing content returns None."""
316
+ # Store metadata but not content
317
+ metadata_json = (
318
+ '{"document_id": "test-123", "content_multihash": "missing_hash",'
319
+ ' "original_filename": "test.txt", "content_type": "text/plain",'
320
+ ' "size_bytes": 100, "status": "captured"}'
321
+ )
322
+ repository.client.put_object(
323
+ "documents",
324
+ "test-123",
325
+ io.BytesIO(metadata_json.encode("utf-8")),
326
+ len(metadata_json),
327
+ content_type="application/json",
328
+ )
329
+
330
+ # Note: content with multihash "missing_hash" does not exist
331
+
332
+ # Act
333
+ result = await repository.get("test-123")
334
+
335
+ # Assert - should return None when content is missing
336
+ assert result is None
337
+
338
+ async def test_get_nonexistent_document(
339
+ self, repository: MinioDocumentRepository
340
+ ) -> None:
341
+ """Test retrieving a document that doesn't exist."""
342
+ # Act - try to get a document that was never stored
343
+ result = await repository.get("nonexistent-123")
344
+
345
+ # Assert
346
+ assert result is None
347
+
348
+
349
+ class TestMinioDocumentRepositoryUpdate:
350
+ """Test document update operations."""
351
+
352
+ async def test_update_document(
353
+ self, repository: MinioDocumentRepository, sample_document: Document
354
+ ) -> None:
355
+ """Test updating a document."""
356
+ # Store document initially
357
+ await repository.save(sample_document)
358
+ original_updated_at = sample_document.updated_at
359
+
360
+ # Modify document
361
+ sample_document.status = DocumentStatus.EXTRACTED
362
+
363
+ # Act
364
+ await repository.save(sample_document)
365
+
366
+ # Assert updated_at was changed
367
+ assert sample_document.updated_at != original_updated_at
368
+ if original_updated_at and sample_document.updated_at:
369
+ assert sample_document.updated_at > original_updated_at
370
+
371
+ # Verify document was actually updated in storage
372
+ retrieved_doc = await repository.get(sample_document.document_id)
373
+ assert retrieved_doc is not None
374
+ assert retrieved_doc.status == DocumentStatus.EXTRACTED
375
+ assert retrieved_doc.updated_at == sample_document.updated_at
376
+
377
+
378
+ class TestMinioDocumentRepositoryGenerateId:
379
+ """Test ID generation."""
380
+
381
+ async def test_generate_id(self, repository: MinioDocumentRepository) -> None:
382
+ """Test that generate_id returns a unique string."""
383
+ # Act
384
+ doc_id_1 = await repository.generate_id()
385
+ doc_id_2 = await repository.generate_id()
386
+
387
+ # Assert
388
+ assert isinstance(doc_id_1, str)
389
+ assert isinstance(doc_id_2, str)
390
+ assert doc_id_1 != doc_id_2
391
+ assert len(doc_id_1) > 0
392
+ assert len(doc_id_2) > 0
393
+
394
+
395
+ class TestMinioDocumentRepositoryMultihash:
396
+ """Test multihash calculation functionality."""
397
+
398
+ def test_calculate_multihash_from_stream(
399
+ self, repository: MinioDocumentRepository
400
+ ) -> None:
401
+ """Test multihash calculation from stream."""
402
+ content = b"test content for hashing"
403
+ stream = ContentStream(io.BytesIO(content))
404
+
405
+ # Act
406
+ multihash_result = repository._calculate_multihash_from_stream(stream)
407
+
408
+ # Assert
409
+ assert isinstance(multihash_result, str)
410
+ assert len(multihash_result) > 0
411
+
412
+ # Test deterministic - same content should produce same hash
413
+ stream.seek(0)
414
+ multihash_result_2 = repository._calculate_multihash_from_stream(stream)
415
+ assert multihash_result == multihash_result_2
416
+
417
+ def test_calculate_multihash_from_empty_stream(
418
+ self, repository: MinioDocumentRepository
419
+ ) -> None:
420
+ """Test multihash calculation from empty stream."""
421
+ stream = ContentStream(io.BytesIO(b""))
422
+
423
+ # Act
424
+ multihash_result = repository._calculate_multihash_from_stream(stream)
425
+
426
+ # Assert
427
+ assert isinstance(multihash_result, str)
428
+ assert len(multihash_result) > 0
429
+
430
+
431
+ class TestMinioDocumentRepositoryContentString:
432
+ """Test content_string functionality."""
433
+
434
+ async def test_save_document_with_content_string(
435
+ self, repository: MinioDocumentRepository
436
+ ) -> None:
437
+ """Test saving document with content_string (small content)."""
438
+ content = '{"assembled": "document", "data": "test"}'
439
+
440
+ # Create document with content_string
441
+ document = Document(
442
+ document_id="test-doc-content-string",
443
+ original_filename="assembled.json",
444
+ content_type="application/json",
445
+ size_bytes=100, # Will be updated automatically
446
+ content_multihash="placeholder", # Will be updated automatically
447
+ status=DocumentStatus.CAPTURED,
448
+ content_string=content,
449
+ )
450
+
451
+ # Act - save should convert content_string to ContentStream
452
+ await repository.save(document)
453
+
454
+ # Assert document was saved successfully
455
+ retrieved = await repository.get(document.document_id)
456
+ assert retrieved is not None
457
+ assert retrieved.content_multihash != "placeholder" # Hash was calculated
458
+ assert retrieved.size_bytes == len(content.encode("utf-8"))
459
+
460
+ # Verify content can be read
461
+ assert retrieved.content is not None
462
+ retrieved_content = retrieved.content.read().decode("utf-8")
463
+ assert retrieved_content == content
464
+
465
+ async def test_save_document_with_content_string_unicode(
466
+ self, repository: MinioDocumentRepository
467
+ ) -> None:
468
+ """Test saving document with unicode content_string."""
469
+ content = '{"title": "测试文档", "emoji": "🚀", "content": "éñ"}'
470
+
471
+ document = Document(
472
+ document_id="test-doc-unicode",
473
+ original_filename="unicode.json",
474
+ content_type="application/json",
475
+ size_bytes=100,
476
+ content_multihash="placeholder",
477
+ status=DocumentStatus.CAPTURED,
478
+ content_string=content,
479
+ )
480
+
481
+ await repository.save(document)
482
+ retrieved = await repository.get(document.document_id)
483
+
484
+ assert retrieved is not None
485
+ assert retrieved.content is not None
486
+ retrieved_content = retrieved.content.read().decode("utf-8")
487
+ assert retrieved_content == content
488
+
489
+ # Note: Empty content test removed because domain model requires
490
+ # size_bytes > 0
491
+
492
+ async def test_save_excludes_content_string_from_metadata(
493
+ self,
494
+ repository: MinioDocumentRepository,
495
+ fake_minio_client: FakeMinioClient,
496
+ ) -> None:
497
+ """Test that content_string is not stored in metadata."""
498
+ content = '{"test": "data that should not be in metadata"}'
499
+
500
+ document = Document(
501
+ document_id="test-metadata-exclusion",
502
+ original_filename="test.json",
503
+ content_type="application/json",
504
+ size_bytes=100,
505
+ content_multihash="placeholder",
506
+ status=DocumentStatus.CAPTURED,
507
+ content_string=content,
508
+ )
509
+
510
+ await repository.save(document)
511
+
512
+ # Check raw metadata stored in MinIO
513
+ metadata_response = fake_minio_client.get_object(
514
+ bucket_name="documents", object_name="test-metadata-exclusion"
515
+ )
516
+ metadata_data = metadata_response.read()
517
+ metadata_json = metadata_data.decode("utf-8")
518
+
519
+ import json
520
+
521
+ metadata_dict = json.loads(metadata_json)
522
+
523
+ # Verify content_string is not in stored metadata
524
+ assert "content_string" not in metadata_dict
525
+ assert "content" not in metadata_dict
526
+
527
+ # Verify essential fields are still present
528
+ assert metadata_dict["document_id"] == "test-metadata-exclusion"
529
+ assert "content_multihash" in metadata_dict
530
+ assert "status" in metadata_dict
531
+
532
+
533
+ class TestMinioDocumentRepositoryErrorHandling:
534
+ """Test error handling scenarios."""
535
+
536
+ async def test_store_handles_metadata_storage_error(
537
+ self, fake_minio_client: FakeMinioClient, sample_document: Document
538
+ ) -> None:
539
+ """Test error handling when metadata storage fails."""
540
+ repository = MinioDocumentRepository(fake_minio_client)
541
+
542
+ # Override put_object to fail only for metadata storage
543
+ original_put_object = repository.client.put_object
544
+
545
+ def failing_put_object(
546
+ bucket_name: str,
547
+ object_name: str,
548
+ data: Any,
549
+ length: int,
550
+ **kwargs: Any,
551
+ ) -> Any:
552
+ if bucket_name == "documents":
553
+ raise S3Error(
554
+ code="AccessDenied",
555
+ message="Access denied",
556
+ resource="AccessDenied",
557
+ request_id="req123",
558
+ host_id="host123",
559
+ response=Mock(),
560
+ )
561
+ return original_put_object(bucket_name, object_name, data, length, **kwargs)
562
+
563
+ repository.client.put_object = failing_put_object # type: ignore[method-assign, assignment]
564
+
565
+ # Act & Assert
566
+ with pytest.raises(S3Error):
567
+ await repository.save(sample_document)
568
+
569
+ # Verify content was stored but metadata was not
570
+ assert fake_minio_client.get_object_count("documents-content") == 1
571
+ assert fake_minio_client.get_object_count("documents") == 0
572
+
573
+ async def test_get_handles_unexpected_error(
574
+ self, repository: MinioDocumentRepository
575
+ ) -> None:
576
+ """Test handling of unexpected errors during get operation."""
577
+ # Override get_object to raise unexpected error
578
+ original_get_object = repository.client.get_object
579
+
580
+ def failing_get_object(bucket_name: str, object_name: str) -> Any:
581
+ if bucket_name == "documents":
582
+ raise Exception("Unexpected error")
583
+ return original_get_object(bucket_name, object_name)
584
+
585
+ repository.client.get_object = failing_get_object # type: ignore[method-assign]
586
+
587
+ # Act
588
+ result = await repository.get("test-123")
589
+
590
+ # Assert - should return None and not propagate exception
591
+ assert result is None