julee 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. julee/api/app.py +9 -8
  2. julee/api/dependencies.py +15 -15
  3. julee/api/requests.py +10 -9
  4. julee/api/responses.py +2 -1
  5. julee/api/routers/__init__.py +5 -5
  6. julee/api/routers/assembly_specifications.py +5 -4
  7. julee/api/routers/documents.py +1 -1
  8. julee/api/routers/knowledge_service_configs.py +4 -3
  9. julee/api/routers/knowledge_service_queries.py +7 -6
  10. julee/api/routers/system.py +4 -3
  11. julee/api/routers/workflows.py +4 -5
  12. julee/api/services/system_initialization.py +6 -6
  13. julee/api/tests/routers/test_assembly_specifications.py +4 -3
  14. julee/api/tests/routers/test_documents.py +11 -10
  15. julee/api/tests/routers/test_knowledge_service_configs.py +7 -6
  16. julee/api/tests/routers/test_knowledge_service_queries.py +4 -3
  17. julee/api/tests/routers/test_system.py +5 -4
  18. julee/api/tests/routers/test_workflows.py +5 -4
  19. julee/api/tests/test_app.py +5 -4
  20. julee/api/tests/test_dependencies.py +3 -2
  21. julee/api/tests/test_requests.py +2 -1
  22. julee/contrib/__init__.py +15 -0
  23. julee/contrib/polling/__init__.py +47 -0
  24. julee/contrib/polling/domain/__init__.py +17 -0
  25. julee/contrib/polling/domain/models/__init__.py +13 -0
  26. julee/contrib/polling/domain/models/polling_config.py +39 -0
  27. julee/contrib/polling/domain/services/__init__.py +11 -0
  28. julee/contrib/polling/domain/services/poller.py +39 -0
  29. julee/contrib/polling/infrastructure/__init__.py +15 -0
  30. julee/contrib/polling/infrastructure/services/__init__.py +12 -0
  31. julee/contrib/polling/infrastructure/services/polling/__init__.py +12 -0
  32. julee/contrib/polling/infrastructure/services/polling/http/__init__.py +12 -0
  33. julee/contrib/polling/infrastructure/services/polling/http/http_poller_service.py +80 -0
  34. julee/contrib/polling/infrastructure/temporal/__init__.py +20 -0
  35. julee/contrib/polling/infrastructure/temporal/activities.py +42 -0
  36. julee/contrib/polling/infrastructure/temporal/activity_names.py +20 -0
  37. julee/contrib/polling/infrastructure/temporal/proxies.py +45 -0
  38. julee/contrib/polling/tests/__init__.py +6 -0
  39. julee/contrib/polling/tests/unit/__init__.py +6 -0
  40. julee/contrib/polling/tests/unit/infrastructure/__init__.py +7 -0
  41. julee/contrib/polling/tests/unit/infrastructure/services/__init__.py +6 -0
  42. julee/contrib/polling/tests/unit/infrastructure/services/polling/__init__.py +6 -0
  43. julee/contrib/polling/tests/unit/infrastructure/services/polling/http/__init__.py +7 -0
  44. julee/contrib/polling/tests/unit/infrastructure/services/polling/http/test_http_poller_service.py +163 -0
  45. julee/docs/__init__.py +5 -0
  46. julee/docs/sphinx_hcd/__init__.py +76 -0
  47. julee/docs/sphinx_hcd/accelerators.py +1175 -0
  48. julee/docs/sphinx_hcd/apps.py +518 -0
  49. julee/docs/sphinx_hcd/config.py +148 -0
  50. julee/docs/sphinx_hcd/epics.py +453 -0
  51. julee/docs/sphinx_hcd/integrations.py +310 -0
  52. julee/docs/sphinx_hcd/journeys.py +797 -0
  53. julee/docs/sphinx_hcd/personas.py +457 -0
  54. julee/docs/sphinx_hcd/stories.py +960 -0
  55. julee/docs/sphinx_hcd/utils.py +185 -0
  56. julee/domain/models/__init__.py +5 -6
  57. julee/domain/models/assembly/assembly.py +7 -7
  58. julee/domain/models/assembly/tests/factories.py +2 -1
  59. julee/domain/models/assembly/tests/test_assembly.py +16 -13
  60. julee/domain/models/assembly_specification/assembly_specification.py +11 -10
  61. julee/domain/models/assembly_specification/knowledge_service_query.py +7 -6
  62. julee/domain/models/assembly_specification/tests/factories.py +2 -1
  63. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +9 -6
  64. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +3 -1
  65. julee/domain/models/custom_fields/content_stream.py +3 -2
  66. julee/domain/models/custom_fields/tests/test_custom_fields.py +2 -1
  67. julee/domain/models/document/document.py +23 -30
  68. julee/domain/models/document/tests/factories.py +3 -2
  69. julee/domain/models/document/tests/test_document.py +20 -37
  70. julee/domain/models/knowledge_service_config/knowledge_service_config.py +4 -4
  71. julee/domain/models/policy/__init__.py +4 -4
  72. julee/domain/models/policy/document_policy_validation.py +17 -17
  73. julee/domain/models/policy/policy.py +10 -10
  74. julee/domain/models/policy/tests/factories.py +2 -1
  75. julee/domain/models/policy/tests/test_document_policy_validation.py +3 -1
  76. julee/domain/models/policy/tests/test_policy.py +2 -1
  77. julee/domain/repositories/__init__.py +3 -3
  78. julee/domain/repositories/assembly.py +3 -1
  79. julee/domain/repositories/assembly_specification.py +2 -0
  80. julee/domain/repositories/base.py +5 -4
  81. julee/domain/repositories/document.py +3 -1
  82. julee/domain/repositories/document_policy_validation.py +3 -1
  83. julee/domain/repositories/knowledge_service_config.py +2 -0
  84. julee/domain/repositories/knowledge_service_query.py +1 -0
  85. julee/domain/repositories/policy.py +3 -1
  86. julee/domain/use_cases/decorators.py +3 -2
  87. julee/domain/use_cases/extract_assemble_data.py +14 -13
  88. julee/domain/use_cases/initialize_system_data.py +88 -34
  89. julee/domain/use_cases/tests/test_extract_assemble_data.py +10 -10
  90. julee/domain/use_cases/tests/test_initialize_system_data.py +2 -2
  91. julee/domain/use_cases/tests/test_validate_document.py +11 -11
  92. julee/domain/use_cases/validate_document.py +14 -14
  93. julee/fixtures/documents.yaml +4 -43
  94. julee/fixtures/knowledge_service_queries.yaml +9 -0
  95. julee/maintenance/__init__.py +1 -0
  96. julee/maintenance/release.py +243 -0
  97. julee/repositories/memory/assembly.py +6 -5
  98. julee/repositories/memory/assembly_specification.py +8 -9
  99. julee/repositories/memory/base.py +12 -11
  100. julee/repositories/memory/document.py +27 -20
  101. julee/repositories/memory/document_policy_validation.py +7 -6
  102. julee/repositories/memory/knowledge_service_config.py +8 -7
  103. julee/repositories/memory/knowledge_service_query.py +8 -7
  104. julee/repositories/memory/policy.py +6 -5
  105. julee/repositories/memory/tests/test_document.py +24 -22
  106. julee/repositories/memory/tests/test_document_policy_validation.py +2 -1
  107. julee/repositories/memory/tests/test_policy.py +2 -1
  108. julee/repositories/minio/assembly.py +4 -4
  109. julee/repositories/minio/assembly_specification.py +6 -8
  110. julee/repositories/minio/client.py +22 -25
  111. julee/repositories/minio/document.py +36 -33
  112. julee/repositories/minio/document_policy_validation.py +5 -5
  113. julee/repositories/minio/knowledge_service_config.py +6 -6
  114. julee/repositories/minio/knowledge_service_query.py +6 -9
  115. julee/repositories/minio/policy.py +4 -4
  116. julee/repositories/minio/tests/fake_client.py +11 -9
  117. julee/repositories/minio/tests/test_assembly.py +3 -1
  118. julee/repositories/minio/tests/test_assembly_specification.py +2 -1
  119. julee/repositories/minio/tests/test_client_protocol.py +5 -5
  120. julee/repositories/minio/tests/test_document.py +23 -22
  121. julee/repositories/minio/tests/test_document_policy_validation.py +3 -1
  122. julee/repositories/minio/tests/test_knowledge_service_config.py +4 -2
  123. julee/repositories/minio/tests/test_knowledge_service_query.py +3 -2
  124. julee/repositories/minio/tests/test_policy.py +3 -1
  125. julee/repositories/temporal/activities.py +5 -5
  126. julee/repositories/temporal/proxies.py +5 -5
  127. julee/services/knowledge_service/__init__.py +1 -2
  128. julee/services/knowledge_service/anthropic/knowledge_service.py +8 -7
  129. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +11 -10
  130. julee/services/knowledge_service/factory.py +8 -8
  131. julee/services/knowledge_service/knowledge_service.py +12 -14
  132. julee/services/knowledge_service/memory/knowledge_service.py +13 -12
  133. julee/services/knowledge_service/memory/test_knowledge_service.py +10 -7
  134. julee/services/knowledge_service/test_factory.py +11 -10
  135. julee/services/temporal/activities.py +10 -10
  136. julee/services/temporal/proxies.py +2 -2
  137. julee/util/domain.py +6 -6
  138. julee/util/repos/minio/file_storage.py +8 -9
  139. julee/util/repos/temporal/client_proxies/file_storage.py +3 -4
  140. julee/util/repos/temporal/data_converter.py +6 -6
  141. julee/util/repos/temporal/minio_file_storage.py +1 -1
  142. julee/util/repos/temporal/proxies/file_storage.py +2 -3
  143. julee/util/repositories.py +4 -3
  144. julee/util/temporal/decorators.py +20 -18
  145. julee/util/tests/test_decorators.py +13 -15
  146. julee/util/validation/repository.py +3 -3
  147. julee/util/validation/type_guards.py +12 -11
  148. julee/worker.py +9 -8
  149. julee/workflows/__init__.py +2 -2
  150. julee/workflows/extract_assemble.py +2 -1
  151. julee/workflows/validate_document.py +3 -2
  152. {julee-0.1.2.dist-info → julee-0.1.4.dist-info}/METADATA +3 -3
  153. julee-0.1.4.dist-info/RECORD +196 -0
  154. julee/fixtures/assembly_specifications.yaml +0 -70
  155. julee-0.1.2.dist-info/RECORD +0 -161
  156. {julee-0.1.2.dist-info → julee-0.1.4.dist-info}/WHEEL +0 -0
  157. {julee-0.1.2.dist-info → julee-0.1.4.dist-info}/licenses/LICENSE +0 -0
  158. {julee-0.1.2.dist-info → julee-0.1.4.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,7 @@ should be avoided.
13
13
  """
14
14
 
15
15
  import logging
16
- from typing import Dict, Optional, Any, List
16
+ from typing import Any
17
17
 
18
18
  from julee.domain.models.assembly_specification import (
19
19
  KnowledgeServiceQuery,
@@ -21,6 +21,7 @@ from julee.domain.models.assembly_specification import (
21
21
  from julee.domain.repositories.knowledge_service_query import (
22
22
  KnowledgeServiceQueryRepository,
23
23
  )
24
+
24
25
  from .base import MemoryRepositoryMixin
25
26
 
26
27
  logger = logging.getLogger(__name__)
@@ -47,11 +48,11 @@ class MemoryKnowledgeServiceQueryRepository(
47
48
  """Initialize repository with empty in-memory storage."""
48
49
  self.logger = logger
49
50
  self.entity_name = "KnowledgeServiceQuery"
50
- self.storage_dict: Dict[str, KnowledgeServiceQuery] = {}
51
+ self.storage_dict: dict[str, KnowledgeServiceQuery] = {}
51
52
 
52
53
  logger.debug("Initializing MemoryKnowledgeServiceQueryRepository")
53
54
 
54
- async def get(self, query_id: str) -> Optional[KnowledgeServiceQuery]:
55
+ async def get(self, query_id: str) -> KnowledgeServiceQuery | None:
55
56
  """Retrieve a knowledge service query by ID.
56
57
 
57
58
  Args:
@@ -71,8 +72,8 @@ class MemoryKnowledgeServiceQueryRepository(
71
72
  self.save_entity(query, "query_id")
72
73
 
73
74
  async def get_many(
74
- self, query_ids: List[str]
75
- ) -> Dict[str, Optional[KnowledgeServiceQuery]]:
75
+ self, query_ids: list[str]
76
+ ) -> dict[str, KnowledgeServiceQuery | None]:
76
77
  """Retrieve multiple knowledge service queries by ID.
77
78
 
78
79
  Args:
@@ -92,7 +93,7 @@ class MemoryKnowledgeServiceQueryRepository(
92
93
  """
93
94
  return self.generate_entity_id("query")
94
95
 
95
- async def list_all(self) -> List[KnowledgeServiceQuery]:
96
+ async def list_all(self) -> list[KnowledgeServiceQuery]:
96
97
  """List all knowledge service queries.
97
98
 
98
99
  Returns:
@@ -113,7 +114,7 @@ class MemoryKnowledgeServiceQueryRepository(
113
114
  return entities
114
115
 
115
116
  def _add_entity_specific_log_data(
116
- self, entity: KnowledgeServiceQuery, log_data: Dict[str, Any]
117
+ self, entity: KnowledgeServiceQuery, log_data: dict[str, Any]
117
118
  ) -> None:
118
119
  """Add knowledge service query-specific data to log entries."""
119
120
  super()._add_entity_specific_log_data(entity, log_data)
@@ -12,10 +12,11 @@ All operations are still async to maintain interface compatibility.
12
12
  """
13
13
 
14
14
  import logging
15
- from typing import Optional, Dict, Any, List
15
+ from typing import Any
16
16
 
17
17
  from julee.domain.models.policy import Policy
18
18
  from julee.domain.repositories.policy import PolicyRepository
19
+
19
20
  from .base import MemoryRepositoryMixin
20
21
 
21
22
  logger = logging.getLogger(__name__)
@@ -34,11 +35,11 @@ class MemoryPolicyRepository(PolicyRepository, MemoryRepositoryMixin[Policy]):
34
35
  """Initialize repository with empty in-memory storage."""
35
36
  self.logger = logger
36
37
  self.entity_name = "Policy"
37
- self.storage_dict: Dict[str, Policy] = {}
38
+ self.storage_dict: dict[str, Policy] = {}
38
39
 
39
40
  logger.debug("Initializing MemoryPolicyRepository")
40
41
 
41
- async def get(self, policy_id: str) -> Optional[Policy]:
42
+ async def get(self, policy_id: str) -> Policy | None:
42
43
  """Retrieve a policy by ID.
43
44
 
44
45
  Args:
@@ -65,7 +66,7 @@ class MemoryPolicyRepository(PolicyRepository, MemoryRepositoryMixin[Policy]):
65
66
  """
66
67
  return self.generate_entity_id("policy")
67
68
 
68
- async def get_many(self, policy_ids: List[str]) -> Dict[str, Optional[Policy]]:
69
+ async def get_many(self, policy_ids: list[str]) -> dict[str, Policy | None]:
69
70
  """Retrieve multiple policies by ID.
70
71
 
71
72
  Args:
@@ -77,7 +78,7 @@ class MemoryPolicyRepository(PolicyRepository, MemoryRepositoryMixin[Policy]):
77
78
  return self.get_many_entities(policy_ids)
78
79
 
79
80
  def _add_entity_specific_log_data(
80
- self, entity: Policy, log_data: Dict[str, Any]
81
+ self, entity: Policy, log_data: dict[str, Any]
81
82
  ) -> None:
82
83
  """Add policy-specific data to log entries."""
83
84
  super()._add_entity_specific_log_data(entity, log_data)
@@ -3,18 +3,20 @@ Unit tests for MemoryDocumentRepository.
3
3
 
4
4
  These tests verify the memory implementation logic without requiring external
5
5
  dependencies. They follow the Clean Architecture testing patterns and verify
6
- idempotency, error handling, and content operations including content_string.
6
+ idempotency, error handling, and content operations including content_bytes.
7
7
  """
8
8
 
9
9
  import io
10
+
10
11
  import pytest
11
- from julee.repositories.memory.document import (
12
- MemoryDocumentRepository,
13
- )
14
- from julee.domain.models.document import Document, DocumentStatus
12
+
15
13
  from julee.domain.models.custom_fields.content_stream import (
16
14
  ContentStream,
17
15
  )
16
+ from julee.domain.models.document import Document, DocumentStatus
17
+ from julee.repositories.memory.document import (
18
+ MemoryDocumentRepository,
19
+ )
18
20
 
19
21
 
20
22
  @pytest.fixture
@@ -44,16 +46,16 @@ def sample_document(sample_content: ContentStream) -> Document:
44
46
  )
45
47
 
46
48
 
47
- class TestMemoryDocumentRepositoryContentString:
48
- """Test content_string functionality."""
49
+ class TestMemoryDocumentRepositoryContentBytes:
50
+ """Test content_bytes functionality."""
49
51
 
50
- async def test_save_document_with_content_string(
52
+ async def test_save_document_with_content_bytes(
51
53
  self, repository: MemoryDocumentRepository
52
54
  ) -> None:
53
- """Test saving document with content_string (small content)."""
55
+ """Test saving document with content_bytes."""
54
56
  content = '{"assembled": "document", "data": "test"}'
55
57
 
56
- # Create document with content_string
58
+ # Create document with content_bytes
57
59
  document = Document(
58
60
  document_id="test-doc-content-string",
59
61
  original_filename="assembled.json",
@@ -61,10 +63,10 @@ class TestMemoryDocumentRepositoryContentString:
61
63
  size_bytes=100, # Will be updated automatically
62
64
  content_multihash="placeholder", # Will be updated automatically
63
65
  status=DocumentStatus.CAPTURED,
64
- content_string=content,
66
+ content_bytes=content,
65
67
  )
66
68
 
67
- # Act - save should convert content_string to ContentStream
69
+ # Act - save should convert content_bytes to ContentStream
68
70
  await repository.save(document)
69
71
 
70
72
  # Assert document was saved successfully
@@ -78,10 +80,10 @@ class TestMemoryDocumentRepositoryContentString:
78
80
  retrieved_content = retrieved.content.read().decode("utf-8")
79
81
  assert retrieved_content == content
80
82
 
81
- async def test_save_document_with_content_string_unicode(
83
+ async def test_save_document_with_content_bytes_unicode(
82
84
  self, repository: MemoryDocumentRepository
83
85
  ) -> None:
84
- """Test saving document with unicode content_string."""
86
+ """Test saving document with unicode content_bytes."""
85
87
  content = '{"title": "测试文档", "emoji": "🚀", "content": "éñ"}'
86
88
 
87
89
  document = Document(
@@ -91,7 +93,7 @@ class TestMemoryDocumentRepositoryContentString:
91
93
  size_bytes=100,
92
94
  content_multihash="placeholder",
93
95
  status=DocumentStatus.CAPTURED,
94
- content_string=content,
96
+ content_bytes=content,
95
97
  )
96
98
 
97
99
  await repository.save(document)
@@ -105,10 +107,10 @@ class TestMemoryDocumentRepositoryContentString:
105
107
  # Note: Empty content test removed because domain model requires
106
108
  # size_bytes > 0
107
109
 
108
- async def test_save_excludes_content_string_from_storage(
110
+ async def test_save_excludes_content_bytes_from_storage(
109
111
  self, repository: MemoryDocumentRepository
110
112
  ) -> None:
111
- """Test that content_string is not stored in memory storage."""
113
+ """Test that content_bytes is not stored in memory storage."""
112
114
  content = '{"test": "data that should not be in storage"}'
113
115
 
114
116
  document = Document(
@@ -118,7 +120,7 @@ class TestMemoryDocumentRepositoryContentString:
118
120
  size_bytes=100,
119
121
  content_multihash="placeholder",
120
122
  status=DocumentStatus.CAPTURED,
121
- content_string=content,
123
+ content_bytes=content,
122
124
  )
123
125
 
124
126
  await repository.save(document)
@@ -127,8 +129,8 @@ class TestMemoryDocumentRepositoryContentString:
127
129
  stored_document = repository.storage_dict.get("test-storage-exclusion")
128
130
  assert stored_document is not None
129
131
 
130
- # Verify content_string is not in stored document
131
- assert stored_document.content_string is None
132
+ # Verify content_bytes is not in stored document
133
+ assert stored_document.content_bytes is None
132
134
 
133
135
  # Verify essential fields are still present
134
136
  assert stored_document.document_id == "test-storage-exclusion"
@@ -193,7 +195,7 @@ class TestMemoryDocumentRepositoryErrorHandling:
193
195
  size_bytes=100,
194
196
  content_multihash="test_hash",
195
197
  status=DocumentStatus.CAPTURED,
196
- content_string="test content",
198
+ content_bytes="test content",
197
199
  )
198
200
 
199
201
  async def test_save_handles_empty_filename(
@@ -208,5 +210,5 @@ class TestMemoryDocumentRepositoryErrorHandling:
208
210
  size_bytes=100,
209
211
  content_multihash="test_hash",
210
212
  status=DocumentStatus.CAPTURED,
211
- content_string="test content",
213
+ content_bytes="test content",
212
214
  )
@@ -6,10 +6,11 @@ repository implementation, focusing on functionality specific to this
6
6
  repository that differs from the inherited mixins.
7
7
  """
8
8
 
9
- import pytest
10
9
  from datetime import datetime, timezone
11
10
  from typing import Any
12
11
 
12
+ import pytest
13
+
13
14
  from julee.domain.models.policy import (
14
15
  DocumentPolicyValidation,
15
16
  DocumentPolicyValidationStatus,
@@ -6,9 +6,10 @@ repository implementation, following the testing patterns established in the
6
6
  project.
7
7
  """
8
8
 
9
- import pytest
10
9
  from datetime import datetime, timezone
11
10
 
11
+ import pytest
12
+
12
13
  from julee.domain.models.policy import Policy, PolicyStatus
13
14
  from julee.repositories.memory.policy import MemoryPolicyRepository
14
15
 
@@ -11,10 +11,10 @@ the large payload handling pattern from the architectural guidelines.
11
11
  """
12
12
 
13
13
  import logging
14
- from typing import Optional, List, Dict
15
14
 
16
15
  from julee.domain.models.assembly import Assembly
17
16
  from julee.domain.repositories.assembly import AssemblyRepository
17
+
18
18
  from .client import MinioClient, MinioRepositoryMixin
19
19
 
20
20
 
@@ -37,7 +37,7 @@ class MinioAssemblyRepository(AssemblyRepository, MinioRepositoryMixin):
37
37
  self.assembly_bucket = "assemblies"
38
38
  self.ensure_buckets_exist([self.assembly_bucket])
39
39
 
40
- async def get(self, assembly_id: str) -> Optional[Assembly]:
40
+ async def get(self, assembly_id: str) -> Assembly | None:
41
41
  """Retrieve an assembly by ID."""
42
42
  # Get the assembly using mixin methods
43
43
  assembly = self.get_json_object(
@@ -69,7 +69,7 @@ class MinioAssemblyRepository(AssemblyRepository, MinioRepositoryMixin):
69
69
  },
70
70
  )
71
71
 
72
- async def get_many(self, assembly_ids: List[str]) -> Dict[str, Optional[Assembly]]:
72
+ async def get_many(self, assembly_ids: list[str]) -> dict[str, Assembly | None]:
73
73
  """Retrieve multiple assemblies by ID.
74
74
 
75
75
  Args:
@@ -92,7 +92,7 @@ class MinioAssemblyRepository(AssemblyRepository, MinioRepositoryMixin):
92
92
  )
93
93
 
94
94
  # Convert object names back to assembly IDs for the result
95
- result: Dict[str, Optional[Assembly]] = {}
95
+ result: dict[str, Assembly | None] = {}
96
96
  for assembly_id in assembly_ids:
97
97
  result[assembly_id] = object_results[assembly_id]
98
98
 
@@ -14,7 +14,6 @@ schema and query mappings.
14
14
  """
15
15
 
16
16
  import logging
17
- from typing import Optional, List, Dict
18
17
 
19
18
  from julee.domain.models.assembly_specification import (
20
19
  AssemblySpecification,
@@ -22,6 +21,7 @@ from julee.domain.models.assembly_specification import (
22
21
  from julee.domain.repositories.assembly_specification import (
23
22
  AssemblySpecificationRepository,
24
23
  )
24
+
25
25
  from .client import MinioClient, MinioRepositoryMixin
26
26
 
27
27
 
@@ -48,9 +48,7 @@ class MinioAssemblySpecificationRepository(
48
48
  self.specifications_bucket = "assembly-specifications"
49
49
  self.ensure_buckets_exist(self.specifications_bucket)
50
50
 
51
- async def get(
52
- self, assembly_specification_id: str
53
- ) -> Optional[AssemblySpecification]:
51
+ async def get(self, assembly_specification_id: str) -> AssemblySpecification | None:
54
52
  """Retrieve an assembly specification by ID."""
55
53
  object_name = f"spec/{assembly_specification_id}"
56
54
 
@@ -87,8 +85,8 @@ class MinioAssemblySpecificationRepository(
87
85
  )
88
86
 
89
87
  async def get_many(
90
- self, assembly_specification_ids: List[str]
91
- ) -> Dict[str, Optional[AssemblySpecification]]:
88
+ self, assembly_specification_ids: list[str]
89
+ ) -> dict[str, AssemblySpecification | None]:
92
90
  """Retrieve multiple assembly specifications by ID.
93
91
 
94
92
  Args:
@@ -113,7 +111,7 @@ class MinioAssemblySpecificationRepository(
113
111
  )
114
112
 
115
113
  # Convert object names back to specification IDs for the result
116
- result: Dict[str, Optional[AssemblySpecification]] = {}
114
+ result: dict[str, AssemblySpecification | None] = {}
117
115
  for i, spec_id in enumerate(assembly_specification_ids):
118
116
  object_name = object_names[i]
119
117
  result[spec_id] = object_results[object_name]
@@ -124,7 +122,7 @@ class MinioAssemblySpecificationRepository(
124
122
  """Generate a unique assembly specification identifier."""
125
123
  return self.generate_id_with_prefix("spec")
126
124
 
127
- async def list_all(self) -> List[AssemblySpecification]:
125
+ async def list_all(self) -> list[AssemblySpecification]:
128
126
  """List all assembly specifications.
129
127
 
130
128
  Returns:
@@ -15,21 +15,18 @@ import io
15
15
  import json
16
16
  from datetime import datetime, timezone
17
17
  from typing import (
18
- Protocol,
19
18
  Any,
20
- Dict,
21
- Optional,
22
- runtime_checkable,
23
- List,
24
- Union,
25
- TypeVar,
26
19
  BinaryIO,
20
+ Protocol,
21
+ TypeVar,
22
+ runtime_checkable,
27
23
  )
28
- from urllib3.response import BaseHTTPResponse
29
- from minio.datatypes import Object
24
+
30
25
  from minio.api import ObjectWriteResult
26
+ from minio.datatypes import Object
31
27
  from minio.error import S3Error # type: ignore[import-untyped]
32
28
  from pydantic import BaseModel
29
+ from urllib3.response import BaseHTTPResponse
33
30
 
34
31
  # Import ContentStream here to avoid circular imports
35
32
  from julee.domain.models.custom_fields.content_stream import (
@@ -78,7 +75,7 @@ class MinioClient(Protocol):
78
75
  data: BinaryIO,
79
76
  length: int,
80
77
  content_type: str = "application/octet-stream",
81
- metadata: Optional[Dict[str, Union[str, List[str], tuple[str]]]] = None,
78
+ metadata: dict[str, str | list[str] | tuple[str]] | None = None,
82
79
  ) -> ObjectWriteResult:
83
80
  """Store an object in the bucket.
84
81
 
@@ -167,7 +164,7 @@ class MinioRepositoryMixin:
167
164
  client: MinioClient
168
165
  logger: Any # logging.Logger, but avoiding import
169
166
 
170
- def ensure_buckets_exist(self, bucket_names: Union[str, List[str]]) -> None:
167
+ def ensure_buckets_exist(self, bucket_names: str | list[str]) -> None:
171
168
  """Ensure one or more buckets exist, creating them if necessary.
172
169
 
173
170
  Args:
@@ -202,12 +199,12 @@ class MinioRepositoryMixin:
202
199
  def get_many_json_objects(
203
200
  self,
204
201
  bucket_name: str,
205
- object_names: List[str],
202
+ object_names: list[str],
206
203
  model_class: type[T],
207
204
  not_found_log_message: str,
208
205
  error_log_message: str,
209
- extra_log_data: Optional[Dict[str, Any]] = None,
210
- ) -> Dict[str, Optional[T]]:
206
+ extra_log_data: dict[str, Any] | None = None,
207
+ ) -> dict[str, T | None]:
211
208
  """Get multiple JSON objects from Minio and deserialize them.
212
209
 
213
210
  Note: S3/MinIO does not have native batch retrieval operations.
@@ -232,7 +229,7 @@ class MinioRepositoryMixin:
232
229
  S3Error: For non-NoSuchKey errors
233
230
  """
234
231
  extra_log_data = extra_log_data or {}
235
- result: Dict[str, Optional[T]] = {}
232
+ result: dict[str, T | None] = {}
236
233
  found_count = 0
237
234
 
238
235
  self.logger.debug(
@@ -297,11 +294,11 @@ class MinioRepositoryMixin:
297
294
  def get_many_binary_objects(
298
295
  self,
299
296
  bucket_name: str,
300
- object_names: List[str],
297
+ object_names: list[str],
301
298
  not_found_log_message: str,
302
299
  error_log_message: str,
303
- extra_log_data: Optional[Dict[str, Any]] = None,
304
- ) -> Dict[str, Optional[ContentStream]]:
300
+ extra_log_data: dict[str, Any] | None = None,
301
+ ) -> dict[str, ContentStream | None]:
305
302
  """Get multiple binary objects from Minio as ContentStreams.
306
303
 
307
304
  Note: S3/MinIO does not have native batch retrieval operations.
@@ -322,7 +319,7 @@ class MinioRepositoryMixin:
322
319
  S3Error: For non-NoSuchKey errors
323
320
  """
324
321
  extra_log_data = extra_log_data or {}
325
- result: Dict[str, Optional[ContentStream]] = {}
322
+ result: dict[str, ContentStream | None] = {}
326
323
  found_count = 0
327
324
 
328
325
  self.logger.debug(
@@ -383,8 +380,8 @@ class MinioRepositoryMixin:
383
380
  model_class: type[T],
384
381
  not_found_log_message: str,
385
382
  error_log_message: str,
386
- extra_log_data: Optional[Dict[str, Any]] = None,
387
- ) -> Optional[T]:
383
+ extra_log_data: dict[str, Any] | None = None,
384
+ ) -> T | None:
388
385
  """Get a JSON object from Minio and deserialize it to a Pydantic
389
386
  model.
390
387
 
@@ -441,7 +438,7 @@ class MinioRepositoryMixin:
441
438
  model: BaseModel,
442
439
  success_log_message: str,
443
440
  error_log_message: str,
444
- extra_log_data: Optional[Dict[str, Any]] = None,
441
+ extra_log_data: dict[str, Any] | None = None,
445
442
  ) -> None:
446
443
  """Store a Pydantic model as a JSON object in Minio.
447
444
 
@@ -494,11 +491,11 @@ class MinioRepositoryMixin:
494
491
 
495
492
  # Set created_at if it's None (for new objects)
496
493
  if hasattr(model, "created_at") and getattr(model, "created_at", None) is None:
497
- setattr(model, "created_at", now)
494
+ model.created_at = now
498
495
 
499
496
  # Always update updated_at
500
497
  if hasattr(model, "updated_at"):
501
- setattr(model, "updated_at", now)
498
+ model.updated_at = now
502
499
 
503
500
  def generate_id_with_prefix(self, prefix: str) -> str:
504
501
  """Generate a unique ID with the given prefix and log the generation.
@@ -530,7 +527,7 @@ class MinioRepositoryMixin:
530
527
  bucket_name: str,
531
528
  prefix: str,
532
529
  entity_type_name: str,
533
- ) -> List[str]:
530
+ ) -> list[str]:
534
531
  """Extract entity IDs from objects with a given prefix.
535
532
 
536
533
  This method provides a common implementation for listing objects
@@ -11,23 +11,23 @@ The implementation separates document metadata (stored as JSON) from content
11
11
  payload handling pattern from the architectural guidelines.
12
12
  """
13
13
 
14
+ import hashlib
14
15
  import io
15
16
  import json
16
- import hashlib
17
17
  import logging
18
18
  from datetime import datetime, timezone
19
- from typing import Optional, List, Dict
20
19
 
21
- from minio.error import S3Error # type: ignore[import-untyped]
22
20
  import multihash # type: ignore[import-untyped]
21
+ from minio.error import S3Error # type: ignore[import-untyped]
22
+ from pydantic import BaseModel, ConfigDict
23
23
 
24
- from julee.domain.models.document import Document
25
24
  from julee.domain.models.custom_fields.content_stream import (
26
25
  ContentStream,
27
26
  )
27
+ from julee.domain.models.document import Document
28
28
  from julee.domain.repositories.document import DocumentRepository
29
+
29
30
  from .client import MinioClient, MinioRepositoryMixin
30
- from pydantic import BaseModel, ConfigDict
31
31
 
32
32
 
33
33
  class RawMetadata(BaseModel):
@@ -36,7 +36,7 @@ class RawMetadata(BaseModel):
36
36
  model_config = ConfigDict(extra="allow") # Allow arbitrary fields
37
37
 
38
38
  # Only include fields we actually use for type safety
39
- content_multihash: Optional[str] = None
39
+ content_multihash: str | None = None
40
40
 
41
41
 
42
42
  class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
@@ -63,7 +63,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
63
63
  self.content_bucket = "documents-content"
64
64
  self.ensure_buckets_exist([self.metadata_bucket, self.content_bucket])
65
65
 
66
- async def get(self, document_id: str) -> Optional[Document]:
66
+ async def get(self, document_id: str) -> Document | None:
67
67
  """Retrieve a document with metadata and content."""
68
68
  try:
69
69
  # First, get the metadata
@@ -175,27 +175,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
175
175
 
176
176
  try:
177
177
  # Handle content_string conversion (only if no content provided)
178
- if document.content_string is not None:
179
- # Convert content_string to ContentStream
180
- assert document.content_string is not None # For MyPy
181
- content_bytes = document.content_string.encode("utf-8")
182
- content_stream = ContentStream(io.BytesIO(content_bytes))
183
-
184
- # Create new document with ContentStream
185
- document = document.model_copy(
186
- update={
187
- "content": content_stream,
188
- "size_bytes": len(content_bytes),
189
- }
190
- )
191
-
192
- self.logger.debug(
193
- "Converted content_string to ContentStream",
194
- extra={
195
- "document_id": document.document_id,
196
- "content_length": len(content_bytes),
197
- },
198
- )
178
+ document = self._normalize_document_content(document)
199
179
 
200
180
  # Store content first and get calculated multihash
201
181
  calculated_multihash = await self._store_content(document)
@@ -234,7 +214,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
234
214
  )
235
215
  raise
236
216
 
237
- async def get_many(self, document_ids: List[str]) -> Dict[str, Optional[Document]]:
217
+ async def get_many(self, document_ids: list[str]) -> dict[str, Document | None]:
238
218
  """Retrieve multiple documents by ID using batch operations.
239
219
 
240
220
  Args:
@@ -271,7 +251,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
271
251
  )
272
252
 
273
253
  # Use RawMetadata objects directly
274
- metadata_results: Dict[str, Optional[RawMetadata]] = raw_metadata_results
254
+ metadata_results: dict[str, RawMetadata | None] = raw_metadata_results
275
255
 
276
256
  # Step 2: Extract unique content multihashes from found metadata
277
257
  content_hashes = set()
@@ -294,7 +274,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
294
274
  )
295
275
 
296
276
  # Step 4: Splice metadata and content together into Documents
297
- result: Dict[str, Optional[Document]] = {}
277
+ result: dict[str, Document | None] = {}
298
278
  for document_id in document_ids:
299
279
  metadata = metadata_results.get(document_id)
300
280
  if not metadata:
@@ -335,7 +315,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
335
315
 
336
316
  return result
337
317
 
338
- async def list_all(self) -> List[Document]:
318
+ async def list_all(self) -> list[Document]:
339
319
  """List all documents.
340
320
 
341
321
  Returns:
@@ -449,6 +429,29 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
449
429
  )
450
430
  raise
451
431
 
432
+ def _normalize_document_content(self, document: Document) -> Document:
433
+ """Ensure document has a ContentStream in content"""
434
+ if document.content is not None:
435
+ return document
436
+
437
+ content_bytes = document.content_bytes
438
+ if content_bytes is not None:
439
+ if isinstance(content_bytes, str):
440
+ content_bytes = content_bytes.encode("utf-8")
441
+
442
+ stream = ContentStream(io.BytesIO(content_bytes))
443
+ size_bytes = len(content_bytes)
444
+ return document.model_copy(
445
+ update={
446
+ "content": stream,
447
+ "size_bytes": size_bytes,
448
+ }
449
+ )
450
+
451
+ raise ValueError(
452
+ f"Document {document.document_id} has no content, content_bytes"
453
+ )
454
+
452
455
  def _calculate_multihash_from_stream(self, content_stream: ContentStream) -> str:
453
456
  """Calculate multihash from content stream."""
454
457
  if not content_stream:
@@ -471,7 +474,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
471
474
 
472
475
  # Serialize metadata (content stream and content_string excluded)
473
476
  metadata_json = document.model_dump_json(
474
- exclude={"content", "content_string"}
477
+ exclude={"content", "content_string", "content_bytes"}
475
478
  ).encode("utf-8")
476
479
 
477
480
  try:
@@ -14,12 +14,12 @@ status, scores, transformation results, and metadata.
14
14
  """
15
15
 
16
16
  import logging
17
- from typing import Optional, List, Dict
18
17
 
19
18
  from julee.domain.models.policy import DocumentPolicyValidation
20
19
  from julee.domain.repositories.document_policy_validation import (
21
20
  DocumentPolicyValidationRepository,
22
21
  )
22
+
23
23
  from .client import MinioClient, MinioRepositoryMixin
24
24
 
25
25
 
@@ -47,7 +47,7 @@ class MinioDocumentPolicyValidationRepository(
47
47
  self.validations_bucket = "document-policy-validations"
48
48
  self.ensure_buckets_exist(self.validations_bucket)
49
49
 
50
- async def get(self, validation_id: str) -> Optional[DocumentPolicyValidation]:
50
+ async def get(self, validation_id: str) -> DocumentPolicyValidation | None:
51
51
  """Retrieve a document policy validation by ID."""
52
52
  return self.get_json_object(
53
53
  bucket_name=self.validations_bucket,
@@ -88,8 +88,8 @@ class MinioDocumentPolicyValidationRepository(
88
88
  return self.generate_id_with_prefix("validation")
89
89
 
90
90
  async def get_many(
91
- self, validation_ids: List[str]
92
- ) -> Dict[str, Optional[DocumentPolicyValidation]]:
91
+ self, validation_ids: list[str]
92
+ ) -> dict[str, DocumentPolicyValidation | None]:
93
93
  """Retrieve multiple document policy validations by ID.
94
94
 
95
95
  Args:
@@ -113,7 +113,7 @@ class MinioDocumentPolicyValidationRepository(
113
113
  )
114
114
 
115
115
  # Convert object names back to validation IDs for the result
116
- result: Dict[str, Optional[DocumentPolicyValidation]] = {}
116
+ result: dict[str, DocumentPolicyValidation | None] = {}
117
117
  for validation_id in validation_ids:
118
118
  result[validation_id] = object_results[validation_id]
119
119