julee 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. julee/__init__.py +3 -0
  2. julee/api/__init__.py +20 -0
  3. julee/api/app.py +180 -0
  4. julee/api/dependencies.py +257 -0
  5. julee/api/requests.py +175 -0
  6. julee/api/responses.py +43 -0
  7. julee/api/routers/__init__.py +43 -0
  8. julee/api/routers/assembly_specifications.py +212 -0
  9. julee/api/routers/documents.py +182 -0
  10. julee/api/routers/knowledge_service_configs.py +79 -0
  11. julee/api/routers/knowledge_service_queries.py +293 -0
  12. julee/api/routers/system.py +137 -0
  13. julee/api/routers/workflows.py +234 -0
  14. julee/api/services/__init__.py +20 -0
  15. julee/api/services/system_initialization.py +214 -0
  16. julee/api/tests/__init__.py +14 -0
  17. julee/api/tests/routers/__init__.py +17 -0
  18. julee/api/tests/routers/test_assembly_specifications.py +749 -0
  19. julee/api/tests/routers/test_documents.py +301 -0
  20. julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
  21. julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
  22. julee/api/tests/routers/test_system.py +179 -0
  23. julee/api/tests/routers/test_workflows.py +393 -0
  24. julee/api/tests/test_app.py +285 -0
  25. julee/api/tests/test_dependencies.py +245 -0
  26. julee/api/tests/test_requests.py +250 -0
  27. julee/domain/__init__.py +22 -0
  28. julee/domain/models/__init__.py +49 -0
  29. julee/domain/models/assembly/__init__.py +17 -0
  30. julee/domain/models/assembly/assembly.py +103 -0
  31. julee/domain/models/assembly/tests/__init__.py +0 -0
  32. julee/domain/models/assembly/tests/factories.py +37 -0
  33. julee/domain/models/assembly/tests/test_assembly.py +430 -0
  34. julee/domain/models/assembly_specification/__init__.py +24 -0
  35. julee/domain/models/assembly_specification/assembly_specification.py +172 -0
  36. julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
  37. julee/domain/models/assembly_specification/tests/__init__.py +0 -0
  38. julee/domain/models/assembly_specification/tests/factories.py +78 -0
  39. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
  40. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
  41. julee/domain/models/custom_fields/__init__.py +0 -0
  42. julee/domain/models/custom_fields/content_stream.py +68 -0
  43. julee/domain/models/custom_fields/tests/__init__.py +0 -0
  44. julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
  45. julee/domain/models/document/__init__.py +17 -0
  46. julee/domain/models/document/document.py +150 -0
  47. julee/domain/models/document/tests/__init__.py +0 -0
  48. julee/domain/models/document/tests/factories.py +76 -0
  49. julee/domain/models/document/tests/test_document.py +297 -0
  50. julee/domain/models/knowledge_service_config/__init__.py +17 -0
  51. julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
  52. julee/domain/models/policy/__init__.py +15 -0
  53. julee/domain/models/policy/document_policy_validation.py +220 -0
  54. julee/domain/models/policy/policy.py +203 -0
  55. julee/domain/models/policy/tests/__init__.py +0 -0
  56. julee/domain/models/policy/tests/factories.py +47 -0
  57. julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
  58. julee/domain/models/policy/tests/test_policy.py +546 -0
  59. julee/domain/repositories/__init__.py +27 -0
  60. julee/domain/repositories/assembly.py +45 -0
  61. julee/domain/repositories/assembly_specification.py +52 -0
  62. julee/domain/repositories/base.py +146 -0
  63. julee/domain/repositories/document.py +49 -0
  64. julee/domain/repositories/document_policy_validation.py +52 -0
  65. julee/domain/repositories/knowledge_service_config.py +54 -0
  66. julee/domain/repositories/knowledge_service_query.py +44 -0
  67. julee/domain/repositories/policy.py +49 -0
  68. julee/domain/use_cases/__init__.py +17 -0
  69. julee/domain/use_cases/decorators.py +107 -0
  70. julee/domain/use_cases/extract_assemble_data.py +649 -0
  71. julee/domain/use_cases/initialize_system_data.py +842 -0
  72. julee/domain/use_cases/tests/__init__.py +7 -0
  73. julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
  74. julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
  75. julee/domain/use_cases/tests/test_validate_document.py +1228 -0
  76. julee/domain/use_cases/validate_document.py +736 -0
  77. julee/fixtures/assembly_specifications.yaml +70 -0
  78. julee/fixtures/documents.yaml +178 -0
  79. julee/fixtures/knowledge_service_configs.yaml +37 -0
  80. julee/fixtures/knowledge_service_queries.yaml +27 -0
  81. julee/repositories/__init__.py +17 -0
  82. julee/repositories/memory/__init__.py +31 -0
  83. julee/repositories/memory/assembly.py +84 -0
  84. julee/repositories/memory/assembly_specification.py +125 -0
  85. julee/repositories/memory/base.py +227 -0
  86. julee/repositories/memory/document.py +149 -0
  87. julee/repositories/memory/document_policy_validation.py +104 -0
  88. julee/repositories/memory/knowledge_service_config.py +123 -0
  89. julee/repositories/memory/knowledge_service_query.py +120 -0
  90. julee/repositories/memory/policy.py +87 -0
  91. julee/repositories/memory/tests/__init__.py +0 -0
  92. julee/repositories/memory/tests/test_document.py +212 -0
  93. julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
  94. julee/repositories/memory/tests/test_policy.py +443 -0
  95. julee/repositories/minio/__init__.py +31 -0
  96. julee/repositories/minio/assembly.py +103 -0
  97. julee/repositories/minio/assembly_specification.py +170 -0
  98. julee/repositories/minio/client.py +570 -0
  99. julee/repositories/minio/document.py +530 -0
  100. julee/repositories/minio/document_policy_validation.py +120 -0
  101. julee/repositories/minio/knowledge_service_config.py +187 -0
  102. julee/repositories/minio/knowledge_service_query.py +211 -0
  103. julee/repositories/minio/policy.py +106 -0
  104. julee/repositories/minio/tests/__init__.py +0 -0
  105. julee/repositories/minio/tests/fake_client.py +213 -0
  106. julee/repositories/minio/tests/test_assembly.py +374 -0
  107. julee/repositories/minio/tests/test_assembly_specification.py +391 -0
  108. julee/repositories/minio/tests/test_client_protocol.py +57 -0
  109. julee/repositories/minio/tests/test_document.py +591 -0
  110. julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
  111. julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
  112. julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
  113. julee/repositories/minio/tests/test_policy.py +559 -0
  114. julee/repositories/temporal/__init__.py +38 -0
  115. julee/repositories/temporal/activities.py +114 -0
  116. julee/repositories/temporal/activity_names.py +34 -0
  117. julee/repositories/temporal/proxies.py +159 -0
  118. julee/services/__init__.py +18 -0
  119. julee/services/knowledge_service/__init__.py +48 -0
  120. julee/services/knowledge_service/anthropic/__init__.py +12 -0
  121. julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
  122. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
  123. julee/services/knowledge_service/factory.py +138 -0
  124. julee/services/knowledge_service/knowledge_service.py +160 -0
  125. julee/services/knowledge_service/memory/__init__.py +13 -0
  126. julee/services/knowledge_service/memory/knowledge_service.py +278 -0
  127. julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
  128. julee/services/knowledge_service/test_factory.py +112 -0
  129. julee/services/temporal/__init__.py +38 -0
  130. julee/services/temporal/activities.py +86 -0
  131. julee/services/temporal/activity_names.py +22 -0
  132. julee/services/temporal/proxies.py +41 -0
  133. julee/util/__init__.py +0 -0
  134. julee/util/domain.py +119 -0
  135. julee/util/repos/__init__.py +0 -0
  136. julee/util/repos/minio/__init__.py +0 -0
  137. julee/util/repos/minio/file_storage.py +213 -0
  138. julee/util/repos/temporal/__init__.py +11 -0
  139. julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
  140. julee/util/repos/temporal/data_converter.py +123 -0
  141. julee/util/repos/temporal/minio_file_storage.py +12 -0
  142. julee/util/repos/temporal/proxies/__init__.py +0 -0
  143. julee/util/repos/temporal/proxies/file_storage.py +58 -0
  144. julee/util/repositories.py +55 -0
  145. julee/util/temporal/__init__.py +22 -0
  146. julee/util/temporal/activities.py +123 -0
  147. julee/util/temporal/decorators.py +473 -0
  148. julee/util/tests/__init__.py +1 -0
  149. julee/util/tests/test_decorators.py +770 -0
  150. julee/util/validation/__init__.py +29 -0
  151. julee/util/validation/repository.py +100 -0
  152. julee/util/validation/type_guards.py +369 -0
  153. julee/worker.py +211 -0
  154. julee/workflows/__init__.py +26 -0
  155. julee/workflows/extract_assemble.py +215 -0
  156. julee/workflows/validate_document.py +228 -0
  157. julee-0.1.0.dist-info/METADATA +195 -0
  158. julee-0.1.0.dist-info/RECORD +161 -0
  159. julee-0.1.0.dist-info/WHEEL +5 -0
  160. julee-0.1.0.dist-info/licenses/LICENSE +674 -0
  161. julee-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,120 @@
1
+ """
2
+ Memory implementation of KnowledgeServiceQueryRepository.
3
+
4
+ This module provides an in-memory implementation of the
5
+ KnowledgeServiceQueryRepository protocol that follows the Clean Architecture
6
+ patterns defined in the Fun-Police Framework. It handles knowledge service
7
+ query storage in memory dictionaries, ensuring idempotency and proper error
8
+ handling.
9
+
10
+ The implementation uses Python dictionaries to store knowledge service query
11
+ data, making it ideal for testing scenarios where external dependencies
12
+ should be avoided.
13
+ """
14
+
15
+ import logging
16
+ from typing import Dict, Optional, Any, List
17
+
18
+ from julee.domain.models.assembly_specification import (
19
+ KnowledgeServiceQuery,
20
+ )
21
+ from julee.domain.repositories.knowledge_service_query import (
22
+ KnowledgeServiceQueryRepository,
23
+ )
24
+ from .base import MemoryRepositoryMixin
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class MemoryKnowledgeServiceQueryRepository(
30
+ KnowledgeServiceQueryRepository,
31
+ MemoryRepositoryMixin[KnowledgeServiceQuery],
32
+ ):
33
+ """
34
+ Memory implementation of KnowledgeServiceQueryRepository using Python
35
+ dictionaries.
36
+
37
+ This implementation stores knowledge service queries in memory:
38
+ - Queries: Dictionary keyed by query_id containing KnowledgeServiceQuery
39
+ objects
40
+
41
+ This provides a lightweight, dependency-free option for testing while
42
+ maintaining the same interface as other implementations.
43
+ """
44
+
45
+ def __init__(self) -> None:
46
+ """Initialize repository with empty in-memory storage."""
47
+ self.logger = logger
48
+ self.entity_name = "KnowledgeServiceQuery"
49
+ self.storage_dict: Dict[str, KnowledgeServiceQuery] = {}
50
+
51
+ logger.debug("Initializing MemoryKnowledgeServiceQueryRepository")
52
+
53
+ async def get(self, query_id: str) -> Optional[KnowledgeServiceQuery]:
54
+ """Retrieve a knowledge service query by ID.
55
+
56
+ Args:
57
+ query_id: Unique query identifier
58
+
59
+ Returns:
60
+ KnowledgeServiceQuery object if found, None otherwise
61
+ """
62
+ return self.get_entity(query_id)
63
+
64
+ async def save(self, query: KnowledgeServiceQuery) -> None:
65
+ """Store or update a knowledge service query.
66
+
67
+ Args:
68
+ query: KnowledgeServiceQuery object to store
69
+ """
70
+ self.save_entity(query, "query_id")
71
+
72
+ async def get_many(
73
+ self, query_ids: List[str]
74
+ ) -> Dict[str, Optional[KnowledgeServiceQuery]]:
75
+ """Retrieve multiple knowledge service queries by ID.
76
+
77
+ Args:
78
+ query_ids: List of unique query identifiers
79
+
80
+ Returns:
81
+ Dict mapping query_id to KnowledgeServiceQuery (or None if not
82
+ found)
83
+ """
84
+ return self.get_many_entities(query_ids)
85
+
86
+ async def generate_id(self) -> str:
87
+ """Generate a unique query identifier.
88
+
89
+ Returns:
90
+ Unique string identifier for a new query
91
+ """
92
+ return self.generate_entity_id("query")
93
+
94
+ async def list_all(self) -> List[KnowledgeServiceQuery]:
95
+ """List all knowledge service queries.
96
+
97
+ Returns:
98
+ List of all knowledge service queries, sorted by query_id
99
+ """
100
+ self.logger.debug("MemoryKnowledgeServiceQueryRepository: Listing all queries")
101
+
102
+ # Get all entities and sort by query_id
103
+ entities = list(self.storage_dict.values())
104
+ entities.sort(key=lambda x: x.query_id)
105
+
106
+ self.logger.info(
107
+ "MemoryKnowledgeServiceQueryRepository: Retrieved "
108
+ f"{len(entities)} queries",
109
+ extra={"count": len(entities)},
110
+ )
111
+
112
+ return entities
113
+
114
+ def _add_entity_specific_log_data(
115
+ self, entity: KnowledgeServiceQuery, log_data: Dict[str, Any]
116
+ ) -> None:
117
+ """Add knowledge service query-specific data to log entries."""
118
+ super()._add_entity_specific_log_data(entity, log_data)
119
+ log_data["query_name"] = entity.name
120
+ log_data["knowledge_service_id"] = entity.knowledge_service_id
@@ -0,0 +1,87 @@
1
+ """
2
+ Memory implementation of PolicyRepository.
3
+
4
+ This module provides an in-memory implementation of the PolicyRepository
5
+ protocol that follows the Clean Architecture patterns defined in the
6
+ Fun-Police Framework. It handles policy storage in memory dictionaries,
7
+ ensuring idempotency and proper error handling.
8
+
9
+ The implementation uses Python dictionaries to store policy data, making it
10
+ ideal for testing scenarios where external dependencies should be avoided.
11
+ All operations are still async to maintain interface compatibility.
12
+ """
13
+
14
+ import logging
15
+ from typing import Optional, Dict, Any, List
16
+
17
+ from julee.domain.models.policy import Policy
18
+ from julee.domain.repositories.policy import PolicyRepository
19
+ from .base import MemoryRepositoryMixin
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class MemoryPolicyRepository(PolicyRepository, MemoryRepositoryMixin[Policy]):
25
+ """
26
+ Memory implementation of PolicyRepository using Python dictionaries.
27
+
28
+ This implementation stores policy data in memory using a dictionary
29
+ keyed by policy_id. This provides a lightweight, dependency-free
30
+ option for testing.
31
+ """
32
+
33
+ def __init__(self) -> None:
34
+ """Initialize repository with empty in-memory storage."""
35
+ self.logger = logger
36
+ self.entity_name = "Policy"
37
+ self.storage_dict: Dict[str, Policy] = {}
38
+
39
+ logger.debug("Initializing MemoryPolicyRepository")
40
+
41
+ async def get(self, policy_id: str) -> Optional[Policy]:
42
+ """Retrieve a policy by ID.
43
+
44
+ Args:
45
+ policy_id: Unique policy identifier
46
+
47
+ Returns:
48
+ Policy if found, None otherwise
49
+ """
50
+ return self.get_entity(policy_id)
51
+
52
+ async def save(self, policy: Policy) -> None:
53
+ """Save a policy.
54
+
55
+ Args:
56
+ policy: Complete Policy to save
57
+ """
58
+ self.save_entity(policy, "policy_id")
59
+
60
+ async def generate_id(self) -> str:
61
+ """Generate a unique policy identifier.
62
+
63
+ Returns:
64
+ Unique policy ID string
65
+ """
66
+ return self.generate_entity_id("policy")
67
+
68
+ async def get_many(self, policy_ids: List[str]) -> Dict[str, Optional[Policy]]:
69
+ """Retrieve multiple policies by ID.
70
+
71
+ Args:
72
+ policy_ids: List of unique policy identifiers
73
+
74
+ Returns:
75
+ Dict mapping policy_id to Policy (or None if not found)
76
+ """
77
+ return self.get_many_entities(policy_ids)
78
+
79
+ def _add_entity_specific_log_data(
80
+ self, entity: Policy, log_data: Dict[str, Any]
81
+ ) -> None:
82
+ """Add policy-specific data to log entries."""
83
+ super()._add_entity_specific_log_data(entity, log_data)
84
+ log_data["title"] = entity.title
85
+ log_data["validation_scores_count"] = len(entity.validation_scores)
86
+ log_data["has_transformations"] = entity.has_transformations
87
+ log_data["is_validation_only"] = entity.is_validation_only
File without changes
@@ -0,0 +1,212 @@
1
+ """
2
+ Unit tests for MemoryDocumentRepository.
3
+
4
+ These tests verify the memory implementation logic without requiring external
5
+ dependencies. They follow the Clean Architecture testing patterns and verify
6
+ idempotency, error handling, and content operations including content_string.
7
+ """
8
+
9
+ import io
10
+ import pytest
11
+ from julee.repositories.memory.document import (
12
+ MemoryDocumentRepository,
13
+ )
14
+ from julee.domain.models.document import Document, DocumentStatus
15
+ from julee.domain.models.custom_fields.content_stream import (
16
+ ContentStream,
17
+ )
18
+
19
+
20
+ @pytest.fixture
21
+ def repository() -> MemoryDocumentRepository:
22
+ """Provide a repository instance for testing."""
23
+ return MemoryDocumentRepository()
24
+
25
+
26
+ @pytest.fixture
27
+ def sample_content() -> ContentStream:
28
+ """Sample content for testing."""
29
+ content_bytes = b"This is test content for document storage"
30
+ return ContentStream(io.BytesIO(content_bytes))
31
+
32
+
33
+ @pytest.fixture
34
+ def sample_document(sample_content: ContentStream) -> Document:
35
+ """Sample document for testing."""
36
+ return Document(
37
+ document_id="test-doc-123",
38
+ original_filename="test.txt",
39
+ content_type="text/plain",
40
+ size_bytes=41,
41
+ content_multihash="test_hash_placeholder",
42
+ status=DocumentStatus.CAPTURED,
43
+ content=sample_content,
44
+ )
45
+
46
+
47
+ class TestMemoryDocumentRepositoryContentString:
48
+ """Test content_string functionality."""
49
+
50
+ async def test_save_document_with_content_string(
51
+ self, repository: MemoryDocumentRepository
52
+ ) -> None:
53
+ """Test saving document with content_string (small content)."""
54
+ content = '{"assembled": "document", "data": "test"}'
55
+
56
+ # Create document with content_string
57
+ document = Document(
58
+ document_id="test-doc-content-string",
59
+ original_filename="assembled.json",
60
+ content_type="application/json",
61
+ size_bytes=100, # Will be updated automatically
62
+ content_multihash="placeholder", # Will be updated automatically
63
+ status=DocumentStatus.CAPTURED,
64
+ content_string=content,
65
+ )
66
+
67
+ # Act - save should convert content_string to ContentStream
68
+ await repository.save(document)
69
+
70
+ # Assert document was saved successfully
71
+ retrieved = await repository.get(document.document_id)
72
+ assert retrieved is not None
73
+ assert retrieved.content_multihash != "placeholder" # Hash was calculated
74
+ assert retrieved.size_bytes == len(content.encode("utf-8"))
75
+
76
+ # Verify content can be read
77
+ assert retrieved.content is not None
78
+ retrieved_content = retrieved.content.read().decode("utf-8")
79
+ assert retrieved_content == content
80
+
81
+ async def test_save_document_with_content_string_unicode(
82
+ self, repository: MemoryDocumentRepository
83
+ ) -> None:
84
+ """Test saving document with unicode content_string."""
85
+ content = '{"title": "测试文档", "emoji": "🚀", "content": "éñ"}'
86
+
87
+ document = Document(
88
+ document_id="test-doc-unicode",
89
+ original_filename="unicode.json",
90
+ content_type="application/json",
91
+ size_bytes=100,
92
+ content_multihash="placeholder",
93
+ status=DocumentStatus.CAPTURED,
94
+ content_string=content,
95
+ )
96
+
97
+ await repository.save(document)
98
+ retrieved = await repository.get(document.document_id)
99
+
100
+ assert retrieved is not None
101
+ assert retrieved.content is not None
102
+ retrieved_content = retrieved.content.read().decode("utf-8")
103
+ assert retrieved_content == content
104
+
105
+ # Note: Empty content test removed because domain model requires
106
+ # size_bytes > 0
107
+
108
+ async def test_save_excludes_content_string_from_storage(
109
+ self, repository: MemoryDocumentRepository
110
+ ) -> None:
111
+ """Test that content_string is not stored in memory storage."""
112
+ content = '{"test": "data that should not be in storage"}'
113
+
114
+ document = Document(
115
+ document_id="test-storage-exclusion",
116
+ original_filename="test.json",
117
+ content_type="application/json",
118
+ size_bytes=100,
119
+ content_multihash="placeholder",
120
+ status=DocumentStatus.CAPTURED,
121
+ content_string=content,
122
+ )
123
+
124
+ await repository.save(document)
125
+
126
+ # Check stored document directly from internal storage
127
+ stored_document = repository.storage_dict.get("test-storage-exclusion")
128
+ assert stored_document is not None
129
+
130
+ # Verify content_string is not in stored document
131
+ assert stored_document.content_string is None
132
+
133
+ # Verify essential fields are still present
134
+ assert stored_document.document_id == "test-storage-exclusion"
135
+ assert stored_document.content_multihash is not None
136
+ assert stored_document.content_multihash != "placeholder"
137
+
138
+ # Verify we can still retrieve with content
139
+ retrieved = await repository.get("test-storage-exclusion")
140
+ assert retrieved is not None
141
+ assert retrieved.content is not None
142
+ retrieved_content = retrieved.content.read().decode("utf-8")
143
+ assert retrieved_content == content
144
+
145
+
146
+ class TestMemoryDocumentRepositoryBasicOperations:
147
+ """Test basic repository operations."""
148
+
149
+ async def test_save_and_get_document_with_content_stream(
150
+ self, repository: MemoryDocumentRepository, sample_document: Document
151
+ ) -> None:
152
+ """Test basic save and retrieve operations with ContentStream."""
153
+ # Act
154
+ await repository.save(sample_document)
155
+ retrieved = await repository.get(sample_document.document_id)
156
+
157
+ # Assert
158
+ assert retrieved is not None
159
+ assert retrieved.document_id == sample_document.document_id
160
+ assert retrieved.original_filename == sample_document.original_filename
161
+
162
+ async def test_get_nonexistent_document(
163
+ self, repository: MemoryDocumentRepository
164
+ ) -> None:
165
+ """Test retrieving a document that doesn't exist."""
166
+ result = await repository.get("nonexistent-123")
167
+ assert result is None
168
+
169
+ async def test_generate_id(self, repository: MemoryDocumentRepository) -> None:
170
+ """Test that generate_id returns a unique string."""
171
+ doc_id_1 = await repository.generate_id()
172
+ doc_id_2 = await repository.generate_id()
173
+
174
+ assert isinstance(doc_id_1, str)
175
+ assert isinstance(doc_id_2, str)
176
+ assert doc_id_1 != doc_id_2
177
+ assert len(doc_id_1) > 0
178
+ assert len(doc_id_2) > 0
179
+
180
+
181
+ class TestMemoryDocumentRepositoryErrorHandling:
182
+ """Test error handling scenarios."""
183
+
184
+ async def test_save_handles_empty_document_id(
185
+ self, repository: MemoryDocumentRepository
186
+ ) -> None:
187
+ """Test error handling for empty document ID."""
188
+ with pytest.raises(ValueError, match="Document ID cannot be empty"):
189
+ Document(
190
+ document_id="",
191
+ original_filename="test.txt",
192
+ content_type="text/plain",
193
+ size_bytes=100,
194
+ content_multihash="test_hash",
195
+ status=DocumentStatus.CAPTURED,
196
+ content_string="test content",
197
+ )
198
+
199
+ async def test_save_handles_empty_filename(
200
+ self, repository: MemoryDocumentRepository
201
+ ) -> None:
202
+ """Test error handling for empty filename."""
203
+ with pytest.raises(ValueError, match="Original filename cannot be empty"):
204
+ Document(
205
+ document_id="test-123",
206
+ original_filename="",
207
+ content_type="text/plain",
208
+ size_bytes=100,
209
+ content_multihash="test_hash",
210
+ status=DocumentStatus.CAPTURED,
211
+ content_string="test content",
212
+ )
@@ -0,0 +1,161 @@
1
+ """
2
+ Tests for MemoryDocumentPolicyValidationRepository implementation.
3
+
4
+ This module provides tests for the memory-based document policy validation
5
+ repository implementation, focusing on functionality specific to this
6
+ repository that differs from the inherited mixins.
7
+ """
8
+
9
+ import pytest
10
+ from datetime import datetime, timezone
11
+ from typing import Any
12
+
13
+ from julee.domain.models.policy import (
14
+ DocumentPolicyValidation,
15
+ DocumentPolicyValidationStatus,
16
+ )
17
+ from julee.repositories.memory.document_policy_validation import (
18
+ MemoryDocumentPolicyValidationRepository,
19
+ )
20
+
21
+
22
+ @pytest.fixture
23
+ def validation_repo() -> MemoryDocumentPolicyValidationRepository:
24
+ """Create a fresh validation repository for each test."""
25
+ return MemoryDocumentPolicyValidationRepository()
26
+
27
+
28
+ @pytest.fixture
29
+ def sample_validation() -> DocumentPolicyValidation:
30
+ """Create a sample document policy validation for testing."""
31
+ return DocumentPolicyValidation(
32
+ validation_id="validation-test-123",
33
+ input_document_id="doc-123",
34
+ policy_id="policy-456",
35
+ status=DocumentPolicyValidationStatus.PASSED,
36
+ validation_scores=[
37
+ ("quality-check-query", 85),
38
+ ("completeness-check", 92),
39
+ ],
40
+ transformed_document_id="doc-123-transformed",
41
+ post_transform_validation_scores=[
42
+ ("quality-check-query", 95),
43
+ ("completeness-check", 88),
44
+ ],
45
+ started_at=datetime.now(timezone.utc),
46
+ completed_at=datetime.now(timezone.utc),
47
+ passed=True,
48
+ )
49
+
50
+
51
+ class TestMemoryDocumentPolicyValidationRepositorySpecific:
52
+ """Test functionality specific to DocumentPolicyValidation repository."""
53
+
54
+ @pytest.mark.asyncio
55
+ async def test_generate_id_prefix(
56
+ self, validation_repo: MemoryDocumentPolicyValidationRepository
57
+ ) -> None:
58
+ """Test that generated IDs use the correct prefix for validations."""
59
+ id1 = await validation_repo.generate_id()
60
+ id2 = await validation_repo.generate_id()
61
+
62
+ assert isinstance(id1, str)
63
+ assert isinstance(id2, str)
64
+ assert id1 != id2
65
+ assert len(id1) > 0
66
+ assert len(id2) > 0
67
+ assert id1.startswith("validation-")
68
+ assert id2.startswith("validation-")
69
+
70
+ @pytest.mark.asyncio
71
+ async def test_entity_specific_logging_data(
72
+ self,
73
+ validation_repo: MemoryDocumentPolicyValidationRepository,
74
+ sample_validation: DocumentPolicyValidation,
75
+ ) -> None:
76
+ """Test that entity-specific logging data is added correctly."""
77
+ log_data: dict[str, Any] = {}
78
+ validation_repo._add_entity_specific_log_data(sample_validation, log_data)
79
+
80
+ # Check validation-specific fields are added
81
+ assert log_data["input_document_id"] == "doc-123"
82
+ assert log_data["policy_id"] == "policy-456"
83
+ assert log_data["validation_scores_count"] == 2
84
+ assert log_data["has_transformations"] is True
85
+ assert log_data["passed"] is True
86
+ assert "has_error" not in log_data # No error message
87
+
88
+ @pytest.mark.asyncio
89
+ async def test_entity_specific_logging_data_with_error(
90
+ self, validation_repo: MemoryDocumentPolicyValidationRepository
91
+ ) -> None:
92
+ """Test logging data includes error flag when error_message is
93
+ present."""
94
+ validation_with_error = DocumentPolicyValidation(
95
+ validation_id="validation-error-123",
96
+ input_document_id="doc-456",
97
+ policy_id="policy-789",
98
+ status=DocumentPolicyValidationStatus.ERROR,
99
+ validation_scores=[],
100
+ error_message="Something went wrong",
101
+ passed=False,
102
+ )
103
+
104
+ log_data: dict[str, Any] = {}
105
+ validation_repo._add_entity_specific_log_data(validation_with_error, log_data)
106
+
107
+ assert log_data["has_error"] is True
108
+ assert log_data["passed"] is False
109
+
110
+ @pytest.mark.asyncio
111
+ async def test_entity_specific_logging_data_no_transformations(
112
+ self, validation_repo: MemoryDocumentPolicyValidationRepository
113
+ ) -> None:
114
+ """Test logging data correctly identifies validations without
115
+ transformations."""
116
+ validation_no_transform = DocumentPolicyValidation(
117
+ validation_id="validation-no-transform-123",
118
+ input_document_id="doc-789",
119
+ policy_id="policy-abc",
120
+ status=DocumentPolicyValidationStatus.VALIDATION_COMPLETE,
121
+ validation_scores=[("basic-check", 75)],
122
+ transformed_document_id=None,
123
+ post_transform_validation_scores=None,
124
+ )
125
+
126
+ log_data: dict[str, Any] = {}
127
+ validation_repo._add_entity_specific_log_data(validation_no_transform, log_data)
128
+
129
+ assert log_data["has_transformations"] is False
130
+ assert log_data["validation_scores_count"] == 1
131
+
132
+ @pytest.mark.asyncio
133
+ async def test_entity_specific_logging_data_passed_none(
134
+ self, validation_repo: MemoryDocumentPolicyValidationRepository
135
+ ) -> None:
136
+ """Test logging data handles None passed value correctly."""
137
+ validation_in_progress = DocumentPolicyValidation(
138
+ validation_id="validation-progress-123",
139
+ input_document_id="doc-progress",
140
+ policy_id="policy-progress",
141
+ status=DocumentPolicyValidationStatus.IN_PROGRESS,
142
+ validation_scores=[],
143
+ passed=None, # Still in progress
144
+ )
145
+
146
+ log_data: dict[str, Any] = {}
147
+ validation_repo._add_entity_specific_log_data(validation_in_progress, log_data)
148
+
149
+ # passed field should not be added when None
150
+ assert "passed" not in log_data
151
+ assert log_data["has_transformations"] is False
152
+
153
+ @pytest.mark.asyncio
154
+ async def test_initialization_sets_correct_attributes(
155
+ self, validation_repo: MemoryDocumentPolicyValidationRepository
156
+ ) -> None:
157
+ """Test that repository initialization sets the correct attributes."""
158
+ assert validation_repo.entity_name == "DocumentPolicyValidation"
159
+ assert isinstance(validation_repo.storage_dict, dict)
160
+ assert len(validation_repo.storage_dict) == 0
161
+ assert validation_repo.logger is not None