julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory implementation of KnowledgeServiceQueryRepository.
|
|
3
|
+
|
|
4
|
+
This module provides an in-memory implementation of the
|
|
5
|
+
KnowledgeServiceQueryRepository protocol that follows the Clean Architecture
|
|
6
|
+
patterns defined in the Fun-Police Framework. It handles knowledge service
|
|
7
|
+
query storage in memory dictionaries, ensuring idempotency and proper error
|
|
8
|
+
handling.
|
|
9
|
+
|
|
10
|
+
The implementation uses Python dictionaries to store knowledge service query
|
|
11
|
+
data, making it ideal for testing scenarios where external dependencies
|
|
12
|
+
should be avoided.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Dict, Optional, Any, List
|
|
17
|
+
|
|
18
|
+
from julee.domain.models.assembly_specification import (
|
|
19
|
+
KnowledgeServiceQuery,
|
|
20
|
+
)
|
|
21
|
+
from julee.domain.repositories.knowledge_service_query import (
|
|
22
|
+
KnowledgeServiceQueryRepository,
|
|
23
|
+
)
|
|
24
|
+
from .base import MemoryRepositoryMixin
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MemoryKnowledgeServiceQueryRepository(
|
|
30
|
+
KnowledgeServiceQueryRepository,
|
|
31
|
+
MemoryRepositoryMixin[KnowledgeServiceQuery],
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Memory implementation of KnowledgeServiceQueryRepository using Python
|
|
35
|
+
dictionaries.
|
|
36
|
+
|
|
37
|
+
This implementation stores knowledge service queries in memory:
|
|
38
|
+
- Queries: Dictionary keyed by query_id containing KnowledgeServiceQuery
|
|
39
|
+
objects
|
|
40
|
+
|
|
41
|
+
This provides a lightweight, dependency-free option for testing while
|
|
42
|
+
maintaining the same interface as other implementations.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self) -> None:
|
|
46
|
+
"""Initialize repository with empty in-memory storage."""
|
|
47
|
+
self.logger = logger
|
|
48
|
+
self.entity_name = "KnowledgeServiceQuery"
|
|
49
|
+
self.storage_dict: Dict[str, KnowledgeServiceQuery] = {}
|
|
50
|
+
|
|
51
|
+
logger.debug("Initializing MemoryKnowledgeServiceQueryRepository")
|
|
52
|
+
|
|
53
|
+
async def get(self, query_id: str) -> Optional[KnowledgeServiceQuery]:
|
|
54
|
+
"""Retrieve a knowledge service query by ID.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
query_id: Unique query identifier
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
KnowledgeServiceQuery object if found, None otherwise
|
|
61
|
+
"""
|
|
62
|
+
return self.get_entity(query_id)
|
|
63
|
+
|
|
64
|
+
async def save(self, query: KnowledgeServiceQuery) -> None:
|
|
65
|
+
"""Store or update a knowledge service query.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
query: KnowledgeServiceQuery object to store
|
|
69
|
+
"""
|
|
70
|
+
self.save_entity(query, "query_id")
|
|
71
|
+
|
|
72
|
+
async def get_many(
|
|
73
|
+
self, query_ids: List[str]
|
|
74
|
+
) -> Dict[str, Optional[KnowledgeServiceQuery]]:
|
|
75
|
+
"""Retrieve multiple knowledge service queries by ID.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
query_ids: List of unique query identifiers
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Dict mapping query_id to KnowledgeServiceQuery (or None if not
|
|
82
|
+
found)
|
|
83
|
+
"""
|
|
84
|
+
return self.get_many_entities(query_ids)
|
|
85
|
+
|
|
86
|
+
async def generate_id(self) -> str:
|
|
87
|
+
"""Generate a unique query identifier.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Unique string identifier for a new query
|
|
91
|
+
"""
|
|
92
|
+
return self.generate_entity_id("query")
|
|
93
|
+
|
|
94
|
+
async def list_all(self) -> List[KnowledgeServiceQuery]:
|
|
95
|
+
"""List all knowledge service queries.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
List of all knowledge service queries, sorted by query_id
|
|
99
|
+
"""
|
|
100
|
+
self.logger.debug("MemoryKnowledgeServiceQueryRepository: Listing all queries")
|
|
101
|
+
|
|
102
|
+
# Get all entities and sort by query_id
|
|
103
|
+
entities = list(self.storage_dict.values())
|
|
104
|
+
entities.sort(key=lambda x: x.query_id)
|
|
105
|
+
|
|
106
|
+
self.logger.info(
|
|
107
|
+
"MemoryKnowledgeServiceQueryRepository: Retrieved "
|
|
108
|
+
f"{len(entities)} queries",
|
|
109
|
+
extra={"count": len(entities)},
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return entities
|
|
113
|
+
|
|
114
|
+
def _add_entity_specific_log_data(
|
|
115
|
+
self, entity: KnowledgeServiceQuery, log_data: Dict[str, Any]
|
|
116
|
+
) -> None:
|
|
117
|
+
"""Add knowledge service query-specific data to log entries."""
|
|
118
|
+
super()._add_entity_specific_log_data(entity, log_data)
|
|
119
|
+
log_data["query_name"] = entity.name
|
|
120
|
+
log_data["knowledge_service_id"] = entity.knowledge_service_id
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory implementation of PolicyRepository.
|
|
3
|
+
|
|
4
|
+
This module provides an in-memory implementation of the PolicyRepository
|
|
5
|
+
protocol that follows the Clean Architecture patterns defined in the
|
|
6
|
+
Fun-Police Framework. It handles policy storage in memory dictionaries,
|
|
7
|
+
ensuring idempotency and proper error handling.
|
|
8
|
+
|
|
9
|
+
The implementation uses Python dictionaries to store policy data, making it
|
|
10
|
+
ideal for testing scenarios where external dependencies should be avoided.
|
|
11
|
+
All operations are still async to maintain interface compatibility.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
from typing import Optional, Dict, Any, List
|
|
16
|
+
|
|
17
|
+
from julee.domain.models.policy import Policy
|
|
18
|
+
from julee.domain.repositories.policy import PolicyRepository
|
|
19
|
+
from .base import MemoryRepositoryMixin
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class MemoryPolicyRepository(PolicyRepository, MemoryRepositoryMixin[Policy]):
|
|
25
|
+
"""
|
|
26
|
+
Memory implementation of PolicyRepository using Python dictionaries.
|
|
27
|
+
|
|
28
|
+
This implementation stores policy data in memory using a dictionary
|
|
29
|
+
keyed by policy_id. This provides a lightweight, dependency-free
|
|
30
|
+
option for testing.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self) -> None:
|
|
34
|
+
"""Initialize repository with empty in-memory storage."""
|
|
35
|
+
self.logger = logger
|
|
36
|
+
self.entity_name = "Policy"
|
|
37
|
+
self.storage_dict: Dict[str, Policy] = {}
|
|
38
|
+
|
|
39
|
+
logger.debug("Initializing MemoryPolicyRepository")
|
|
40
|
+
|
|
41
|
+
async def get(self, policy_id: str) -> Optional[Policy]:
|
|
42
|
+
"""Retrieve a policy by ID.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
policy_id: Unique policy identifier
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Policy if found, None otherwise
|
|
49
|
+
"""
|
|
50
|
+
return self.get_entity(policy_id)
|
|
51
|
+
|
|
52
|
+
async def save(self, policy: Policy) -> None:
|
|
53
|
+
"""Save a policy.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
policy: Complete Policy to save
|
|
57
|
+
"""
|
|
58
|
+
self.save_entity(policy, "policy_id")
|
|
59
|
+
|
|
60
|
+
async def generate_id(self) -> str:
|
|
61
|
+
"""Generate a unique policy identifier.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Unique policy ID string
|
|
65
|
+
"""
|
|
66
|
+
return self.generate_entity_id("policy")
|
|
67
|
+
|
|
68
|
+
async def get_many(self, policy_ids: List[str]) -> Dict[str, Optional[Policy]]:
|
|
69
|
+
"""Retrieve multiple policies by ID.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
policy_ids: List of unique policy identifiers
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Dict mapping policy_id to Policy (or None if not found)
|
|
76
|
+
"""
|
|
77
|
+
return self.get_many_entities(policy_ids)
|
|
78
|
+
|
|
79
|
+
def _add_entity_specific_log_data(
|
|
80
|
+
self, entity: Policy, log_data: Dict[str, Any]
|
|
81
|
+
) -> None:
|
|
82
|
+
"""Add policy-specific data to log entries."""
|
|
83
|
+
super()._add_entity_specific_log_data(entity, log_data)
|
|
84
|
+
log_data["title"] = entity.title
|
|
85
|
+
log_data["validation_scores_count"] = len(entity.validation_scores)
|
|
86
|
+
log_data["has_transformations"] = entity.has_transformations
|
|
87
|
+
log_data["is_validation_only"] = entity.is_validation_only
|
|
File without changes
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for MemoryDocumentRepository.
|
|
3
|
+
|
|
4
|
+
These tests verify the memory implementation logic without requiring external
|
|
5
|
+
dependencies. They follow the Clean Architecture testing patterns and verify
|
|
6
|
+
idempotency, error handling, and content operations including content_string.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import pytest
|
|
11
|
+
from julee.repositories.memory.document import (
|
|
12
|
+
MemoryDocumentRepository,
|
|
13
|
+
)
|
|
14
|
+
from julee.domain.models.document import Document, DocumentStatus
|
|
15
|
+
from julee.domain.models.custom_fields.content_stream import (
|
|
16
|
+
ContentStream,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.fixture
|
|
21
|
+
def repository() -> MemoryDocumentRepository:
|
|
22
|
+
"""Provide a repository instance for testing."""
|
|
23
|
+
return MemoryDocumentRepository()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.fixture
|
|
27
|
+
def sample_content() -> ContentStream:
|
|
28
|
+
"""Sample content for testing."""
|
|
29
|
+
content_bytes = b"This is test content for document storage"
|
|
30
|
+
return ContentStream(io.BytesIO(content_bytes))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.fixture
|
|
34
|
+
def sample_document(sample_content: ContentStream) -> Document:
|
|
35
|
+
"""Sample document for testing."""
|
|
36
|
+
return Document(
|
|
37
|
+
document_id="test-doc-123",
|
|
38
|
+
original_filename="test.txt",
|
|
39
|
+
content_type="text/plain",
|
|
40
|
+
size_bytes=41,
|
|
41
|
+
content_multihash="test_hash_placeholder",
|
|
42
|
+
status=DocumentStatus.CAPTURED,
|
|
43
|
+
content=sample_content,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class TestMemoryDocumentRepositoryContentString:
|
|
48
|
+
"""Test content_string functionality."""
|
|
49
|
+
|
|
50
|
+
async def test_save_document_with_content_string(
|
|
51
|
+
self, repository: MemoryDocumentRepository
|
|
52
|
+
) -> None:
|
|
53
|
+
"""Test saving document with content_string (small content)."""
|
|
54
|
+
content = '{"assembled": "document", "data": "test"}'
|
|
55
|
+
|
|
56
|
+
# Create document with content_string
|
|
57
|
+
document = Document(
|
|
58
|
+
document_id="test-doc-content-string",
|
|
59
|
+
original_filename="assembled.json",
|
|
60
|
+
content_type="application/json",
|
|
61
|
+
size_bytes=100, # Will be updated automatically
|
|
62
|
+
content_multihash="placeholder", # Will be updated automatically
|
|
63
|
+
status=DocumentStatus.CAPTURED,
|
|
64
|
+
content_string=content,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Act - save should convert content_string to ContentStream
|
|
68
|
+
await repository.save(document)
|
|
69
|
+
|
|
70
|
+
# Assert document was saved successfully
|
|
71
|
+
retrieved = await repository.get(document.document_id)
|
|
72
|
+
assert retrieved is not None
|
|
73
|
+
assert retrieved.content_multihash != "placeholder" # Hash was calculated
|
|
74
|
+
assert retrieved.size_bytes == len(content.encode("utf-8"))
|
|
75
|
+
|
|
76
|
+
# Verify content can be read
|
|
77
|
+
assert retrieved.content is not None
|
|
78
|
+
retrieved_content = retrieved.content.read().decode("utf-8")
|
|
79
|
+
assert retrieved_content == content
|
|
80
|
+
|
|
81
|
+
async def test_save_document_with_content_string_unicode(
|
|
82
|
+
self, repository: MemoryDocumentRepository
|
|
83
|
+
) -> None:
|
|
84
|
+
"""Test saving document with unicode content_string."""
|
|
85
|
+
content = '{"title": "测试文档", "emoji": "🚀", "content": "éñ"}'
|
|
86
|
+
|
|
87
|
+
document = Document(
|
|
88
|
+
document_id="test-doc-unicode",
|
|
89
|
+
original_filename="unicode.json",
|
|
90
|
+
content_type="application/json",
|
|
91
|
+
size_bytes=100,
|
|
92
|
+
content_multihash="placeholder",
|
|
93
|
+
status=DocumentStatus.CAPTURED,
|
|
94
|
+
content_string=content,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
await repository.save(document)
|
|
98
|
+
retrieved = await repository.get(document.document_id)
|
|
99
|
+
|
|
100
|
+
assert retrieved is not None
|
|
101
|
+
assert retrieved.content is not None
|
|
102
|
+
retrieved_content = retrieved.content.read().decode("utf-8")
|
|
103
|
+
assert retrieved_content == content
|
|
104
|
+
|
|
105
|
+
# Note: Empty content test removed because domain model requires
|
|
106
|
+
# size_bytes > 0
|
|
107
|
+
|
|
108
|
+
async def test_save_excludes_content_string_from_storage(
|
|
109
|
+
self, repository: MemoryDocumentRepository
|
|
110
|
+
) -> None:
|
|
111
|
+
"""Test that content_string is not stored in memory storage."""
|
|
112
|
+
content = '{"test": "data that should not be in storage"}'
|
|
113
|
+
|
|
114
|
+
document = Document(
|
|
115
|
+
document_id="test-storage-exclusion",
|
|
116
|
+
original_filename="test.json",
|
|
117
|
+
content_type="application/json",
|
|
118
|
+
size_bytes=100,
|
|
119
|
+
content_multihash="placeholder",
|
|
120
|
+
status=DocumentStatus.CAPTURED,
|
|
121
|
+
content_string=content,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
await repository.save(document)
|
|
125
|
+
|
|
126
|
+
# Check stored document directly from internal storage
|
|
127
|
+
stored_document = repository.storage_dict.get("test-storage-exclusion")
|
|
128
|
+
assert stored_document is not None
|
|
129
|
+
|
|
130
|
+
# Verify content_string is not in stored document
|
|
131
|
+
assert stored_document.content_string is None
|
|
132
|
+
|
|
133
|
+
# Verify essential fields are still present
|
|
134
|
+
assert stored_document.document_id == "test-storage-exclusion"
|
|
135
|
+
assert stored_document.content_multihash is not None
|
|
136
|
+
assert stored_document.content_multihash != "placeholder"
|
|
137
|
+
|
|
138
|
+
# Verify we can still retrieve with content
|
|
139
|
+
retrieved = await repository.get("test-storage-exclusion")
|
|
140
|
+
assert retrieved is not None
|
|
141
|
+
assert retrieved.content is not None
|
|
142
|
+
retrieved_content = retrieved.content.read().decode("utf-8")
|
|
143
|
+
assert retrieved_content == content
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class TestMemoryDocumentRepositoryBasicOperations:
|
|
147
|
+
"""Test basic repository operations."""
|
|
148
|
+
|
|
149
|
+
async def test_save_and_get_document_with_content_stream(
|
|
150
|
+
self, repository: MemoryDocumentRepository, sample_document: Document
|
|
151
|
+
) -> None:
|
|
152
|
+
"""Test basic save and retrieve operations with ContentStream."""
|
|
153
|
+
# Act
|
|
154
|
+
await repository.save(sample_document)
|
|
155
|
+
retrieved = await repository.get(sample_document.document_id)
|
|
156
|
+
|
|
157
|
+
# Assert
|
|
158
|
+
assert retrieved is not None
|
|
159
|
+
assert retrieved.document_id == sample_document.document_id
|
|
160
|
+
assert retrieved.original_filename == sample_document.original_filename
|
|
161
|
+
|
|
162
|
+
async def test_get_nonexistent_document(
|
|
163
|
+
self, repository: MemoryDocumentRepository
|
|
164
|
+
) -> None:
|
|
165
|
+
"""Test retrieving a document that doesn't exist."""
|
|
166
|
+
result = await repository.get("nonexistent-123")
|
|
167
|
+
assert result is None
|
|
168
|
+
|
|
169
|
+
async def test_generate_id(self, repository: MemoryDocumentRepository) -> None:
|
|
170
|
+
"""Test that generate_id returns a unique string."""
|
|
171
|
+
doc_id_1 = await repository.generate_id()
|
|
172
|
+
doc_id_2 = await repository.generate_id()
|
|
173
|
+
|
|
174
|
+
assert isinstance(doc_id_1, str)
|
|
175
|
+
assert isinstance(doc_id_2, str)
|
|
176
|
+
assert doc_id_1 != doc_id_2
|
|
177
|
+
assert len(doc_id_1) > 0
|
|
178
|
+
assert len(doc_id_2) > 0
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class TestMemoryDocumentRepositoryErrorHandling:
|
|
182
|
+
"""Test error handling scenarios."""
|
|
183
|
+
|
|
184
|
+
async def test_save_handles_empty_document_id(
|
|
185
|
+
self, repository: MemoryDocumentRepository
|
|
186
|
+
) -> None:
|
|
187
|
+
"""Test error handling for empty document ID."""
|
|
188
|
+
with pytest.raises(ValueError, match="Document ID cannot be empty"):
|
|
189
|
+
Document(
|
|
190
|
+
document_id="",
|
|
191
|
+
original_filename="test.txt",
|
|
192
|
+
content_type="text/plain",
|
|
193
|
+
size_bytes=100,
|
|
194
|
+
content_multihash="test_hash",
|
|
195
|
+
status=DocumentStatus.CAPTURED,
|
|
196
|
+
content_string="test content",
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
async def test_save_handles_empty_filename(
|
|
200
|
+
self, repository: MemoryDocumentRepository
|
|
201
|
+
) -> None:
|
|
202
|
+
"""Test error handling for empty filename."""
|
|
203
|
+
with pytest.raises(ValueError, match="Original filename cannot be empty"):
|
|
204
|
+
Document(
|
|
205
|
+
document_id="test-123",
|
|
206
|
+
original_filename="",
|
|
207
|
+
content_type="text/plain",
|
|
208
|
+
size_bytes=100,
|
|
209
|
+
content_multihash="test_hash",
|
|
210
|
+
status=DocumentStatus.CAPTURED,
|
|
211
|
+
content_string="test content",
|
|
212
|
+
)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for MemoryDocumentPolicyValidationRepository implementation.
|
|
3
|
+
|
|
4
|
+
This module provides tests for the memory-based document policy validation
|
|
5
|
+
repository implementation, focusing on functionality specific to this
|
|
6
|
+
repository that differs from the inherited mixins.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from julee.domain.models.policy import (
|
|
14
|
+
DocumentPolicyValidation,
|
|
15
|
+
DocumentPolicyValidationStatus,
|
|
16
|
+
)
|
|
17
|
+
from julee.repositories.memory.document_policy_validation import (
|
|
18
|
+
MemoryDocumentPolicyValidationRepository,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.fixture
|
|
23
|
+
def validation_repo() -> MemoryDocumentPolicyValidationRepository:
|
|
24
|
+
"""Create a fresh validation repository for each test."""
|
|
25
|
+
return MemoryDocumentPolicyValidationRepository()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.fixture
|
|
29
|
+
def sample_validation() -> DocumentPolicyValidation:
|
|
30
|
+
"""Create a sample document policy validation for testing."""
|
|
31
|
+
return DocumentPolicyValidation(
|
|
32
|
+
validation_id="validation-test-123",
|
|
33
|
+
input_document_id="doc-123",
|
|
34
|
+
policy_id="policy-456",
|
|
35
|
+
status=DocumentPolicyValidationStatus.PASSED,
|
|
36
|
+
validation_scores=[
|
|
37
|
+
("quality-check-query", 85),
|
|
38
|
+
("completeness-check", 92),
|
|
39
|
+
],
|
|
40
|
+
transformed_document_id="doc-123-transformed",
|
|
41
|
+
post_transform_validation_scores=[
|
|
42
|
+
("quality-check-query", 95),
|
|
43
|
+
("completeness-check", 88),
|
|
44
|
+
],
|
|
45
|
+
started_at=datetime.now(timezone.utc),
|
|
46
|
+
completed_at=datetime.now(timezone.utc),
|
|
47
|
+
passed=True,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class TestMemoryDocumentPolicyValidationRepositorySpecific:
|
|
52
|
+
"""Test functionality specific to DocumentPolicyValidation repository."""
|
|
53
|
+
|
|
54
|
+
@pytest.mark.asyncio
|
|
55
|
+
async def test_generate_id_prefix(
|
|
56
|
+
self, validation_repo: MemoryDocumentPolicyValidationRepository
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Test that generated IDs use the correct prefix for validations."""
|
|
59
|
+
id1 = await validation_repo.generate_id()
|
|
60
|
+
id2 = await validation_repo.generate_id()
|
|
61
|
+
|
|
62
|
+
assert isinstance(id1, str)
|
|
63
|
+
assert isinstance(id2, str)
|
|
64
|
+
assert id1 != id2
|
|
65
|
+
assert len(id1) > 0
|
|
66
|
+
assert len(id2) > 0
|
|
67
|
+
assert id1.startswith("validation-")
|
|
68
|
+
assert id2.startswith("validation-")
|
|
69
|
+
|
|
70
|
+
@pytest.mark.asyncio
|
|
71
|
+
async def test_entity_specific_logging_data(
|
|
72
|
+
self,
|
|
73
|
+
validation_repo: MemoryDocumentPolicyValidationRepository,
|
|
74
|
+
sample_validation: DocumentPolicyValidation,
|
|
75
|
+
) -> None:
|
|
76
|
+
"""Test that entity-specific logging data is added correctly."""
|
|
77
|
+
log_data: dict[str, Any] = {}
|
|
78
|
+
validation_repo._add_entity_specific_log_data(sample_validation, log_data)
|
|
79
|
+
|
|
80
|
+
# Check validation-specific fields are added
|
|
81
|
+
assert log_data["input_document_id"] == "doc-123"
|
|
82
|
+
assert log_data["policy_id"] == "policy-456"
|
|
83
|
+
assert log_data["validation_scores_count"] == 2
|
|
84
|
+
assert log_data["has_transformations"] is True
|
|
85
|
+
assert log_data["passed"] is True
|
|
86
|
+
assert "has_error" not in log_data # No error message
|
|
87
|
+
|
|
88
|
+
@pytest.mark.asyncio
|
|
89
|
+
async def test_entity_specific_logging_data_with_error(
|
|
90
|
+
self, validation_repo: MemoryDocumentPolicyValidationRepository
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Test logging data includes error flag when error_message is
|
|
93
|
+
present."""
|
|
94
|
+
validation_with_error = DocumentPolicyValidation(
|
|
95
|
+
validation_id="validation-error-123",
|
|
96
|
+
input_document_id="doc-456",
|
|
97
|
+
policy_id="policy-789",
|
|
98
|
+
status=DocumentPolicyValidationStatus.ERROR,
|
|
99
|
+
validation_scores=[],
|
|
100
|
+
error_message="Something went wrong",
|
|
101
|
+
passed=False,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
log_data: dict[str, Any] = {}
|
|
105
|
+
validation_repo._add_entity_specific_log_data(validation_with_error, log_data)
|
|
106
|
+
|
|
107
|
+
assert log_data["has_error"] is True
|
|
108
|
+
assert log_data["passed"] is False
|
|
109
|
+
|
|
110
|
+
@pytest.mark.asyncio
|
|
111
|
+
async def test_entity_specific_logging_data_no_transformations(
|
|
112
|
+
self, validation_repo: MemoryDocumentPolicyValidationRepository
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Test logging data correctly identifies validations without
|
|
115
|
+
transformations."""
|
|
116
|
+
validation_no_transform = DocumentPolicyValidation(
|
|
117
|
+
validation_id="validation-no-transform-123",
|
|
118
|
+
input_document_id="doc-789",
|
|
119
|
+
policy_id="policy-abc",
|
|
120
|
+
status=DocumentPolicyValidationStatus.VALIDATION_COMPLETE,
|
|
121
|
+
validation_scores=[("basic-check", 75)],
|
|
122
|
+
transformed_document_id=None,
|
|
123
|
+
post_transform_validation_scores=None,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
log_data: dict[str, Any] = {}
|
|
127
|
+
validation_repo._add_entity_specific_log_data(validation_no_transform, log_data)
|
|
128
|
+
|
|
129
|
+
assert log_data["has_transformations"] is False
|
|
130
|
+
assert log_data["validation_scores_count"] == 1
|
|
131
|
+
|
|
132
|
+
@pytest.mark.asyncio
|
|
133
|
+
async def test_entity_specific_logging_data_passed_none(
|
|
134
|
+
self, validation_repo: MemoryDocumentPolicyValidationRepository
|
|
135
|
+
) -> None:
|
|
136
|
+
"""Test logging data handles None passed value correctly."""
|
|
137
|
+
validation_in_progress = DocumentPolicyValidation(
|
|
138
|
+
validation_id="validation-progress-123",
|
|
139
|
+
input_document_id="doc-progress",
|
|
140
|
+
policy_id="policy-progress",
|
|
141
|
+
status=DocumentPolicyValidationStatus.IN_PROGRESS,
|
|
142
|
+
validation_scores=[],
|
|
143
|
+
passed=None, # Still in progress
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
log_data: dict[str, Any] = {}
|
|
147
|
+
validation_repo._add_entity_specific_log_data(validation_in_progress, log_data)
|
|
148
|
+
|
|
149
|
+
# passed field should not be added when None
|
|
150
|
+
assert "passed" not in log_data
|
|
151
|
+
assert log_data["has_transformations"] is False
|
|
152
|
+
|
|
153
|
+
@pytest.mark.asyncio
|
|
154
|
+
async def test_initialization_sets_correct_attributes(
|
|
155
|
+
self, validation_repo: MemoryDocumentPolicyValidationRepository
|
|
156
|
+
) -> None:
|
|
157
|
+
"""Test that repository initialization sets the correct attributes."""
|
|
158
|
+
assert validation_repo.entity_name == "DocumentPolicyValidation"
|
|
159
|
+
assert isinstance(validation_repo.storage_dict, dict)
|
|
160
|
+
assert len(validation_repo.storage_dict) == 0
|
|
161
|
+
assert validation_repo.logger is not None
|