julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory repository base classes and mixins.
|
|
3
|
+
|
|
4
|
+
This module provides common functionality for in-memory repository
|
|
5
|
+
implementations, reducing code duplication and ensuring consistent patterns
|
|
6
|
+
across all memory-based repositories in the julee domain.
|
|
7
|
+
|
|
8
|
+
The MemoryRepositoryMixin encapsulates common patterns like:
|
|
9
|
+
- Dictionary-based storage management
|
|
10
|
+
- Standardized logging patterns
|
|
11
|
+
- ID generation with consistent prefixes
|
|
12
|
+
- Timestamp management (created_at, updated_at)
|
|
13
|
+
- Generic CRUD operations with proper error handling
|
|
14
|
+
|
|
15
|
+
Classes using this mixin must provide:
|
|
16
|
+
- self.storage_dict: Dict[str, T] for entity storage
|
|
17
|
+
- self.entity_name: str for logging and ID generation
|
|
18
|
+
- self.logger: logging.Logger instance
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import uuid
|
|
22
|
+
from datetime import datetime, timezone
|
|
23
|
+
from typing import Optional, Dict, Any, TypeVar, Generic, List
|
|
24
|
+
from pydantic import BaseModel
|
|
25
|
+
|
|
26
|
+
T = TypeVar("T", bound=BaseModel)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MemoryRepositoryMixin(Generic[T]):
|
|
30
|
+
"""
|
|
31
|
+
Mixin that provides common repository patterns for memory implementations.
|
|
32
|
+
|
|
33
|
+
This mixin encapsulates common functionality used across all memory
|
|
34
|
+
repository implementations, including:
|
|
35
|
+
- Dictionary-based entity storage and retrieval
|
|
36
|
+
- Standardized logging patterns with consistent messaging
|
|
37
|
+
- ID generation with configurable prefixes
|
|
38
|
+
- Timestamp management (created_at if None, always updated_at)
|
|
39
|
+
- Generic error handling patterns
|
|
40
|
+
|
|
41
|
+
Classes using this mixin must provide:
|
|
42
|
+
- self.storage_dict: Dict[str, T] instance for entity storage
|
|
43
|
+
- self.entity_name: str for logging and ID generation prefixes
|
|
44
|
+
- self.logger: logging.Logger instance (typically set in __init__)
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Type annotations for attributes that implementing classes must provide
|
|
48
|
+
storage_dict: Dict[str, T]
|
|
49
|
+
entity_name: str
|
|
50
|
+
logger: Any # logging.Logger, but avoiding import
|
|
51
|
+
|
|
52
|
+
def get_entity(self, entity_id: str) -> Optional[T]:
|
|
53
|
+
"""Get an entity from memory storage with standardized logging.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
entity_id: Unique entity identifier
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Entity if found, None otherwise
|
|
60
|
+
"""
|
|
61
|
+
self.logger.debug(
|
|
62
|
+
f"Memory{self.entity_name}Repository: Attempting to retrieve "
|
|
63
|
+
f"{self.entity_name.lower()}",
|
|
64
|
+
extra={f"{self.entity_name.lower()}_id": entity_id},
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
entity = self.storage_dict.get(entity_id)
|
|
68
|
+
if entity is None:
|
|
69
|
+
self.logger.debug(
|
|
70
|
+
f"Memory{self.entity_name}Repository: {self.entity_name} " f"not found",
|
|
71
|
+
extra={f"{self.entity_name.lower()}_id": entity_id},
|
|
72
|
+
)
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
# Log success with entity-specific details
|
|
76
|
+
extra_data = {f"{self.entity_name.lower()}_id": entity_id}
|
|
77
|
+
self._add_entity_specific_log_data(entity, extra_data)
|
|
78
|
+
|
|
79
|
+
self.logger.info(
|
|
80
|
+
f"Memory{self.entity_name}Repository: {self.entity_name} "
|
|
81
|
+
f"retrieved successfully",
|
|
82
|
+
extra=extra_data,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return entity
|
|
86
|
+
|
|
87
|
+
def get_many_entities(self, entity_ids: List[str]) -> Dict[str, Optional[T]]:
|
|
88
|
+
"""Get multiple entities from memory storage with standardized
|
|
89
|
+
logging.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
entity_ids: List of unique entity identifiers
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Dict mapping entity_id to entity (or None if not found)
|
|
96
|
+
"""
|
|
97
|
+
self.logger.debug(
|
|
98
|
+
f"Memory{self.entity_name}Repository: Attempting to retrieve "
|
|
99
|
+
f"multiple {self.entity_name.lower()}s",
|
|
100
|
+
extra={
|
|
101
|
+
f"{self.entity_name.lower()}_ids": entity_ids,
|
|
102
|
+
"count": len(entity_ids),
|
|
103
|
+
},
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
result: Dict[str, Optional[T]] = {}
|
|
107
|
+
found_count = 0
|
|
108
|
+
|
|
109
|
+
for entity_id in entity_ids:
|
|
110
|
+
entity = self.storage_dict.get(entity_id)
|
|
111
|
+
result[entity_id] = entity
|
|
112
|
+
if entity is not None:
|
|
113
|
+
found_count += 1
|
|
114
|
+
|
|
115
|
+
self.logger.info(
|
|
116
|
+
f"Memory{self.entity_name}Repository: Retrieved "
|
|
117
|
+
f"{found_count}/{len(entity_ids)} {self.entity_name.lower()}s",
|
|
118
|
+
extra={
|
|
119
|
+
f"{self.entity_name.lower()}_ids": entity_ids,
|
|
120
|
+
"requested_count": len(entity_ids),
|
|
121
|
+
"found_count": found_count,
|
|
122
|
+
"missing_count": len(entity_ids) - found_count,
|
|
123
|
+
},
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return result
|
|
127
|
+
|
|
128
|
+
def save_entity(self, entity: T, entity_id_field: str) -> None:
|
|
129
|
+
"""Save an entity to memory storage with timestamp management.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
entity: Entity to save
|
|
133
|
+
entity_id_field: Name of the ID field on the entity
|
|
134
|
+
"""
|
|
135
|
+
entity_id = getattr(entity, entity_id_field)
|
|
136
|
+
|
|
137
|
+
# Log save attempt with entity-specific details
|
|
138
|
+
log_extra = {f"{self.entity_name.lower()}_id": entity_id}
|
|
139
|
+
self._add_entity_specific_log_data(entity, log_extra)
|
|
140
|
+
|
|
141
|
+
self.logger.debug(
|
|
142
|
+
f"Memory{self.entity_name}Repository: Saving "
|
|
143
|
+
f"{self.entity_name.lower()}",
|
|
144
|
+
extra=log_extra,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Update timestamps
|
|
148
|
+
self.update_timestamps(entity)
|
|
149
|
+
|
|
150
|
+
# Store the entity (idempotent - will overwrite if exists)
|
|
151
|
+
self.storage_dict[entity_id] = entity
|
|
152
|
+
|
|
153
|
+
# Log success with final state
|
|
154
|
+
success_extra = {f"{self.entity_name.lower()}_id": entity_id}
|
|
155
|
+
self._add_entity_specific_log_data(entity, success_extra)
|
|
156
|
+
|
|
157
|
+
self.logger.info(
|
|
158
|
+
f"Memory{self.entity_name}Repository: {self.entity_name} "
|
|
159
|
+
f"saved successfully",
|
|
160
|
+
extra=success_extra,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def generate_entity_id(self, prefix: Optional[str] = None) -> str:
|
|
164
|
+
"""Generate a unique entity ID with consistent format.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
prefix: Optional prefix for the ID. If None, uses entity_name
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
Unique entity ID string in format "{prefix}-{uuid}"
|
|
171
|
+
"""
|
|
172
|
+
if prefix is None:
|
|
173
|
+
prefix = self.entity_name.lower()
|
|
174
|
+
|
|
175
|
+
entity_id = f"{prefix}-{uuid.uuid4()}"
|
|
176
|
+
|
|
177
|
+
self.logger.debug(
|
|
178
|
+
f"Memory{self.entity_name}Repository: Generated "
|
|
179
|
+
f"{self.entity_name.lower()} ID",
|
|
180
|
+
extra={f"{self.entity_name.lower()}_id": entity_id},
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
return entity_id
|
|
184
|
+
|
|
185
|
+
def update_timestamps(self, entity: T) -> None:
|
|
186
|
+
"""Update timestamps on an entity (created_at if None, always
|
|
187
|
+
updated_at).
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
entity: Pydantic model with created_at and updated_at fields
|
|
191
|
+
"""
|
|
192
|
+
now = datetime.now(timezone.utc)
|
|
193
|
+
|
|
194
|
+
# Set created_at if it's None (for new objects)
|
|
195
|
+
if (
|
|
196
|
+
hasattr(entity, "created_at")
|
|
197
|
+
and getattr(entity, "created_at", None) is None
|
|
198
|
+
):
|
|
199
|
+
setattr(entity, "created_at", now)
|
|
200
|
+
|
|
201
|
+
# Always update updated_at
|
|
202
|
+
if hasattr(entity, "updated_at"):
|
|
203
|
+
setattr(entity, "updated_at", now)
|
|
204
|
+
|
|
205
|
+
def _add_entity_specific_log_data(
|
|
206
|
+
self, entity: T, log_data: Dict[str, Any]
|
|
207
|
+
) -> None:
|
|
208
|
+
"""Add entity-specific data to log entries for richer logging.
|
|
209
|
+
|
|
210
|
+
This method can be overridden by specific repository implementations
|
|
211
|
+
to add domain-specific logging information.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
entity: The entity being logged
|
|
215
|
+
log_data: Dictionary to add logging data to
|
|
216
|
+
"""
|
|
217
|
+
# Default implementation adds basic model info
|
|
218
|
+
if hasattr(entity, "status"):
|
|
219
|
+
status = getattr(entity, "status")
|
|
220
|
+
log_data["status"] = (
|
|
221
|
+
status.value if hasattr(status, "value") else str(status)
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
if hasattr(entity, "updated_at"):
|
|
225
|
+
updated_at = getattr(entity, "updated_at")
|
|
226
|
+
if updated_at:
|
|
227
|
+
log_data["updated_at"] = updated_at.isoformat()
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory implementation of DocumentRepository.
|
|
3
|
+
|
|
4
|
+
This module provides an in-memory implementation of the DocumentRepository
|
|
5
|
+
protocol that follows the Clean Architecture patterns defined in the
|
|
6
|
+
Fun-Police Framework. It handles document storage with content and metadata
|
|
7
|
+
in memory dictionaries, ensuring idempotency and proper error handling.
|
|
8
|
+
|
|
9
|
+
The implementation uses Python dictionaries to store document data, making it
|
|
10
|
+
ideal for testing scenarios where external dependencies should be avoided.
|
|
11
|
+
All operations are still async to maintain interface compatibility.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import hashlib
|
|
15
|
+
import io
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Optional, Dict, Any, List
|
|
18
|
+
|
|
19
|
+
from julee.domain.models.document import Document
|
|
20
|
+
from julee.domain.models.custom_fields.content_stream import (
|
|
21
|
+
ContentStream,
|
|
22
|
+
)
|
|
23
|
+
from julee.domain.repositories.document import DocumentRepository
|
|
24
|
+
from .base import MemoryRepositoryMixin
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MemoryDocumentRepository(DocumentRepository, MemoryRepositoryMixin[Document]):
|
|
30
|
+
"""
|
|
31
|
+
Memory implementation of DocumentRepository using Python dictionaries.
|
|
32
|
+
|
|
33
|
+
This implementation stores document metadata and content in memory:
|
|
34
|
+
- Documents: Dictionary keyed by document_id containing Document objects
|
|
35
|
+
|
|
36
|
+
This provides a lightweight, dependency-free option for testing while
|
|
37
|
+
maintaining the same interface as other implementations.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self) -> None:
|
|
41
|
+
"""Initialize repository with empty in-memory storage."""
|
|
42
|
+
self.logger = logger
|
|
43
|
+
self.entity_name = "Document"
|
|
44
|
+
self.storage_dict: Dict[str, Document] = {}
|
|
45
|
+
|
|
46
|
+
logger.debug("Initializing MemoryDocumentRepository")
|
|
47
|
+
|
|
48
|
+
async def get(self, document_id: str) -> Optional[Document]:
|
|
49
|
+
"""Retrieve a document with metadata and content.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
document_id: Unique document identifier
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Document object if found, None otherwise
|
|
56
|
+
"""
|
|
57
|
+
return self.get_entity(document_id)
|
|
58
|
+
|
|
59
|
+
async def save(self, document: Document) -> None:
|
|
60
|
+
"""Save a document with its content and metadata.
|
|
61
|
+
|
|
62
|
+
If the document has content_string, it will be converted to a
|
|
63
|
+
ContentStream and the content hash will be calculated automatically.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
document: Document object to save
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
ValueError: If document has no content or content_string
|
|
70
|
+
"""
|
|
71
|
+
# Handle content_string conversion (only if no content provided)
|
|
72
|
+
if document.content_string is not None:
|
|
73
|
+
# Convert content_string to ContentStream
|
|
74
|
+
assert document.content_string is not None # For MyPy
|
|
75
|
+
content_bytes = document.content_string.encode("utf-8")
|
|
76
|
+
content_stream = ContentStream(io.BytesIO(content_bytes))
|
|
77
|
+
|
|
78
|
+
# Calculate content hash
|
|
79
|
+
content_hash = hashlib.sha256(content_bytes).hexdigest()
|
|
80
|
+
|
|
81
|
+
# Create new document with ContentStream and calculated hash
|
|
82
|
+
document = document.model_copy(
|
|
83
|
+
update={
|
|
84
|
+
"content": content_stream,
|
|
85
|
+
"content_multihash": content_hash,
|
|
86
|
+
"size_bytes": len(content_bytes),
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
self.logger.debug(
|
|
91
|
+
"Converted content_string to ContentStream for document save",
|
|
92
|
+
extra={
|
|
93
|
+
"document_id": document.document_id,
|
|
94
|
+
"content_hash": content_hash,
|
|
95
|
+
"content_length": len(content_bytes),
|
|
96
|
+
},
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Create a copy without content_string (content saved
|
|
100
|
+
# in separate content-addressable storage)
|
|
101
|
+
document_for_storage = document.model_copy(update={"content_string": None})
|
|
102
|
+
self.save_entity(document_for_storage, "document_id")
|
|
103
|
+
|
|
104
|
+
async def generate_id(self) -> str:
|
|
105
|
+
"""Generate a unique document identifier.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Unique document ID string
|
|
109
|
+
"""
|
|
110
|
+
return self.generate_entity_id("doc")
|
|
111
|
+
|
|
112
|
+
async def get_many(self, document_ids: List[str]) -> Dict[str, Optional[Document]]:
|
|
113
|
+
"""Retrieve multiple documents by ID.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
document_ids: List of unique document identifiers
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Dict mapping document_id to Document (or None if not found)
|
|
120
|
+
"""
|
|
121
|
+
return self.get_many_entities(document_ids)
|
|
122
|
+
|
|
123
|
+
async def list_all(self) -> List[Document]:
|
|
124
|
+
"""List all documents.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
List of all Document entities in the repository
|
|
128
|
+
"""
|
|
129
|
+
self.logger.debug(
|
|
130
|
+
f"Memory{self.entity_name}Repository: Listing all "
|
|
131
|
+
f"{self.entity_name.lower()}s"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
documents = list(self.storage_dict.values())
|
|
135
|
+
|
|
136
|
+
self.logger.info(
|
|
137
|
+
f"Memory{self.entity_name}Repository: Listed all "
|
|
138
|
+
f"{self.entity_name.lower()}s",
|
|
139
|
+
extra={"count": len(documents)},
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
return documents
|
|
143
|
+
|
|
144
|
+
def _add_entity_specific_log_data(
|
|
145
|
+
self, entity: Document, log_data: Dict[str, Any]
|
|
146
|
+
) -> None:
|
|
147
|
+
"""Add document-specific data to log entries."""
|
|
148
|
+
super()._add_entity_specific_log_data(entity, log_data)
|
|
149
|
+
log_data["content_length"] = entity.size_bytes
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory implementation of DocumentPolicyValidationRepository.
|
|
3
|
+
|
|
4
|
+
This module provides an in-memory implementation of the
|
|
5
|
+
DocumentPolicyValidationRepository protocol that follows the Clean
|
|
6
|
+
Architecture patterns defined in the Fun-Police Framework. It handles
|
|
7
|
+
document policy validation storage in memory dictionaries, ensuring
|
|
8
|
+
idempotency and proper error handling.
|
|
9
|
+
|
|
10
|
+
The implementation uses Python dictionaries to store validation data, making
|
|
11
|
+
it ideal for testing scenarios where external dependencies should be avoided.
|
|
12
|
+
All operations are still async to maintain interface compatibility.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Optional, Dict, Any, List
|
|
17
|
+
|
|
18
|
+
from julee.domain.models.policy import DocumentPolicyValidation
|
|
19
|
+
from julee.domain.repositories.document_policy_validation import (
|
|
20
|
+
DocumentPolicyValidationRepository,
|
|
21
|
+
)
|
|
22
|
+
from .base import MemoryRepositoryMixin
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MemoryDocumentPolicyValidationRepository(
|
|
28
|
+
DocumentPolicyValidationRepository,
|
|
29
|
+
MemoryRepositoryMixin[DocumentPolicyValidation],
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Memory implementation of DocumentPolicyValidationRepository using Python
|
|
33
|
+
dictionaries.
|
|
34
|
+
|
|
35
|
+
This implementation stores document policy validation data in memory using
|
|
36
|
+
a dictionary keyed by validation_id. This provides a lightweight,
|
|
37
|
+
dependency-free option for testing.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self) -> None:
|
|
41
|
+
"""Initialize repository with empty in-memory storage."""
|
|
42
|
+
self.logger = logger
|
|
43
|
+
self.entity_name = "DocumentPolicyValidation"
|
|
44
|
+
self.storage_dict: Dict[str, DocumentPolicyValidation] = {}
|
|
45
|
+
|
|
46
|
+
logger.debug("Initializing MemoryDocumentPolicyValidationRepository")
|
|
47
|
+
|
|
48
|
+
async def get(self, validation_id: str) -> Optional[DocumentPolicyValidation]:
|
|
49
|
+
"""Retrieve a document policy validation by ID.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
validation_id: Unique validation identifier
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
DocumentPolicyValidation if found, None otherwise
|
|
56
|
+
"""
|
|
57
|
+
return self.get_entity(validation_id)
|
|
58
|
+
|
|
59
|
+
async def save(self, validation: DocumentPolicyValidation) -> None:
|
|
60
|
+
"""Save a document policy validation.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
validation: Complete DocumentPolicyValidation to save
|
|
64
|
+
"""
|
|
65
|
+
self.save_entity(validation, "validation_id")
|
|
66
|
+
|
|
67
|
+
async def generate_id(self) -> str:
|
|
68
|
+
"""Generate a unique validation identifier.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Unique validation ID string
|
|
72
|
+
"""
|
|
73
|
+
return self.generate_entity_id("validation")
|
|
74
|
+
|
|
75
|
+
async def get_many(
|
|
76
|
+
self, validation_ids: List[str]
|
|
77
|
+
) -> Dict[str, Optional[DocumentPolicyValidation]]:
|
|
78
|
+
"""Retrieve multiple document policy validations by ID.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
validation_ids: List of unique validation identifiers
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Dict mapping validation_id to DocumentPolicyValidation (or None if
|
|
85
|
+
not found)
|
|
86
|
+
"""
|
|
87
|
+
return self.get_many_entities(validation_ids)
|
|
88
|
+
|
|
89
|
+
def _add_entity_specific_log_data(
|
|
90
|
+
self, entity: DocumentPolicyValidation, log_data: Dict[str, Any]
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Add validation-specific data to log entries."""
|
|
93
|
+
super()._add_entity_specific_log_data(entity, log_data)
|
|
94
|
+
log_data["input_document_id"] = entity.input_document_id
|
|
95
|
+
log_data["policy_id"] = entity.policy_id
|
|
96
|
+
log_data["validation_scores_count"] = len(entity.validation_scores)
|
|
97
|
+
log_data["has_transformations"] = (
|
|
98
|
+
entity.transformed_document_id is not None
|
|
99
|
+
or entity.post_transform_validation_scores is not None
|
|
100
|
+
)
|
|
101
|
+
if entity.passed is not None:
|
|
102
|
+
log_data["passed"] = entity.passed
|
|
103
|
+
if entity.error_message:
|
|
104
|
+
log_data["has_error"] = True
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory implementation of KnowledgeServiceConfigRepository.
|
|
3
|
+
|
|
4
|
+
This module provides an in-memory implementation of the
|
|
5
|
+
KnowledgeServiceConfigRepository
|
|
6
|
+
protocol that follows the Clean Architecture patterns defined in the
|
|
7
|
+
Fun-Police Framework. It handles knowledge service configuration storage
|
|
8
|
+
in memory dictionaries, ensuring idempotency and proper error handling.
|
|
9
|
+
|
|
10
|
+
The implementation uses Python dictionaries to store knowledge service
|
|
11
|
+
configuration data, making it ideal for testing scenarios where external
|
|
12
|
+
dependencies should be avoided. All operations are still async to maintain
|
|
13
|
+
interface compatibility.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Optional, Dict, Any, List
|
|
18
|
+
|
|
19
|
+
from julee.domain.models.knowledge_service_config import (
|
|
20
|
+
KnowledgeServiceConfig,
|
|
21
|
+
)
|
|
22
|
+
from julee.domain.repositories.knowledge_service_config import (
|
|
23
|
+
KnowledgeServiceConfigRepository,
|
|
24
|
+
)
|
|
25
|
+
from .base import MemoryRepositoryMixin
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MemoryKnowledgeServiceConfigRepository(
|
|
31
|
+
KnowledgeServiceConfigRepository,
|
|
32
|
+
MemoryRepositoryMixin[KnowledgeServiceConfig],
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Memory implementation of KnowledgeServiceConfigRepository using Python
|
|
36
|
+
dictionaries.
|
|
37
|
+
|
|
38
|
+
This implementation stores knowledge service configurations in memory:
|
|
39
|
+
- Knowledge Services: Dictionary keyed by knowledge_service_id containing
|
|
40
|
+
KnowledgeServiceConfig objects
|
|
41
|
+
|
|
42
|
+
This provides a lightweight, dependency-free option for testing while
|
|
43
|
+
maintaining the same interface as other implementations.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self) -> None:
|
|
47
|
+
"""Initialize repository with empty in-memory storage."""
|
|
48
|
+
self.logger = logger
|
|
49
|
+
self.entity_name = "KnowledgeServiceConfig"
|
|
50
|
+
self.storage_dict: Dict[str, KnowledgeServiceConfig] = {}
|
|
51
|
+
|
|
52
|
+
logger.debug("Initializing MemoryKnowledgeServiceConfigRepository")
|
|
53
|
+
|
|
54
|
+
async def get(self, knowledge_service_id: str) -> Optional[KnowledgeServiceConfig]:
|
|
55
|
+
"""Retrieve a knowledge service configuration by ID.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
knowledge_service_id: Unique knowledge service identifier
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
KnowledgeServiceConfig object if found, None otherwise
|
|
62
|
+
"""
|
|
63
|
+
return self.get_entity(knowledge_service_id)
|
|
64
|
+
|
|
65
|
+
async def save(self, knowledge_service: KnowledgeServiceConfig) -> None:
|
|
66
|
+
"""Save a knowledge service configuration.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
knowledge_service: Complete KnowledgeServiceConfig to save
|
|
70
|
+
"""
|
|
71
|
+
self.save_entity(knowledge_service, "knowledge_service_id")
|
|
72
|
+
|
|
73
|
+
async def generate_id(self) -> str:
|
|
74
|
+
"""Generate a unique knowledge service identifier.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Unique knowledge service ID string
|
|
78
|
+
"""
|
|
79
|
+
return self.generate_entity_id("ks")
|
|
80
|
+
|
|
81
|
+
async def get_many(
|
|
82
|
+
self, knowledge_service_ids: List[str]
|
|
83
|
+
) -> Dict[str, Optional[KnowledgeServiceConfig]]:
|
|
84
|
+
"""Retrieve multiple knowledge service configs by ID.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
knowledge_service_ids: List of unique knowledge service
|
|
88
|
+
identifiers
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Dict mapping knowledge_service_id to KnowledgeServiceConfig (or
|
|
92
|
+
None if not found)
|
|
93
|
+
"""
|
|
94
|
+
return self.get_many_entities(knowledge_service_ids)
|
|
95
|
+
|
|
96
|
+
async def list_all(self) -> List[KnowledgeServiceConfig]:
|
|
97
|
+
"""List all knowledge service configurations.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
List of all KnowledgeServiceConfig entities in the repository
|
|
101
|
+
"""
|
|
102
|
+
self.logger.debug(
|
|
103
|
+
f"Memory{self.entity_name}Repository: Listing all "
|
|
104
|
+
f"{self.entity_name.lower()}s"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
configs = list(self.storage_dict.values())
|
|
108
|
+
|
|
109
|
+
self.logger.info(
|
|
110
|
+
f"Memory{self.entity_name}Repository: Listed all "
|
|
111
|
+
f"{self.entity_name.lower()}s",
|
|
112
|
+
extra={"count": len(configs)},
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return configs
|
|
116
|
+
|
|
117
|
+
def _add_entity_specific_log_data(
|
|
118
|
+
self, entity: KnowledgeServiceConfig, log_data: Dict[str, Any]
|
|
119
|
+
) -> None:
|
|
120
|
+
"""Add knowledge service config-specific data to log entries."""
|
|
121
|
+
super()._add_entity_specific_log_data(entity, log_data)
|
|
122
|
+
log_data["service_name"] = entity.name
|
|
123
|
+
log_data["service_api"] = entity.service_api.value
|