julee 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/api/app.py +9 -8
- julee/api/dependencies.py +15 -15
- julee/api/requests.py +10 -9
- julee/api/responses.py +2 -1
- julee/api/routers/__init__.py +5 -5
- julee/api/routers/assembly_specifications.py +5 -4
- julee/api/routers/documents.py +1 -1
- julee/api/routers/knowledge_service_configs.py +4 -3
- julee/api/routers/knowledge_service_queries.py +7 -6
- julee/api/routers/system.py +4 -3
- julee/api/routers/workflows.py +4 -5
- julee/api/services/system_initialization.py +6 -6
- julee/api/tests/routers/test_assembly_specifications.py +4 -3
- julee/api/tests/routers/test_documents.py +11 -10
- julee/api/tests/routers/test_knowledge_service_configs.py +7 -6
- julee/api/tests/routers/test_knowledge_service_queries.py +4 -3
- julee/api/tests/routers/test_system.py +5 -4
- julee/api/tests/routers/test_workflows.py +5 -4
- julee/api/tests/test_app.py +5 -4
- julee/api/tests/test_dependencies.py +3 -2
- julee/api/tests/test_requests.py +2 -1
- julee/contrib/__init__.py +15 -0
- julee/contrib/polling/__init__.py +47 -0
- julee/contrib/polling/domain/__init__.py +17 -0
- julee/contrib/polling/domain/models/__init__.py +13 -0
- julee/contrib/polling/domain/models/polling_config.py +39 -0
- julee/contrib/polling/domain/services/__init__.py +11 -0
- julee/contrib/polling/domain/services/poller.py +39 -0
- julee/contrib/polling/infrastructure/__init__.py +15 -0
- julee/contrib/polling/infrastructure/services/__init__.py +12 -0
- julee/contrib/polling/infrastructure/services/polling/__init__.py +12 -0
- julee/contrib/polling/infrastructure/services/polling/http/__init__.py +12 -0
- julee/contrib/polling/infrastructure/services/polling/http/http_poller_service.py +80 -0
- julee/contrib/polling/infrastructure/temporal/__init__.py +20 -0
- julee/contrib/polling/infrastructure/temporal/activities.py +42 -0
- julee/contrib/polling/infrastructure/temporal/activity_names.py +20 -0
- julee/contrib/polling/infrastructure/temporal/proxies.py +45 -0
- julee/contrib/polling/tests/__init__.py +6 -0
- julee/contrib/polling/tests/unit/__init__.py +6 -0
- julee/contrib/polling/tests/unit/infrastructure/__init__.py +7 -0
- julee/contrib/polling/tests/unit/infrastructure/services/__init__.py +6 -0
- julee/contrib/polling/tests/unit/infrastructure/services/polling/__init__.py +6 -0
- julee/contrib/polling/tests/unit/infrastructure/services/polling/http/__init__.py +7 -0
- julee/contrib/polling/tests/unit/infrastructure/services/polling/http/test_http_poller_service.py +163 -0
- julee/docs/__init__.py +5 -0
- julee/docs/sphinx_hcd/__init__.py +76 -0
- julee/docs/sphinx_hcd/accelerators.py +1175 -0
- julee/docs/sphinx_hcd/apps.py +518 -0
- julee/docs/sphinx_hcd/config.py +148 -0
- julee/docs/sphinx_hcd/epics.py +453 -0
- julee/docs/sphinx_hcd/integrations.py +310 -0
- julee/docs/sphinx_hcd/journeys.py +797 -0
- julee/docs/sphinx_hcd/personas.py +457 -0
- julee/docs/sphinx_hcd/stories.py +960 -0
- julee/docs/sphinx_hcd/utils.py +185 -0
- julee/domain/models/__init__.py +5 -6
- julee/domain/models/assembly/assembly.py +7 -7
- julee/domain/models/assembly/tests/factories.py +2 -1
- julee/domain/models/assembly/tests/test_assembly.py +16 -13
- julee/domain/models/assembly_specification/assembly_specification.py +11 -10
- julee/domain/models/assembly_specification/knowledge_service_query.py +7 -6
- julee/domain/models/assembly_specification/tests/factories.py +2 -1
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +9 -6
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +3 -1
- julee/domain/models/custom_fields/content_stream.py +3 -2
- julee/domain/models/custom_fields/tests/test_custom_fields.py +2 -1
- julee/domain/models/document/document.py +23 -30
- julee/domain/models/document/tests/factories.py +3 -2
- julee/domain/models/document/tests/test_document.py +20 -37
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +4 -4
- julee/domain/models/policy/__init__.py +4 -4
- julee/domain/models/policy/document_policy_validation.py +17 -17
- julee/domain/models/policy/policy.py +10 -10
- julee/domain/models/policy/tests/factories.py +2 -1
- julee/domain/models/policy/tests/test_document_policy_validation.py +3 -1
- julee/domain/models/policy/tests/test_policy.py +2 -1
- julee/domain/repositories/__init__.py +3 -3
- julee/domain/repositories/assembly.py +3 -1
- julee/domain/repositories/assembly_specification.py +2 -0
- julee/domain/repositories/base.py +5 -4
- julee/domain/repositories/document.py +3 -1
- julee/domain/repositories/document_policy_validation.py +3 -1
- julee/domain/repositories/knowledge_service_config.py +2 -0
- julee/domain/repositories/knowledge_service_query.py +1 -0
- julee/domain/repositories/policy.py +3 -1
- julee/domain/use_cases/decorators.py +3 -2
- julee/domain/use_cases/extract_assemble_data.py +14 -13
- julee/domain/use_cases/initialize_system_data.py +88 -34
- julee/domain/use_cases/tests/test_extract_assemble_data.py +10 -10
- julee/domain/use_cases/tests/test_initialize_system_data.py +2 -2
- julee/domain/use_cases/tests/test_validate_document.py +11 -11
- julee/domain/use_cases/validate_document.py +14 -14
- julee/fixtures/documents.yaml +4 -43
- julee/fixtures/knowledge_service_queries.yaml +9 -0
- julee/maintenance/__init__.py +1 -0
- julee/maintenance/release.py +243 -0
- julee/repositories/memory/assembly.py +6 -5
- julee/repositories/memory/assembly_specification.py +8 -9
- julee/repositories/memory/base.py +12 -11
- julee/repositories/memory/document.py +27 -20
- julee/repositories/memory/document_policy_validation.py +7 -6
- julee/repositories/memory/knowledge_service_config.py +8 -7
- julee/repositories/memory/knowledge_service_query.py +8 -7
- julee/repositories/memory/policy.py +6 -5
- julee/repositories/memory/tests/test_document.py +24 -22
- julee/repositories/memory/tests/test_document_policy_validation.py +2 -1
- julee/repositories/memory/tests/test_policy.py +2 -1
- julee/repositories/minio/assembly.py +4 -4
- julee/repositories/minio/assembly_specification.py +6 -8
- julee/repositories/minio/client.py +22 -25
- julee/repositories/minio/document.py +36 -33
- julee/repositories/minio/document_policy_validation.py +5 -5
- julee/repositories/minio/knowledge_service_config.py +6 -6
- julee/repositories/minio/knowledge_service_query.py +6 -9
- julee/repositories/minio/policy.py +4 -4
- julee/repositories/minio/tests/fake_client.py +11 -9
- julee/repositories/minio/tests/test_assembly.py +3 -1
- julee/repositories/minio/tests/test_assembly_specification.py +2 -1
- julee/repositories/minio/tests/test_client_protocol.py +5 -5
- julee/repositories/minio/tests/test_document.py +23 -22
- julee/repositories/minio/tests/test_document_policy_validation.py +3 -1
- julee/repositories/minio/tests/test_knowledge_service_config.py +4 -2
- julee/repositories/minio/tests/test_knowledge_service_query.py +3 -2
- julee/repositories/minio/tests/test_policy.py +3 -1
- julee/repositories/temporal/activities.py +5 -5
- julee/repositories/temporal/proxies.py +5 -5
- julee/services/knowledge_service/__init__.py +1 -2
- julee/services/knowledge_service/anthropic/knowledge_service.py +8 -7
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +11 -10
- julee/services/knowledge_service/factory.py +8 -8
- julee/services/knowledge_service/knowledge_service.py +12 -14
- julee/services/knowledge_service/memory/knowledge_service.py +13 -12
- julee/services/knowledge_service/memory/test_knowledge_service.py +10 -7
- julee/services/knowledge_service/test_factory.py +11 -10
- julee/services/temporal/activities.py +10 -10
- julee/services/temporal/proxies.py +2 -2
- julee/util/domain.py +6 -6
- julee/util/repos/minio/file_storage.py +8 -9
- julee/util/repos/temporal/client_proxies/file_storage.py +3 -4
- julee/util/repos/temporal/data_converter.py +6 -6
- julee/util/repos/temporal/minio_file_storage.py +1 -1
- julee/util/repos/temporal/proxies/file_storage.py +2 -3
- julee/util/repositories.py +4 -3
- julee/util/temporal/decorators.py +20 -18
- julee/util/tests/test_decorators.py +13 -15
- julee/util/validation/repository.py +3 -3
- julee/util/validation/type_guards.py +12 -11
- julee/worker.py +9 -8
- julee/workflows/__init__.py +2 -2
- julee/workflows/extract_assemble.py +2 -1
- julee/workflows/validate_document.py +3 -2
- {julee-0.1.2.dist-info → julee-0.1.4.dist-info}/METADATA +3 -3
- julee-0.1.4.dist-info/RECORD +196 -0
- julee/fixtures/assembly_specifications.yaml +0 -70
- julee-0.1.2.dist-info/RECORD +0 -161
- {julee-0.1.2.dist-info → julee-0.1.4.dist-info}/WHEEL +0 -0
- {julee-0.1.2.dist-info → julee-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {julee-0.1.2.dist-info → julee-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -13,7 +13,7 @@ should be avoided.
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
|
-
from typing import
|
|
16
|
+
from typing import Any
|
|
17
17
|
|
|
18
18
|
from julee.domain.models.assembly_specification import (
|
|
19
19
|
KnowledgeServiceQuery,
|
|
@@ -21,6 +21,7 @@ from julee.domain.models.assembly_specification import (
|
|
|
21
21
|
from julee.domain.repositories.knowledge_service_query import (
|
|
22
22
|
KnowledgeServiceQueryRepository,
|
|
23
23
|
)
|
|
24
|
+
|
|
24
25
|
from .base import MemoryRepositoryMixin
|
|
25
26
|
|
|
26
27
|
logger = logging.getLogger(__name__)
|
|
@@ -47,11 +48,11 @@ class MemoryKnowledgeServiceQueryRepository(
|
|
|
47
48
|
"""Initialize repository with empty in-memory storage."""
|
|
48
49
|
self.logger = logger
|
|
49
50
|
self.entity_name = "KnowledgeServiceQuery"
|
|
50
|
-
self.storage_dict:
|
|
51
|
+
self.storage_dict: dict[str, KnowledgeServiceQuery] = {}
|
|
51
52
|
|
|
52
53
|
logger.debug("Initializing MemoryKnowledgeServiceQueryRepository")
|
|
53
54
|
|
|
54
|
-
async def get(self, query_id: str) ->
|
|
55
|
+
async def get(self, query_id: str) -> KnowledgeServiceQuery | None:
|
|
55
56
|
"""Retrieve a knowledge service query by ID.
|
|
56
57
|
|
|
57
58
|
Args:
|
|
@@ -71,8 +72,8 @@ class MemoryKnowledgeServiceQueryRepository(
|
|
|
71
72
|
self.save_entity(query, "query_id")
|
|
72
73
|
|
|
73
74
|
async def get_many(
|
|
74
|
-
self, query_ids:
|
|
75
|
-
) ->
|
|
75
|
+
self, query_ids: list[str]
|
|
76
|
+
) -> dict[str, KnowledgeServiceQuery | None]:
|
|
76
77
|
"""Retrieve multiple knowledge service queries by ID.
|
|
77
78
|
|
|
78
79
|
Args:
|
|
@@ -92,7 +93,7 @@ class MemoryKnowledgeServiceQueryRepository(
|
|
|
92
93
|
"""
|
|
93
94
|
return self.generate_entity_id("query")
|
|
94
95
|
|
|
95
|
-
async def list_all(self) ->
|
|
96
|
+
async def list_all(self) -> list[KnowledgeServiceQuery]:
|
|
96
97
|
"""List all knowledge service queries.
|
|
97
98
|
|
|
98
99
|
Returns:
|
|
@@ -113,7 +114,7 @@ class MemoryKnowledgeServiceQueryRepository(
|
|
|
113
114
|
return entities
|
|
114
115
|
|
|
115
116
|
def _add_entity_specific_log_data(
|
|
116
|
-
self, entity: KnowledgeServiceQuery, log_data:
|
|
117
|
+
self, entity: KnowledgeServiceQuery, log_data: dict[str, Any]
|
|
117
118
|
) -> None:
|
|
118
119
|
"""Add knowledge service query-specific data to log entries."""
|
|
119
120
|
super()._add_entity_specific_log_data(entity, log_data)
|
|
@@ -12,10 +12,11 @@ All operations are still async to maintain interface compatibility.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
import logging
|
|
15
|
-
from typing import
|
|
15
|
+
from typing import Any
|
|
16
16
|
|
|
17
17
|
from julee.domain.models.policy import Policy
|
|
18
18
|
from julee.domain.repositories.policy import PolicyRepository
|
|
19
|
+
|
|
19
20
|
from .base import MemoryRepositoryMixin
|
|
20
21
|
|
|
21
22
|
logger = logging.getLogger(__name__)
|
|
@@ -34,11 +35,11 @@ class MemoryPolicyRepository(PolicyRepository, MemoryRepositoryMixin[Policy]):
|
|
|
34
35
|
"""Initialize repository with empty in-memory storage."""
|
|
35
36
|
self.logger = logger
|
|
36
37
|
self.entity_name = "Policy"
|
|
37
|
-
self.storage_dict:
|
|
38
|
+
self.storage_dict: dict[str, Policy] = {}
|
|
38
39
|
|
|
39
40
|
logger.debug("Initializing MemoryPolicyRepository")
|
|
40
41
|
|
|
41
|
-
async def get(self, policy_id: str) ->
|
|
42
|
+
async def get(self, policy_id: str) -> Policy | None:
|
|
42
43
|
"""Retrieve a policy by ID.
|
|
43
44
|
|
|
44
45
|
Args:
|
|
@@ -65,7 +66,7 @@ class MemoryPolicyRepository(PolicyRepository, MemoryRepositoryMixin[Policy]):
|
|
|
65
66
|
"""
|
|
66
67
|
return self.generate_entity_id("policy")
|
|
67
68
|
|
|
68
|
-
async def get_many(self, policy_ids:
|
|
69
|
+
async def get_many(self, policy_ids: list[str]) -> dict[str, Policy | None]:
|
|
69
70
|
"""Retrieve multiple policies by ID.
|
|
70
71
|
|
|
71
72
|
Args:
|
|
@@ -77,7 +78,7 @@ class MemoryPolicyRepository(PolicyRepository, MemoryRepositoryMixin[Policy]):
|
|
|
77
78
|
return self.get_many_entities(policy_ids)
|
|
78
79
|
|
|
79
80
|
def _add_entity_specific_log_data(
|
|
80
|
-
self, entity: Policy, log_data:
|
|
81
|
+
self, entity: Policy, log_data: dict[str, Any]
|
|
81
82
|
) -> None:
|
|
82
83
|
"""Add policy-specific data to log entries."""
|
|
83
84
|
super()._add_entity_specific_log_data(entity, log_data)
|
|
@@ -3,18 +3,20 @@ Unit tests for MemoryDocumentRepository.
|
|
|
3
3
|
|
|
4
4
|
These tests verify the memory implementation logic without requiring external
|
|
5
5
|
dependencies. They follow the Clean Architecture testing patterns and verify
|
|
6
|
-
idempotency, error handling, and content operations including
|
|
6
|
+
idempotency, error handling, and content operations including content_bytes.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
import io
|
|
10
|
+
|
|
10
11
|
import pytest
|
|
11
|
-
|
|
12
|
-
MemoryDocumentRepository,
|
|
13
|
-
)
|
|
14
|
-
from julee.domain.models.document import Document, DocumentStatus
|
|
12
|
+
|
|
15
13
|
from julee.domain.models.custom_fields.content_stream import (
|
|
16
14
|
ContentStream,
|
|
17
15
|
)
|
|
16
|
+
from julee.domain.models.document import Document, DocumentStatus
|
|
17
|
+
from julee.repositories.memory.document import (
|
|
18
|
+
MemoryDocumentRepository,
|
|
19
|
+
)
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
@pytest.fixture
|
|
@@ -44,16 +46,16 @@ def sample_document(sample_content: ContentStream) -> Document:
|
|
|
44
46
|
)
|
|
45
47
|
|
|
46
48
|
|
|
47
|
-
class
|
|
48
|
-
"""Test
|
|
49
|
+
class TestMemoryDocumentRepositoryContentBytes:
|
|
50
|
+
"""Test content_bytes functionality."""
|
|
49
51
|
|
|
50
|
-
async def
|
|
52
|
+
async def test_save_document_with_content_bytes(
|
|
51
53
|
self, repository: MemoryDocumentRepository
|
|
52
54
|
) -> None:
|
|
53
|
-
"""Test saving document with
|
|
55
|
+
"""Test saving document with content_bytes."""
|
|
54
56
|
content = '{"assembled": "document", "data": "test"}'
|
|
55
57
|
|
|
56
|
-
# Create document with
|
|
58
|
+
# Create document with content_bytes
|
|
57
59
|
document = Document(
|
|
58
60
|
document_id="test-doc-content-string",
|
|
59
61
|
original_filename="assembled.json",
|
|
@@ -61,10 +63,10 @@ class TestMemoryDocumentRepositoryContentString:
|
|
|
61
63
|
size_bytes=100, # Will be updated automatically
|
|
62
64
|
content_multihash="placeholder", # Will be updated automatically
|
|
63
65
|
status=DocumentStatus.CAPTURED,
|
|
64
|
-
|
|
66
|
+
content_bytes=content,
|
|
65
67
|
)
|
|
66
68
|
|
|
67
|
-
# Act - save should convert
|
|
69
|
+
# Act - save should convert content_bytes to ContentStream
|
|
68
70
|
await repository.save(document)
|
|
69
71
|
|
|
70
72
|
# Assert document was saved successfully
|
|
@@ -78,10 +80,10 @@ class TestMemoryDocumentRepositoryContentString:
|
|
|
78
80
|
retrieved_content = retrieved.content.read().decode("utf-8")
|
|
79
81
|
assert retrieved_content == content
|
|
80
82
|
|
|
81
|
-
async def
|
|
83
|
+
async def test_save_document_with_content_bytes_unicode(
|
|
82
84
|
self, repository: MemoryDocumentRepository
|
|
83
85
|
) -> None:
|
|
84
|
-
"""Test saving document with unicode
|
|
86
|
+
"""Test saving document with unicode content_bytes."""
|
|
85
87
|
content = '{"title": "测试文档", "emoji": "🚀", "content": "éñ"}'
|
|
86
88
|
|
|
87
89
|
document = Document(
|
|
@@ -91,7 +93,7 @@ class TestMemoryDocumentRepositoryContentString:
|
|
|
91
93
|
size_bytes=100,
|
|
92
94
|
content_multihash="placeholder",
|
|
93
95
|
status=DocumentStatus.CAPTURED,
|
|
94
|
-
|
|
96
|
+
content_bytes=content,
|
|
95
97
|
)
|
|
96
98
|
|
|
97
99
|
await repository.save(document)
|
|
@@ -105,10 +107,10 @@ class TestMemoryDocumentRepositoryContentString:
|
|
|
105
107
|
# Note: Empty content test removed because domain model requires
|
|
106
108
|
# size_bytes > 0
|
|
107
109
|
|
|
108
|
-
async def
|
|
110
|
+
async def test_save_excludes_content_bytes_from_storage(
|
|
109
111
|
self, repository: MemoryDocumentRepository
|
|
110
112
|
) -> None:
|
|
111
|
-
"""Test that
|
|
113
|
+
"""Test that content_bytes is not stored in memory storage."""
|
|
112
114
|
content = '{"test": "data that should not be in storage"}'
|
|
113
115
|
|
|
114
116
|
document = Document(
|
|
@@ -118,7 +120,7 @@ class TestMemoryDocumentRepositoryContentString:
|
|
|
118
120
|
size_bytes=100,
|
|
119
121
|
content_multihash="placeholder",
|
|
120
122
|
status=DocumentStatus.CAPTURED,
|
|
121
|
-
|
|
123
|
+
content_bytes=content,
|
|
122
124
|
)
|
|
123
125
|
|
|
124
126
|
await repository.save(document)
|
|
@@ -127,8 +129,8 @@ class TestMemoryDocumentRepositoryContentString:
|
|
|
127
129
|
stored_document = repository.storage_dict.get("test-storage-exclusion")
|
|
128
130
|
assert stored_document is not None
|
|
129
131
|
|
|
130
|
-
# Verify
|
|
131
|
-
assert stored_document.
|
|
132
|
+
# Verify content_bytes is not in stored document
|
|
133
|
+
assert stored_document.content_bytes is None
|
|
132
134
|
|
|
133
135
|
# Verify essential fields are still present
|
|
134
136
|
assert stored_document.document_id == "test-storage-exclusion"
|
|
@@ -193,7 +195,7 @@ class TestMemoryDocumentRepositoryErrorHandling:
|
|
|
193
195
|
size_bytes=100,
|
|
194
196
|
content_multihash="test_hash",
|
|
195
197
|
status=DocumentStatus.CAPTURED,
|
|
196
|
-
|
|
198
|
+
content_bytes="test content",
|
|
197
199
|
)
|
|
198
200
|
|
|
199
201
|
async def test_save_handles_empty_filename(
|
|
@@ -208,5 +210,5 @@ class TestMemoryDocumentRepositoryErrorHandling:
|
|
|
208
210
|
size_bytes=100,
|
|
209
211
|
content_multihash="test_hash",
|
|
210
212
|
status=DocumentStatus.CAPTURED,
|
|
211
|
-
|
|
213
|
+
content_bytes="test content",
|
|
212
214
|
)
|
|
@@ -6,10 +6,11 @@ repository implementation, focusing on functionality specific to this
|
|
|
6
6
|
repository that differs from the inherited mixins.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import pytest
|
|
10
9
|
from datetime import datetime, timezone
|
|
11
10
|
from typing import Any
|
|
12
11
|
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
13
14
|
from julee.domain.models.policy import (
|
|
14
15
|
DocumentPolicyValidation,
|
|
15
16
|
DocumentPolicyValidationStatus,
|
|
@@ -6,9 +6,10 @@ repository implementation, following the testing patterns established in the
|
|
|
6
6
|
project.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import pytest
|
|
10
9
|
from datetime import datetime, timezone
|
|
11
10
|
|
|
11
|
+
import pytest
|
|
12
|
+
|
|
12
13
|
from julee.domain.models.policy import Policy, PolicyStatus
|
|
13
14
|
from julee.repositories.memory.policy import MemoryPolicyRepository
|
|
14
15
|
|
|
@@ -11,10 +11,10 @@ the large payload handling pattern from the architectural guidelines.
|
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
import logging
|
|
14
|
-
from typing import Optional, List, Dict
|
|
15
14
|
|
|
16
15
|
from julee.domain.models.assembly import Assembly
|
|
17
16
|
from julee.domain.repositories.assembly import AssemblyRepository
|
|
17
|
+
|
|
18
18
|
from .client import MinioClient, MinioRepositoryMixin
|
|
19
19
|
|
|
20
20
|
|
|
@@ -37,7 +37,7 @@ class MinioAssemblyRepository(AssemblyRepository, MinioRepositoryMixin):
|
|
|
37
37
|
self.assembly_bucket = "assemblies"
|
|
38
38
|
self.ensure_buckets_exist([self.assembly_bucket])
|
|
39
39
|
|
|
40
|
-
async def get(self, assembly_id: str) ->
|
|
40
|
+
async def get(self, assembly_id: str) -> Assembly | None:
|
|
41
41
|
"""Retrieve an assembly by ID."""
|
|
42
42
|
# Get the assembly using mixin methods
|
|
43
43
|
assembly = self.get_json_object(
|
|
@@ -69,7 +69,7 @@ class MinioAssemblyRepository(AssemblyRepository, MinioRepositoryMixin):
|
|
|
69
69
|
},
|
|
70
70
|
)
|
|
71
71
|
|
|
72
|
-
async def get_many(self, assembly_ids:
|
|
72
|
+
async def get_many(self, assembly_ids: list[str]) -> dict[str, Assembly | None]:
|
|
73
73
|
"""Retrieve multiple assemblies by ID.
|
|
74
74
|
|
|
75
75
|
Args:
|
|
@@ -92,7 +92,7 @@ class MinioAssemblyRepository(AssemblyRepository, MinioRepositoryMixin):
|
|
|
92
92
|
)
|
|
93
93
|
|
|
94
94
|
# Convert object names back to assembly IDs for the result
|
|
95
|
-
result:
|
|
95
|
+
result: dict[str, Assembly | None] = {}
|
|
96
96
|
for assembly_id in assembly_ids:
|
|
97
97
|
result[assembly_id] = object_results[assembly_id]
|
|
98
98
|
|
|
@@ -14,7 +14,6 @@ schema and query mappings.
|
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
16
|
import logging
|
|
17
|
-
from typing import Optional, List, Dict
|
|
18
17
|
|
|
19
18
|
from julee.domain.models.assembly_specification import (
|
|
20
19
|
AssemblySpecification,
|
|
@@ -22,6 +21,7 @@ from julee.domain.models.assembly_specification import (
|
|
|
22
21
|
from julee.domain.repositories.assembly_specification import (
|
|
23
22
|
AssemblySpecificationRepository,
|
|
24
23
|
)
|
|
24
|
+
|
|
25
25
|
from .client import MinioClient, MinioRepositoryMixin
|
|
26
26
|
|
|
27
27
|
|
|
@@ -48,9 +48,7 @@ class MinioAssemblySpecificationRepository(
|
|
|
48
48
|
self.specifications_bucket = "assembly-specifications"
|
|
49
49
|
self.ensure_buckets_exist(self.specifications_bucket)
|
|
50
50
|
|
|
51
|
-
async def get(
|
|
52
|
-
self, assembly_specification_id: str
|
|
53
|
-
) -> Optional[AssemblySpecification]:
|
|
51
|
+
async def get(self, assembly_specification_id: str) -> AssemblySpecification | None:
|
|
54
52
|
"""Retrieve an assembly specification by ID."""
|
|
55
53
|
object_name = f"spec/{assembly_specification_id}"
|
|
56
54
|
|
|
@@ -87,8 +85,8 @@ class MinioAssemblySpecificationRepository(
|
|
|
87
85
|
)
|
|
88
86
|
|
|
89
87
|
async def get_many(
|
|
90
|
-
self, assembly_specification_ids:
|
|
91
|
-
) ->
|
|
88
|
+
self, assembly_specification_ids: list[str]
|
|
89
|
+
) -> dict[str, AssemblySpecification | None]:
|
|
92
90
|
"""Retrieve multiple assembly specifications by ID.
|
|
93
91
|
|
|
94
92
|
Args:
|
|
@@ -113,7 +111,7 @@ class MinioAssemblySpecificationRepository(
|
|
|
113
111
|
)
|
|
114
112
|
|
|
115
113
|
# Convert object names back to specification IDs for the result
|
|
116
|
-
result:
|
|
114
|
+
result: dict[str, AssemblySpecification | None] = {}
|
|
117
115
|
for i, spec_id in enumerate(assembly_specification_ids):
|
|
118
116
|
object_name = object_names[i]
|
|
119
117
|
result[spec_id] = object_results[object_name]
|
|
@@ -124,7 +122,7 @@ class MinioAssemblySpecificationRepository(
|
|
|
124
122
|
"""Generate a unique assembly specification identifier."""
|
|
125
123
|
return self.generate_id_with_prefix("spec")
|
|
126
124
|
|
|
127
|
-
async def list_all(self) ->
|
|
125
|
+
async def list_all(self) -> list[AssemblySpecification]:
|
|
128
126
|
"""List all assembly specifications.
|
|
129
127
|
|
|
130
128
|
Returns:
|
|
@@ -15,21 +15,18 @@ import io
|
|
|
15
15
|
import json
|
|
16
16
|
from datetime import datetime, timezone
|
|
17
17
|
from typing import (
|
|
18
|
-
Protocol,
|
|
19
18
|
Any,
|
|
20
|
-
Dict,
|
|
21
|
-
Optional,
|
|
22
|
-
runtime_checkable,
|
|
23
|
-
List,
|
|
24
|
-
Union,
|
|
25
|
-
TypeVar,
|
|
26
19
|
BinaryIO,
|
|
20
|
+
Protocol,
|
|
21
|
+
TypeVar,
|
|
22
|
+
runtime_checkable,
|
|
27
23
|
)
|
|
28
|
-
|
|
29
|
-
from minio.datatypes import Object
|
|
24
|
+
|
|
30
25
|
from minio.api import ObjectWriteResult
|
|
26
|
+
from minio.datatypes import Object
|
|
31
27
|
from minio.error import S3Error # type: ignore[import-untyped]
|
|
32
28
|
from pydantic import BaseModel
|
|
29
|
+
from urllib3.response import BaseHTTPResponse
|
|
33
30
|
|
|
34
31
|
# Import ContentStream here to avoid circular imports
|
|
35
32
|
from julee.domain.models.custom_fields.content_stream import (
|
|
@@ -78,7 +75,7 @@ class MinioClient(Protocol):
|
|
|
78
75
|
data: BinaryIO,
|
|
79
76
|
length: int,
|
|
80
77
|
content_type: str = "application/octet-stream",
|
|
81
|
-
metadata:
|
|
78
|
+
metadata: dict[str, str | list[str] | tuple[str]] | None = None,
|
|
82
79
|
) -> ObjectWriteResult:
|
|
83
80
|
"""Store an object in the bucket.
|
|
84
81
|
|
|
@@ -167,7 +164,7 @@ class MinioRepositoryMixin:
|
|
|
167
164
|
client: MinioClient
|
|
168
165
|
logger: Any # logging.Logger, but avoiding import
|
|
169
166
|
|
|
170
|
-
def ensure_buckets_exist(self, bucket_names:
|
|
167
|
+
def ensure_buckets_exist(self, bucket_names: str | list[str]) -> None:
|
|
171
168
|
"""Ensure one or more buckets exist, creating them if necessary.
|
|
172
169
|
|
|
173
170
|
Args:
|
|
@@ -202,12 +199,12 @@ class MinioRepositoryMixin:
|
|
|
202
199
|
def get_many_json_objects(
|
|
203
200
|
self,
|
|
204
201
|
bucket_name: str,
|
|
205
|
-
object_names:
|
|
202
|
+
object_names: list[str],
|
|
206
203
|
model_class: type[T],
|
|
207
204
|
not_found_log_message: str,
|
|
208
205
|
error_log_message: str,
|
|
209
|
-
extra_log_data:
|
|
210
|
-
) ->
|
|
206
|
+
extra_log_data: dict[str, Any] | None = None,
|
|
207
|
+
) -> dict[str, T | None]:
|
|
211
208
|
"""Get multiple JSON objects from Minio and deserialize them.
|
|
212
209
|
|
|
213
210
|
Note: S3/MinIO does not have native batch retrieval operations.
|
|
@@ -232,7 +229,7 @@ class MinioRepositoryMixin:
|
|
|
232
229
|
S3Error: For non-NoSuchKey errors
|
|
233
230
|
"""
|
|
234
231
|
extra_log_data = extra_log_data or {}
|
|
235
|
-
result:
|
|
232
|
+
result: dict[str, T | None] = {}
|
|
236
233
|
found_count = 0
|
|
237
234
|
|
|
238
235
|
self.logger.debug(
|
|
@@ -297,11 +294,11 @@ class MinioRepositoryMixin:
|
|
|
297
294
|
def get_many_binary_objects(
|
|
298
295
|
self,
|
|
299
296
|
bucket_name: str,
|
|
300
|
-
object_names:
|
|
297
|
+
object_names: list[str],
|
|
301
298
|
not_found_log_message: str,
|
|
302
299
|
error_log_message: str,
|
|
303
|
-
extra_log_data:
|
|
304
|
-
) ->
|
|
300
|
+
extra_log_data: dict[str, Any] | None = None,
|
|
301
|
+
) -> dict[str, ContentStream | None]:
|
|
305
302
|
"""Get multiple binary objects from Minio as ContentStreams.
|
|
306
303
|
|
|
307
304
|
Note: S3/MinIO does not have native batch retrieval operations.
|
|
@@ -322,7 +319,7 @@ class MinioRepositoryMixin:
|
|
|
322
319
|
S3Error: For non-NoSuchKey errors
|
|
323
320
|
"""
|
|
324
321
|
extra_log_data = extra_log_data or {}
|
|
325
|
-
result:
|
|
322
|
+
result: dict[str, ContentStream | None] = {}
|
|
326
323
|
found_count = 0
|
|
327
324
|
|
|
328
325
|
self.logger.debug(
|
|
@@ -383,8 +380,8 @@ class MinioRepositoryMixin:
|
|
|
383
380
|
model_class: type[T],
|
|
384
381
|
not_found_log_message: str,
|
|
385
382
|
error_log_message: str,
|
|
386
|
-
extra_log_data:
|
|
387
|
-
) ->
|
|
383
|
+
extra_log_data: dict[str, Any] | None = None,
|
|
384
|
+
) -> T | None:
|
|
388
385
|
"""Get a JSON object from Minio and deserialize it to a Pydantic
|
|
389
386
|
model.
|
|
390
387
|
|
|
@@ -441,7 +438,7 @@ class MinioRepositoryMixin:
|
|
|
441
438
|
model: BaseModel,
|
|
442
439
|
success_log_message: str,
|
|
443
440
|
error_log_message: str,
|
|
444
|
-
extra_log_data:
|
|
441
|
+
extra_log_data: dict[str, Any] | None = None,
|
|
445
442
|
) -> None:
|
|
446
443
|
"""Store a Pydantic model as a JSON object in Minio.
|
|
447
444
|
|
|
@@ -494,11 +491,11 @@ class MinioRepositoryMixin:
|
|
|
494
491
|
|
|
495
492
|
# Set created_at if it's None (for new objects)
|
|
496
493
|
if hasattr(model, "created_at") and getattr(model, "created_at", None) is None:
|
|
497
|
-
|
|
494
|
+
model.created_at = now
|
|
498
495
|
|
|
499
496
|
# Always update updated_at
|
|
500
497
|
if hasattr(model, "updated_at"):
|
|
501
|
-
|
|
498
|
+
model.updated_at = now
|
|
502
499
|
|
|
503
500
|
def generate_id_with_prefix(self, prefix: str) -> str:
|
|
504
501
|
"""Generate a unique ID with the given prefix and log the generation.
|
|
@@ -530,7 +527,7 @@ class MinioRepositoryMixin:
|
|
|
530
527
|
bucket_name: str,
|
|
531
528
|
prefix: str,
|
|
532
529
|
entity_type_name: str,
|
|
533
|
-
) ->
|
|
530
|
+
) -> list[str]:
|
|
534
531
|
"""Extract entity IDs from objects with a given prefix.
|
|
535
532
|
|
|
536
533
|
This method provides a common implementation for listing objects
|
|
@@ -11,23 +11,23 @@ The implementation separates document metadata (stored as JSON) from content
|
|
|
11
11
|
payload handling pattern from the architectural guidelines.
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
+
import hashlib
|
|
14
15
|
import io
|
|
15
16
|
import json
|
|
16
|
-
import hashlib
|
|
17
17
|
import logging
|
|
18
18
|
from datetime import datetime, timezone
|
|
19
|
-
from typing import Optional, List, Dict
|
|
20
19
|
|
|
21
|
-
from minio.error import S3Error # type: ignore[import-untyped]
|
|
22
20
|
import multihash # type: ignore[import-untyped]
|
|
21
|
+
from minio.error import S3Error # type: ignore[import-untyped]
|
|
22
|
+
from pydantic import BaseModel, ConfigDict
|
|
23
23
|
|
|
24
|
-
from julee.domain.models.document import Document
|
|
25
24
|
from julee.domain.models.custom_fields.content_stream import (
|
|
26
25
|
ContentStream,
|
|
27
26
|
)
|
|
27
|
+
from julee.domain.models.document import Document
|
|
28
28
|
from julee.domain.repositories.document import DocumentRepository
|
|
29
|
+
|
|
29
30
|
from .client import MinioClient, MinioRepositoryMixin
|
|
30
|
-
from pydantic import BaseModel, ConfigDict
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class RawMetadata(BaseModel):
|
|
@@ -36,7 +36,7 @@ class RawMetadata(BaseModel):
|
|
|
36
36
|
model_config = ConfigDict(extra="allow") # Allow arbitrary fields
|
|
37
37
|
|
|
38
38
|
# Only include fields we actually use for type safety
|
|
39
|
-
content_multihash:
|
|
39
|
+
content_multihash: str | None = None
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
|
|
@@ -63,7 +63,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
|
|
|
63
63
|
self.content_bucket = "documents-content"
|
|
64
64
|
self.ensure_buckets_exist([self.metadata_bucket, self.content_bucket])
|
|
65
65
|
|
|
66
|
-
async def get(self, document_id: str) ->
|
|
66
|
+
async def get(self, document_id: str) -> Document | None:
|
|
67
67
|
"""Retrieve a document with metadata and content."""
|
|
68
68
|
try:
|
|
69
69
|
# First, get the metadata
|
|
@@ -175,27 +175,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
|
|
|
175
175
|
|
|
176
176
|
try:
|
|
177
177
|
# Handle content_string conversion (only if no content provided)
|
|
178
|
-
|
|
179
|
-
# Convert content_string to ContentStream
|
|
180
|
-
assert document.content_string is not None # For MyPy
|
|
181
|
-
content_bytes = document.content_string.encode("utf-8")
|
|
182
|
-
content_stream = ContentStream(io.BytesIO(content_bytes))
|
|
183
|
-
|
|
184
|
-
# Create new document with ContentStream
|
|
185
|
-
document = document.model_copy(
|
|
186
|
-
update={
|
|
187
|
-
"content": content_stream,
|
|
188
|
-
"size_bytes": len(content_bytes),
|
|
189
|
-
}
|
|
190
|
-
)
|
|
191
|
-
|
|
192
|
-
self.logger.debug(
|
|
193
|
-
"Converted content_string to ContentStream",
|
|
194
|
-
extra={
|
|
195
|
-
"document_id": document.document_id,
|
|
196
|
-
"content_length": len(content_bytes),
|
|
197
|
-
},
|
|
198
|
-
)
|
|
178
|
+
document = self._normalize_document_content(document)
|
|
199
179
|
|
|
200
180
|
# Store content first and get calculated multihash
|
|
201
181
|
calculated_multihash = await self._store_content(document)
|
|
@@ -234,7 +214,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
|
|
|
234
214
|
)
|
|
235
215
|
raise
|
|
236
216
|
|
|
237
|
-
async def get_many(self, document_ids:
|
|
217
|
+
async def get_many(self, document_ids: list[str]) -> dict[str, Document | None]:
|
|
238
218
|
"""Retrieve multiple documents by ID using batch operations.
|
|
239
219
|
|
|
240
220
|
Args:
|
|
@@ -271,7 +251,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
|
|
|
271
251
|
)
|
|
272
252
|
|
|
273
253
|
# Use RawMetadata objects directly
|
|
274
|
-
metadata_results:
|
|
254
|
+
metadata_results: dict[str, RawMetadata | None] = raw_metadata_results
|
|
275
255
|
|
|
276
256
|
# Step 2: Extract unique content multihashes from found metadata
|
|
277
257
|
content_hashes = set()
|
|
@@ -294,7 +274,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
|
|
|
294
274
|
)
|
|
295
275
|
|
|
296
276
|
# Step 4: Splice metadata and content together into Documents
|
|
297
|
-
result:
|
|
277
|
+
result: dict[str, Document | None] = {}
|
|
298
278
|
for document_id in document_ids:
|
|
299
279
|
metadata = metadata_results.get(document_id)
|
|
300
280
|
if not metadata:
|
|
@@ -335,7 +315,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
|
|
|
335
315
|
|
|
336
316
|
return result
|
|
337
317
|
|
|
338
|
-
async def list_all(self) ->
|
|
318
|
+
async def list_all(self) -> list[Document]:
|
|
339
319
|
"""List all documents.
|
|
340
320
|
|
|
341
321
|
Returns:
|
|
@@ -449,6 +429,29 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
|
|
|
449
429
|
)
|
|
450
430
|
raise
|
|
451
431
|
|
|
432
|
+
def _normalize_document_content(self, document: Document) -> Document:
|
|
433
|
+
"""Ensure document has a ContentStream in content"""
|
|
434
|
+
if document.content is not None:
|
|
435
|
+
return document
|
|
436
|
+
|
|
437
|
+
content_bytes = document.content_bytes
|
|
438
|
+
if content_bytes is not None:
|
|
439
|
+
if isinstance(content_bytes, str):
|
|
440
|
+
content_bytes = content_bytes.encode("utf-8")
|
|
441
|
+
|
|
442
|
+
stream = ContentStream(io.BytesIO(content_bytes))
|
|
443
|
+
size_bytes = len(content_bytes)
|
|
444
|
+
return document.model_copy(
|
|
445
|
+
update={
|
|
446
|
+
"content": stream,
|
|
447
|
+
"size_bytes": size_bytes,
|
|
448
|
+
}
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
raise ValueError(
|
|
452
|
+
f"Document {document.document_id} has no content, content_bytes"
|
|
453
|
+
)
|
|
454
|
+
|
|
452
455
|
def _calculate_multihash_from_stream(self, content_stream: ContentStream) -> str:
|
|
453
456
|
"""Calculate multihash from content stream."""
|
|
454
457
|
if not content_stream:
|
|
@@ -471,7 +474,7 @@ class MinioDocumentRepository(DocumentRepository, MinioRepositoryMixin):
|
|
|
471
474
|
|
|
472
475
|
# Serialize metadata (content stream and content_string excluded)
|
|
473
476
|
metadata_json = document.model_dump_json(
|
|
474
|
-
exclude={"content", "content_string"}
|
|
477
|
+
exclude={"content", "content_string", "content_bytes"}
|
|
475
478
|
).encode("utf-8")
|
|
476
479
|
|
|
477
480
|
try:
|
|
@@ -14,12 +14,12 @@ status, scores, transformation results, and metadata.
|
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
16
|
import logging
|
|
17
|
-
from typing import Optional, List, Dict
|
|
18
17
|
|
|
19
18
|
from julee.domain.models.policy import DocumentPolicyValidation
|
|
20
19
|
from julee.domain.repositories.document_policy_validation import (
|
|
21
20
|
DocumentPolicyValidationRepository,
|
|
22
21
|
)
|
|
22
|
+
|
|
23
23
|
from .client import MinioClient, MinioRepositoryMixin
|
|
24
24
|
|
|
25
25
|
|
|
@@ -47,7 +47,7 @@ class MinioDocumentPolicyValidationRepository(
|
|
|
47
47
|
self.validations_bucket = "document-policy-validations"
|
|
48
48
|
self.ensure_buckets_exist(self.validations_bucket)
|
|
49
49
|
|
|
50
|
-
async def get(self, validation_id: str) ->
|
|
50
|
+
async def get(self, validation_id: str) -> DocumentPolicyValidation | None:
|
|
51
51
|
"""Retrieve a document policy validation by ID."""
|
|
52
52
|
return self.get_json_object(
|
|
53
53
|
bucket_name=self.validations_bucket,
|
|
@@ -88,8 +88,8 @@ class MinioDocumentPolicyValidationRepository(
|
|
|
88
88
|
return self.generate_id_with_prefix("validation")
|
|
89
89
|
|
|
90
90
|
async def get_many(
|
|
91
|
-
self, validation_ids:
|
|
92
|
-
) ->
|
|
91
|
+
self, validation_ids: list[str]
|
|
92
|
+
) -> dict[str, DocumentPolicyValidation | None]:
|
|
93
93
|
"""Retrieve multiple document policy validations by ID.
|
|
94
94
|
|
|
95
95
|
Args:
|
|
@@ -113,7 +113,7 @@ class MinioDocumentPolicyValidationRepository(
|
|
|
113
113
|
)
|
|
114
114
|
|
|
115
115
|
# Convert object names back to validation IDs for the result
|
|
116
|
-
result:
|
|
116
|
+
result: dict[str, DocumentPolicyValidation | None] = {}
|
|
117
117
|
for validation_id in validation_ids:
|
|
118
118
|
result[validation_id] = object_results[validation_id]
|
|
119
119
|
|