julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
julee/util/domain.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from pydantic import (
|
|
2
|
+
BaseModel,
|
|
3
|
+
Field,
|
|
4
|
+
field_validator,
|
|
5
|
+
)
|
|
6
|
+
from typing import Optional, Dict
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileMetadata(BaseModel):
|
|
11
|
+
"""Metadata about a stored file."""
|
|
12
|
+
|
|
13
|
+
file_id: str
|
|
14
|
+
filename: Optional[str] = None
|
|
15
|
+
content_type: Optional[str] = None
|
|
16
|
+
size_bytes: Optional[int] = None
|
|
17
|
+
uploaded_at: str = Field(
|
|
18
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
19
|
+
)
|
|
20
|
+
metadata: Dict[str, str] = Field(default_factory=dict)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FileUploadArgs(BaseModel):
|
|
24
|
+
"""
|
|
25
|
+
Arguments for file upload with security validation.
|
|
26
|
+
|
|
27
|
+
This model enforces security constraints at the domain level,
|
|
28
|
+
ensuring that all file uploads are validated before reaching
|
|
29
|
+
the repository layer.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
file_id: str
|
|
33
|
+
filename: str
|
|
34
|
+
data: bytes
|
|
35
|
+
content_type: str
|
|
36
|
+
metadata: dict = Field(default_factory=dict)
|
|
37
|
+
|
|
38
|
+
@field_validator("filename")
|
|
39
|
+
@classmethod
|
|
40
|
+
def validate_filename(cls, v: str) -> str:
|
|
41
|
+
"""Validate and sanitize filename to prevent path traversal
|
|
42
|
+
attacks."""
|
|
43
|
+
import os
|
|
44
|
+
|
|
45
|
+
if not v or not v.strip():
|
|
46
|
+
raise ValueError("Filename cannot be empty")
|
|
47
|
+
|
|
48
|
+
# Remove any path components to prevent directory traversal
|
|
49
|
+
sanitized = os.path.basename(v.strip())
|
|
50
|
+
|
|
51
|
+
# Check for dangerous patterns
|
|
52
|
+
dangerous_patterns = [
|
|
53
|
+
"..",
|
|
54
|
+
"~",
|
|
55
|
+
"$",
|
|
56
|
+
"`",
|
|
57
|
+
"|",
|
|
58
|
+
"&",
|
|
59
|
+
";",
|
|
60
|
+
"(",
|
|
61
|
+
")",
|
|
62
|
+
"{",
|
|
63
|
+
"}",
|
|
64
|
+
"[",
|
|
65
|
+
"]",
|
|
66
|
+
]
|
|
67
|
+
for pattern in dangerous_patterns:
|
|
68
|
+
if pattern in sanitized:
|
|
69
|
+
raise ValueError(f"Filename contains dangerous pattern: {pattern}")
|
|
70
|
+
|
|
71
|
+
# Ensure filename has reasonable length
|
|
72
|
+
if len(sanitized) > 255:
|
|
73
|
+
raise ValueError("Filename too long (max 255 characters)")
|
|
74
|
+
|
|
75
|
+
# Ensure filename is not empty after sanitization
|
|
76
|
+
if not sanitized:
|
|
77
|
+
raise ValueError("Filename is empty after sanitization")
|
|
78
|
+
|
|
79
|
+
return sanitized
|
|
80
|
+
|
|
81
|
+
@field_validator("data")
|
|
82
|
+
@classmethod
|
|
83
|
+
def validate_file_size(cls, v: bytes) -> bytes:
|
|
84
|
+
"""Validate file size to prevent resource exhaustion."""
|
|
85
|
+
max_size = 50 * 1024 * 1024 # 50MB limit
|
|
86
|
+
if len(v) > max_size:
|
|
87
|
+
raise ValueError(
|
|
88
|
+
f"File size {len(v)} bytes exceeds maximum allowed size of "
|
|
89
|
+
f"{max_size} bytes"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if len(v) == 0:
|
|
93
|
+
raise ValueError("File cannot be empty")
|
|
94
|
+
|
|
95
|
+
return v
|
|
96
|
+
|
|
97
|
+
@field_validator("content_type")
|
|
98
|
+
@classmethod
|
|
99
|
+
def validate_content_type(cls, v: str) -> str:
|
|
100
|
+
"""Validate content type against allowed types."""
|
|
101
|
+
allowed_types = {
|
|
102
|
+
"text/plain",
|
|
103
|
+
"text/csv",
|
|
104
|
+
"application/json",
|
|
105
|
+
"application/pdf",
|
|
106
|
+
"image/jpeg",
|
|
107
|
+
"image/png",
|
|
108
|
+
"image/gif",
|
|
109
|
+
"application/zip",
|
|
110
|
+
"application/octet-stream",
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if v not in allowed_types:
|
|
114
|
+
raise ValueError(
|
|
115
|
+
f"Content type '{v}' not allowed. Allowed types: "
|
|
116
|
+
f"{', '.join(sorted(allowed_types))}"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return v
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from minio import Minio # type: ignore[import-untyped]
|
|
7
|
+
from minio.error import S3Error # type: ignore[import-untyped]
|
|
8
|
+
|
|
9
|
+
from julee.util.domain import FileMetadata, FileUploadArgs
|
|
10
|
+
from julee.util.repositories import FileStorageRepository
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MinioFileStorageRepository(FileStorageRepository):
|
|
16
|
+
"""
|
|
17
|
+
Minio implementation of FileStorageRepository.
|
|
18
|
+
Uses Minio for persistence of large files/payloads.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
endpoint: Optional[str] = None,
|
|
24
|
+
access_key: Optional[str] = None,
|
|
25
|
+
secret_key: Optional[str] = None,
|
|
26
|
+
secure: bool = False,
|
|
27
|
+
bucket_name: Optional[str] = None,
|
|
28
|
+
):
|
|
29
|
+
self._endpoint = (
|
|
30
|
+
endpoint
|
|
31
|
+
if endpoint is not None
|
|
32
|
+
else os.environ.get("MINIO_ENDPOINT", "localhost:9000")
|
|
33
|
+
)
|
|
34
|
+
self._access_key = (
|
|
35
|
+
access_key
|
|
36
|
+
if access_key is not None
|
|
37
|
+
else os.environ.get("MINIO_ROOT_USER", "minioadmin")
|
|
38
|
+
)
|
|
39
|
+
self._secret_key = (
|
|
40
|
+
secret_key
|
|
41
|
+
if secret_key is not None
|
|
42
|
+
else os.environ.get("MINIO_ROOT_PASSWORD", "minioadmin")
|
|
43
|
+
)
|
|
44
|
+
self._secure = secure
|
|
45
|
+
self._bucket_name = (
|
|
46
|
+
bucket_name
|
|
47
|
+
if bucket_name is not None
|
|
48
|
+
else os.environ.get("MINIO_BUCKET_NAME", "file-storage")
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
self._client: Optional[Minio] = None
|
|
52
|
+
logger.debug(
|
|
53
|
+
"MinioFileStorageRepository initialized",
|
|
54
|
+
extra={
|
|
55
|
+
"endpoint": self._endpoint,
|
|
56
|
+
"bucket_name": self._bucket_name,
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def _get_client(self) -> Minio:
|
|
61
|
+
"""Lazily initialize and return the Minio client."""
|
|
62
|
+
if self._client is None:
|
|
63
|
+
logger.debug(
|
|
64
|
+
"Creating new Minio client instance",
|
|
65
|
+
extra={"endpoint": self._endpoint, "secure": self._secure},
|
|
66
|
+
)
|
|
67
|
+
self._client = Minio(
|
|
68
|
+
self._endpoint,
|
|
69
|
+
access_key=self._access_key,
|
|
70
|
+
secret_key=self._secret_key,
|
|
71
|
+
secure=self._secure,
|
|
72
|
+
)
|
|
73
|
+
try:
|
|
74
|
+
# Ensure bucket exists
|
|
75
|
+
if not self._client.bucket_exists(self._bucket_name):
|
|
76
|
+
logger.info(
|
|
77
|
+
"Minio bucket does not exist, creating now",
|
|
78
|
+
extra={"bucket_name": self._bucket_name},
|
|
79
|
+
)
|
|
80
|
+
self._client.make_bucket(self._bucket_name)
|
|
81
|
+
else:
|
|
82
|
+
logger.debug(
|
|
83
|
+
"Minio bucket already exists",
|
|
84
|
+
extra={"bucket_name": self._bucket_name},
|
|
85
|
+
)
|
|
86
|
+
except S3Error as e:
|
|
87
|
+
logger.error(
|
|
88
|
+
f"Error checking or creating Minio bucket: {e}",
|
|
89
|
+
extra={
|
|
90
|
+
"bucket_name": self._bucket_name,
|
|
91
|
+
"error_code": e.code,
|
|
92
|
+
},
|
|
93
|
+
)
|
|
94
|
+
raise
|
|
95
|
+
return self._client
|
|
96
|
+
|
|
97
|
+
async def upload_file(self, args: FileUploadArgs) -> FileMetadata:
|
|
98
|
+
"""Upload a file to Minio storage."""
|
|
99
|
+
client = await self._get_client()
|
|
100
|
+
logger.info(
|
|
101
|
+
"Uploading file to Minio",
|
|
102
|
+
extra={
|
|
103
|
+
"file_id": args.file_id,
|
|
104
|
+
"filename": args.filename,
|
|
105
|
+
"content_type": args.content_type,
|
|
106
|
+
"size_bytes": len(args.data),
|
|
107
|
+
},
|
|
108
|
+
)
|
|
109
|
+
try:
|
|
110
|
+
# Minio put_object is idempotent if object name is the same
|
|
111
|
+
client.put_object(
|
|
112
|
+
self._bucket_name,
|
|
113
|
+
args.file_id,
|
|
114
|
+
io.BytesIO(args.data),
|
|
115
|
+
len(args.data),
|
|
116
|
+
content_type=args.content_type,
|
|
117
|
+
metadata=args.metadata,
|
|
118
|
+
)
|
|
119
|
+
logger.info(
|
|
120
|
+
"File uploaded successfully to Minio",
|
|
121
|
+
extra={"file_id": args.file_id},
|
|
122
|
+
)
|
|
123
|
+
return FileMetadata(
|
|
124
|
+
file_id=args.file_id,
|
|
125
|
+
filename=args.filename,
|
|
126
|
+
content_type=args.content_type,
|
|
127
|
+
size_bytes=len(args.data),
|
|
128
|
+
metadata=args.metadata,
|
|
129
|
+
)
|
|
130
|
+
except S3Error as e:
|
|
131
|
+
logger.error(
|
|
132
|
+
f"Error uploading file to Minio: {e}",
|
|
133
|
+
extra={"file_id": args.file_id, "error_code": e.code},
|
|
134
|
+
)
|
|
135
|
+
raise
|
|
136
|
+
|
|
137
|
+
async def download_file(self, file_id: str) -> Optional[bytes]:
|
|
138
|
+
"""Download a file from Minio storage by its ID."""
|
|
139
|
+
client = await self._get_client()
|
|
140
|
+
logger.info(
|
|
141
|
+
"Attempting to download file from Minio",
|
|
142
|
+
extra={"file_id": file_id},
|
|
143
|
+
)
|
|
144
|
+
try:
|
|
145
|
+
response = client.get_object(self._bucket_name, file_id)
|
|
146
|
+
file_data: bytes = response.read()
|
|
147
|
+
response.close()
|
|
148
|
+
response.release_conn()
|
|
149
|
+
logger.info(
|
|
150
|
+
"File downloaded successfully from Minio",
|
|
151
|
+
extra={"file_id": file_id, "size_bytes": len(file_data)},
|
|
152
|
+
)
|
|
153
|
+
return file_data
|
|
154
|
+
except S3Error as e:
|
|
155
|
+
if e.code == "NoSuchKey":
|
|
156
|
+
logger.warning("File not found in Minio", extra={"file_id": file_id})
|
|
157
|
+
return None
|
|
158
|
+
logger.error(
|
|
159
|
+
f"Error downloading file from Minio: {e}",
|
|
160
|
+
extra={"file_id": file_id, "error_code": e.code},
|
|
161
|
+
)
|
|
162
|
+
raise
|
|
163
|
+
|
|
164
|
+
async def get_file_metadata(self, file_id: str) -> Optional[FileMetadata]:
|
|
165
|
+
"""Retrieve metadata for a stored file from Minio."""
|
|
166
|
+
client = await self._get_client()
|
|
167
|
+
logger.info(
|
|
168
|
+
"Attempting to get file metadata from Minio",
|
|
169
|
+
extra={"file_id": file_id},
|
|
170
|
+
)
|
|
171
|
+
try:
|
|
172
|
+
stat = client.stat_object(self._bucket_name, file_id)
|
|
173
|
+
logger.info(
|
|
174
|
+
"File metadata retrieved successfully from Minio",
|
|
175
|
+
extra={
|
|
176
|
+
"file_id": file_id,
|
|
177
|
+
"size_bytes": stat.size,
|
|
178
|
+
"content_type": stat.content_type,
|
|
179
|
+
},
|
|
180
|
+
)
|
|
181
|
+
uploaded_at_str: Optional[str] = (
|
|
182
|
+
stat.last_modified.isoformat() if stat.last_modified else None
|
|
183
|
+
)
|
|
184
|
+
# Extract filename and metadata more explicitly
|
|
185
|
+
filename = (
|
|
186
|
+
stat.metadata.get("X-Amz-Meta-Filename") if stat.metadata else None
|
|
187
|
+
)
|
|
188
|
+
metadata = (
|
|
189
|
+
{k.replace("X-Amz-Meta-", ""): v for k, v in stat.metadata.items()}
|
|
190
|
+
if stat.metadata
|
|
191
|
+
else {}
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return FileMetadata(
|
|
195
|
+
file_id=file_id,
|
|
196
|
+
filename=filename, # Minio prepends X-Amz-Meta-
|
|
197
|
+
content_type=stat.content_type,
|
|
198
|
+
size_bytes=stat.size,
|
|
199
|
+
uploaded_at=uploaded_at_str or "", # Provide empty string if None
|
|
200
|
+
metadata=metadata,
|
|
201
|
+
)
|
|
202
|
+
except S3Error as e:
|
|
203
|
+
if e.code == "NoSuchKey":
|
|
204
|
+
logger.warning(
|
|
205
|
+
"File metadata not found in Minio",
|
|
206
|
+
extra={"file_id": file_id},
|
|
207
|
+
)
|
|
208
|
+
return None
|
|
209
|
+
logger.error(
|
|
210
|
+
f"Error getting file metadata from Minio: {e}",
|
|
211
|
+
extra={"file_id": file_id, "error_code": e.code},
|
|
212
|
+
)
|
|
213
|
+
raise
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal repository utilities.
|
|
3
|
+
|
|
4
|
+
This module provides utilities for working with Temporal repositories,
|
|
5
|
+
including the temporal_activity_registration decorator for automatically
|
|
6
|
+
wrapping repository methods as Temporal activities.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from julee.util.temporal.decorators import temporal_activity_registration
|
|
10
|
+
|
|
11
|
+
__all__ = ["temporal_activity_registration"]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from temporalio.client import Client
|
|
5
|
+
|
|
6
|
+
from julee.util.domain import FileMetadata, FileUploadArgs
|
|
7
|
+
from julee.util.repositories import FileStorageRepository
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TemporalFileStorageRepository(FileStorageRepository):
|
|
13
|
+
"""
|
|
14
|
+
Client-side proxy for FileStorageRepository that calls activities.
|
|
15
|
+
This proxy ensures that all interactions with the FileStorageRepository
|
|
16
|
+
are performed via Temporal activities, maintaining workflow determinism.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
client: Client,
|
|
22
|
+
concrete_repo: Optional[FileStorageRepository] = None,
|
|
23
|
+
):
|
|
24
|
+
self.client = client
|
|
25
|
+
self.concrete_repo = concrete_repo
|
|
26
|
+
logger.debug("Initialized TemporalFileStorageRepository")
|
|
27
|
+
|
|
28
|
+
async def upload_file(self, args: FileUploadArgs) -> FileMetadata:
|
|
29
|
+
"""Upload a file via Temporal activity."""
|
|
30
|
+
logger.debug(f"Client calling activity to upload file: {args.file_id}")
|
|
31
|
+
|
|
32
|
+
handle = await self.client.start_workflow(
|
|
33
|
+
"util.file_storage.minio.upload_file",
|
|
34
|
+
args,
|
|
35
|
+
id=f"upload-{args.file_id}",
|
|
36
|
+
task_queue="order-fulfillment-queue",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
result = await handle.result()
|
|
40
|
+
return result # type: ignore[no-any-return]
|
|
41
|
+
|
|
42
|
+
async def download_file(self, file_id: str) -> Optional[bytes]:
|
|
43
|
+
"""Download a file via Temporal activity."""
|
|
44
|
+
logger.debug(f"Client calling activity to download file: {file_id}")
|
|
45
|
+
|
|
46
|
+
handle = await self.client.start_workflow(
|
|
47
|
+
"util.file_storage.minio.download_file",
|
|
48
|
+
file_id,
|
|
49
|
+
id=f"download-{file_id}",
|
|
50
|
+
task_queue="order-fulfillment-queue",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
result = await handle.result()
|
|
54
|
+
return result # type: ignore[no-any-return]
|
|
55
|
+
|
|
56
|
+
async def get_file_metadata(self, file_id: str) -> Optional[FileMetadata]:
|
|
57
|
+
"""Retrieve file metadata via Temporal activity."""
|
|
58
|
+
logger.debug(f"Client calling activity to get file metadata: {file_id}")
|
|
59
|
+
|
|
60
|
+
handle = await self.client.start_workflow(
|
|
61
|
+
"util.file_storage.minio.get_file_metadata",
|
|
62
|
+
file_id,
|
|
63
|
+
id=f"metadata-{file_id}",
|
|
64
|
+
task_queue="order-fulfillment-queue",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
result = await handle.result()
|
|
68
|
+
return result # type: ignore[no-any-return]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom Temporal data converter with support for temporal_validation context.
|
|
3
|
+
|
|
4
|
+
This module provides a custom Pydantic data converter that automatically
|
|
5
|
+
adds temporal_validation=True context when deserializing Pydantic models.
|
|
6
|
+
This allows domain models to implement context-aware validation that can
|
|
7
|
+
be more permissive during Temporal serialization/deserialization.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Any, Optional, Type
|
|
11
|
+
|
|
12
|
+
from pydantic import TypeAdapter
|
|
13
|
+
from temporalio.contrib.pydantic import (
|
|
14
|
+
PydanticJSONPlainPayloadConverter,
|
|
15
|
+
ToJsonOptions,
|
|
16
|
+
)
|
|
17
|
+
from temporalio.converter import (
|
|
18
|
+
DataConverter,
|
|
19
|
+
CompositePayloadConverter,
|
|
20
|
+
DefaultPayloadConverter,
|
|
21
|
+
JSONPlainPayloadConverter,
|
|
22
|
+
)
|
|
23
|
+
import temporalio.api.common.v1
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TemporalValidationPydanticConverter(PydanticJSONPlainPayloadConverter):
|
|
27
|
+
"""Custom Pydantic JSON converter that adds temporal_validation context.
|
|
28
|
+
|
|
29
|
+
This converter extends the standard PydanticJSONPlainPayloadConverter
|
|
30
|
+
to automatically add temporal_validation=True context when deserializing
|
|
31
|
+
Pydantic models. This allows domain models to implement more permissive
|
|
32
|
+
validation during Temporal operations while maintaining strict validation
|
|
33
|
+
for direct instantiation.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def from_payload(
|
|
37
|
+
self,
|
|
38
|
+
payload: temporalio.api.common.v1.Payload,
|
|
39
|
+
type_hint: Optional[Type] = None,
|
|
40
|
+
) -> Any:
|
|
41
|
+
"""Deserialize payload with temporal_validation context.
|
|
42
|
+
|
|
43
|
+
This method overrides the base implementation to always add
|
|
44
|
+
temporal_validation=True to the validation context. This allows
|
|
45
|
+
Pydantic models to detect when they're being deserialized by
|
|
46
|
+
Temporal and apply appropriate validation rules.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
payload: The Temporal payload to deserialize
|
|
50
|
+
type_hint: Optional type hint for the expected return type
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Deserialized object with temporal validation context applied
|
|
54
|
+
"""
|
|
55
|
+
# Convert Optional[Type] to Type, defaulting to Any (same as original)
|
|
56
|
+
_type_hint = type_hint if type_hint is not None else Any
|
|
57
|
+
|
|
58
|
+
# Always add temporal_validation context for Pydantic model validation
|
|
59
|
+
return TypeAdapter(_type_hint).validate_json(
|
|
60
|
+
payload.data, context={"temporal_validation": True}
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class TemporalValidationPayloadConverter(CompositePayloadConverter):
|
|
65
|
+
"""Custom payload converter that uses temporal validation context.
|
|
66
|
+
|
|
67
|
+
This payload converter extends CompositePayloadConverter to use our
|
|
68
|
+
custom TemporalValidationPydanticConverter for JSON serialization,
|
|
69
|
+
ensuring all Pydantic models get temporal_validation context.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(self, to_json_options: Optional[ToJsonOptions] = None) -> None:
|
|
73
|
+
"""Initialize with custom JSON converter adding temporal context."""
|
|
74
|
+
# Create our custom JSON converter with temporal validation
|
|
75
|
+
json_payload_converter = TemporalValidationPydanticConverter(to_json_options)
|
|
76
|
+
|
|
77
|
+
# Initialize CompositePayloadConverter, replacing JSON converter
|
|
78
|
+
|
|
79
|
+
super().__init__(
|
|
80
|
+
*(
|
|
81
|
+
(
|
|
82
|
+
c
|
|
83
|
+
if not isinstance(c, JSONPlainPayloadConverter)
|
|
84
|
+
else json_payload_converter
|
|
85
|
+
)
|
|
86
|
+
for c in (DefaultPayloadConverter.default_encoding_payload_converters)
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def create_temporal_data_converter(
|
|
92
|
+
to_json_options: Optional[ToJsonOptions] = None,
|
|
93
|
+
) -> DataConverter:
|
|
94
|
+
"""Create a data converter with temporal validation support.
|
|
95
|
+
|
|
96
|
+
This factory function creates a DataConverter that uses our custom
|
|
97
|
+
TemporalValidationPayloadConverter for serialization. This
|
|
98
|
+
ensures that all Pydantic models are deserialized with the
|
|
99
|
+
temporal_validation context.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
to_json_options: Optional configuration for JSON serialization
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
DataConverter configured with temporal validation support
|
|
106
|
+
"""
|
|
107
|
+
return DataConverter(payload_converter_class=TemporalValidationPayloadConverter)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# Default temporal data converter with validation context support
|
|
111
|
+
temporal_data_converter = create_temporal_data_converter()
|
|
112
|
+
"""Default Temporal data converter with temporal_validation context support.
|
|
113
|
+
|
|
114
|
+
This data converter automatically adds temporal_validation=True context
|
|
115
|
+
when deserializing Pydantic models, allowing domain models to implement
|
|
116
|
+
context-aware validation rules.
|
|
117
|
+
|
|
118
|
+
Usage:
|
|
119
|
+
client = Client(
|
|
120
|
+
data_converter=temporal_data_converter,
|
|
121
|
+
...
|
|
122
|
+
)
|
|
123
|
+
"""
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from julee.util.temporal.decorators import temporal_activity_registration
|
|
2
|
+
from julee.util.repos.minio.file_storage import MinioFileStorageRepository
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@temporal_activity_registration("util.file_storage.minio")
|
|
6
|
+
class TemporalMinioFileStorageRepository(MinioFileStorageRepository):
|
|
7
|
+
"""
|
|
8
|
+
Temporal activity wrapper for MinioFileStorageRepository.
|
|
9
|
+
All async methods automatically wrapped as activities.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from temporalio import workflow
|
|
5
|
+
|
|
6
|
+
from julee.util.domain import FileMetadata, FileUploadArgs
|
|
7
|
+
from julee.util.repositories import FileStorageRepository
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class WorkflowFileStorageRepositoryProxy(FileStorageRepository):
|
|
13
|
+
"""
|
|
14
|
+
Workflow implementation of FileStorageRepository that calls activities.
|
|
15
|
+
This proxy ensures that all interactions with the FileStorageRepository
|
|
16
|
+
are performed via Temporal activities, maintaining workflow determinism.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self) -> None:
|
|
20
|
+
# Activity timeout can be configured, but for simplicity, we use a
|
|
21
|
+
# default here or could retrieve from workflow config.
|
|
22
|
+
# This timeout should be generous enough for large file transfers.
|
|
23
|
+
self.activity_timeout = workflow.timedelta(seconds=600) # 10 minutes
|
|
24
|
+
logger.debug("Initialized WorkflowFileStorageRepositoryProxy")
|
|
25
|
+
|
|
26
|
+
async def upload_file(self, args: FileUploadArgs) -> FileMetadata:
|
|
27
|
+
"""Upload a file to storage via Temporal activity."""
|
|
28
|
+
logger.debug(f"Workflow calling activity to upload file: {args.file_id}")
|
|
29
|
+
# The activity name follows the general util pattern:
|
|
30
|
+
# {domain}.{subdomain}.{implementation}.{method}
|
|
31
|
+
result = await workflow.execute_activity(
|
|
32
|
+
"util.file_storage.minio.upload_file",
|
|
33
|
+
args,
|
|
34
|
+
start_to_close_timeout=self.activity_timeout,
|
|
35
|
+
)
|
|
36
|
+
return FileMetadata.model_validate(result)
|
|
37
|
+
|
|
38
|
+
async def download_file(self, file_id: str) -> Optional[bytes]:
|
|
39
|
+
"""Download a file from storage via Temporal activity."""
|
|
40
|
+
logger.debug(f"Workflow calling activity to download file: {file_id}")
|
|
41
|
+
result = await workflow.execute_activity(
|
|
42
|
+
"util.file_storage.minio.download_file",
|
|
43
|
+
file_id,
|
|
44
|
+
start_to_close_timeout=self.activity_timeout,
|
|
45
|
+
)
|
|
46
|
+
return result # type: ignore[no-any-return]
|
|
47
|
+
|
|
48
|
+
async def get_file_metadata(self, file_id: str) -> Optional[FileMetadata]:
|
|
49
|
+
"""Retrieve file metadata via Temporal activity."""
|
|
50
|
+
logger.debug(f"Workflow calling activity to get file metadata: {file_id}")
|
|
51
|
+
result = await workflow.execute_activity(
|
|
52
|
+
"util.file_storage.minio.get_file_metadata",
|
|
53
|
+
file_id,
|
|
54
|
+
start_to_close_timeout=self.activity_timeout,
|
|
55
|
+
)
|
|
56
|
+
if result is None:
|
|
57
|
+
return None
|
|
58
|
+
return FileMetadata.model_validate(result)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from typing import Protocol, Optional, runtime_checkable
|
|
2
|
+
from julee.util.domain import FileMetadata, FileUploadArgs
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@runtime_checkable
|
|
6
|
+
class FileStorageRepository(Protocol):
|
|
7
|
+
"""Handles storage and retrieval of large files/payloads.
|
|
8
|
+
|
|
9
|
+
Architectural Purpose:
|
|
10
|
+
This repository is designed to manage large data payloads that might
|
|
11
|
+
exceed Temporal's payload size limits or are better stored externally.
|
|
12
|
+
It allows workflows to store references to files rather than the files
|
|
13
|
+
themselves, maintaining workflow determinism while handling large data.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
async def upload_file(self, args: FileUploadArgs) -> FileMetadata:
|
|
17
|
+
"""Upload a file to storage.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
args: FileUploadArgs containing file_id, data, and metadata.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
FileMetadata object with details about the uploaded file.
|
|
24
|
+
|
|
25
|
+
Implementation Notes:
|
|
26
|
+
- Must be idempotent: uploading the same file_id multiple times is
|
|
27
|
+
safe.
|
|
28
|
+
- Should return metadata including the actual size and content type.
|
|
29
|
+
- Must perform security validation: file size limits, content type
|
|
30
|
+
verification, and filename sanitization.
|
|
31
|
+
- Should reject files that don't match declared content type.
|
|
32
|
+
"""
|
|
33
|
+
...
|
|
34
|
+
|
|
35
|
+
async def download_file(self, file_id: str) -> Optional[bytes]:
|
|
36
|
+
"""Download a file from storage by its ID.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file_id: Unique identifier of the file.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
File content as bytes if found, None otherwise.
|
|
43
|
+
"""
|
|
44
|
+
...
|
|
45
|
+
|
|
46
|
+
async def get_file_metadata(self, file_id: str) -> Optional[FileMetadata]:
|
|
47
|
+
"""Retrieve metadata for a stored file.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
file_id: Unique identifier of the file.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
FileMetadata object if found, None otherwise.
|
|
54
|
+
"""
|
|
55
|
+
...
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal utilities package.
|
|
3
|
+
|
|
4
|
+
This package provides utility functions and classes for working with
|
|
5
|
+
Temporal workflows and activities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .activities import (
|
|
9
|
+
collect_activities_from_instances,
|
|
10
|
+
discover_protocol_methods,
|
|
11
|
+
)
|
|
12
|
+
from .decorators import (
|
|
13
|
+
temporal_activity_registration,
|
|
14
|
+
temporal_workflow_proxy,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"collect_activities_from_instances",
|
|
19
|
+
"discover_protocol_methods",
|
|
20
|
+
"temporal_activity_registration",
|
|
21
|
+
"temporal_workflow_proxy",
|
|
22
|
+
]
|