julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
julee/worker.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal worker for julee domain workflows and activities.
|
|
3
|
+
|
|
4
|
+
This worker runs workflows and activities for document processing,
|
|
5
|
+
assembly, and knowledge service operations within the julee domain.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
from temporalio.client import Client
|
|
12
|
+
from temporalio.service import RPCError
|
|
13
|
+
from temporalio.worker import Worker
|
|
14
|
+
from julee.util.repos.temporal.data_converter import temporal_data_converter
|
|
15
|
+
|
|
16
|
+
from julee.workflows import (
|
|
17
|
+
ExtractAssembleWorkflow,
|
|
18
|
+
ValidateDocumentWorkflow,
|
|
19
|
+
)
|
|
20
|
+
from julee.repositories.temporal.activities import (
|
|
21
|
+
TemporalMinioAssemblyRepository,
|
|
22
|
+
TemporalMinioAssemblySpecificationRepository,
|
|
23
|
+
TemporalMinioDocumentRepository,
|
|
24
|
+
TemporalMinioKnowledgeServiceConfigRepository,
|
|
25
|
+
TemporalMinioKnowledgeServiceQueryRepository,
|
|
26
|
+
TemporalMinioPolicyRepository,
|
|
27
|
+
TemporalMinioDocumentPolicyValidationRepository,
|
|
28
|
+
)
|
|
29
|
+
from julee.services.temporal.activities import (
|
|
30
|
+
TemporalKnowledgeService,
|
|
31
|
+
)
|
|
32
|
+
from minio import Minio
|
|
33
|
+
from julee.repositories.minio.client import MinioClient
|
|
34
|
+
from julee.util.temporal.activities import collect_activities_from_instances
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def setup_logging() -> None:
|
|
40
|
+
"""Configure logging based on environment variables"""
|
|
41
|
+
log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
|
|
42
|
+
log_format = os.environ.get(
|
|
43
|
+
"LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Validate log level
|
|
47
|
+
numeric_level = getattr(logging, log_level, None)
|
|
48
|
+
if not isinstance(numeric_level, int):
|
|
49
|
+
print(f"Invalid log level: {log_level}, defaulting to INFO")
|
|
50
|
+
numeric_level = logging.INFO
|
|
51
|
+
|
|
52
|
+
logging.basicConfig(
|
|
53
|
+
level=numeric_level,
|
|
54
|
+
format=log_format,
|
|
55
|
+
force=True, # Override any existing configuration
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
logger.info(
|
|
59
|
+
"Logging configured",
|
|
60
|
+
extra={"log_level": log_level, "numeric_level": numeric_level},
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
async def get_temporal_client_with_retries(
|
|
65
|
+
endpoint: str, attempts: int = 10, delay: int = 5
|
|
66
|
+
) -> Client:
|
|
67
|
+
"""Attempt to connect to Temporal with retries."""
|
|
68
|
+
logger.debug(
|
|
69
|
+
"Attempting to connect to Temporal",
|
|
70
|
+
extra={
|
|
71
|
+
"endpoint": endpoint,
|
|
72
|
+
"max_attempts": attempts,
|
|
73
|
+
"delay_seconds": delay,
|
|
74
|
+
},
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
for attempt in range(attempts):
|
|
78
|
+
try:
|
|
79
|
+
# Use the proper Pydantic v2 data converter and connect to the
|
|
80
|
+
# 'default' namespace
|
|
81
|
+
client = await Client.connect(
|
|
82
|
+
endpoint,
|
|
83
|
+
data_converter=temporal_data_converter,
|
|
84
|
+
namespace="default",
|
|
85
|
+
)
|
|
86
|
+
logger.info(
|
|
87
|
+
"Successfully connected to Temporal",
|
|
88
|
+
extra={
|
|
89
|
+
"endpoint": endpoint,
|
|
90
|
+
"attempt": attempt + 1,
|
|
91
|
+
"data_converter_type": type(client.data_converter).__name__,
|
|
92
|
+
},
|
|
93
|
+
)
|
|
94
|
+
return client
|
|
95
|
+
except RPCError as e:
|
|
96
|
+
logger.warning(
|
|
97
|
+
"Failed to connect to Temporal",
|
|
98
|
+
extra={
|
|
99
|
+
"endpoint": endpoint,
|
|
100
|
+
"attempt": attempt + 1,
|
|
101
|
+
"max_attempts": attempts,
|
|
102
|
+
"error": str(e),
|
|
103
|
+
"retry_in_seconds": delay,
|
|
104
|
+
},
|
|
105
|
+
)
|
|
106
|
+
if attempt + 1 == attempts:
|
|
107
|
+
logger.error(
|
|
108
|
+
"All connection attempts to Temporal failed",
|
|
109
|
+
extra={"endpoint": endpoint, "total_attempts": attempts},
|
|
110
|
+
)
|
|
111
|
+
raise
|
|
112
|
+
await asyncio.sleep(delay)
|
|
113
|
+
|
|
114
|
+
# This should never be reached due to the raise in the loop, but mypy
|
|
115
|
+
# needs it
|
|
116
|
+
raise RuntimeError("Failed to connect to Temporal after all attempts")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
async def run_worker() -> None:
|
|
120
|
+
"""Run the Temporal worker for julee domain"""
|
|
121
|
+
# Setup logging first
|
|
122
|
+
setup_logging()
|
|
123
|
+
|
|
124
|
+
# Connect to Temporal server using environment variable
|
|
125
|
+
temporal_endpoint = os.environ.get("TEMPORAL_ENDPOINT", "localhost:7234")
|
|
126
|
+
logger.info(
|
|
127
|
+
"Starting julee Temporal worker",
|
|
128
|
+
extra={"temporal_endpoint": temporal_endpoint},
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
client = await get_temporal_client_with_retries(temporal_endpoint)
|
|
132
|
+
|
|
133
|
+
# Get Minio endpoint and create client for repositories
|
|
134
|
+
logger.debug("Preparing repository configurations")
|
|
135
|
+
minio_endpoint = os.environ.get("MINIO_ENDPOINT", "localhost:9000")
|
|
136
|
+
|
|
137
|
+
# Create Minio client for repositories
|
|
138
|
+
# minio.Minio implements the MinioClient protocol
|
|
139
|
+
minio_client: MinioClient = Minio( # type: ignore[assignment]
|
|
140
|
+
endpoint=minio_endpoint,
|
|
141
|
+
access_key="minioadmin",
|
|
142
|
+
secret_key="minioadmin",
|
|
143
|
+
secure=False,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Instantiate temporal repository classes for activity registration
|
|
147
|
+
logger.debug("Creating Temporal Activity repository implementations")
|
|
148
|
+
temporal_assembly_repo = TemporalMinioAssemblyRepository(client=minio_client)
|
|
149
|
+
temporal_assembly_spec_repo = TemporalMinioAssemblySpecificationRepository(
|
|
150
|
+
client=minio_client
|
|
151
|
+
)
|
|
152
|
+
temporal_document_repo = TemporalMinioDocumentRepository(client=minio_client)
|
|
153
|
+
temporal_knowledge_config_repo = TemporalMinioKnowledgeServiceConfigRepository(
|
|
154
|
+
client=minio_client
|
|
155
|
+
)
|
|
156
|
+
temporal_knowledge_query_repo = TemporalMinioKnowledgeServiceQueryRepository(
|
|
157
|
+
client=minio_client
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Create policy repositories for validation workflow
|
|
161
|
+
temporal_policy_repo = TemporalMinioPolicyRepository(client=minio_client)
|
|
162
|
+
temporal_document_policy_validation_repo = (
|
|
163
|
+
TemporalMinioDocumentPolicyValidationRepository(client=minio_client)
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Create temporal knowledge service for activity registration
|
|
167
|
+
# Pass the document repository for dependency injection
|
|
168
|
+
temporal_knowledge_service = TemporalKnowledgeService(
|
|
169
|
+
document_repo=temporal_document_repo
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Automatically collect all activities from decorated instances
|
|
173
|
+
# This uses the same _discover_protocol_methods that the decorator uses,
|
|
174
|
+
# ensuring we never miss activities and eliminating boilerplate
|
|
175
|
+
activities = collect_activities_from_instances(
|
|
176
|
+
temporal_assembly_repo,
|
|
177
|
+
temporal_assembly_spec_repo,
|
|
178
|
+
temporal_document_repo,
|
|
179
|
+
temporal_knowledge_config_repo,
|
|
180
|
+
temporal_knowledge_query_repo,
|
|
181
|
+
temporal_policy_repo,
|
|
182
|
+
temporal_document_policy_validation_repo,
|
|
183
|
+
temporal_knowledge_service,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
logger.info(
|
|
187
|
+
"Creating Temporal worker for julee domain",
|
|
188
|
+
extra={
|
|
189
|
+
"task_queue": "julee-extract-assemble-queue",
|
|
190
|
+
"workflow_count": 2,
|
|
191
|
+
"activity_count": len(activities),
|
|
192
|
+
"data_converter_type": type(client.data_converter).__name__,
|
|
193
|
+
},
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Create worker with workflow retry policy
|
|
197
|
+
worker = Worker(
|
|
198
|
+
client,
|
|
199
|
+
task_queue="julee-extract-assemble-queue",
|
|
200
|
+
workflows=[ExtractAssembleWorkflow, ValidateDocumentWorkflow],
|
|
201
|
+
activities=activities, # type: ignore[arg-type]
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
logger.info("Starting julee worker execution")
|
|
205
|
+
|
|
206
|
+
# Run the worker
|
|
207
|
+
await worker.run()
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
if __name__ == "__main__":
|
|
211
|
+
asyncio.run(run_worker())
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal workflows for the julee domain.
|
|
3
|
+
|
|
4
|
+
This package contains Temporal workflow definitions that orchestrate
|
|
5
|
+
use cases with durability guarantees, retry logic, and state management.
|
|
6
|
+
|
|
7
|
+
Workflows in this package:
|
|
8
|
+
- ExtractAssembleWorkflow: Orchestrates document extraction and assembly
|
|
9
|
+
- ValidateDocumentWorkflow: Orchestrates document validation against policies
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .extract_assemble import (
|
|
13
|
+
ExtractAssembleWorkflow,
|
|
14
|
+
EXTRACT_ASSEMBLE_RETRY_POLICY,
|
|
15
|
+
)
|
|
16
|
+
from .validate_document import (
|
|
17
|
+
ValidateDocumentWorkflow,
|
|
18
|
+
VALIDATE_DOCUMENT_RETRY_POLICY,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"ExtractAssembleWorkflow",
|
|
23
|
+
"EXTRACT_ASSEMBLE_RETRY_POLICY",
|
|
24
|
+
"ValidateDocumentWorkflow",
|
|
25
|
+
"VALIDATE_DOCUMENT_RETRY_POLICY",
|
|
26
|
+
]
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal workflow for extract and assemble data operations.
|
|
3
|
+
|
|
4
|
+
This workflow orchestrates the ExtractAssembleDataUseCase with Temporal's
|
|
5
|
+
durability guarantees, providing retry logic, state management, and
|
|
6
|
+
compensation for the complex document assembly process.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from temporalio import workflow
|
|
11
|
+
from temporalio.common import RetryPolicy
|
|
12
|
+
from datetime import timedelta
|
|
13
|
+
|
|
14
|
+
from julee.domain.models.assembly import Assembly
|
|
15
|
+
from julee.domain.use_cases import ExtractAssembleDataUseCase
|
|
16
|
+
from julee.repositories.temporal.proxies import (
|
|
17
|
+
WorkflowAssemblyRepositoryProxy,
|
|
18
|
+
WorkflowAssemblySpecificationRepositoryProxy,
|
|
19
|
+
WorkflowDocumentRepositoryProxy,
|
|
20
|
+
WorkflowKnowledgeServiceConfigRepositoryProxy,
|
|
21
|
+
WorkflowKnowledgeServiceQueryRepositoryProxy,
|
|
22
|
+
)
|
|
23
|
+
from julee.services.temporal.proxies import (
|
|
24
|
+
WorkflowKnowledgeServiceProxy,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@workflow.defn
|
|
31
|
+
class ExtractAssembleWorkflow:
|
|
32
|
+
"""
|
|
33
|
+
Temporal workflow for document extract and assemble operations.
|
|
34
|
+
|
|
35
|
+
This workflow:
|
|
36
|
+
1. Receives document_id and assembly_specification_id
|
|
37
|
+
2. Orchestrates the ExtractAssembleDataUseCase with workflow-safe proxies
|
|
38
|
+
3. Provides durability and retry logic for long-running assembly
|
|
39
|
+
4. Returns the completed Assembly object
|
|
40
|
+
|
|
41
|
+
The workflow remains framework-agnostic by delegating all business logic
|
|
42
|
+
to the use case, while providing Temporal-specific orchestration concerns
|
|
43
|
+
like retry policies, timeouts, and state management.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self) -> None:
|
|
47
|
+
self.current_step = "initialized"
|
|
48
|
+
self.assembly_id: str | None = None
|
|
49
|
+
|
|
50
|
+
@workflow.query
|
|
51
|
+
def get_current_step(self) -> str:
|
|
52
|
+
"""Query method to get the current workflow step"""
|
|
53
|
+
return self.current_step
|
|
54
|
+
|
|
55
|
+
@workflow.query
|
|
56
|
+
def get_assembly_id(self) -> str | None:
|
|
57
|
+
"""Query method to get the assembly ID once created"""
|
|
58
|
+
return self.assembly_id
|
|
59
|
+
|
|
60
|
+
@workflow.run
|
|
61
|
+
async def run(self, document_id: str, assembly_specification_id: str) -> Assembly:
|
|
62
|
+
"""
|
|
63
|
+
Execute the extract and assemble workflow.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
document_id: ID of the document to assemble
|
|
67
|
+
assembly_specification_id: ID of the specification to use
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Completed Assembly object with assembled document
|
|
71
|
+
|
|
72
|
+
Raises:
|
|
73
|
+
ValueError: If required entities are not found
|
|
74
|
+
RuntimeError: If assembly processing fails after retries
|
|
75
|
+
"""
|
|
76
|
+
workflow.logger.info(
|
|
77
|
+
"Starting extract assemble workflow",
|
|
78
|
+
extra={
|
|
79
|
+
"document_id": document_id,
|
|
80
|
+
"assembly_specification_id": assembly_specification_id,
|
|
81
|
+
"workflow_id": workflow.info().workflow_id,
|
|
82
|
+
"run_id": workflow.info().run_id,
|
|
83
|
+
},
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
self.current_step = "initializing_repositories"
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
# Create workflow-safe repository proxies
|
|
90
|
+
# These proxy all calls through Temporal activities for durability
|
|
91
|
+
document_repo = WorkflowDocumentRepositoryProxy() # type: ignore[abstract]
|
|
92
|
+
assembly_repo = WorkflowAssemblyRepositoryProxy() # type: ignore[abstract]
|
|
93
|
+
assembly_specification_repo = (
|
|
94
|
+
WorkflowAssemblySpecificationRepositoryProxy() # type: ignore[abstract]
|
|
95
|
+
)
|
|
96
|
+
knowledge_service_query_repo = (
|
|
97
|
+
WorkflowKnowledgeServiceQueryRepositoryProxy() # type: ignore[abstract]
|
|
98
|
+
)
|
|
99
|
+
knowledge_service_config_repo = (
|
|
100
|
+
WorkflowKnowledgeServiceConfigRepositoryProxy() # type: ignore[abstract]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
workflow.logger.debug(
|
|
104
|
+
"Repository proxies created",
|
|
105
|
+
extra={
|
|
106
|
+
"document_id": document_id,
|
|
107
|
+
"assembly_specification_id": assembly_specification_id,
|
|
108
|
+
},
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
self.current_step = "creating_use_case"
|
|
112
|
+
|
|
113
|
+
# Create workflow-safe knowledge service proxy
|
|
114
|
+
knowledge_service = WorkflowKnowledgeServiceProxy() # type: ignore[abstract]
|
|
115
|
+
|
|
116
|
+
# Create the use case with workflow-safe repositories
|
|
117
|
+
# The use case remains completely unaware it's running in workflow
|
|
118
|
+
use_case = ExtractAssembleDataUseCase(
|
|
119
|
+
document_repo=document_repo,
|
|
120
|
+
assembly_repo=assembly_repo,
|
|
121
|
+
assembly_specification_repo=assembly_specification_repo,
|
|
122
|
+
knowledge_service_query_repo=knowledge_service_query_repo,
|
|
123
|
+
knowledge_service_config_repo=knowledge_service_config_repo,
|
|
124
|
+
knowledge_service=knowledge_service,
|
|
125
|
+
now_fn=workflow.now,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
workflow.logger.debug(
|
|
129
|
+
"Use case created successfully",
|
|
130
|
+
extra={
|
|
131
|
+
"document_id": document_id,
|
|
132
|
+
"assembly_specification_id": assembly_specification_id,
|
|
133
|
+
},
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
self.current_step = "executing_assembly"
|
|
137
|
+
|
|
138
|
+
# Execute the assembly process with workflow durability
|
|
139
|
+
# All repository calls inside the use case will be executed as
|
|
140
|
+
# Temporal activities with automatic retry and state persistence
|
|
141
|
+
assembly = await use_case.assemble_data(
|
|
142
|
+
document_id=document_id,
|
|
143
|
+
assembly_specification_id=assembly_specification_id,
|
|
144
|
+
workflow_id=workflow.info().workflow_id,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Store the assembly ID for queries
|
|
148
|
+
self.assembly_id = assembly.assembly_id
|
|
149
|
+
|
|
150
|
+
self.current_step = "completed"
|
|
151
|
+
|
|
152
|
+
workflow.logger.info(
|
|
153
|
+
"Extract assemble workflow completed successfully",
|
|
154
|
+
extra={
|
|
155
|
+
"document_id": document_id,
|
|
156
|
+
"assembly_specification_id": assembly_specification_id,
|
|
157
|
+
"assembly_id": assembly.assembly_id,
|
|
158
|
+
"assembled_document_id": assembly.assembled_document_id,
|
|
159
|
+
"status": assembly.status.value,
|
|
160
|
+
},
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return assembly
|
|
164
|
+
|
|
165
|
+
except Exception as e:
|
|
166
|
+
self.current_step = "failed"
|
|
167
|
+
|
|
168
|
+
workflow.logger.error(
|
|
169
|
+
"Extract assemble workflow failed",
|
|
170
|
+
extra={
|
|
171
|
+
"document_id": document_id,
|
|
172
|
+
"assembly_specification_id": assembly_specification_id,
|
|
173
|
+
"assembly_id": self.assembly_id,
|
|
174
|
+
"error": str(e),
|
|
175
|
+
"error_type": type(e).__name__,
|
|
176
|
+
},
|
|
177
|
+
exc_info=True,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Re-raise to let Temporal handle retry logic
|
|
181
|
+
raise
|
|
182
|
+
|
|
183
|
+
@workflow.signal
|
|
184
|
+
async def cancel_assembly(self, reason: str) -> None:
|
|
185
|
+
"""
|
|
186
|
+
Signal handler to cancel the assembly process.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
reason: Reason for cancellation
|
|
190
|
+
|
|
191
|
+
Note:
|
|
192
|
+
This is a placeholder for future cancellation logic.
|
|
193
|
+
Currently, we rely on Temporal's built-in workflow cancellation.
|
|
194
|
+
"""
|
|
195
|
+
workflow.logger.info(
|
|
196
|
+
"Assembly cancellation requested",
|
|
197
|
+
extra={
|
|
198
|
+
"assembly_id": self.assembly_id,
|
|
199
|
+
"reason": reason,
|
|
200
|
+
"current_step": self.current_step,
|
|
201
|
+
},
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Future: Implement graceful cancellation logic here
|
|
205
|
+
# For now, let the workflow be cancelled naturally by Temporal
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# Workflow configuration with retry policies optimized for document processing
|
|
209
|
+
EXTRACT_ASSEMBLE_RETRY_POLICY = RetryPolicy(
|
|
210
|
+
initial_interval=timedelta(seconds=1),
|
|
211
|
+
backoff_coefficient=2.0,
|
|
212
|
+
maximum_interval=timedelta(minutes=5),
|
|
213
|
+
maximum_attempts=5,
|
|
214
|
+
non_retryable_error_types=["ValueError"], # Don't retry validation errors
|
|
215
|
+
)
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Temporal workflow for document validation operations.
|
|
3
|
+
|
|
4
|
+
This workflow orchestrates the ValidateDocumentUseCase with Temporal's
|
|
5
|
+
durability guarantees, providing retry logic, state management, and
|
|
6
|
+
compensation for the document validation process.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from temporalio import workflow
|
|
11
|
+
from temporalio.common import RetryPolicy
|
|
12
|
+
from datetime import timedelta
|
|
13
|
+
|
|
14
|
+
from julee.domain.models.policy import DocumentPolicyValidation
|
|
15
|
+
from julee.domain.use_cases import ValidateDocumentUseCase
|
|
16
|
+
from julee.repositories.temporal.proxies import (
|
|
17
|
+
WorkflowDocumentRepositoryProxy,
|
|
18
|
+
WorkflowKnowledgeServiceConfigRepositoryProxy,
|
|
19
|
+
WorkflowKnowledgeServiceQueryRepositoryProxy,
|
|
20
|
+
)
|
|
21
|
+
from julee.services.temporal.proxies import (
|
|
22
|
+
WorkflowKnowledgeServiceProxy,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@workflow.defn
|
|
29
|
+
class ValidateDocumentWorkflow:
|
|
30
|
+
"""
|
|
31
|
+
Temporal workflow for document validation operations.
|
|
32
|
+
|
|
33
|
+
This workflow:
|
|
34
|
+
1. Receives document_id and policy_id
|
|
35
|
+
2. Orchestrates the ValidateDocumentUseCase with workflow-safe proxies
|
|
36
|
+
3. Provides durability and retry logic for validation processing
|
|
37
|
+
4. Returns the completed DocumentPolicyValidation object
|
|
38
|
+
|
|
39
|
+
The workflow remains framework-agnostic by delegating all business logic
|
|
40
|
+
to the use case, while providing Temporal-specific orchestration concerns
|
|
41
|
+
like retry policies, timeouts, and state management.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
self.current_step = "initialized"
|
|
46
|
+
self.validation_id: str | None = None
|
|
47
|
+
|
|
48
|
+
@workflow.query
|
|
49
|
+
def get_current_step(self) -> str:
|
|
50
|
+
"""Query method to get the current workflow step"""
|
|
51
|
+
return self.current_step
|
|
52
|
+
|
|
53
|
+
@workflow.query
|
|
54
|
+
def get_validation_id(self) -> str | None:
|
|
55
|
+
"""Query method to get the validation ID once created"""
|
|
56
|
+
return self.validation_id
|
|
57
|
+
|
|
58
|
+
@workflow.run
|
|
59
|
+
async def run(self, document_id: str, policy_id: str) -> DocumentPolicyValidation:
|
|
60
|
+
"""
|
|
61
|
+
Execute the document validation workflow.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
document_id: ID of the document to validate
|
|
65
|
+
policy_id: ID of the policy to validate against
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Completed DocumentPolicyValidation object with validation results
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
ValueError: If required entities are not found
|
|
72
|
+
RuntimeError: If validation processing fails after retries
|
|
73
|
+
"""
|
|
74
|
+
workflow.logger.info(
|
|
75
|
+
"Starting document validation workflow",
|
|
76
|
+
extra={
|
|
77
|
+
"document_id": document_id,
|
|
78
|
+
"policy_id": policy_id,
|
|
79
|
+
"workflow_id": workflow.info().workflow_id,
|
|
80
|
+
"run_id": workflow.info().run_id,
|
|
81
|
+
},
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
self.current_step = "initializing_repositories"
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
# Create workflow-safe repository proxies
|
|
88
|
+
# These proxy all calls through Temporal activities for durability
|
|
89
|
+
document_repo = WorkflowDocumentRepositoryProxy() # type: ignore[abstract]
|
|
90
|
+
knowledge_service_query_repo = (
|
|
91
|
+
WorkflowKnowledgeServiceQueryRepositoryProxy() # type: ignore[abstract]
|
|
92
|
+
)
|
|
93
|
+
knowledge_service_config_repo = (
|
|
94
|
+
WorkflowKnowledgeServiceConfigRepositoryProxy() # type: ignore[abstract]
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
workflow.logger.debug(
|
|
98
|
+
"Repository proxies created",
|
|
99
|
+
extra={
|
|
100
|
+
"document_id": document_id,
|
|
101
|
+
"policy_id": policy_id,
|
|
102
|
+
},
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
self.current_step = "creating_use_case"
|
|
106
|
+
|
|
107
|
+
# Create workflow-safe knowledge service proxy
|
|
108
|
+
knowledge_service = WorkflowKnowledgeServiceProxy() # type: ignore[abstract]
|
|
109
|
+
|
|
110
|
+
# Import policy repository proxy (assuming it exists)
|
|
111
|
+
try:
|
|
112
|
+
from julee.repositories.temporal.proxies import (
|
|
113
|
+
WorkflowPolicyRepositoryProxy,
|
|
114
|
+
WorkflowDocumentPolicyValidationRepositoryProxy,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
policy_repo = WorkflowPolicyRepositoryProxy() # type: ignore[abstract]
|
|
118
|
+
document_policy_validation_repo = (
|
|
119
|
+
WorkflowDocumentPolicyValidationRepositoryProxy() # type: ignore[abstract]
|
|
120
|
+
)
|
|
121
|
+
except ImportError:
|
|
122
|
+
# Fallback if proxies don't exist yet
|
|
123
|
+
workflow.logger.warning(
|
|
124
|
+
"Policy repository proxies not found, workflow may fail"
|
|
125
|
+
)
|
|
126
|
+
raise ValueError(
|
|
127
|
+
"Policy repository proxies required for validation " "workflow"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Create the use case with workflow-safe repositories
|
|
131
|
+
# The use case remains completely unaware it's running in workflow
|
|
132
|
+
use_case = ValidateDocumentUseCase(
|
|
133
|
+
document_repo=document_repo,
|
|
134
|
+
knowledge_service_query_repo=knowledge_service_query_repo,
|
|
135
|
+
knowledge_service_config_repo=knowledge_service_config_repo,
|
|
136
|
+
policy_repo=policy_repo,
|
|
137
|
+
document_policy_validation_repo=document_policy_validation_repo,
|
|
138
|
+
knowledge_service=knowledge_service,
|
|
139
|
+
now_fn=workflow.now,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
workflow.logger.debug(
|
|
143
|
+
"Use case created successfully",
|
|
144
|
+
extra={
|
|
145
|
+
"document_id": document_id,
|
|
146
|
+
"policy_id": policy_id,
|
|
147
|
+
},
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
self.current_step = "executing_validation"
|
|
151
|
+
|
|
152
|
+
# Execute the validation process with workflow durability
|
|
153
|
+
# All repository calls inside the use case will be executed as
|
|
154
|
+
# Temporal activities with automatic retry and state persistence
|
|
155
|
+
validation = await use_case.validate_document(
|
|
156
|
+
document_id=document_id,
|
|
157
|
+
policy_id=policy_id,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Store the validation ID for queries
|
|
161
|
+
self.validation_id = validation.validation_id
|
|
162
|
+
|
|
163
|
+
self.current_step = "completed"
|
|
164
|
+
|
|
165
|
+
workflow.logger.info(
|
|
166
|
+
"Document validation workflow completed successfully",
|
|
167
|
+
extra={
|
|
168
|
+
"document_id": document_id,
|
|
169
|
+
"policy_id": policy_id,
|
|
170
|
+
"validation_id": validation.validation_id,
|
|
171
|
+
"status": validation.status.value,
|
|
172
|
+
"passed": validation.passed,
|
|
173
|
+
},
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
return validation
|
|
177
|
+
|
|
178
|
+
except Exception as e:
|
|
179
|
+
self.current_step = "failed"
|
|
180
|
+
|
|
181
|
+
workflow.logger.error(
|
|
182
|
+
"Document validation workflow failed",
|
|
183
|
+
extra={
|
|
184
|
+
"document_id": document_id,
|
|
185
|
+
"policy_id": policy_id,
|
|
186
|
+
"validation_id": self.validation_id,
|
|
187
|
+
"error": str(e),
|
|
188
|
+
"error_type": type(e).__name__,
|
|
189
|
+
},
|
|
190
|
+
exc_info=True,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Re-raise to let Temporal handle retry logic
|
|
194
|
+
raise
|
|
195
|
+
|
|
196
|
+
@workflow.signal
|
|
197
|
+
async def cancel_validation(self, reason: str) -> None:
|
|
198
|
+
"""
|
|
199
|
+
Signal handler to cancel the validation process.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
reason: Reason for cancellation
|
|
203
|
+
|
|
204
|
+
Note:
|
|
205
|
+
This is a placeholder for future cancellation logic.
|
|
206
|
+
Currently, we rely on Temporal's built-in workflow cancellation.
|
|
207
|
+
"""
|
|
208
|
+
workflow.logger.info(
|
|
209
|
+
"Validation cancellation requested",
|
|
210
|
+
extra={
|
|
211
|
+
"validation_id": self.validation_id,
|
|
212
|
+
"reason": reason,
|
|
213
|
+
"current_step": self.current_step,
|
|
214
|
+
},
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Future: Implement graceful cancellation logic here
|
|
218
|
+
# For now, let the workflow be cancelled naturally by Temporal
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# Workflow configuration with retry policies optimized for document validation
|
|
222
|
+
VALIDATE_DOCUMENT_RETRY_POLICY = RetryPolicy(
|
|
223
|
+
initial_interval=timedelta(seconds=1),
|
|
224
|
+
backoff_coefficient=2.0,
|
|
225
|
+
maximum_interval=timedelta(minutes=5),
|
|
226
|
+
maximum_attempts=3,
|
|
227
|
+
non_retryable_error_types=["ValueError"], # Don't retry validation errors
|
|
228
|
+
)
|