julee 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. julee/__init__.py +3 -0
  2. julee/api/__init__.py +20 -0
  3. julee/api/app.py +180 -0
  4. julee/api/dependencies.py +257 -0
  5. julee/api/requests.py +175 -0
  6. julee/api/responses.py +43 -0
  7. julee/api/routers/__init__.py +43 -0
  8. julee/api/routers/assembly_specifications.py +212 -0
  9. julee/api/routers/documents.py +182 -0
  10. julee/api/routers/knowledge_service_configs.py +79 -0
  11. julee/api/routers/knowledge_service_queries.py +293 -0
  12. julee/api/routers/system.py +137 -0
  13. julee/api/routers/workflows.py +234 -0
  14. julee/api/services/__init__.py +20 -0
  15. julee/api/services/system_initialization.py +214 -0
  16. julee/api/tests/__init__.py +14 -0
  17. julee/api/tests/routers/__init__.py +17 -0
  18. julee/api/tests/routers/test_assembly_specifications.py +749 -0
  19. julee/api/tests/routers/test_documents.py +301 -0
  20. julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
  21. julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
  22. julee/api/tests/routers/test_system.py +179 -0
  23. julee/api/tests/routers/test_workflows.py +393 -0
  24. julee/api/tests/test_app.py +285 -0
  25. julee/api/tests/test_dependencies.py +245 -0
  26. julee/api/tests/test_requests.py +250 -0
  27. julee/domain/__init__.py +22 -0
  28. julee/domain/models/__init__.py +49 -0
  29. julee/domain/models/assembly/__init__.py +17 -0
  30. julee/domain/models/assembly/assembly.py +103 -0
  31. julee/domain/models/assembly/tests/__init__.py +0 -0
  32. julee/domain/models/assembly/tests/factories.py +37 -0
  33. julee/domain/models/assembly/tests/test_assembly.py +430 -0
  34. julee/domain/models/assembly_specification/__init__.py +24 -0
  35. julee/domain/models/assembly_specification/assembly_specification.py +172 -0
  36. julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
  37. julee/domain/models/assembly_specification/tests/__init__.py +0 -0
  38. julee/domain/models/assembly_specification/tests/factories.py +78 -0
  39. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
  40. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
  41. julee/domain/models/custom_fields/__init__.py +0 -0
  42. julee/domain/models/custom_fields/content_stream.py +68 -0
  43. julee/domain/models/custom_fields/tests/__init__.py +0 -0
  44. julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
  45. julee/domain/models/document/__init__.py +17 -0
  46. julee/domain/models/document/document.py +150 -0
  47. julee/domain/models/document/tests/__init__.py +0 -0
  48. julee/domain/models/document/tests/factories.py +76 -0
  49. julee/domain/models/document/tests/test_document.py +297 -0
  50. julee/domain/models/knowledge_service_config/__init__.py +17 -0
  51. julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
  52. julee/domain/models/policy/__init__.py +15 -0
  53. julee/domain/models/policy/document_policy_validation.py +220 -0
  54. julee/domain/models/policy/policy.py +203 -0
  55. julee/domain/models/policy/tests/__init__.py +0 -0
  56. julee/domain/models/policy/tests/factories.py +47 -0
  57. julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
  58. julee/domain/models/policy/tests/test_policy.py +546 -0
  59. julee/domain/repositories/__init__.py +27 -0
  60. julee/domain/repositories/assembly.py +45 -0
  61. julee/domain/repositories/assembly_specification.py +52 -0
  62. julee/domain/repositories/base.py +146 -0
  63. julee/domain/repositories/document.py +49 -0
  64. julee/domain/repositories/document_policy_validation.py +52 -0
  65. julee/domain/repositories/knowledge_service_config.py +54 -0
  66. julee/domain/repositories/knowledge_service_query.py +44 -0
  67. julee/domain/repositories/policy.py +49 -0
  68. julee/domain/use_cases/__init__.py +17 -0
  69. julee/domain/use_cases/decorators.py +107 -0
  70. julee/domain/use_cases/extract_assemble_data.py +649 -0
  71. julee/domain/use_cases/initialize_system_data.py +842 -0
  72. julee/domain/use_cases/tests/__init__.py +7 -0
  73. julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
  74. julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
  75. julee/domain/use_cases/tests/test_validate_document.py +1228 -0
  76. julee/domain/use_cases/validate_document.py +736 -0
  77. julee/fixtures/assembly_specifications.yaml +70 -0
  78. julee/fixtures/documents.yaml +178 -0
  79. julee/fixtures/knowledge_service_configs.yaml +37 -0
  80. julee/fixtures/knowledge_service_queries.yaml +27 -0
  81. julee/repositories/__init__.py +17 -0
  82. julee/repositories/memory/__init__.py +31 -0
  83. julee/repositories/memory/assembly.py +84 -0
  84. julee/repositories/memory/assembly_specification.py +125 -0
  85. julee/repositories/memory/base.py +227 -0
  86. julee/repositories/memory/document.py +149 -0
  87. julee/repositories/memory/document_policy_validation.py +104 -0
  88. julee/repositories/memory/knowledge_service_config.py +123 -0
  89. julee/repositories/memory/knowledge_service_query.py +120 -0
  90. julee/repositories/memory/policy.py +87 -0
  91. julee/repositories/memory/tests/__init__.py +0 -0
  92. julee/repositories/memory/tests/test_document.py +212 -0
  93. julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
  94. julee/repositories/memory/tests/test_policy.py +443 -0
  95. julee/repositories/minio/__init__.py +31 -0
  96. julee/repositories/minio/assembly.py +103 -0
  97. julee/repositories/minio/assembly_specification.py +170 -0
  98. julee/repositories/minio/client.py +570 -0
  99. julee/repositories/minio/document.py +530 -0
  100. julee/repositories/minio/document_policy_validation.py +120 -0
  101. julee/repositories/minio/knowledge_service_config.py +187 -0
  102. julee/repositories/minio/knowledge_service_query.py +211 -0
  103. julee/repositories/minio/policy.py +106 -0
  104. julee/repositories/minio/tests/__init__.py +0 -0
  105. julee/repositories/minio/tests/fake_client.py +213 -0
  106. julee/repositories/minio/tests/test_assembly.py +374 -0
  107. julee/repositories/minio/tests/test_assembly_specification.py +391 -0
  108. julee/repositories/minio/tests/test_client_protocol.py +57 -0
  109. julee/repositories/minio/tests/test_document.py +591 -0
  110. julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
  111. julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
  112. julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
  113. julee/repositories/minio/tests/test_policy.py +559 -0
  114. julee/repositories/temporal/__init__.py +38 -0
  115. julee/repositories/temporal/activities.py +114 -0
  116. julee/repositories/temporal/activity_names.py +34 -0
  117. julee/repositories/temporal/proxies.py +159 -0
  118. julee/services/__init__.py +18 -0
  119. julee/services/knowledge_service/__init__.py +48 -0
  120. julee/services/knowledge_service/anthropic/__init__.py +12 -0
  121. julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
  122. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
  123. julee/services/knowledge_service/factory.py +138 -0
  124. julee/services/knowledge_service/knowledge_service.py +160 -0
  125. julee/services/knowledge_service/memory/__init__.py +13 -0
  126. julee/services/knowledge_service/memory/knowledge_service.py +278 -0
  127. julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
  128. julee/services/knowledge_service/test_factory.py +112 -0
  129. julee/services/temporal/__init__.py +38 -0
  130. julee/services/temporal/activities.py +86 -0
  131. julee/services/temporal/activity_names.py +22 -0
  132. julee/services/temporal/proxies.py +41 -0
  133. julee/util/__init__.py +0 -0
  134. julee/util/domain.py +119 -0
  135. julee/util/repos/__init__.py +0 -0
  136. julee/util/repos/minio/__init__.py +0 -0
  137. julee/util/repos/minio/file_storage.py +213 -0
  138. julee/util/repos/temporal/__init__.py +11 -0
  139. julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
  140. julee/util/repos/temporal/data_converter.py +123 -0
  141. julee/util/repos/temporal/minio_file_storage.py +12 -0
  142. julee/util/repos/temporal/proxies/__init__.py +0 -0
  143. julee/util/repos/temporal/proxies/file_storage.py +58 -0
  144. julee/util/repositories.py +55 -0
  145. julee/util/temporal/__init__.py +22 -0
  146. julee/util/temporal/activities.py +123 -0
  147. julee/util/temporal/decorators.py +473 -0
  148. julee/util/tests/__init__.py +1 -0
  149. julee/util/tests/test_decorators.py +770 -0
  150. julee/util/validation/__init__.py +29 -0
  151. julee/util/validation/repository.py +100 -0
  152. julee/util/validation/type_guards.py +369 -0
  153. julee/worker.py +211 -0
  154. julee/workflows/__init__.py +26 -0
  155. julee/workflows/extract_assemble.py +215 -0
  156. julee/workflows/validate_document.py +228 -0
  157. julee-0.1.0.dist-info/METADATA +195 -0
  158. julee-0.1.0.dist-info/RECORD +161 -0
  159. julee-0.1.0.dist-info/WHEEL +5 -0
  160. julee-0.1.0.dist-info/licenses/LICENSE +674 -0
  161. julee-0.1.0.dist-info/top_level.txt +1 -0
julee/worker.py ADDED
@@ -0,0 +1,211 @@
1
+ """
2
+ Temporal worker for julee domain workflows and activities.
3
+
4
+ This worker runs workflows and activities for document processing,
5
+ assembly, and knowledge service operations within the julee domain.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import os
11
+ from temporalio.client import Client
12
+ from temporalio.service import RPCError
13
+ from temporalio.worker import Worker
14
+ from julee.util.repos.temporal.data_converter import temporal_data_converter
15
+
16
+ from julee.workflows import (
17
+ ExtractAssembleWorkflow,
18
+ ValidateDocumentWorkflow,
19
+ )
20
+ from julee.repositories.temporal.activities import (
21
+ TemporalMinioAssemblyRepository,
22
+ TemporalMinioAssemblySpecificationRepository,
23
+ TemporalMinioDocumentRepository,
24
+ TemporalMinioKnowledgeServiceConfigRepository,
25
+ TemporalMinioKnowledgeServiceQueryRepository,
26
+ TemporalMinioPolicyRepository,
27
+ TemporalMinioDocumentPolicyValidationRepository,
28
+ )
29
+ from julee.services.temporal.activities import (
30
+ TemporalKnowledgeService,
31
+ )
32
+ from minio import Minio
33
+ from julee.repositories.minio.client import MinioClient
34
+ from julee.util.temporal.activities import collect_activities_from_instances
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ def setup_logging() -> None:
40
+ """Configure logging based on environment variables"""
41
+ log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
42
+ log_format = os.environ.get(
43
+ "LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
44
+ )
45
+
46
+ # Validate log level
47
+ numeric_level = getattr(logging, log_level, None)
48
+ if not isinstance(numeric_level, int):
49
+ print(f"Invalid log level: {log_level}, defaulting to INFO")
50
+ numeric_level = logging.INFO
51
+
52
+ logging.basicConfig(
53
+ level=numeric_level,
54
+ format=log_format,
55
+ force=True, # Override any existing configuration
56
+ )
57
+
58
+ logger.info(
59
+ "Logging configured",
60
+ extra={"log_level": log_level, "numeric_level": numeric_level},
61
+ )
62
+
63
+
64
+ async def get_temporal_client_with_retries(
65
+ endpoint: str, attempts: int = 10, delay: int = 5
66
+ ) -> Client:
67
+ """Attempt to connect to Temporal with retries."""
68
+ logger.debug(
69
+ "Attempting to connect to Temporal",
70
+ extra={
71
+ "endpoint": endpoint,
72
+ "max_attempts": attempts,
73
+ "delay_seconds": delay,
74
+ },
75
+ )
76
+
77
+ for attempt in range(attempts):
78
+ try:
79
+ # Use the proper Pydantic v2 data converter and connect to the
80
+ # 'default' namespace
81
+ client = await Client.connect(
82
+ endpoint,
83
+ data_converter=temporal_data_converter,
84
+ namespace="default",
85
+ )
86
+ logger.info(
87
+ "Successfully connected to Temporal",
88
+ extra={
89
+ "endpoint": endpoint,
90
+ "attempt": attempt + 1,
91
+ "data_converter_type": type(client.data_converter).__name__,
92
+ },
93
+ )
94
+ return client
95
+ except RPCError as e:
96
+ logger.warning(
97
+ "Failed to connect to Temporal",
98
+ extra={
99
+ "endpoint": endpoint,
100
+ "attempt": attempt + 1,
101
+ "max_attempts": attempts,
102
+ "error": str(e),
103
+ "retry_in_seconds": delay,
104
+ },
105
+ )
106
+ if attempt + 1 == attempts:
107
+ logger.error(
108
+ "All connection attempts to Temporal failed",
109
+ extra={"endpoint": endpoint, "total_attempts": attempts},
110
+ )
111
+ raise
112
+ await asyncio.sleep(delay)
113
+
114
+ # This should never be reached due to the raise in the loop, but mypy
115
+ # needs it
116
+ raise RuntimeError("Failed to connect to Temporal after all attempts")
117
+
118
+
119
+ async def run_worker() -> None:
120
+ """Run the Temporal worker for julee domain"""
121
+ # Setup logging first
122
+ setup_logging()
123
+
124
+ # Connect to Temporal server using environment variable
125
+ temporal_endpoint = os.environ.get("TEMPORAL_ENDPOINT", "localhost:7234")
126
+ logger.info(
127
+ "Starting julee Temporal worker",
128
+ extra={"temporal_endpoint": temporal_endpoint},
129
+ )
130
+
131
+ client = await get_temporal_client_with_retries(temporal_endpoint)
132
+
133
+ # Get Minio endpoint and create client for repositories
134
+ logger.debug("Preparing repository configurations")
135
+ minio_endpoint = os.environ.get("MINIO_ENDPOINT", "localhost:9000")
136
+
137
+ # Create Minio client for repositories
138
+ # minio.Minio implements the MinioClient protocol
139
+ minio_client: MinioClient = Minio( # type: ignore[assignment]
140
+ endpoint=minio_endpoint,
141
+ access_key="minioadmin",
142
+ secret_key="minioadmin",
143
+ secure=False,
144
+ )
145
+
146
+ # Instantiate temporal repository classes for activity registration
147
+ logger.debug("Creating Temporal Activity repository implementations")
148
+ temporal_assembly_repo = TemporalMinioAssemblyRepository(client=minio_client)
149
+ temporal_assembly_spec_repo = TemporalMinioAssemblySpecificationRepository(
150
+ client=minio_client
151
+ )
152
+ temporal_document_repo = TemporalMinioDocumentRepository(client=minio_client)
153
+ temporal_knowledge_config_repo = TemporalMinioKnowledgeServiceConfigRepository(
154
+ client=minio_client
155
+ )
156
+ temporal_knowledge_query_repo = TemporalMinioKnowledgeServiceQueryRepository(
157
+ client=minio_client
158
+ )
159
+
160
+ # Create policy repositories for validation workflow
161
+ temporal_policy_repo = TemporalMinioPolicyRepository(client=minio_client)
162
+ temporal_document_policy_validation_repo = (
163
+ TemporalMinioDocumentPolicyValidationRepository(client=minio_client)
164
+ )
165
+
166
+ # Create temporal knowledge service for activity registration
167
+ # Pass the document repository for dependency injection
168
+ temporal_knowledge_service = TemporalKnowledgeService(
169
+ document_repo=temporal_document_repo
170
+ )
171
+
172
+ # Automatically collect all activities from decorated instances
173
+ # This uses the same _discover_protocol_methods that the decorator uses,
174
+ # ensuring we never miss activities and eliminating boilerplate
175
+ activities = collect_activities_from_instances(
176
+ temporal_assembly_repo,
177
+ temporal_assembly_spec_repo,
178
+ temporal_document_repo,
179
+ temporal_knowledge_config_repo,
180
+ temporal_knowledge_query_repo,
181
+ temporal_policy_repo,
182
+ temporal_document_policy_validation_repo,
183
+ temporal_knowledge_service,
184
+ )
185
+
186
+ logger.info(
187
+ "Creating Temporal worker for julee domain",
188
+ extra={
189
+ "task_queue": "julee-extract-assemble-queue",
190
+ "workflow_count": 2,
191
+ "activity_count": len(activities),
192
+ "data_converter_type": type(client.data_converter).__name__,
193
+ },
194
+ )
195
+
196
+ # Create worker with workflow retry policy
197
+ worker = Worker(
198
+ client,
199
+ task_queue="julee-extract-assemble-queue",
200
+ workflows=[ExtractAssembleWorkflow, ValidateDocumentWorkflow],
201
+ activities=activities, # type: ignore[arg-type]
202
+ )
203
+
204
+ logger.info("Starting julee worker execution")
205
+
206
+ # Run the worker
207
+ await worker.run()
208
+
209
+
210
+ if __name__ == "__main__":
211
+ asyncio.run(run_worker())
@@ -0,0 +1,26 @@
1
+ """
2
+ Temporal workflows for the julee domain.
3
+
4
+ This package contains Temporal workflow definitions that orchestrate
5
+ use cases with durability guarantees, retry logic, and state management.
6
+
7
+ Workflows in this package:
8
+ - ExtractAssembleWorkflow: Orchestrates document extraction and assembly
9
+ - ValidateDocumentWorkflow: Orchestrates document validation against policies
10
+ """
11
+
12
+ from .extract_assemble import (
13
+ ExtractAssembleWorkflow,
14
+ EXTRACT_ASSEMBLE_RETRY_POLICY,
15
+ )
16
+ from .validate_document import (
17
+ ValidateDocumentWorkflow,
18
+ VALIDATE_DOCUMENT_RETRY_POLICY,
19
+ )
20
+
21
+ __all__ = [
22
+ "ExtractAssembleWorkflow",
23
+ "EXTRACT_ASSEMBLE_RETRY_POLICY",
24
+ "ValidateDocumentWorkflow",
25
+ "VALIDATE_DOCUMENT_RETRY_POLICY",
26
+ ]
@@ -0,0 +1,215 @@
1
+ """
2
+ Temporal workflow for extract and assemble data operations.
3
+
4
+ This workflow orchestrates the ExtractAssembleDataUseCase with Temporal's
5
+ durability guarantees, providing retry logic, state management, and
6
+ compensation for the complex document assembly process.
7
+ """
8
+
9
+ import logging
10
+ from temporalio import workflow
11
+ from temporalio.common import RetryPolicy
12
+ from datetime import timedelta
13
+
14
+ from julee.domain.models.assembly import Assembly
15
+ from julee.domain.use_cases import ExtractAssembleDataUseCase
16
+ from julee.repositories.temporal.proxies import (
17
+ WorkflowAssemblyRepositoryProxy,
18
+ WorkflowAssemblySpecificationRepositoryProxy,
19
+ WorkflowDocumentRepositoryProxy,
20
+ WorkflowKnowledgeServiceConfigRepositoryProxy,
21
+ WorkflowKnowledgeServiceQueryRepositoryProxy,
22
+ )
23
+ from julee.services.temporal.proxies import (
24
+ WorkflowKnowledgeServiceProxy,
25
+ )
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ @workflow.defn
31
+ class ExtractAssembleWorkflow:
32
+ """
33
+ Temporal workflow for document extract and assemble operations.
34
+
35
+ This workflow:
36
+ 1. Receives document_id and assembly_specification_id
37
+ 2. Orchestrates the ExtractAssembleDataUseCase with workflow-safe proxies
38
+ 3. Provides durability and retry logic for long-running assembly
39
+ 4. Returns the completed Assembly object
40
+
41
+ The workflow remains framework-agnostic by delegating all business logic
42
+ to the use case, while providing Temporal-specific orchestration concerns
43
+ like retry policies, timeouts, and state management.
44
+ """
45
+
46
+ def __init__(self) -> None:
47
+ self.current_step = "initialized"
48
+ self.assembly_id: str | None = None
49
+
50
+ @workflow.query
51
+ def get_current_step(self) -> str:
52
+ """Query method to get the current workflow step"""
53
+ return self.current_step
54
+
55
+ @workflow.query
56
+ def get_assembly_id(self) -> str | None:
57
+ """Query method to get the assembly ID once created"""
58
+ return self.assembly_id
59
+
60
+ @workflow.run
61
+ async def run(self, document_id: str, assembly_specification_id: str) -> Assembly:
62
+ """
63
+ Execute the extract and assemble workflow.
64
+
65
+ Args:
66
+ document_id: ID of the document to assemble
67
+ assembly_specification_id: ID of the specification to use
68
+
69
+ Returns:
70
+ Completed Assembly object with assembled document
71
+
72
+ Raises:
73
+ ValueError: If required entities are not found
74
+ RuntimeError: If assembly processing fails after retries
75
+ """
76
+ workflow.logger.info(
77
+ "Starting extract assemble workflow",
78
+ extra={
79
+ "document_id": document_id,
80
+ "assembly_specification_id": assembly_specification_id,
81
+ "workflow_id": workflow.info().workflow_id,
82
+ "run_id": workflow.info().run_id,
83
+ },
84
+ )
85
+
86
+ self.current_step = "initializing_repositories"
87
+
88
+ try:
89
+ # Create workflow-safe repository proxies
90
+ # These proxy all calls through Temporal activities for durability
91
+ document_repo = WorkflowDocumentRepositoryProxy() # type: ignore[abstract]
92
+ assembly_repo = WorkflowAssemblyRepositoryProxy() # type: ignore[abstract]
93
+ assembly_specification_repo = (
94
+ WorkflowAssemblySpecificationRepositoryProxy() # type: ignore[abstract]
95
+ )
96
+ knowledge_service_query_repo = (
97
+ WorkflowKnowledgeServiceQueryRepositoryProxy() # type: ignore[abstract]
98
+ )
99
+ knowledge_service_config_repo = (
100
+ WorkflowKnowledgeServiceConfigRepositoryProxy() # type: ignore[abstract]
101
+ )
102
+
103
+ workflow.logger.debug(
104
+ "Repository proxies created",
105
+ extra={
106
+ "document_id": document_id,
107
+ "assembly_specification_id": assembly_specification_id,
108
+ },
109
+ )
110
+
111
+ self.current_step = "creating_use_case"
112
+
113
+ # Create workflow-safe knowledge service proxy
114
+ knowledge_service = WorkflowKnowledgeServiceProxy() # type: ignore[abstract]
115
+
116
+ # Create the use case with workflow-safe repositories
117
+ # The use case remains completely unaware it's running in workflow
118
+ use_case = ExtractAssembleDataUseCase(
119
+ document_repo=document_repo,
120
+ assembly_repo=assembly_repo,
121
+ assembly_specification_repo=assembly_specification_repo,
122
+ knowledge_service_query_repo=knowledge_service_query_repo,
123
+ knowledge_service_config_repo=knowledge_service_config_repo,
124
+ knowledge_service=knowledge_service,
125
+ now_fn=workflow.now,
126
+ )
127
+
128
+ workflow.logger.debug(
129
+ "Use case created successfully",
130
+ extra={
131
+ "document_id": document_id,
132
+ "assembly_specification_id": assembly_specification_id,
133
+ },
134
+ )
135
+
136
+ self.current_step = "executing_assembly"
137
+
138
+ # Execute the assembly process with workflow durability
139
+ # All repository calls inside the use case will be executed as
140
+ # Temporal activities with automatic retry and state persistence
141
+ assembly = await use_case.assemble_data(
142
+ document_id=document_id,
143
+ assembly_specification_id=assembly_specification_id,
144
+ workflow_id=workflow.info().workflow_id,
145
+ )
146
+
147
+ # Store the assembly ID for queries
148
+ self.assembly_id = assembly.assembly_id
149
+
150
+ self.current_step = "completed"
151
+
152
+ workflow.logger.info(
153
+ "Extract assemble workflow completed successfully",
154
+ extra={
155
+ "document_id": document_id,
156
+ "assembly_specification_id": assembly_specification_id,
157
+ "assembly_id": assembly.assembly_id,
158
+ "assembled_document_id": assembly.assembled_document_id,
159
+ "status": assembly.status.value,
160
+ },
161
+ )
162
+
163
+ return assembly
164
+
165
+ except Exception as e:
166
+ self.current_step = "failed"
167
+
168
+ workflow.logger.error(
169
+ "Extract assemble workflow failed",
170
+ extra={
171
+ "document_id": document_id,
172
+ "assembly_specification_id": assembly_specification_id,
173
+ "assembly_id": self.assembly_id,
174
+ "error": str(e),
175
+ "error_type": type(e).__name__,
176
+ },
177
+ exc_info=True,
178
+ )
179
+
180
+ # Re-raise to let Temporal handle retry logic
181
+ raise
182
+
183
+ @workflow.signal
184
+ async def cancel_assembly(self, reason: str) -> None:
185
+ """
186
+ Signal handler to cancel the assembly process.
187
+
188
+ Args:
189
+ reason: Reason for cancellation
190
+
191
+ Note:
192
+ This is a placeholder for future cancellation logic.
193
+ Currently, we rely on Temporal's built-in workflow cancellation.
194
+ """
195
+ workflow.logger.info(
196
+ "Assembly cancellation requested",
197
+ extra={
198
+ "assembly_id": self.assembly_id,
199
+ "reason": reason,
200
+ "current_step": self.current_step,
201
+ },
202
+ )
203
+
204
+ # Future: Implement graceful cancellation logic here
205
+ # For now, let the workflow be cancelled naturally by Temporal
206
+
207
+
208
+ # Workflow configuration with retry policies optimized for document processing
209
+ EXTRACT_ASSEMBLE_RETRY_POLICY = RetryPolicy(
210
+ initial_interval=timedelta(seconds=1),
211
+ backoff_coefficient=2.0,
212
+ maximum_interval=timedelta(minutes=5),
213
+ maximum_attempts=5,
214
+ non_retryable_error_types=["ValueError"], # Don't retry validation errors
215
+ )
@@ -0,0 +1,228 @@
1
+ """
2
+ Temporal workflow for document validation operations.
3
+
4
+ This workflow orchestrates the ValidateDocumentUseCase with Temporal's
5
+ durability guarantees, providing retry logic, state management, and
6
+ compensation for the document validation process.
7
+ """
8
+
9
+ import logging
10
+ from temporalio import workflow
11
+ from temporalio.common import RetryPolicy
12
+ from datetime import timedelta
13
+
14
+ from julee.domain.models.policy import DocumentPolicyValidation
15
+ from julee.domain.use_cases import ValidateDocumentUseCase
16
+ from julee.repositories.temporal.proxies import (
17
+ WorkflowDocumentRepositoryProxy,
18
+ WorkflowKnowledgeServiceConfigRepositoryProxy,
19
+ WorkflowKnowledgeServiceQueryRepositoryProxy,
20
+ )
21
+ from julee.services.temporal.proxies import (
22
+ WorkflowKnowledgeServiceProxy,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ @workflow.defn
29
+ class ValidateDocumentWorkflow:
30
+ """
31
+ Temporal workflow for document validation operations.
32
+
33
+ This workflow:
34
+ 1. Receives document_id and policy_id
35
+ 2. Orchestrates the ValidateDocumentUseCase with workflow-safe proxies
36
+ 3. Provides durability and retry logic for validation processing
37
+ 4. Returns the completed DocumentPolicyValidation object
38
+
39
+ The workflow remains framework-agnostic by delegating all business logic
40
+ to the use case, while providing Temporal-specific orchestration concerns
41
+ like retry policies, timeouts, and state management.
42
+ """
43
+
44
+ def __init__(self) -> None:
45
+ self.current_step = "initialized"
46
+ self.validation_id: str | None = None
47
+
48
+ @workflow.query
49
+ def get_current_step(self) -> str:
50
+ """Query method to get the current workflow step"""
51
+ return self.current_step
52
+
53
+ @workflow.query
54
+ def get_validation_id(self) -> str | None:
55
+ """Query method to get the validation ID once created"""
56
+ return self.validation_id
57
+
58
+ @workflow.run
59
+ async def run(self, document_id: str, policy_id: str) -> DocumentPolicyValidation:
60
+ """
61
+ Execute the document validation workflow.
62
+
63
+ Args:
64
+ document_id: ID of the document to validate
65
+ policy_id: ID of the policy to validate against
66
+
67
+ Returns:
68
+ Completed DocumentPolicyValidation object with validation results
69
+
70
+ Raises:
71
+ ValueError: If required entities are not found
72
+ RuntimeError: If validation processing fails after retries
73
+ """
74
+ workflow.logger.info(
75
+ "Starting document validation workflow",
76
+ extra={
77
+ "document_id": document_id,
78
+ "policy_id": policy_id,
79
+ "workflow_id": workflow.info().workflow_id,
80
+ "run_id": workflow.info().run_id,
81
+ },
82
+ )
83
+
84
+ self.current_step = "initializing_repositories"
85
+
86
+ try:
87
+ # Create workflow-safe repository proxies
88
+ # These proxy all calls through Temporal activities for durability
89
+ document_repo = WorkflowDocumentRepositoryProxy() # type: ignore[abstract]
90
+ knowledge_service_query_repo = (
91
+ WorkflowKnowledgeServiceQueryRepositoryProxy() # type: ignore[abstract]
92
+ )
93
+ knowledge_service_config_repo = (
94
+ WorkflowKnowledgeServiceConfigRepositoryProxy() # type: ignore[abstract]
95
+ )
96
+
97
+ workflow.logger.debug(
98
+ "Repository proxies created",
99
+ extra={
100
+ "document_id": document_id,
101
+ "policy_id": policy_id,
102
+ },
103
+ )
104
+
105
+ self.current_step = "creating_use_case"
106
+
107
+ # Create workflow-safe knowledge service proxy
108
+ knowledge_service = WorkflowKnowledgeServiceProxy() # type: ignore[abstract]
109
+
110
+ # Import policy repository proxy (assuming it exists)
111
+ try:
112
+ from julee.repositories.temporal.proxies import (
113
+ WorkflowPolicyRepositoryProxy,
114
+ WorkflowDocumentPolicyValidationRepositoryProxy,
115
+ )
116
+
117
+ policy_repo = WorkflowPolicyRepositoryProxy() # type: ignore[abstract]
118
+ document_policy_validation_repo = (
119
+ WorkflowDocumentPolicyValidationRepositoryProxy() # type: ignore[abstract]
120
+ )
121
+ except ImportError:
122
+ # Fallback if proxies don't exist yet
123
+ workflow.logger.warning(
124
+ "Policy repository proxies not found, workflow may fail"
125
+ )
126
+ raise ValueError(
127
+ "Policy repository proxies required for validation " "workflow"
128
+ )
129
+
130
+ # Create the use case with workflow-safe repositories
131
+ # The use case remains completely unaware it's running in workflow
132
+ use_case = ValidateDocumentUseCase(
133
+ document_repo=document_repo,
134
+ knowledge_service_query_repo=knowledge_service_query_repo,
135
+ knowledge_service_config_repo=knowledge_service_config_repo,
136
+ policy_repo=policy_repo,
137
+ document_policy_validation_repo=document_policy_validation_repo,
138
+ knowledge_service=knowledge_service,
139
+ now_fn=workflow.now,
140
+ )
141
+
142
+ workflow.logger.debug(
143
+ "Use case created successfully",
144
+ extra={
145
+ "document_id": document_id,
146
+ "policy_id": policy_id,
147
+ },
148
+ )
149
+
150
+ self.current_step = "executing_validation"
151
+
152
+ # Execute the validation process with workflow durability
153
+ # All repository calls inside the use case will be executed as
154
+ # Temporal activities with automatic retry and state persistence
155
+ validation = await use_case.validate_document(
156
+ document_id=document_id,
157
+ policy_id=policy_id,
158
+ )
159
+
160
+ # Store the validation ID for queries
161
+ self.validation_id = validation.validation_id
162
+
163
+ self.current_step = "completed"
164
+
165
+ workflow.logger.info(
166
+ "Document validation workflow completed successfully",
167
+ extra={
168
+ "document_id": document_id,
169
+ "policy_id": policy_id,
170
+ "validation_id": validation.validation_id,
171
+ "status": validation.status.value,
172
+ "passed": validation.passed,
173
+ },
174
+ )
175
+
176
+ return validation
177
+
178
+ except Exception as e:
179
+ self.current_step = "failed"
180
+
181
+ workflow.logger.error(
182
+ "Document validation workflow failed",
183
+ extra={
184
+ "document_id": document_id,
185
+ "policy_id": policy_id,
186
+ "validation_id": self.validation_id,
187
+ "error": str(e),
188
+ "error_type": type(e).__name__,
189
+ },
190
+ exc_info=True,
191
+ )
192
+
193
+ # Re-raise to let Temporal handle retry logic
194
+ raise
195
+
196
+ @workflow.signal
197
+ async def cancel_validation(self, reason: str) -> None:
198
+ """
199
+ Signal handler to cancel the validation process.
200
+
201
+ Args:
202
+ reason: Reason for cancellation
203
+
204
+ Note:
205
+ This is a placeholder for future cancellation logic.
206
+ Currently, we rely on Temporal's built-in workflow cancellation.
207
+ """
208
+ workflow.logger.info(
209
+ "Validation cancellation requested",
210
+ extra={
211
+ "validation_id": self.validation_id,
212
+ "reason": reason,
213
+ "current_step": self.current_step,
214
+ },
215
+ )
216
+
217
+ # Future: Implement graceful cancellation logic here
218
+ # For now, let the workflow be cancelled naturally by Temporal
219
+
220
+
221
+ # Workflow configuration with retry policies optimized for document validation
222
+ VALIDATE_DOCUMENT_RETRY_POLICY = RetryPolicy(
223
+ initial_interval=timedelta(seconds=1),
224
+ backoff_coefficient=2.0,
225
+ maximum_interval=timedelta(minutes=5),
226
+ maximum_attempts=3,
227
+ non_retryable_error_types=["ValueError"], # Don't retry validation errors
228
+ )