julee 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. julee/__init__.py +3 -0
  2. julee/api/__init__.py +20 -0
  3. julee/api/app.py +180 -0
  4. julee/api/dependencies.py +257 -0
  5. julee/api/requests.py +175 -0
  6. julee/api/responses.py +43 -0
  7. julee/api/routers/__init__.py +43 -0
  8. julee/api/routers/assembly_specifications.py +212 -0
  9. julee/api/routers/documents.py +182 -0
  10. julee/api/routers/knowledge_service_configs.py +79 -0
  11. julee/api/routers/knowledge_service_queries.py +293 -0
  12. julee/api/routers/system.py +137 -0
  13. julee/api/routers/workflows.py +234 -0
  14. julee/api/services/__init__.py +20 -0
  15. julee/api/services/system_initialization.py +214 -0
  16. julee/api/tests/__init__.py +14 -0
  17. julee/api/tests/routers/__init__.py +17 -0
  18. julee/api/tests/routers/test_assembly_specifications.py +749 -0
  19. julee/api/tests/routers/test_documents.py +301 -0
  20. julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
  21. julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
  22. julee/api/tests/routers/test_system.py +179 -0
  23. julee/api/tests/routers/test_workflows.py +393 -0
  24. julee/api/tests/test_app.py +285 -0
  25. julee/api/tests/test_dependencies.py +245 -0
  26. julee/api/tests/test_requests.py +250 -0
  27. julee/domain/__init__.py +22 -0
  28. julee/domain/models/__init__.py +49 -0
  29. julee/domain/models/assembly/__init__.py +17 -0
  30. julee/domain/models/assembly/assembly.py +103 -0
  31. julee/domain/models/assembly/tests/__init__.py +0 -0
  32. julee/domain/models/assembly/tests/factories.py +37 -0
  33. julee/domain/models/assembly/tests/test_assembly.py +430 -0
  34. julee/domain/models/assembly_specification/__init__.py +24 -0
  35. julee/domain/models/assembly_specification/assembly_specification.py +172 -0
  36. julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
  37. julee/domain/models/assembly_specification/tests/__init__.py +0 -0
  38. julee/domain/models/assembly_specification/tests/factories.py +78 -0
  39. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
  40. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
  41. julee/domain/models/custom_fields/__init__.py +0 -0
  42. julee/domain/models/custom_fields/content_stream.py +68 -0
  43. julee/domain/models/custom_fields/tests/__init__.py +0 -0
  44. julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
  45. julee/domain/models/document/__init__.py +17 -0
  46. julee/domain/models/document/document.py +150 -0
  47. julee/domain/models/document/tests/__init__.py +0 -0
  48. julee/domain/models/document/tests/factories.py +76 -0
  49. julee/domain/models/document/tests/test_document.py +297 -0
  50. julee/domain/models/knowledge_service_config/__init__.py +17 -0
  51. julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
  52. julee/domain/models/policy/__init__.py +15 -0
  53. julee/domain/models/policy/document_policy_validation.py +220 -0
  54. julee/domain/models/policy/policy.py +203 -0
  55. julee/domain/models/policy/tests/__init__.py +0 -0
  56. julee/domain/models/policy/tests/factories.py +47 -0
  57. julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
  58. julee/domain/models/policy/tests/test_policy.py +546 -0
  59. julee/domain/repositories/__init__.py +27 -0
  60. julee/domain/repositories/assembly.py +45 -0
  61. julee/domain/repositories/assembly_specification.py +52 -0
  62. julee/domain/repositories/base.py +146 -0
  63. julee/domain/repositories/document.py +49 -0
  64. julee/domain/repositories/document_policy_validation.py +52 -0
  65. julee/domain/repositories/knowledge_service_config.py +54 -0
  66. julee/domain/repositories/knowledge_service_query.py +44 -0
  67. julee/domain/repositories/policy.py +49 -0
  68. julee/domain/use_cases/__init__.py +17 -0
  69. julee/domain/use_cases/decorators.py +107 -0
  70. julee/domain/use_cases/extract_assemble_data.py +649 -0
  71. julee/domain/use_cases/initialize_system_data.py +842 -0
  72. julee/domain/use_cases/tests/__init__.py +7 -0
  73. julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
  74. julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
  75. julee/domain/use_cases/tests/test_validate_document.py +1228 -0
  76. julee/domain/use_cases/validate_document.py +736 -0
  77. julee/fixtures/assembly_specifications.yaml +70 -0
  78. julee/fixtures/documents.yaml +178 -0
  79. julee/fixtures/knowledge_service_configs.yaml +37 -0
  80. julee/fixtures/knowledge_service_queries.yaml +27 -0
  81. julee/repositories/__init__.py +17 -0
  82. julee/repositories/memory/__init__.py +31 -0
  83. julee/repositories/memory/assembly.py +84 -0
  84. julee/repositories/memory/assembly_specification.py +125 -0
  85. julee/repositories/memory/base.py +227 -0
  86. julee/repositories/memory/document.py +149 -0
  87. julee/repositories/memory/document_policy_validation.py +104 -0
  88. julee/repositories/memory/knowledge_service_config.py +123 -0
  89. julee/repositories/memory/knowledge_service_query.py +120 -0
  90. julee/repositories/memory/policy.py +87 -0
  91. julee/repositories/memory/tests/__init__.py +0 -0
  92. julee/repositories/memory/tests/test_document.py +212 -0
  93. julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
  94. julee/repositories/memory/tests/test_policy.py +443 -0
  95. julee/repositories/minio/__init__.py +31 -0
  96. julee/repositories/minio/assembly.py +103 -0
  97. julee/repositories/minio/assembly_specification.py +170 -0
  98. julee/repositories/minio/client.py +570 -0
  99. julee/repositories/minio/document.py +530 -0
  100. julee/repositories/minio/document_policy_validation.py +120 -0
  101. julee/repositories/minio/knowledge_service_config.py +187 -0
  102. julee/repositories/minio/knowledge_service_query.py +211 -0
  103. julee/repositories/minio/policy.py +106 -0
  104. julee/repositories/minio/tests/__init__.py +0 -0
  105. julee/repositories/minio/tests/fake_client.py +213 -0
  106. julee/repositories/minio/tests/test_assembly.py +374 -0
  107. julee/repositories/minio/tests/test_assembly_specification.py +391 -0
  108. julee/repositories/minio/tests/test_client_protocol.py +57 -0
  109. julee/repositories/minio/tests/test_document.py +591 -0
  110. julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
  111. julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
  112. julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
  113. julee/repositories/minio/tests/test_policy.py +559 -0
  114. julee/repositories/temporal/__init__.py +38 -0
  115. julee/repositories/temporal/activities.py +114 -0
  116. julee/repositories/temporal/activity_names.py +34 -0
  117. julee/repositories/temporal/proxies.py +159 -0
  118. julee/services/__init__.py +18 -0
  119. julee/services/knowledge_service/__init__.py +48 -0
  120. julee/services/knowledge_service/anthropic/__init__.py +12 -0
  121. julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
  122. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
  123. julee/services/knowledge_service/factory.py +138 -0
  124. julee/services/knowledge_service/knowledge_service.py +160 -0
  125. julee/services/knowledge_service/memory/__init__.py +13 -0
  126. julee/services/knowledge_service/memory/knowledge_service.py +278 -0
  127. julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
  128. julee/services/knowledge_service/test_factory.py +112 -0
  129. julee/services/temporal/__init__.py +38 -0
  130. julee/services/temporal/activities.py +86 -0
  131. julee/services/temporal/activity_names.py +22 -0
  132. julee/services/temporal/proxies.py +41 -0
  133. julee/util/__init__.py +0 -0
  134. julee/util/domain.py +119 -0
  135. julee/util/repos/__init__.py +0 -0
  136. julee/util/repos/minio/__init__.py +0 -0
  137. julee/util/repos/minio/file_storage.py +213 -0
  138. julee/util/repos/temporal/__init__.py +11 -0
  139. julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
  140. julee/util/repos/temporal/data_converter.py +123 -0
  141. julee/util/repos/temporal/minio_file_storage.py +12 -0
  142. julee/util/repos/temporal/proxies/__init__.py +0 -0
  143. julee/util/repos/temporal/proxies/file_storage.py +58 -0
  144. julee/util/repositories.py +55 -0
  145. julee/util/temporal/__init__.py +22 -0
  146. julee/util/temporal/activities.py +123 -0
  147. julee/util/temporal/decorators.py +473 -0
  148. julee/util/tests/__init__.py +1 -0
  149. julee/util/tests/test_decorators.py +770 -0
  150. julee/util/validation/__init__.py +29 -0
  151. julee/util/validation/repository.py +100 -0
  152. julee/util/validation/type_guards.py +369 -0
  153. julee/worker.py +211 -0
  154. julee/workflows/__init__.py +26 -0
  155. julee/workflows/extract_assemble.py +215 -0
  156. julee/workflows/validate_document.py +228 -0
  157. julee-0.1.0.dist-info/METADATA +195 -0
  158. julee-0.1.0.dist-info/RECORD +161 -0
  159. julee-0.1.0.dist-info/WHEEL +5 -0
  160. julee-0.1.0.dist-info/licenses/LICENSE +674 -0
  161. julee-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,297 @@
1
+ """
2
+ Comprehensive tests for Document domain model.
3
+
4
+ This test module documents the design decisions made for the Document domain
5
+ model
6
+ using table-based tests. It covers:
7
+
8
+ - Document instantiation with various field combinations
9
+ - Content stream operations (read, seek, tell)
10
+ - Validation rules and error conditions
11
+ - JSON serialization behavior
12
+ - Stream-like interface compatibility
13
+
14
+ Design decisions documented:
15
+ - Documents must have all required fields
16
+ - Content streams are excluded from JSON serialization
17
+ - Size must be positive, filenames and content types non-empty
18
+ - Multihash is required and non-empty
19
+ - Documents act as readable streams with standard methods
20
+ """
21
+
22
+ import pytest
23
+ import json
24
+
25
+ from julee.domain.models.document import Document
26
+ from .factories import DocumentFactory, ContentStreamFactory
27
+
28
+
29
+ class TestDocumentInstantiation:
30
+ """Test Document creation with various field combinations."""
31
+
32
+ @pytest.mark.parametrize(
33
+ "document_id,original_filename,content_type,size_bytes,multihash,expected_success",
34
+ [
35
+ # Valid cases
36
+ ("doc-1", "test.txt", "text/plain", 100, "sha256:hash", True),
37
+ (
38
+ "doc-2",
39
+ "document.pdf",
40
+ "application/pdf",
41
+ 1024,
42
+ "sha256:pdf-hash",
43
+ True,
44
+ ),
45
+ (
46
+ "doc-3",
47
+ "data.json",
48
+ "application/json",
49
+ 50,
50
+ "sha256:json-hash",
51
+ True,
52
+ ),
53
+ # Invalid cases - empty required fields
54
+ (
55
+ "",
56
+ "test.txt",
57
+ "text/plain",
58
+ 100,
59
+ "sha256:hash",
60
+ False,
61
+ ), # Empty document_id
62
+ (
63
+ "doc-4",
64
+ "",
65
+ "text/plain",
66
+ 100,
67
+ "sha256:hash",
68
+ False,
69
+ ), # Empty filename
70
+ (
71
+ "doc-5",
72
+ "test.txt",
73
+ "",
74
+ 100,
75
+ "sha256:hash",
76
+ False,
77
+ ), # Empty content_type
78
+ (
79
+ "doc-6",
80
+ "test.txt",
81
+ "text/plain",
82
+ 100,
83
+ "",
84
+ False,
85
+ ), # Empty multihash
86
+ # Invalid cases - whitespace only
87
+ (
88
+ " ",
89
+ "test.txt",
90
+ "text/plain",
91
+ 100,
92
+ "sha256:hash",
93
+ False,
94
+ ), # Whitespace document_id
95
+ (
96
+ "doc-7",
97
+ " ",
98
+ "text/plain",
99
+ 100,
100
+ "sha256:hash",
101
+ False,
102
+ ), # Whitespace filename
103
+ (
104
+ "doc-8",
105
+ "test.txt",
106
+ " ",
107
+ 100,
108
+ "sha256:hash",
109
+ False,
110
+ ), # Whitespace content_type
111
+ (
112
+ "doc-9",
113
+ "test.txt",
114
+ "text/plain",
115
+ 100,
116
+ " ",
117
+ False,
118
+ ), # Whitespace multihash
119
+ # Invalid cases - size validation
120
+ (
121
+ "doc-10",
122
+ "test.txt",
123
+ "text/plain",
124
+ 0,
125
+ "sha256:hash",
126
+ False,
127
+ ), # Zero size
128
+ (
129
+ "doc-11",
130
+ "test.txt",
131
+ "text/plain",
132
+ -1,
133
+ "sha256:hash",
134
+ False,
135
+ ), # Negative size
136
+ ],
137
+ )
138
+ def test_document_creation_validation(
139
+ self,
140
+ document_id: str,
141
+ original_filename: str,
142
+ content_type: str,
143
+ size_bytes: int,
144
+ multihash: str,
145
+ expected_success: bool,
146
+ ) -> None:
147
+ """Test document creation with various field validation scenarios."""
148
+ content_stream = ContentStreamFactory.build()
149
+
150
+ if expected_success:
151
+ # Should create successfully
152
+ doc = Document(
153
+ document_id=document_id,
154
+ original_filename=original_filename,
155
+ content_type=content_type,
156
+ size_bytes=size_bytes,
157
+ content_multihash=multihash,
158
+ content=content_stream,
159
+ )
160
+ assert doc.document_id == document_id
161
+ assert doc.original_filename.strip() == original_filename.strip()
162
+ assert doc.content_type.strip() == content_type.strip()
163
+ assert doc.size_bytes == size_bytes
164
+ assert doc.content_multihash.strip() == multihash.strip()
165
+ else:
166
+ # Should raise validation error
167
+ with pytest.raises(Exception): # Could be ValueError or ValidationError
168
+ Document(
169
+ document_id=document_id,
170
+ original_filename=original_filename,
171
+ content_type=content_type,
172
+ size_bytes=size_bytes,
173
+ content_multihash=multihash,
174
+ content=ContentStreamFactory.build(),
175
+ )
176
+
177
+
178
+ class TestDocumentSerialization:
179
+ """Test Document JSON serialization behavior."""
180
+
181
+ def test_document_json_excludes_content(self) -> None:
182
+ """Test that content stream is excluded from JSON serialization."""
183
+ content = b"Secret content not for JSON"
184
+ content_stream = ContentStreamFactory.build(content=content)
185
+ doc = DocumentFactory.build(content=content_stream, size_bytes=len(content))
186
+
187
+ json_str = doc.model_dump_json()
188
+ json_data = json.loads(json_str)
189
+
190
+ # Content should not be in JSON
191
+ assert "content" not in json_data
192
+
193
+ # But all other fields should be present
194
+ assert json_data["document_id"] == doc.document_id
195
+ assert json_data["original_filename"] == doc.original_filename
196
+ assert json_data["content_type"] == doc.content_type
197
+ assert json_data["size_bytes"] == doc.size_bytes
198
+ assert json_data["content_multihash"] == doc.content_multihash
199
+ assert json_data["status"] == doc.status.value
200
+
201
+
202
+ class TestDocumentContentValidation:
203
+ """Test Document content and content_string validation rules."""
204
+
205
+ def test_document_with_both_content_and_content_string_fails(
206
+ self,
207
+ ) -> None:
208
+ """Test that both content and content_string raises error."""
209
+ content_stream = ContentStreamFactory.build()
210
+ content_string = '{"type": "string"}'
211
+
212
+ with pytest.raises(
213
+ ValueError, match="cannot have both content and content_string"
214
+ ):
215
+ Document(
216
+ document_id="test-doc-both",
217
+ original_filename="both.json",
218
+ content_type="application/json",
219
+ size_bytes=100,
220
+ content_multihash="test_hash",
221
+ content=content_stream,
222
+ content_string=content_string,
223
+ )
224
+
225
+ def test_document_without_content_or_content_string_fails(self) -> None:
226
+ """Test that no content or content_string raises error."""
227
+ with pytest.raises(
228
+ ValueError, match="must have either content or content_string"
229
+ ):
230
+ Document(
231
+ document_id="test-doc-no-content",
232
+ original_filename="empty.json",
233
+ content_type="application/json",
234
+ size_bytes=100,
235
+ content_multihash="test_hash",
236
+ content=None,
237
+ content_string=None,
238
+ )
239
+
240
+ def test_document_with_content_only_succeeds(self) -> None:
241
+ """Test that document with only content field succeeds."""
242
+ content_stream = ContentStreamFactory.build()
243
+
244
+ doc = Document(
245
+ document_id="test-doc-content",
246
+ original_filename="content.json",
247
+ content_type="application/json",
248
+ size_bytes=100,
249
+ content_multihash="test_hash",
250
+ content=content_stream,
251
+ content_string=None,
252
+ )
253
+
254
+ assert doc.content is not None
255
+ assert doc.content_string is None
256
+
257
+ def test_document_with_content_string_only_succeeds(self) -> None:
258
+ """Test that document with only content_string field succeeds."""
259
+ content_string = '{"type": "string"}'
260
+
261
+ doc = Document(
262
+ document_id="test-doc-string",
263
+ original_filename="string.json",
264
+ content_type="application/json",
265
+ size_bytes=100,
266
+ content_multihash="test_hash",
267
+ content=None,
268
+ content_string=content_string,
269
+ )
270
+
271
+ assert doc.content is None
272
+ assert doc.content_string == content_string
273
+
274
+ def test_document_deserialization_with_empty_content_succeeds(
275
+ self,
276
+ ) -> None:
277
+ """Test Temporal deserialization allows empty content."""
278
+ # This simulates what happens when a Document comes back from Temporal
279
+ # activities - the ContentStream is excluded from serialization
280
+ document_data = {
281
+ "document_id": "test-temporal",
282
+ "original_filename": "temporal.json",
283
+ "content_type": "application/json",
284
+ "size_bytes": 100,
285
+ "content_multihash": "test_hash",
286
+ "content": None,
287
+ "content_string": None,
288
+ }
289
+
290
+ # Should succeed with temporal_validation context
291
+ doc = Document.model_validate(
292
+ document_data, context={"temporal_validation": True}
293
+ )
294
+
295
+ assert doc.document_id == "test-temporal"
296
+ assert doc.content is None
297
+ assert doc.content_string is None
@@ -0,0 +1,17 @@
1
+ """
2
+ Knowledge Service domain models for julee domain.
3
+
4
+ This module exports domain models for knowledge services in the Capture,
5
+ Extract, Assemble, Publish workflow. Knowledge services represent external
6
+ AI/ML services that can store documents and execute queries against them.
7
+ """
8
+
9
+ from .knowledge_service_config import (
10
+ KnowledgeServiceConfig,
11
+ ServiceApi,
12
+ )
13
+
14
+ __all__ = [
15
+ "KnowledgeServiceConfig",
16
+ "ServiceApi",
17
+ ]
@@ -0,0 +1,86 @@
1
+ """
2
+ KnowledgeService domain models for the Capture, Extract, Assemble,
3
+ Publish workflow.
4
+
5
+ This module contains the KnowledgeService domain object that represents
6
+ knowledge services in the CEAP workflow system.
7
+
8
+ A KnowledgeService defines a service that can store documents and execute
9
+ queries against them. It acts as an interface to external AI/ML services
10
+ that can analyze and extract information from documents.
11
+
12
+ All domain models use Pydantic BaseModel for validation, serialization,
13
+ and type safety, following the patterns established in the sample project.
14
+ """
15
+
16
+ from pydantic import BaseModel, Field, field_validator
17
+ from typing import Optional
18
+ from datetime import datetime, timezone
19
+ from enum import Enum
20
+
21
+
22
+ class ServiceApi(str, Enum):
23
+ """Supported knowledge service APIs."""
24
+
25
+ ANTHROPIC = "anthropic"
26
+
27
+
28
+ class KnowledgeServiceConfig(BaseModel):
29
+ """Knowledge service configuration that defines how to interact with
30
+ an external knowledge/AI service.
31
+
32
+ A KnowledgeServiceConfig represents a service endpoint that can store
33
+ documents and execute queries against them. This could be an AI service,
34
+ vector database, search engine, or any other service that can analyze
35
+ documents and answer questions about them.
36
+ """
37
+
38
+ # Core service identification
39
+ knowledge_service_id: str = Field(
40
+ description="Unique identifier for this knowledge service"
41
+ )
42
+ name: str = Field(description="Human-readable name for the knowledge service")
43
+ description: str = Field(
44
+ description="Description of what this knowledge service does"
45
+ )
46
+ service_api: ServiceApi = Field(
47
+ description="The external API/service this knowledge service uses"
48
+ )
49
+
50
+ # Timestamps
51
+ created_at: Optional[datetime] = Field(
52
+ default_factory=lambda: datetime.now(timezone.utc)
53
+ )
54
+ updated_at: Optional[datetime] = Field(
55
+ default_factory=lambda: datetime.now(timezone.utc)
56
+ )
57
+
58
+ @field_validator("knowledge_service_id")
59
+ @classmethod
60
+ def knowledge_service_id_must_not_be_empty(cls, v: str) -> str:
61
+ if not v or not v.strip():
62
+ raise ValueError("Knowledge service ID cannot be empty")
63
+ return v.strip()
64
+
65
+ @field_validator("name")
66
+ @classmethod
67
+ def name_must_not_be_empty(cls, v: str) -> str:
68
+ if not v or not v.strip():
69
+ raise ValueError("Knowledge service name cannot be empty")
70
+ return v.strip()
71
+
72
+ @field_validator("description")
73
+ @classmethod
74
+ def description_must_not_be_empty(cls, v: str) -> str:
75
+ if not v or not v.strip():
76
+ raise ValueError("Knowledge service description cannot be empty")
77
+ return v.strip()
78
+
79
+ @field_validator("service_api")
80
+ @classmethod
81
+ def service_api_must_be_valid(cls, v: ServiceApi) -> ServiceApi:
82
+ if v not in ServiceApi:
83
+ raise ValueError(
84
+ f"Invalid service API: {v}. Must be one of {list(ServiceApi)}"
85
+ )
86
+ return v
@@ -0,0 +1,15 @@
1
+ from .policy import (
2
+ Policy,
3
+ PolicyStatus,
4
+ )
5
+ from .document_policy_validation import (
6
+ DocumentPolicyValidation,
7
+ DocumentPolicyValidationStatus,
8
+ )
9
+
10
+ __all__ = [
11
+ "Policy",
12
+ "PolicyStatus",
13
+ "DocumentPolicyValidation",
14
+ "DocumentPolicyValidationStatus",
15
+ ]
@@ -0,0 +1,220 @@
1
+ """
2
+ DocumentPolicyValidation domain models for the Capture, Extract, Assemble,
3
+ Publish workflow.
4
+
5
+ This module contains the DocumentPolicyValidation domain object that
6
+ represents
7
+ the result of validating a document against a policy configuration in the CEAP
8
+ workflow system.
9
+
10
+ A DocumentPolicyValidation captures the complete validation process including:
11
+ - The document being validated and the policy used
12
+ - Actual validation scores achieved against policy criteria
13
+ - Optional transformation results and post-transformation scores
14
+ - Status tracking throughout the validation lifecycle
15
+
16
+ All domain models use Pydantic BaseModel for validation, serialization,
17
+ and type safety, following the patterns established in the sample project.
18
+ """
19
+
20
+ from pydantic import BaseModel, Field, field_validator
21
+ from typing import Optional, List, Tuple
22
+ from datetime import datetime, timezone
23
+ from enum import Enum
24
+
25
+
26
+ class DocumentPolicyValidationStatus(str, Enum):
27
+ """Status of a document policy validation process."""
28
+
29
+ PENDING = "pending"
30
+ IN_PROGRESS = "in_progress"
31
+ VALIDATION_COMPLETE = "validation_complete"
32
+ TRANSFORMATION_REQUIRED = "transformation_required"
33
+ TRANSFORMATION_IN_PROGRESS = "transformation_in_progress"
34
+ TRANSFORMATION_COMPLETE = "transformation_complete"
35
+ PASSED = "passed"
36
+ FAILED = "failed"
37
+ ERROR = "error"
38
+
39
+
40
+ class DocumentPolicyValidation(BaseModel):
41
+ """Represents the validation of a document against a policy configuration.
42
+
43
+ A DocumentPolicyValidation tracks the complete lifecycle of validating
44
+ a document against policy criteria. It includes:
45
+
46
+ 1. Initial validation: Document is scored against policy validation
47
+ queries
48
+ 2. Optional transformation: If policy includes transformation queries and
49
+ initial validation fails, transformations are applied
50
+ 3. Re-validation: Transformed document is re-scored against policy
51
+ criteria
52
+ 4. Final determination: Pass/fail based on final validation scores
53
+
54
+ The validation process supports both validation-only policies and policies
55
+ that include transformations for document quality improvement.
56
+ """
57
+
58
+ # Core validation identification
59
+ validation_id: str = Field(
60
+ description="Unique identifier for this validation instance"
61
+ )
62
+ input_document_id: str = Field(
63
+ description="ID of the document being validated against the policy"
64
+ )
65
+ policy_id: str = Field(
66
+ description="ID of the policy configuration used for validation"
67
+ )
68
+
69
+ # Validation process status
70
+ status: DocumentPolicyValidationStatus = DocumentPolicyValidationStatus.PENDING
71
+
72
+ # Initial validation results
73
+ validation_scores: List[Tuple[str, int]] = Field(
74
+ default_factory=list,
75
+ description="List of (knowledge_service_query_id, actual_score) "
76
+ "tuples representing the scores achieved during initial validation. "
77
+ "Scores are between 0 and 100",
78
+ )
79
+
80
+ # Transformation results (if applicable)
81
+ transformed_document_id: Optional[str] = Field(
82
+ default=None,
83
+ description="ID of the document after transformations have been "
84
+ "applied. Only present if the policy includes transformation queries "
85
+ "and they were executed",
86
+ )
87
+ post_transform_validation_scores: Optional[List[Tuple[str, int]]] = Field(
88
+ default=None,
89
+ description="List of (knowledge_service_query_id, actual_score) "
90
+ "tuples representing scores achieved after transformation. "
91
+ "Only present if transformations were applied and re-validation "
92
+ "occurred",
93
+ )
94
+
95
+ # Validation metadata
96
+ started_at: Optional[datetime] = Field(
97
+ default_factory=lambda: datetime.now(timezone.utc),
98
+ description="When the validation process was initiated",
99
+ )
100
+ completed_at: Optional[datetime] = Field(
101
+ default=None, description="When the validation process completed"
102
+ )
103
+ error_message: Optional[str] = Field(
104
+ default=None, description="Error message if validation process failed"
105
+ )
106
+
107
+ # Results summary
108
+ passed: Optional[bool] = Field(
109
+ default=None,
110
+ description="Whether the document passed policy validation. "
111
+ "None while validation is in progress, True/False when complete",
112
+ )
113
+
114
+ @field_validator("input_document_id")
115
+ @classmethod
116
+ def input_document_id_must_not_be_empty(cls, v: str) -> str:
117
+ if not v or not v.strip():
118
+ raise ValueError("Input document ID cannot be empty")
119
+ return v.strip()
120
+
121
+ @field_validator("policy_id")
122
+ @classmethod
123
+ def policy_id_must_not_be_empty(cls, v: str) -> str:
124
+ if not v or not v.strip():
125
+ raise ValueError("Policy ID cannot be empty")
126
+ return v.strip()
127
+
128
+ @field_validator("validation_scores")
129
+ @classmethod
130
+ def validation_scores_must_be_valid(
131
+ cls, v: List[Tuple[str, int]]
132
+ ) -> List[Tuple[str, int]]:
133
+ if not isinstance(v, list):
134
+ raise ValueError("Validation scores must be a list")
135
+
136
+ # Empty list is valid for pending validations
137
+ if not v:
138
+ return v
139
+
140
+ return cls._validate_score_tuples(v, "validation_scores")
141
+
142
+ @field_validator("post_transform_validation_scores")
143
+ @classmethod
144
+ def post_transform_scores_must_be_valid(
145
+ cls, v: Optional[List[Tuple[str, int]]]
146
+ ) -> Optional[List[Tuple[str, int]]]:
147
+ if v is None:
148
+ return v
149
+
150
+ if not isinstance(v, list):
151
+ raise ValueError("Post-transform validation scores must be a list or None")
152
+
153
+ # Empty list is valid
154
+ if not v:
155
+ return v
156
+
157
+ return cls._validate_score_tuples(v, "post_transform_validation_scores")
158
+
159
+ @field_validator("error_message")
160
+ @classmethod
161
+ def error_message_must_be_valid(cls, v: Optional[str]) -> Optional[str]:
162
+ if v is None:
163
+ return v
164
+ if not isinstance(v, str):
165
+ raise ValueError("Error message must be a string or None")
166
+ return v.strip() if v.strip() else None
167
+
168
+ @field_validator("transformed_document_id")
169
+ @classmethod
170
+ def transformed_document_id_must_be_valid(cls, v: Optional[str]) -> Optional[str]:
171
+ if v is None:
172
+ return v
173
+ if not isinstance(v, str) or not v.strip():
174
+ raise ValueError(
175
+ "Transformed document ID must be a non-empty string or None"
176
+ )
177
+ return v.strip()
178
+
179
+ @classmethod
180
+ def _validate_score_tuples(
181
+ cls, scores: List[Tuple[str, int]], field_name: str
182
+ ) -> List[Tuple[str, int]]:
183
+ """Helper method to validate score tuple lists."""
184
+ validated_scores = []
185
+ query_ids_seen = set()
186
+
187
+ for item in scores:
188
+ if not isinstance(item, tuple) or len(item) != 2:
189
+ raise ValueError(
190
+ f"Each item in {field_name} must be a 2-tuple of "
191
+ f"(query_id, actual_score)"
192
+ )
193
+
194
+ query_id, actual_score = item
195
+
196
+ # Validate query ID
197
+ if not isinstance(query_id, str) or not query_id.strip():
198
+ raise ValueError(f"Query ID in {field_name} must be a non-empty string")
199
+ query_id = query_id.strip()
200
+
201
+ # Check for duplicate query IDs within this field
202
+ if query_id in query_ids_seen:
203
+ raise ValueError(f"Duplicate query ID '{query_id}' in {field_name}")
204
+ query_ids_seen.add(query_id)
205
+
206
+ # Validate actual score
207
+ if not isinstance(actual_score, int):
208
+ raise ValueError(
209
+ f"Actual score in {field_name} must be an integer "
210
+ f"between 0 and 100"
211
+ )
212
+ if actual_score < 0 or actual_score > 100:
213
+ raise ValueError(
214
+ f"Actual score {actual_score} in {field_name} must be "
215
+ f"between 0 and 100"
216
+ )
217
+
218
+ validated_scores.append((query_id, actual_score))
219
+
220
+ return validated_scores