julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Comprehensive tests for Document domain model.
|
|
3
|
+
|
|
4
|
+
This test module documents the design decisions made for the Document domain
|
|
5
|
+
model
|
|
6
|
+
using table-based tests. It covers:
|
|
7
|
+
|
|
8
|
+
- Document instantiation with various field combinations
|
|
9
|
+
- Content stream operations (read, seek, tell)
|
|
10
|
+
- Validation rules and error conditions
|
|
11
|
+
- JSON serialization behavior
|
|
12
|
+
- Stream-like interface compatibility
|
|
13
|
+
|
|
14
|
+
Design decisions documented:
|
|
15
|
+
- Documents must have all required fields
|
|
16
|
+
- Content streams are excluded from JSON serialization
|
|
17
|
+
- Size must be positive, filenames and content types non-empty
|
|
18
|
+
- Multihash is required and non-empty
|
|
19
|
+
- Documents act as readable streams with standard methods
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import pytest
|
|
23
|
+
import json
|
|
24
|
+
|
|
25
|
+
from julee.domain.models.document import Document
|
|
26
|
+
from .factories import DocumentFactory, ContentStreamFactory
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TestDocumentInstantiation:
|
|
30
|
+
"""Test Document creation with various field combinations."""
|
|
31
|
+
|
|
32
|
+
@pytest.mark.parametrize(
|
|
33
|
+
"document_id,original_filename,content_type,size_bytes,multihash,expected_success",
|
|
34
|
+
[
|
|
35
|
+
# Valid cases
|
|
36
|
+
("doc-1", "test.txt", "text/plain", 100, "sha256:hash", True),
|
|
37
|
+
(
|
|
38
|
+
"doc-2",
|
|
39
|
+
"document.pdf",
|
|
40
|
+
"application/pdf",
|
|
41
|
+
1024,
|
|
42
|
+
"sha256:pdf-hash",
|
|
43
|
+
True,
|
|
44
|
+
),
|
|
45
|
+
(
|
|
46
|
+
"doc-3",
|
|
47
|
+
"data.json",
|
|
48
|
+
"application/json",
|
|
49
|
+
50,
|
|
50
|
+
"sha256:json-hash",
|
|
51
|
+
True,
|
|
52
|
+
),
|
|
53
|
+
# Invalid cases - empty required fields
|
|
54
|
+
(
|
|
55
|
+
"",
|
|
56
|
+
"test.txt",
|
|
57
|
+
"text/plain",
|
|
58
|
+
100,
|
|
59
|
+
"sha256:hash",
|
|
60
|
+
False,
|
|
61
|
+
), # Empty document_id
|
|
62
|
+
(
|
|
63
|
+
"doc-4",
|
|
64
|
+
"",
|
|
65
|
+
"text/plain",
|
|
66
|
+
100,
|
|
67
|
+
"sha256:hash",
|
|
68
|
+
False,
|
|
69
|
+
), # Empty filename
|
|
70
|
+
(
|
|
71
|
+
"doc-5",
|
|
72
|
+
"test.txt",
|
|
73
|
+
"",
|
|
74
|
+
100,
|
|
75
|
+
"sha256:hash",
|
|
76
|
+
False,
|
|
77
|
+
), # Empty content_type
|
|
78
|
+
(
|
|
79
|
+
"doc-6",
|
|
80
|
+
"test.txt",
|
|
81
|
+
"text/plain",
|
|
82
|
+
100,
|
|
83
|
+
"",
|
|
84
|
+
False,
|
|
85
|
+
), # Empty multihash
|
|
86
|
+
# Invalid cases - whitespace only
|
|
87
|
+
(
|
|
88
|
+
" ",
|
|
89
|
+
"test.txt",
|
|
90
|
+
"text/plain",
|
|
91
|
+
100,
|
|
92
|
+
"sha256:hash",
|
|
93
|
+
False,
|
|
94
|
+
), # Whitespace document_id
|
|
95
|
+
(
|
|
96
|
+
"doc-7",
|
|
97
|
+
" ",
|
|
98
|
+
"text/plain",
|
|
99
|
+
100,
|
|
100
|
+
"sha256:hash",
|
|
101
|
+
False,
|
|
102
|
+
), # Whitespace filename
|
|
103
|
+
(
|
|
104
|
+
"doc-8",
|
|
105
|
+
"test.txt",
|
|
106
|
+
" ",
|
|
107
|
+
100,
|
|
108
|
+
"sha256:hash",
|
|
109
|
+
False,
|
|
110
|
+
), # Whitespace content_type
|
|
111
|
+
(
|
|
112
|
+
"doc-9",
|
|
113
|
+
"test.txt",
|
|
114
|
+
"text/plain",
|
|
115
|
+
100,
|
|
116
|
+
" ",
|
|
117
|
+
False,
|
|
118
|
+
), # Whitespace multihash
|
|
119
|
+
# Invalid cases - size validation
|
|
120
|
+
(
|
|
121
|
+
"doc-10",
|
|
122
|
+
"test.txt",
|
|
123
|
+
"text/plain",
|
|
124
|
+
0,
|
|
125
|
+
"sha256:hash",
|
|
126
|
+
False,
|
|
127
|
+
), # Zero size
|
|
128
|
+
(
|
|
129
|
+
"doc-11",
|
|
130
|
+
"test.txt",
|
|
131
|
+
"text/plain",
|
|
132
|
+
-1,
|
|
133
|
+
"sha256:hash",
|
|
134
|
+
False,
|
|
135
|
+
), # Negative size
|
|
136
|
+
],
|
|
137
|
+
)
|
|
138
|
+
def test_document_creation_validation(
|
|
139
|
+
self,
|
|
140
|
+
document_id: str,
|
|
141
|
+
original_filename: str,
|
|
142
|
+
content_type: str,
|
|
143
|
+
size_bytes: int,
|
|
144
|
+
multihash: str,
|
|
145
|
+
expected_success: bool,
|
|
146
|
+
) -> None:
|
|
147
|
+
"""Test document creation with various field validation scenarios."""
|
|
148
|
+
content_stream = ContentStreamFactory.build()
|
|
149
|
+
|
|
150
|
+
if expected_success:
|
|
151
|
+
# Should create successfully
|
|
152
|
+
doc = Document(
|
|
153
|
+
document_id=document_id,
|
|
154
|
+
original_filename=original_filename,
|
|
155
|
+
content_type=content_type,
|
|
156
|
+
size_bytes=size_bytes,
|
|
157
|
+
content_multihash=multihash,
|
|
158
|
+
content=content_stream,
|
|
159
|
+
)
|
|
160
|
+
assert doc.document_id == document_id
|
|
161
|
+
assert doc.original_filename.strip() == original_filename.strip()
|
|
162
|
+
assert doc.content_type.strip() == content_type.strip()
|
|
163
|
+
assert doc.size_bytes == size_bytes
|
|
164
|
+
assert doc.content_multihash.strip() == multihash.strip()
|
|
165
|
+
else:
|
|
166
|
+
# Should raise validation error
|
|
167
|
+
with pytest.raises(Exception): # Could be ValueError or ValidationError
|
|
168
|
+
Document(
|
|
169
|
+
document_id=document_id,
|
|
170
|
+
original_filename=original_filename,
|
|
171
|
+
content_type=content_type,
|
|
172
|
+
size_bytes=size_bytes,
|
|
173
|
+
content_multihash=multihash,
|
|
174
|
+
content=ContentStreamFactory.build(),
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class TestDocumentSerialization:
|
|
179
|
+
"""Test Document JSON serialization behavior."""
|
|
180
|
+
|
|
181
|
+
def test_document_json_excludes_content(self) -> None:
|
|
182
|
+
"""Test that content stream is excluded from JSON serialization."""
|
|
183
|
+
content = b"Secret content not for JSON"
|
|
184
|
+
content_stream = ContentStreamFactory.build(content=content)
|
|
185
|
+
doc = DocumentFactory.build(content=content_stream, size_bytes=len(content))
|
|
186
|
+
|
|
187
|
+
json_str = doc.model_dump_json()
|
|
188
|
+
json_data = json.loads(json_str)
|
|
189
|
+
|
|
190
|
+
# Content should not be in JSON
|
|
191
|
+
assert "content" not in json_data
|
|
192
|
+
|
|
193
|
+
# But all other fields should be present
|
|
194
|
+
assert json_data["document_id"] == doc.document_id
|
|
195
|
+
assert json_data["original_filename"] == doc.original_filename
|
|
196
|
+
assert json_data["content_type"] == doc.content_type
|
|
197
|
+
assert json_data["size_bytes"] == doc.size_bytes
|
|
198
|
+
assert json_data["content_multihash"] == doc.content_multihash
|
|
199
|
+
assert json_data["status"] == doc.status.value
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class TestDocumentContentValidation:
|
|
203
|
+
"""Test Document content and content_string validation rules."""
|
|
204
|
+
|
|
205
|
+
def test_document_with_both_content_and_content_string_fails(
|
|
206
|
+
self,
|
|
207
|
+
) -> None:
|
|
208
|
+
"""Test that both content and content_string raises error."""
|
|
209
|
+
content_stream = ContentStreamFactory.build()
|
|
210
|
+
content_string = '{"type": "string"}'
|
|
211
|
+
|
|
212
|
+
with pytest.raises(
|
|
213
|
+
ValueError, match="cannot have both content and content_string"
|
|
214
|
+
):
|
|
215
|
+
Document(
|
|
216
|
+
document_id="test-doc-both",
|
|
217
|
+
original_filename="both.json",
|
|
218
|
+
content_type="application/json",
|
|
219
|
+
size_bytes=100,
|
|
220
|
+
content_multihash="test_hash",
|
|
221
|
+
content=content_stream,
|
|
222
|
+
content_string=content_string,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
def test_document_without_content_or_content_string_fails(self) -> None:
|
|
226
|
+
"""Test that no content or content_string raises error."""
|
|
227
|
+
with pytest.raises(
|
|
228
|
+
ValueError, match="must have either content or content_string"
|
|
229
|
+
):
|
|
230
|
+
Document(
|
|
231
|
+
document_id="test-doc-no-content",
|
|
232
|
+
original_filename="empty.json",
|
|
233
|
+
content_type="application/json",
|
|
234
|
+
size_bytes=100,
|
|
235
|
+
content_multihash="test_hash",
|
|
236
|
+
content=None,
|
|
237
|
+
content_string=None,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def test_document_with_content_only_succeeds(self) -> None:
|
|
241
|
+
"""Test that document with only content field succeeds."""
|
|
242
|
+
content_stream = ContentStreamFactory.build()
|
|
243
|
+
|
|
244
|
+
doc = Document(
|
|
245
|
+
document_id="test-doc-content",
|
|
246
|
+
original_filename="content.json",
|
|
247
|
+
content_type="application/json",
|
|
248
|
+
size_bytes=100,
|
|
249
|
+
content_multihash="test_hash",
|
|
250
|
+
content=content_stream,
|
|
251
|
+
content_string=None,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
assert doc.content is not None
|
|
255
|
+
assert doc.content_string is None
|
|
256
|
+
|
|
257
|
+
def test_document_with_content_string_only_succeeds(self) -> None:
|
|
258
|
+
"""Test that document with only content_string field succeeds."""
|
|
259
|
+
content_string = '{"type": "string"}'
|
|
260
|
+
|
|
261
|
+
doc = Document(
|
|
262
|
+
document_id="test-doc-string",
|
|
263
|
+
original_filename="string.json",
|
|
264
|
+
content_type="application/json",
|
|
265
|
+
size_bytes=100,
|
|
266
|
+
content_multihash="test_hash",
|
|
267
|
+
content=None,
|
|
268
|
+
content_string=content_string,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
assert doc.content is None
|
|
272
|
+
assert doc.content_string == content_string
|
|
273
|
+
|
|
274
|
+
def test_document_deserialization_with_empty_content_succeeds(
|
|
275
|
+
self,
|
|
276
|
+
) -> None:
|
|
277
|
+
"""Test Temporal deserialization allows empty content."""
|
|
278
|
+
# This simulates what happens when a Document comes back from Temporal
|
|
279
|
+
# activities - the ContentStream is excluded from serialization
|
|
280
|
+
document_data = {
|
|
281
|
+
"document_id": "test-temporal",
|
|
282
|
+
"original_filename": "temporal.json",
|
|
283
|
+
"content_type": "application/json",
|
|
284
|
+
"size_bytes": 100,
|
|
285
|
+
"content_multihash": "test_hash",
|
|
286
|
+
"content": None,
|
|
287
|
+
"content_string": None,
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
# Should succeed with temporal_validation context
|
|
291
|
+
doc = Document.model_validate(
|
|
292
|
+
document_data, context={"temporal_validation": True}
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
assert doc.document_id == "test-temporal"
|
|
296
|
+
assert doc.content is None
|
|
297
|
+
assert doc.content_string is None
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Service domain models for julee domain.
|
|
3
|
+
|
|
4
|
+
This module exports domain models for knowledge services in the Capture,
|
|
5
|
+
Extract, Assemble, Publish workflow. Knowledge services represent external
|
|
6
|
+
AI/ML services that can store documents and execute queries against them.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .knowledge_service_config import (
|
|
10
|
+
KnowledgeServiceConfig,
|
|
11
|
+
ServiceApi,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"KnowledgeServiceConfig",
|
|
16
|
+
"ServiceApi",
|
|
17
|
+
]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
KnowledgeService domain models for the Capture, Extract, Assemble,
|
|
3
|
+
Publish workflow.
|
|
4
|
+
|
|
5
|
+
This module contains the KnowledgeService domain object that represents
|
|
6
|
+
knowledge services in the CEAP workflow system.
|
|
7
|
+
|
|
8
|
+
A KnowledgeService defines a service that can store documents and execute
|
|
9
|
+
queries against them. It acts as an interface to external AI/ML services
|
|
10
|
+
that can analyze and extract information from documents.
|
|
11
|
+
|
|
12
|
+
All domain models use Pydantic BaseModel for validation, serialization,
|
|
13
|
+
and type safety, following the patterns established in the sample project.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field, field_validator
|
|
17
|
+
from typing import Optional
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from enum import Enum
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ServiceApi(str, Enum):
|
|
23
|
+
"""Supported knowledge service APIs."""
|
|
24
|
+
|
|
25
|
+
ANTHROPIC = "anthropic"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class KnowledgeServiceConfig(BaseModel):
|
|
29
|
+
"""Knowledge service configuration that defines how to interact with
|
|
30
|
+
an external knowledge/AI service.
|
|
31
|
+
|
|
32
|
+
A KnowledgeServiceConfig represents a service endpoint that can store
|
|
33
|
+
documents and execute queries against them. This could be an AI service,
|
|
34
|
+
vector database, search engine, or any other service that can analyze
|
|
35
|
+
documents and answer questions about them.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
# Core service identification
|
|
39
|
+
knowledge_service_id: str = Field(
|
|
40
|
+
description="Unique identifier for this knowledge service"
|
|
41
|
+
)
|
|
42
|
+
name: str = Field(description="Human-readable name for the knowledge service")
|
|
43
|
+
description: str = Field(
|
|
44
|
+
description="Description of what this knowledge service does"
|
|
45
|
+
)
|
|
46
|
+
service_api: ServiceApi = Field(
|
|
47
|
+
description="The external API/service this knowledge service uses"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Timestamps
|
|
51
|
+
created_at: Optional[datetime] = Field(
|
|
52
|
+
default_factory=lambda: datetime.now(timezone.utc)
|
|
53
|
+
)
|
|
54
|
+
updated_at: Optional[datetime] = Field(
|
|
55
|
+
default_factory=lambda: datetime.now(timezone.utc)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
@field_validator("knowledge_service_id")
|
|
59
|
+
@classmethod
|
|
60
|
+
def knowledge_service_id_must_not_be_empty(cls, v: str) -> str:
|
|
61
|
+
if not v or not v.strip():
|
|
62
|
+
raise ValueError("Knowledge service ID cannot be empty")
|
|
63
|
+
return v.strip()
|
|
64
|
+
|
|
65
|
+
@field_validator("name")
|
|
66
|
+
@classmethod
|
|
67
|
+
def name_must_not_be_empty(cls, v: str) -> str:
|
|
68
|
+
if not v or not v.strip():
|
|
69
|
+
raise ValueError("Knowledge service name cannot be empty")
|
|
70
|
+
return v.strip()
|
|
71
|
+
|
|
72
|
+
@field_validator("description")
|
|
73
|
+
@classmethod
|
|
74
|
+
def description_must_not_be_empty(cls, v: str) -> str:
|
|
75
|
+
if not v or not v.strip():
|
|
76
|
+
raise ValueError("Knowledge service description cannot be empty")
|
|
77
|
+
return v.strip()
|
|
78
|
+
|
|
79
|
+
@field_validator("service_api")
|
|
80
|
+
@classmethod
|
|
81
|
+
def service_api_must_be_valid(cls, v: ServiceApi) -> ServiceApi:
|
|
82
|
+
if v not in ServiceApi:
|
|
83
|
+
raise ValueError(
|
|
84
|
+
f"Invalid service API: {v}. Must be one of {list(ServiceApi)}"
|
|
85
|
+
)
|
|
86
|
+
return v
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .policy import (
|
|
2
|
+
Policy,
|
|
3
|
+
PolicyStatus,
|
|
4
|
+
)
|
|
5
|
+
from .document_policy_validation import (
|
|
6
|
+
DocumentPolicyValidation,
|
|
7
|
+
DocumentPolicyValidationStatus,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"Policy",
|
|
12
|
+
"PolicyStatus",
|
|
13
|
+
"DocumentPolicyValidation",
|
|
14
|
+
"DocumentPolicyValidationStatus",
|
|
15
|
+
]
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DocumentPolicyValidation domain models for the Capture, Extract, Assemble,
|
|
3
|
+
Publish workflow.
|
|
4
|
+
|
|
5
|
+
This module contains the DocumentPolicyValidation domain object that
|
|
6
|
+
represents
|
|
7
|
+
the result of validating a document against a policy configuration in the CEAP
|
|
8
|
+
workflow system.
|
|
9
|
+
|
|
10
|
+
A DocumentPolicyValidation captures the complete validation process including:
|
|
11
|
+
- The document being validated and the policy used
|
|
12
|
+
- Actual validation scores achieved against policy criteria
|
|
13
|
+
- Optional transformation results and post-transformation scores
|
|
14
|
+
- Status tracking throughout the validation lifecycle
|
|
15
|
+
|
|
16
|
+
All domain models use Pydantic BaseModel for validation, serialization,
|
|
17
|
+
and type safety, following the patterns established in the sample project.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, Field, field_validator
|
|
21
|
+
from typing import Optional, List, Tuple
|
|
22
|
+
from datetime import datetime, timezone
|
|
23
|
+
from enum import Enum
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DocumentPolicyValidationStatus(str, Enum):
|
|
27
|
+
"""Status of a document policy validation process."""
|
|
28
|
+
|
|
29
|
+
PENDING = "pending"
|
|
30
|
+
IN_PROGRESS = "in_progress"
|
|
31
|
+
VALIDATION_COMPLETE = "validation_complete"
|
|
32
|
+
TRANSFORMATION_REQUIRED = "transformation_required"
|
|
33
|
+
TRANSFORMATION_IN_PROGRESS = "transformation_in_progress"
|
|
34
|
+
TRANSFORMATION_COMPLETE = "transformation_complete"
|
|
35
|
+
PASSED = "passed"
|
|
36
|
+
FAILED = "failed"
|
|
37
|
+
ERROR = "error"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DocumentPolicyValidation(BaseModel):
|
|
41
|
+
"""Represents the validation of a document against a policy configuration.
|
|
42
|
+
|
|
43
|
+
A DocumentPolicyValidation tracks the complete lifecycle of validating
|
|
44
|
+
a document against policy criteria. It includes:
|
|
45
|
+
|
|
46
|
+
1. Initial validation: Document is scored against policy validation
|
|
47
|
+
queries
|
|
48
|
+
2. Optional transformation: If policy includes transformation queries and
|
|
49
|
+
initial validation fails, transformations are applied
|
|
50
|
+
3. Re-validation: Transformed document is re-scored against policy
|
|
51
|
+
criteria
|
|
52
|
+
4. Final determination: Pass/fail based on final validation scores
|
|
53
|
+
|
|
54
|
+
The validation process supports both validation-only policies and policies
|
|
55
|
+
that include transformations for document quality improvement.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# Core validation identification
|
|
59
|
+
validation_id: str = Field(
|
|
60
|
+
description="Unique identifier for this validation instance"
|
|
61
|
+
)
|
|
62
|
+
input_document_id: str = Field(
|
|
63
|
+
description="ID of the document being validated against the policy"
|
|
64
|
+
)
|
|
65
|
+
policy_id: str = Field(
|
|
66
|
+
description="ID of the policy configuration used for validation"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Validation process status
|
|
70
|
+
status: DocumentPolicyValidationStatus = DocumentPolicyValidationStatus.PENDING
|
|
71
|
+
|
|
72
|
+
# Initial validation results
|
|
73
|
+
validation_scores: List[Tuple[str, int]] = Field(
|
|
74
|
+
default_factory=list,
|
|
75
|
+
description="List of (knowledge_service_query_id, actual_score) "
|
|
76
|
+
"tuples representing the scores achieved during initial validation. "
|
|
77
|
+
"Scores are between 0 and 100",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Transformation results (if applicable)
|
|
81
|
+
transformed_document_id: Optional[str] = Field(
|
|
82
|
+
default=None,
|
|
83
|
+
description="ID of the document after transformations have been "
|
|
84
|
+
"applied. Only present if the policy includes transformation queries "
|
|
85
|
+
"and they were executed",
|
|
86
|
+
)
|
|
87
|
+
post_transform_validation_scores: Optional[List[Tuple[str, int]]] = Field(
|
|
88
|
+
default=None,
|
|
89
|
+
description="List of (knowledge_service_query_id, actual_score) "
|
|
90
|
+
"tuples representing scores achieved after transformation. "
|
|
91
|
+
"Only present if transformations were applied and re-validation "
|
|
92
|
+
"occurred",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Validation metadata
|
|
96
|
+
started_at: Optional[datetime] = Field(
|
|
97
|
+
default_factory=lambda: datetime.now(timezone.utc),
|
|
98
|
+
description="When the validation process was initiated",
|
|
99
|
+
)
|
|
100
|
+
completed_at: Optional[datetime] = Field(
|
|
101
|
+
default=None, description="When the validation process completed"
|
|
102
|
+
)
|
|
103
|
+
error_message: Optional[str] = Field(
|
|
104
|
+
default=None, description="Error message if validation process failed"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Results summary
|
|
108
|
+
passed: Optional[bool] = Field(
|
|
109
|
+
default=None,
|
|
110
|
+
description="Whether the document passed policy validation. "
|
|
111
|
+
"None while validation is in progress, True/False when complete",
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
@field_validator("input_document_id")
|
|
115
|
+
@classmethod
|
|
116
|
+
def input_document_id_must_not_be_empty(cls, v: str) -> str:
|
|
117
|
+
if not v or not v.strip():
|
|
118
|
+
raise ValueError("Input document ID cannot be empty")
|
|
119
|
+
return v.strip()
|
|
120
|
+
|
|
121
|
+
@field_validator("policy_id")
|
|
122
|
+
@classmethod
|
|
123
|
+
def policy_id_must_not_be_empty(cls, v: str) -> str:
|
|
124
|
+
if not v or not v.strip():
|
|
125
|
+
raise ValueError("Policy ID cannot be empty")
|
|
126
|
+
return v.strip()
|
|
127
|
+
|
|
128
|
+
@field_validator("validation_scores")
|
|
129
|
+
@classmethod
|
|
130
|
+
def validation_scores_must_be_valid(
|
|
131
|
+
cls, v: List[Tuple[str, int]]
|
|
132
|
+
) -> List[Tuple[str, int]]:
|
|
133
|
+
if not isinstance(v, list):
|
|
134
|
+
raise ValueError("Validation scores must be a list")
|
|
135
|
+
|
|
136
|
+
# Empty list is valid for pending validations
|
|
137
|
+
if not v:
|
|
138
|
+
return v
|
|
139
|
+
|
|
140
|
+
return cls._validate_score_tuples(v, "validation_scores")
|
|
141
|
+
|
|
142
|
+
@field_validator("post_transform_validation_scores")
|
|
143
|
+
@classmethod
|
|
144
|
+
def post_transform_scores_must_be_valid(
|
|
145
|
+
cls, v: Optional[List[Tuple[str, int]]]
|
|
146
|
+
) -> Optional[List[Tuple[str, int]]]:
|
|
147
|
+
if v is None:
|
|
148
|
+
return v
|
|
149
|
+
|
|
150
|
+
if not isinstance(v, list):
|
|
151
|
+
raise ValueError("Post-transform validation scores must be a list or None")
|
|
152
|
+
|
|
153
|
+
# Empty list is valid
|
|
154
|
+
if not v:
|
|
155
|
+
return v
|
|
156
|
+
|
|
157
|
+
return cls._validate_score_tuples(v, "post_transform_validation_scores")
|
|
158
|
+
|
|
159
|
+
@field_validator("error_message")
|
|
160
|
+
@classmethod
|
|
161
|
+
def error_message_must_be_valid(cls, v: Optional[str]) -> Optional[str]:
|
|
162
|
+
if v is None:
|
|
163
|
+
return v
|
|
164
|
+
if not isinstance(v, str):
|
|
165
|
+
raise ValueError("Error message must be a string or None")
|
|
166
|
+
return v.strip() if v.strip() else None
|
|
167
|
+
|
|
168
|
+
@field_validator("transformed_document_id")
|
|
169
|
+
@classmethod
|
|
170
|
+
def transformed_document_id_must_be_valid(cls, v: Optional[str]) -> Optional[str]:
|
|
171
|
+
if v is None:
|
|
172
|
+
return v
|
|
173
|
+
if not isinstance(v, str) or not v.strip():
|
|
174
|
+
raise ValueError(
|
|
175
|
+
"Transformed document ID must be a non-empty string or None"
|
|
176
|
+
)
|
|
177
|
+
return v.strip()
|
|
178
|
+
|
|
179
|
+
@classmethod
|
|
180
|
+
def _validate_score_tuples(
|
|
181
|
+
cls, scores: List[Tuple[str, int]], field_name: str
|
|
182
|
+
) -> List[Tuple[str, int]]:
|
|
183
|
+
"""Helper method to validate score tuple lists."""
|
|
184
|
+
validated_scores = []
|
|
185
|
+
query_ids_seen = set()
|
|
186
|
+
|
|
187
|
+
for item in scores:
|
|
188
|
+
if not isinstance(item, tuple) or len(item) != 2:
|
|
189
|
+
raise ValueError(
|
|
190
|
+
f"Each item in {field_name} must be a 2-tuple of "
|
|
191
|
+
f"(query_id, actual_score)"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
query_id, actual_score = item
|
|
195
|
+
|
|
196
|
+
# Validate query ID
|
|
197
|
+
if not isinstance(query_id, str) or not query_id.strip():
|
|
198
|
+
raise ValueError(f"Query ID in {field_name} must be a non-empty string")
|
|
199
|
+
query_id = query_id.strip()
|
|
200
|
+
|
|
201
|
+
# Check for duplicate query IDs within this field
|
|
202
|
+
if query_id in query_ids_seen:
|
|
203
|
+
raise ValueError(f"Duplicate query ID '{query_id}' in {field_name}")
|
|
204
|
+
query_ids_seen.add(query_id)
|
|
205
|
+
|
|
206
|
+
# Validate actual score
|
|
207
|
+
if not isinstance(actual_score, int):
|
|
208
|
+
raise ValueError(
|
|
209
|
+
f"Actual score in {field_name} must be an integer "
|
|
210
|
+
f"between 0 and 100"
|
|
211
|
+
)
|
|
212
|
+
if actual_score < 0 or actual_score > 100:
|
|
213
|
+
raise ValueError(
|
|
214
|
+
f"Actual score {actual_score} in {field_name} must be "
|
|
215
|
+
f"between 0 and 100"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
validated_scores.append((query_id, actual_score))
|
|
219
|
+
|
|
220
|
+
return validated_scores
|