julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Comprehensive tests for Assembly domain model.
|
|
3
|
+
|
|
4
|
+
This test module documents the design decisions made for the Assembly domain
|
|
5
|
+
model using table-based tests. It covers:
|
|
6
|
+
|
|
7
|
+
- Assembly instantiation with various field combinations
|
|
8
|
+
- JSON serialization behavior
|
|
9
|
+
- Field validation for required fields
|
|
10
|
+
- Assembly status transitions
|
|
11
|
+
- Assembly document output management
|
|
12
|
+
|
|
13
|
+
Design decisions documented:
|
|
14
|
+
- Assemblies must have all required fields (assembly_id,
|
|
15
|
+
assembly_specification_id, input_document_id)
|
|
16
|
+
- All ID fields must be non-empty and non-whitespace
|
|
17
|
+
- Status defaults to PENDING
|
|
18
|
+
- assembled_document_id is optional and defaults to None
|
|
19
|
+
- Timestamps are automatically set with timezone-aware defaults
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import pytest
|
|
23
|
+
import json
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
|
|
26
|
+
from julee.domain.models.assembly import Assembly, AssemblyStatus
|
|
27
|
+
from .factories import AssemblyFactory
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TestAssemblyInstantiation:
|
|
31
|
+
"""Test Assembly creation with various field combinations."""
|
|
32
|
+
|
|
33
|
+
@pytest.mark.parametrize(
|
|
34
|
+
"assembly_id,assembly_specification_id,input_document_id,expected_success",
|
|
35
|
+
[
|
|
36
|
+
# Valid cases
|
|
37
|
+
("asm-1", "spec-1", "doc-1", True),
|
|
38
|
+
("assembly-uuid-456", "spec-uuid-789", "input-doc-123", True),
|
|
39
|
+
("asm_abc", "spec_def", "doc_ghi", True),
|
|
40
|
+
# Invalid cases - empty required fields
|
|
41
|
+
("", "spec-1", "doc-1", False), # Empty assembly_id
|
|
42
|
+
("asm-1", "", "doc-1", False), # Empty assembly_specification_id
|
|
43
|
+
("asm-1", "spec-1", "", False), # Empty input_document_id
|
|
44
|
+
# Invalid cases - whitespace only
|
|
45
|
+
(" ", "spec-1", "doc-1", False), # Whitespace assembly_id
|
|
46
|
+
(
|
|
47
|
+
"asm-1",
|
|
48
|
+
" ",
|
|
49
|
+
"doc-1",
|
|
50
|
+
False,
|
|
51
|
+
), # Whitespace assembly_specification_id
|
|
52
|
+
("asm-1", "spec-1", " ", False), # Whitespace input_document_id
|
|
53
|
+
],
|
|
54
|
+
)
|
|
55
|
+
def test_assembly_creation_validation(
|
|
56
|
+
self,
|
|
57
|
+
assembly_id: str,
|
|
58
|
+
assembly_specification_id: str,
|
|
59
|
+
input_document_id: str,
|
|
60
|
+
expected_success: bool,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Test assembly creation with various field validation scenarios."""
|
|
63
|
+
if expected_success:
|
|
64
|
+
# Should create successfully
|
|
65
|
+
assembly = Assembly(
|
|
66
|
+
assembly_id=assembly_id,
|
|
67
|
+
assembly_specification_id=assembly_specification_id,
|
|
68
|
+
input_document_id=input_document_id,
|
|
69
|
+
workflow_id="test-workflow-123",
|
|
70
|
+
)
|
|
71
|
+
assert assembly.assembly_id == assembly_id.strip()
|
|
72
|
+
assert (
|
|
73
|
+
assembly.assembly_specification_id == assembly_specification_id.strip()
|
|
74
|
+
)
|
|
75
|
+
assert assembly.input_document_id == input_document_id.strip()
|
|
76
|
+
assert assembly.status == AssemblyStatus.PENDING # Default
|
|
77
|
+
assert assembly.assembled_document_id is None # Default None
|
|
78
|
+
assert assembly.created_at is not None
|
|
79
|
+
assert assembly.updated_at is not None
|
|
80
|
+
else:
|
|
81
|
+
# Should raise validation error
|
|
82
|
+
with pytest.raises(Exception): # Could be ValueError or ValidationError
|
|
83
|
+
Assembly(
|
|
84
|
+
assembly_id=assembly_id,
|
|
85
|
+
assembly_specification_id=assembly_specification_id,
|
|
86
|
+
input_document_id=input_document_id,
|
|
87
|
+
workflow_id="test-workflow-123",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TestAssemblySerialization:
|
|
92
|
+
"""Test Assembly JSON serialization behavior."""
|
|
93
|
+
|
|
94
|
+
def test_assembly_json_serialization(self) -> None:
|
|
95
|
+
"""Test that Assembly serializes to JSON correctly."""
|
|
96
|
+
assembly = AssemblyFactory.build(
|
|
97
|
+
assembly_id="test-assembly-123",
|
|
98
|
+
assembly_specification_id="spec-456",
|
|
99
|
+
input_document_id="input-789",
|
|
100
|
+
status=AssemblyStatus.IN_PROGRESS,
|
|
101
|
+
assembled_document_id="output-doc-456",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
json_str = assembly.model_dump_json()
|
|
105
|
+
json_data = json.loads(json_str)
|
|
106
|
+
|
|
107
|
+
# All fields should be present in JSON
|
|
108
|
+
assert json_data["assembly_id"] == assembly.assembly_id
|
|
109
|
+
assert (
|
|
110
|
+
json_data["assembly_specification_id"] == assembly.assembly_specification_id
|
|
111
|
+
)
|
|
112
|
+
assert json_data["input_document_id"] == assembly.input_document_id
|
|
113
|
+
assert json_data["workflow_id"] == assembly.workflow_id
|
|
114
|
+
assert json_data["status"] == assembly.status.value
|
|
115
|
+
assert "created_at" in json_data
|
|
116
|
+
assert "updated_at" in json_data
|
|
117
|
+
assert json_data["assembled_document_id"] == assembly.assembled_document_id
|
|
118
|
+
|
|
119
|
+
def test_assembly_json_roundtrip(self) -> None:
|
|
120
|
+
"""Test that Assembly can be serialized to JSON and deserialized
|
|
121
|
+
back."""
|
|
122
|
+
original_assembly = AssemblyFactory.build(
|
|
123
|
+
assembled_document_id="test-output-doc"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Serialize to JSON
|
|
127
|
+
json_str = original_assembly.model_dump_json()
|
|
128
|
+
json_data = json.loads(json_str)
|
|
129
|
+
|
|
130
|
+
# Deserialize back to Assembly
|
|
131
|
+
reconstructed_assembly = Assembly(**json_data)
|
|
132
|
+
|
|
133
|
+
# Should be equivalent
|
|
134
|
+
assert reconstructed_assembly.assembly_id == original_assembly.assembly_id
|
|
135
|
+
assert (
|
|
136
|
+
reconstructed_assembly.assembly_specification_id
|
|
137
|
+
== original_assembly.assembly_specification_id
|
|
138
|
+
)
|
|
139
|
+
assert (
|
|
140
|
+
reconstructed_assembly.input_document_id
|
|
141
|
+
== original_assembly.input_document_id
|
|
142
|
+
)
|
|
143
|
+
assert reconstructed_assembly.workflow_id == original_assembly.workflow_id
|
|
144
|
+
assert reconstructed_assembly.status == original_assembly.status
|
|
145
|
+
assert (
|
|
146
|
+
reconstructed_assembly.assembled_document_id
|
|
147
|
+
== original_assembly.assembled_document_id
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class TestAssemblyDefaults:
|
|
152
|
+
"""Test Assembly default values and behavior."""
|
|
153
|
+
|
|
154
|
+
def test_assembly_default_values(self) -> None:
|
|
155
|
+
"""Test that Assembly has correct default values."""
|
|
156
|
+
minimal_assembly = Assembly(
|
|
157
|
+
assembly_id="test-id",
|
|
158
|
+
assembly_specification_id="spec-id",
|
|
159
|
+
input_document_id="doc-id",
|
|
160
|
+
workflow_id="test-workflow-123",
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
assert minimal_assembly.status == AssemblyStatus.PENDING
|
|
164
|
+
assert minimal_assembly.assembled_document_id is None
|
|
165
|
+
assert minimal_assembly.created_at is not None
|
|
166
|
+
assert minimal_assembly.updated_at is not None
|
|
167
|
+
assert isinstance(minimal_assembly.created_at, datetime)
|
|
168
|
+
assert isinstance(minimal_assembly.updated_at, datetime)
|
|
169
|
+
# Should be timezone-aware
|
|
170
|
+
assert minimal_assembly.created_at.tzinfo is not None
|
|
171
|
+
assert minimal_assembly.updated_at.tzinfo is not None
|
|
172
|
+
|
|
173
|
+
def test_assembly_custom_values(self) -> None:
|
|
174
|
+
"""Test Assembly with custom non-default values."""
|
|
175
|
+
custom_created_at = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
|
|
176
|
+
custom_updated_at = datetime(2023, 1, 2, 12, 0, 0, tzinfo=timezone.utc)
|
|
177
|
+
|
|
178
|
+
custom_assembly = Assembly(
|
|
179
|
+
assembly_id="custom-id",
|
|
180
|
+
assembly_specification_id="custom-spec",
|
|
181
|
+
input_document_id="custom-doc",
|
|
182
|
+
workflow_id="custom-workflow-456",
|
|
183
|
+
status=AssemblyStatus.COMPLETED,
|
|
184
|
+
assembled_document_id="custom-output-doc",
|
|
185
|
+
created_at=custom_created_at,
|
|
186
|
+
updated_at=custom_updated_at,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
assert custom_assembly.status == AssemblyStatus.COMPLETED
|
|
190
|
+
assert custom_assembly.assembled_document_id == "custom-output-doc"
|
|
191
|
+
assert custom_assembly.created_at == custom_created_at
|
|
192
|
+
assert custom_assembly.updated_at == custom_updated_at
|
|
193
|
+
|
|
194
|
+
@pytest.mark.parametrize(
|
|
195
|
+
"status",
|
|
196
|
+
[
|
|
197
|
+
AssemblyStatus.PENDING,
|
|
198
|
+
AssemblyStatus.IN_PROGRESS,
|
|
199
|
+
AssemblyStatus.COMPLETED,
|
|
200
|
+
AssemblyStatus.FAILED,
|
|
201
|
+
AssemblyStatus.CANCELLED,
|
|
202
|
+
],
|
|
203
|
+
)
|
|
204
|
+
def test_assembly_status_values(self, status: AssemblyStatus) -> None:
|
|
205
|
+
"""Test Assembly with different status values."""
|
|
206
|
+
assembly = AssemblyFactory.build(status=status)
|
|
207
|
+
assert assembly.status == status
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class TestAssemblyFieldValidation:
|
|
211
|
+
"""Test Assembly field-specific validation."""
|
|
212
|
+
|
|
213
|
+
def test_assembly_id_validation(self) -> None:
|
|
214
|
+
"""Test assembly_id field validation."""
|
|
215
|
+
# Valid cases
|
|
216
|
+
valid_assembly = Assembly(
|
|
217
|
+
assembly_id="valid-id",
|
|
218
|
+
assembly_specification_id="spec-id",
|
|
219
|
+
input_document_id="doc-id",
|
|
220
|
+
workflow_id="test-workflow-123",
|
|
221
|
+
)
|
|
222
|
+
assert valid_assembly.assembly_id == "valid-id"
|
|
223
|
+
|
|
224
|
+
# Invalid cases
|
|
225
|
+
with pytest.raises(Exception):
|
|
226
|
+
Assembly(
|
|
227
|
+
assembly_id="",
|
|
228
|
+
assembly_specification_id="spec-id",
|
|
229
|
+
input_document_id="doc-id",
|
|
230
|
+
workflow_id="test-workflow-123",
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
with pytest.raises(Exception):
|
|
234
|
+
Assembly(
|
|
235
|
+
assembly_id=" ",
|
|
236
|
+
assembly_specification_id="spec-id",
|
|
237
|
+
input_document_id="doc-id",
|
|
238
|
+
workflow_id="test-workflow-123",
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def test_assembly_specification_id_validation(self) -> None:
|
|
242
|
+
"""Test assembly_specification_id field validation."""
|
|
243
|
+
# Valid cases
|
|
244
|
+
valid_assembly = Assembly(
|
|
245
|
+
assembly_id="asm-id",
|
|
246
|
+
assembly_specification_id="valid-spec-id",
|
|
247
|
+
input_document_id="doc-id",
|
|
248
|
+
workflow_id="test-workflow-123",
|
|
249
|
+
)
|
|
250
|
+
assert valid_assembly.assembly_specification_id == "valid-spec-id"
|
|
251
|
+
|
|
252
|
+
# Invalid cases
|
|
253
|
+
with pytest.raises(Exception):
|
|
254
|
+
Assembly(
|
|
255
|
+
assembly_id="asm-id",
|
|
256
|
+
assembly_specification_id="",
|
|
257
|
+
input_document_id="doc-id",
|
|
258
|
+
workflow_id="test-workflow-123",
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
with pytest.raises(Exception):
|
|
262
|
+
Assembly(
|
|
263
|
+
assembly_id="asm-id",
|
|
264
|
+
assembly_specification_id=" ",
|
|
265
|
+
input_document_id="doc-id",
|
|
266
|
+
workflow_id="test-workflow-123",
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def test_input_document_id_validation(self) -> None:
|
|
270
|
+
"""Test input_document_id field validation."""
|
|
271
|
+
# Valid cases
|
|
272
|
+
valid_assembly = Assembly(
|
|
273
|
+
assembly_id="asm-id",
|
|
274
|
+
assembly_specification_id="spec-id",
|
|
275
|
+
input_document_id="valid-doc-id",
|
|
276
|
+
workflow_id="test-workflow-123",
|
|
277
|
+
)
|
|
278
|
+
assert valid_assembly.input_document_id == "valid-doc-id"
|
|
279
|
+
|
|
280
|
+
# Invalid cases
|
|
281
|
+
with pytest.raises(Exception):
|
|
282
|
+
Assembly(
|
|
283
|
+
assembly_id="asm-id",
|
|
284
|
+
assembly_specification_id="spec-id",
|
|
285
|
+
input_document_id="",
|
|
286
|
+
workflow_id="test-workflow-123",
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
with pytest.raises(Exception):
|
|
290
|
+
Assembly(
|
|
291
|
+
assembly_id="asm-id",
|
|
292
|
+
assembly_specification_id="spec-id",
|
|
293
|
+
input_document_id=" ",
|
|
294
|
+
workflow_id="test-workflow-123",
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
def test_field_trimming(self) -> None:
|
|
298
|
+
"""Test that string fields are properly trimmed."""
|
|
299
|
+
assembly = Assembly(
|
|
300
|
+
assembly_id=" trim-asm ",
|
|
301
|
+
assembly_specification_id=" trim-spec ",
|
|
302
|
+
input_document_id=" trim-doc ",
|
|
303
|
+
workflow_id=" trim-workflow ",
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
assert assembly.assembly_id == "trim-asm"
|
|
307
|
+
assert assembly.assembly_specification_id == "trim-spec"
|
|
308
|
+
assert assembly.input_document_id == "trim-doc"
|
|
309
|
+
assert assembly.workflow_id == "trim-workflow"
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class TestAssemblyDocumentManagement:
|
|
313
|
+
"""Test Assembly assembled document management."""
|
|
314
|
+
|
|
315
|
+
def test_default_assembled_document_id(self) -> None:
|
|
316
|
+
"""Test Assembly with default assembled_document_id (None)."""
|
|
317
|
+
assembly = AssemblyFactory.build(assembled_document_id=None)
|
|
318
|
+
assert assembly.assembled_document_id is None
|
|
319
|
+
|
|
320
|
+
def test_valid_assembled_document_id(self) -> None:
|
|
321
|
+
"""Test Assembly with valid assembled document ID."""
|
|
322
|
+
assembly = AssemblyFactory.build(assembled_document_id="output-doc-123")
|
|
323
|
+
assert assembly.assembled_document_id == "output-doc-123"
|
|
324
|
+
|
|
325
|
+
def test_assembled_document_id_validation(self) -> None:
|
|
326
|
+
"""Test assembled_document_id field validation."""
|
|
327
|
+
# Valid cases
|
|
328
|
+
valid_assembly = Assembly(
|
|
329
|
+
assembly_id="asm-id",
|
|
330
|
+
assembly_specification_id="spec-id",
|
|
331
|
+
input_document_id="doc-id",
|
|
332
|
+
workflow_id="test-workflow-123",
|
|
333
|
+
assembled_document_id="valid-output-doc",
|
|
334
|
+
)
|
|
335
|
+
assert valid_assembly.assembled_document_id == "valid-output-doc"
|
|
336
|
+
|
|
337
|
+
# None is valid
|
|
338
|
+
none_assembly = Assembly(
|
|
339
|
+
assembly_id="asm-id",
|
|
340
|
+
assembly_specification_id="spec-id",
|
|
341
|
+
input_document_id="doc-id",
|
|
342
|
+
workflow_id="test-workflow-123",
|
|
343
|
+
assembled_document_id=None,
|
|
344
|
+
)
|
|
345
|
+
assert none_assembly.assembled_document_id is None
|
|
346
|
+
|
|
347
|
+
# Invalid cases - empty string
|
|
348
|
+
with pytest.raises(Exception):
|
|
349
|
+
Assembly(
|
|
350
|
+
assembly_id="asm-id",
|
|
351
|
+
assembly_specification_id="spec-id",
|
|
352
|
+
input_document_id="doc-id",
|
|
353
|
+
workflow_id="test-workflow-123",
|
|
354
|
+
assembled_document_id="",
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Invalid cases - whitespace only
|
|
358
|
+
with pytest.raises(Exception):
|
|
359
|
+
Assembly(
|
|
360
|
+
assembly_id="asm-id",
|
|
361
|
+
assembly_specification_id="spec-id",
|
|
362
|
+
input_document_id="doc-id",
|
|
363
|
+
workflow_id="test-workflow-123",
|
|
364
|
+
assembled_document_id=" ",
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
def test_assembled_document_id_trimming(self) -> None:
|
|
368
|
+
"""Test that assembled_document_id is properly trimmed."""
|
|
369
|
+
assembly = Assembly(
|
|
370
|
+
assembly_id="asm-id",
|
|
371
|
+
assembly_specification_id="spec-id",
|
|
372
|
+
input_document_id="doc-id",
|
|
373
|
+
workflow_id="test-workflow-123",
|
|
374
|
+
assembled_document_id=" trim-output-doc ",
|
|
375
|
+
)
|
|
376
|
+
assert assembly.assembled_document_id == "trim-output-doc"
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
class TestAssemblyWorkflowIdValidation:
|
|
380
|
+
"""Test Assembly workflow_id field validation."""
|
|
381
|
+
|
|
382
|
+
def test_workflow_id_validation(self) -> None:
|
|
383
|
+
"""Test workflow_id field validation."""
|
|
384
|
+
# Valid cases
|
|
385
|
+
valid_assembly = Assembly(
|
|
386
|
+
assembly_id="asm-id",
|
|
387
|
+
assembly_specification_id="spec-id",
|
|
388
|
+
input_document_id="doc-id",
|
|
389
|
+
workflow_id="valid-workflow-id",
|
|
390
|
+
)
|
|
391
|
+
assert valid_assembly.workflow_id == "valid-workflow-id"
|
|
392
|
+
|
|
393
|
+
# Invalid cases - empty string
|
|
394
|
+
with pytest.raises(Exception):
|
|
395
|
+
Assembly(
|
|
396
|
+
assembly_id="asm-id",
|
|
397
|
+
assembly_specification_id="spec-id",
|
|
398
|
+
input_document_id="doc-id",
|
|
399
|
+
workflow_id="",
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Invalid cases - whitespace only
|
|
403
|
+
with pytest.raises(Exception):
|
|
404
|
+
Assembly(
|
|
405
|
+
assembly_id="asm-id",
|
|
406
|
+
assembly_specification_id="spec-id",
|
|
407
|
+
input_document_id="doc-id",
|
|
408
|
+
workflow_id=" ",
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
def test_workflow_id_trimming(self) -> None:
|
|
412
|
+
"""Test that workflow_id is properly trimmed."""
|
|
413
|
+
assembly = Assembly(
|
|
414
|
+
assembly_id="asm-id",
|
|
415
|
+
assembly_specification_id="spec-id",
|
|
416
|
+
input_document_id="doc-id",
|
|
417
|
+
workflow_id=" trim-workflow-id ",
|
|
418
|
+
)
|
|
419
|
+
assert assembly.workflow_id == "trim-workflow-id"
|
|
420
|
+
|
|
421
|
+
def test_workflow_id_required(self) -> None:
|
|
422
|
+
"""Test that workflow_id is required."""
|
|
423
|
+
# workflow_id is required and cannot be omitted
|
|
424
|
+
with pytest.raises(Exception):
|
|
425
|
+
Assembly( # type: ignore[call-arg]
|
|
426
|
+
assembly_id="asm-id",
|
|
427
|
+
assembly_specification_id="spec-id",
|
|
428
|
+
input_document_id="doc-id",
|
|
429
|
+
# workflow_id is missing - should fail
|
|
430
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Assembly domain package for the Capture, Extract, Assemble, Publish workflow.
|
|
3
|
+
|
|
4
|
+
This package contains the AssemblySpecification and KnowledgeServiceQuery
|
|
5
|
+
domain objects that work together to define assembly configurations in the
|
|
6
|
+
CEAP workflow.
|
|
7
|
+
|
|
8
|
+
AssemblySpecification defines document output types (like "meeting minutes")
|
|
9
|
+
with their JSON schemas and applicability rules. KnowledgeServiceQuery defines
|
|
10
|
+
specific extraction operations that can be performed against knowledge
|
|
11
|
+
services to populate the AssemblySpecification's schema.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .assembly_specification import (
|
|
15
|
+
AssemblySpecification,
|
|
16
|
+
AssemblySpecificationStatus,
|
|
17
|
+
)
|
|
18
|
+
from .knowledge_service_query import KnowledgeServiceQuery
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"AssemblySpecification",
|
|
22
|
+
"AssemblySpecificationStatus",
|
|
23
|
+
"KnowledgeServiceQuery",
|
|
24
|
+
]
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AssemblySpecification domain models for the Capture, Extract, Assemble,
|
|
3
|
+
Publish workflow.
|
|
4
|
+
|
|
5
|
+
This module contains the AssemblySpecification domain object that represents
|
|
6
|
+
assembly configurations in the CEAP workflow system.
|
|
7
|
+
|
|
8
|
+
An AssemblySpecification defines a type of document output (like "meeting
|
|
9
|
+
minutes"), includes information about its applicability and and specifies
|
|
10
|
+
which extractors are needed to collect the data for that output.
|
|
11
|
+
|
|
12
|
+
All domain models use Pydantic BaseModel for validation, serialization,
|
|
13
|
+
and type safety, following the patterns established in the sample project.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field, field_validator
|
|
17
|
+
from typing import Optional, Dict, Any
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from enum import Enum
|
|
20
|
+
import jsonschema
|
|
21
|
+
import jsonpointer # type: ignore
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AssemblySpecificationStatus(str, Enum):
|
|
25
|
+
"""Status of an assembly specification configuration."""
|
|
26
|
+
|
|
27
|
+
ACTIVE = "active"
|
|
28
|
+
INACTIVE = "inactive"
|
|
29
|
+
DRAFT = "draft"
|
|
30
|
+
DEPRECATED = "deprecated"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class AssemblySpecification(BaseModel):
|
|
34
|
+
"""Assembly specification configuration that defines how to assemble
|
|
35
|
+
documents of a specific type.
|
|
36
|
+
|
|
37
|
+
An AssemblySpecification represents a type of document output (like
|
|
38
|
+
"meeting minutes", "project report", etc.) and defines which extractors
|
|
39
|
+
should be used to collect the necessary data from source documents.
|
|
40
|
+
|
|
41
|
+
The AssemblySpecification does not contain the template itself - templates
|
|
42
|
+
will be handled separately during the assembly rendering (or publishing?)
|
|
43
|
+
phase. This separation allows the same AssemblySpecification definition to
|
|
44
|
+
be used with different templates over time.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Core assembly identification
|
|
48
|
+
assembly_specification_id: str = Field(
|
|
49
|
+
description="Unique identifier for this assembly specification"
|
|
50
|
+
)
|
|
51
|
+
name: str = Field(description="Human-readable name like 'meeting minutes'")
|
|
52
|
+
applicability: str = Field(
|
|
53
|
+
description="Text description identifying to what type of "
|
|
54
|
+
"information this assembly applies, such as an online transcript "
|
|
55
|
+
"of a video meeting. This information may be used by knowledge "
|
|
56
|
+
"service for document-assembly matching"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
jsonschema: Dict[str, Any] = Field(
|
|
60
|
+
description="JSON Schema defining the structure of data to be "
|
|
61
|
+
"extracted for this assembly"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# AssemblySpecification configuration
|
|
65
|
+
status: AssemblySpecificationStatus = AssemblySpecificationStatus.ACTIVE
|
|
66
|
+
knowledge_service_queries: Dict[str, str] = Field(
|
|
67
|
+
default_factory=dict,
|
|
68
|
+
description="Mapping from JSON Pointer paths to "
|
|
69
|
+
"KnowledgeServiceQuery IDs. Keys are JSON Pointer strings "
|
|
70
|
+
"(e.g., '/properties/attendees', '') and values are query IDs "
|
|
71
|
+
"for extracting data for that schema section",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# AssemblySpecification metadata
|
|
75
|
+
version: str = Field(default="0.1.0", description="Assembly definition version")
|
|
76
|
+
created_at: Optional[datetime] = Field(
|
|
77
|
+
default_factory=lambda: datetime.now(timezone.utc)
|
|
78
|
+
)
|
|
79
|
+
updated_at: Optional[datetime] = Field(
|
|
80
|
+
default_factory=lambda: datetime.now(timezone.utc)
|
|
81
|
+
)
|
|
82
|
+
# May later add a detailed description, change log, additional metadata
|
|
83
|
+
# Timestamps
|
|
84
|
+
|
|
85
|
+
@field_validator("assembly_specification_id")
|
|
86
|
+
@classmethod
|
|
87
|
+
def assembly_specification_id_must_not_be_empty(cls, v: str) -> str:
|
|
88
|
+
if not v or not v.strip():
|
|
89
|
+
raise ValueError("AssemblySpecification ID cannot be empty")
|
|
90
|
+
return v.strip()
|
|
91
|
+
|
|
92
|
+
@field_validator("name")
|
|
93
|
+
@classmethod
|
|
94
|
+
def name_must_not_be_empty(cls, v: str) -> str:
|
|
95
|
+
if not v or not v.strip():
|
|
96
|
+
raise ValueError("AssemblySpecification name cannot be empty")
|
|
97
|
+
return v.strip()
|
|
98
|
+
|
|
99
|
+
@field_validator("applicability")
|
|
100
|
+
@classmethod
|
|
101
|
+
def applicability_must_not_be_empty(cls, v: str) -> str:
|
|
102
|
+
if not v or not v.strip():
|
|
103
|
+
raise ValueError("AssemblySpecification applicability cannot be empty")
|
|
104
|
+
return v.strip()
|
|
105
|
+
|
|
106
|
+
@field_validator("jsonschema")
|
|
107
|
+
@classmethod
|
|
108
|
+
def jsonschema_must_be_valid(cls, v: Dict[str, Any]) -> Dict[str, Any]:
|
|
109
|
+
if not isinstance(v, dict):
|
|
110
|
+
raise ValueError("JSON Schema must be a dictionary")
|
|
111
|
+
|
|
112
|
+
# Basic validation that it looks like a JSON schema
|
|
113
|
+
if "type" not in v:
|
|
114
|
+
raise ValueError("JSON Schema must have a 'type' field")
|
|
115
|
+
|
|
116
|
+
# Validate that it's a proper JSON Schema using jsonschema library
|
|
117
|
+
try:
|
|
118
|
+
jsonschema.Draft7Validator.check_schema(v)
|
|
119
|
+
except jsonschema.SchemaError as e:
|
|
120
|
+
raise ValueError(f"Invalid JSON Schema: {e.message}")
|
|
121
|
+
|
|
122
|
+
return v
|
|
123
|
+
|
|
124
|
+
@field_validator("knowledge_service_queries")
|
|
125
|
+
@classmethod
|
|
126
|
+
def knowledge_service_queries_must_be_valid(
|
|
127
|
+
cls, v: Dict[str, str], info: Any
|
|
128
|
+
) -> Dict[str, str]:
|
|
129
|
+
if not isinstance(v, dict):
|
|
130
|
+
raise ValueError("Knowledge service queries must be a dictionary")
|
|
131
|
+
|
|
132
|
+
# Get the jsonschema field value to validate pointers against it
|
|
133
|
+
jsonschema_value = info.data.get("jsonschema")
|
|
134
|
+
if not jsonschema_value:
|
|
135
|
+
raise ValueError("Cannot validate schema pointers without jsonschema field")
|
|
136
|
+
|
|
137
|
+
cleaned_queries = {}
|
|
138
|
+
for schema_pointer, query_id in v.items():
|
|
139
|
+
# Validate schema pointer keys are strings
|
|
140
|
+
if not isinstance(schema_pointer, str):
|
|
141
|
+
raise ValueError("Schema pointer keys must be strings")
|
|
142
|
+
|
|
143
|
+
# Validate JSON Pointer format and that it exists in the schema
|
|
144
|
+
try:
|
|
145
|
+
if schema_pointer == "":
|
|
146
|
+
# Empty string is valid - refers to root of schema
|
|
147
|
+
pass
|
|
148
|
+
else:
|
|
149
|
+
# Use jsonpointer to validate format and existence
|
|
150
|
+
ptr = jsonpointer.JsonPointer(schema_pointer)
|
|
151
|
+
ptr.resolve(jsonschema_value)
|
|
152
|
+
except jsonpointer.JsonPointerException as e:
|
|
153
|
+
raise ValueError(f"Invalid JSON Pointer '{schema_pointer}': {e}")
|
|
154
|
+
except (KeyError, IndexError, TypeError):
|
|
155
|
+
raise ValueError(
|
|
156
|
+
f"JSON Pointer '{schema_pointer}' does not exist in " f"schema"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Validate query ID values
|
|
160
|
+
if not isinstance(query_id, str) or not query_id.strip():
|
|
161
|
+
raise ValueError("Query ID values must be non-empty strings")
|
|
162
|
+
|
|
163
|
+
cleaned_queries[schema_pointer] = query_id.strip()
|
|
164
|
+
|
|
165
|
+
return cleaned_queries
|
|
166
|
+
|
|
167
|
+
@field_validator("version")
|
|
168
|
+
@classmethod
|
|
169
|
+
def version_must_not_be_empty(cls, v: str) -> str:
|
|
170
|
+
if not v or not v.strip():
|
|
171
|
+
raise ValueError("AssemblySpecification version cannot be empty")
|
|
172
|
+
return v.strip()
|