julee 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. julee/__init__.py +3 -0
  2. julee/api/__init__.py +20 -0
  3. julee/api/app.py +180 -0
  4. julee/api/dependencies.py +257 -0
  5. julee/api/requests.py +175 -0
  6. julee/api/responses.py +43 -0
  7. julee/api/routers/__init__.py +43 -0
  8. julee/api/routers/assembly_specifications.py +212 -0
  9. julee/api/routers/documents.py +182 -0
  10. julee/api/routers/knowledge_service_configs.py +79 -0
  11. julee/api/routers/knowledge_service_queries.py +293 -0
  12. julee/api/routers/system.py +137 -0
  13. julee/api/routers/workflows.py +234 -0
  14. julee/api/services/__init__.py +20 -0
  15. julee/api/services/system_initialization.py +214 -0
  16. julee/api/tests/__init__.py +14 -0
  17. julee/api/tests/routers/__init__.py +17 -0
  18. julee/api/tests/routers/test_assembly_specifications.py +749 -0
  19. julee/api/tests/routers/test_documents.py +301 -0
  20. julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
  21. julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
  22. julee/api/tests/routers/test_system.py +179 -0
  23. julee/api/tests/routers/test_workflows.py +393 -0
  24. julee/api/tests/test_app.py +285 -0
  25. julee/api/tests/test_dependencies.py +245 -0
  26. julee/api/tests/test_requests.py +250 -0
  27. julee/domain/__init__.py +22 -0
  28. julee/domain/models/__init__.py +49 -0
  29. julee/domain/models/assembly/__init__.py +17 -0
  30. julee/domain/models/assembly/assembly.py +103 -0
  31. julee/domain/models/assembly/tests/__init__.py +0 -0
  32. julee/domain/models/assembly/tests/factories.py +37 -0
  33. julee/domain/models/assembly/tests/test_assembly.py +430 -0
  34. julee/domain/models/assembly_specification/__init__.py +24 -0
  35. julee/domain/models/assembly_specification/assembly_specification.py +172 -0
  36. julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
  37. julee/domain/models/assembly_specification/tests/__init__.py +0 -0
  38. julee/domain/models/assembly_specification/tests/factories.py +78 -0
  39. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
  40. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
  41. julee/domain/models/custom_fields/__init__.py +0 -0
  42. julee/domain/models/custom_fields/content_stream.py +68 -0
  43. julee/domain/models/custom_fields/tests/__init__.py +0 -0
  44. julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
  45. julee/domain/models/document/__init__.py +17 -0
  46. julee/domain/models/document/document.py +150 -0
  47. julee/domain/models/document/tests/__init__.py +0 -0
  48. julee/domain/models/document/tests/factories.py +76 -0
  49. julee/domain/models/document/tests/test_document.py +297 -0
  50. julee/domain/models/knowledge_service_config/__init__.py +17 -0
  51. julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
  52. julee/domain/models/policy/__init__.py +15 -0
  53. julee/domain/models/policy/document_policy_validation.py +220 -0
  54. julee/domain/models/policy/policy.py +203 -0
  55. julee/domain/models/policy/tests/__init__.py +0 -0
  56. julee/domain/models/policy/tests/factories.py +47 -0
  57. julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
  58. julee/domain/models/policy/tests/test_policy.py +546 -0
  59. julee/domain/repositories/__init__.py +27 -0
  60. julee/domain/repositories/assembly.py +45 -0
  61. julee/domain/repositories/assembly_specification.py +52 -0
  62. julee/domain/repositories/base.py +146 -0
  63. julee/domain/repositories/document.py +49 -0
  64. julee/domain/repositories/document_policy_validation.py +52 -0
  65. julee/domain/repositories/knowledge_service_config.py +54 -0
  66. julee/domain/repositories/knowledge_service_query.py +44 -0
  67. julee/domain/repositories/policy.py +49 -0
  68. julee/domain/use_cases/__init__.py +17 -0
  69. julee/domain/use_cases/decorators.py +107 -0
  70. julee/domain/use_cases/extract_assemble_data.py +649 -0
  71. julee/domain/use_cases/initialize_system_data.py +842 -0
  72. julee/domain/use_cases/tests/__init__.py +7 -0
  73. julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
  74. julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
  75. julee/domain/use_cases/tests/test_validate_document.py +1228 -0
  76. julee/domain/use_cases/validate_document.py +736 -0
  77. julee/fixtures/assembly_specifications.yaml +70 -0
  78. julee/fixtures/documents.yaml +178 -0
  79. julee/fixtures/knowledge_service_configs.yaml +37 -0
  80. julee/fixtures/knowledge_service_queries.yaml +27 -0
  81. julee/repositories/__init__.py +17 -0
  82. julee/repositories/memory/__init__.py +31 -0
  83. julee/repositories/memory/assembly.py +84 -0
  84. julee/repositories/memory/assembly_specification.py +125 -0
  85. julee/repositories/memory/base.py +227 -0
  86. julee/repositories/memory/document.py +149 -0
  87. julee/repositories/memory/document_policy_validation.py +104 -0
  88. julee/repositories/memory/knowledge_service_config.py +123 -0
  89. julee/repositories/memory/knowledge_service_query.py +120 -0
  90. julee/repositories/memory/policy.py +87 -0
  91. julee/repositories/memory/tests/__init__.py +0 -0
  92. julee/repositories/memory/tests/test_document.py +212 -0
  93. julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
  94. julee/repositories/memory/tests/test_policy.py +443 -0
  95. julee/repositories/minio/__init__.py +31 -0
  96. julee/repositories/minio/assembly.py +103 -0
  97. julee/repositories/minio/assembly_specification.py +170 -0
  98. julee/repositories/minio/client.py +570 -0
  99. julee/repositories/minio/document.py +530 -0
  100. julee/repositories/minio/document_policy_validation.py +120 -0
  101. julee/repositories/minio/knowledge_service_config.py +187 -0
  102. julee/repositories/minio/knowledge_service_query.py +211 -0
  103. julee/repositories/minio/policy.py +106 -0
  104. julee/repositories/minio/tests/__init__.py +0 -0
  105. julee/repositories/minio/tests/fake_client.py +213 -0
  106. julee/repositories/minio/tests/test_assembly.py +374 -0
  107. julee/repositories/minio/tests/test_assembly_specification.py +391 -0
  108. julee/repositories/minio/tests/test_client_protocol.py +57 -0
  109. julee/repositories/minio/tests/test_document.py +591 -0
  110. julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
  111. julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
  112. julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
  113. julee/repositories/minio/tests/test_policy.py +559 -0
  114. julee/repositories/temporal/__init__.py +38 -0
  115. julee/repositories/temporal/activities.py +114 -0
  116. julee/repositories/temporal/activity_names.py +34 -0
  117. julee/repositories/temporal/proxies.py +159 -0
  118. julee/services/__init__.py +18 -0
  119. julee/services/knowledge_service/__init__.py +48 -0
  120. julee/services/knowledge_service/anthropic/__init__.py +12 -0
  121. julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
  122. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
  123. julee/services/knowledge_service/factory.py +138 -0
  124. julee/services/knowledge_service/knowledge_service.py +160 -0
  125. julee/services/knowledge_service/memory/__init__.py +13 -0
  126. julee/services/knowledge_service/memory/knowledge_service.py +278 -0
  127. julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
  128. julee/services/knowledge_service/test_factory.py +112 -0
  129. julee/services/temporal/__init__.py +38 -0
  130. julee/services/temporal/activities.py +86 -0
  131. julee/services/temporal/activity_names.py +22 -0
  132. julee/services/temporal/proxies.py +41 -0
  133. julee/util/__init__.py +0 -0
  134. julee/util/domain.py +119 -0
  135. julee/util/repos/__init__.py +0 -0
  136. julee/util/repos/minio/__init__.py +0 -0
  137. julee/util/repos/minio/file_storage.py +213 -0
  138. julee/util/repos/temporal/__init__.py +11 -0
  139. julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
  140. julee/util/repos/temporal/data_converter.py +123 -0
  141. julee/util/repos/temporal/minio_file_storage.py +12 -0
  142. julee/util/repos/temporal/proxies/__init__.py +0 -0
  143. julee/util/repos/temporal/proxies/file_storage.py +58 -0
  144. julee/util/repositories.py +55 -0
  145. julee/util/temporal/__init__.py +22 -0
  146. julee/util/temporal/activities.py +123 -0
  147. julee/util/temporal/decorators.py +473 -0
  148. julee/util/tests/__init__.py +1 -0
  149. julee/util/tests/test_decorators.py +770 -0
  150. julee/util/validation/__init__.py +29 -0
  151. julee/util/validation/repository.py +100 -0
  152. julee/util/validation/type_guards.py +369 -0
  153. julee/worker.py +211 -0
  154. julee/workflows/__init__.py +26 -0
  155. julee/workflows/extract_assemble.py +215 -0
  156. julee/workflows/validate_document.py +228 -0
  157. julee-0.1.0.dist-info/METADATA +195 -0
  158. julee-0.1.0.dist-info/RECORD +161 -0
  159. julee-0.1.0.dist-info/WHEEL +5 -0
  160. julee-0.1.0.dist-info/licenses/LICENSE +674 -0
  161. julee-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,430 @@
1
+ """
2
+ Comprehensive tests for Assembly domain model.
3
+
4
+ This test module documents the design decisions made for the Assembly domain
5
+ model using table-based tests. It covers:
6
+
7
+ - Assembly instantiation with various field combinations
8
+ - JSON serialization behavior
9
+ - Field validation for required fields
10
+ - Assembly status transitions
11
+ - Assembly document output management
12
+
13
+ Design decisions documented:
14
+ - Assemblies must have all required fields (assembly_id,
15
+ assembly_specification_id, input_document_id)
16
+ - All ID fields must be non-empty and non-whitespace
17
+ - Status defaults to PENDING
18
+ - assembled_document_id is optional and defaults to None
19
+ - Timestamps are automatically set with timezone-aware defaults
20
+ """
21
+
22
+ import pytest
23
+ import json
24
+ from datetime import datetime, timezone
25
+
26
+ from julee.domain.models.assembly import Assembly, AssemblyStatus
27
+ from .factories import AssemblyFactory
28
+
29
+
30
+ class TestAssemblyInstantiation:
31
+ """Test Assembly creation with various field combinations."""
32
+
33
+ @pytest.mark.parametrize(
34
+ "assembly_id,assembly_specification_id,input_document_id,expected_success",
35
+ [
36
+ # Valid cases
37
+ ("asm-1", "spec-1", "doc-1", True),
38
+ ("assembly-uuid-456", "spec-uuid-789", "input-doc-123", True),
39
+ ("asm_abc", "spec_def", "doc_ghi", True),
40
+ # Invalid cases - empty required fields
41
+ ("", "spec-1", "doc-1", False), # Empty assembly_id
42
+ ("asm-1", "", "doc-1", False), # Empty assembly_specification_id
43
+ ("asm-1", "spec-1", "", False), # Empty input_document_id
44
+ # Invalid cases - whitespace only
45
+ (" ", "spec-1", "doc-1", False), # Whitespace assembly_id
46
+ (
47
+ "asm-1",
48
+ " ",
49
+ "doc-1",
50
+ False,
51
+ ), # Whitespace assembly_specification_id
52
+ ("asm-1", "spec-1", " ", False), # Whitespace input_document_id
53
+ ],
54
+ )
55
+ def test_assembly_creation_validation(
56
+ self,
57
+ assembly_id: str,
58
+ assembly_specification_id: str,
59
+ input_document_id: str,
60
+ expected_success: bool,
61
+ ) -> None:
62
+ """Test assembly creation with various field validation scenarios."""
63
+ if expected_success:
64
+ # Should create successfully
65
+ assembly = Assembly(
66
+ assembly_id=assembly_id,
67
+ assembly_specification_id=assembly_specification_id,
68
+ input_document_id=input_document_id,
69
+ workflow_id="test-workflow-123",
70
+ )
71
+ assert assembly.assembly_id == assembly_id.strip()
72
+ assert (
73
+ assembly.assembly_specification_id == assembly_specification_id.strip()
74
+ )
75
+ assert assembly.input_document_id == input_document_id.strip()
76
+ assert assembly.status == AssemblyStatus.PENDING # Default
77
+ assert assembly.assembled_document_id is None # Default None
78
+ assert assembly.created_at is not None
79
+ assert assembly.updated_at is not None
80
+ else:
81
+ # Should raise validation error
82
+ with pytest.raises(Exception): # Could be ValueError or ValidationError
83
+ Assembly(
84
+ assembly_id=assembly_id,
85
+ assembly_specification_id=assembly_specification_id,
86
+ input_document_id=input_document_id,
87
+ workflow_id="test-workflow-123",
88
+ )
89
+
90
+
91
+ class TestAssemblySerialization:
92
+ """Test Assembly JSON serialization behavior."""
93
+
94
+ def test_assembly_json_serialization(self) -> None:
95
+ """Test that Assembly serializes to JSON correctly."""
96
+ assembly = AssemblyFactory.build(
97
+ assembly_id="test-assembly-123",
98
+ assembly_specification_id="spec-456",
99
+ input_document_id="input-789",
100
+ status=AssemblyStatus.IN_PROGRESS,
101
+ assembled_document_id="output-doc-456",
102
+ )
103
+
104
+ json_str = assembly.model_dump_json()
105
+ json_data = json.loads(json_str)
106
+
107
+ # All fields should be present in JSON
108
+ assert json_data["assembly_id"] == assembly.assembly_id
109
+ assert (
110
+ json_data["assembly_specification_id"] == assembly.assembly_specification_id
111
+ )
112
+ assert json_data["input_document_id"] == assembly.input_document_id
113
+ assert json_data["workflow_id"] == assembly.workflow_id
114
+ assert json_data["status"] == assembly.status.value
115
+ assert "created_at" in json_data
116
+ assert "updated_at" in json_data
117
+ assert json_data["assembled_document_id"] == assembly.assembled_document_id
118
+
119
+ def test_assembly_json_roundtrip(self) -> None:
120
+ """Test that Assembly can be serialized to JSON and deserialized
121
+ back."""
122
+ original_assembly = AssemblyFactory.build(
123
+ assembled_document_id="test-output-doc"
124
+ )
125
+
126
+ # Serialize to JSON
127
+ json_str = original_assembly.model_dump_json()
128
+ json_data = json.loads(json_str)
129
+
130
+ # Deserialize back to Assembly
131
+ reconstructed_assembly = Assembly(**json_data)
132
+
133
+ # Should be equivalent
134
+ assert reconstructed_assembly.assembly_id == original_assembly.assembly_id
135
+ assert (
136
+ reconstructed_assembly.assembly_specification_id
137
+ == original_assembly.assembly_specification_id
138
+ )
139
+ assert (
140
+ reconstructed_assembly.input_document_id
141
+ == original_assembly.input_document_id
142
+ )
143
+ assert reconstructed_assembly.workflow_id == original_assembly.workflow_id
144
+ assert reconstructed_assembly.status == original_assembly.status
145
+ assert (
146
+ reconstructed_assembly.assembled_document_id
147
+ == original_assembly.assembled_document_id
148
+ )
149
+
150
+
151
+ class TestAssemblyDefaults:
152
+ """Test Assembly default values and behavior."""
153
+
154
+ def test_assembly_default_values(self) -> None:
155
+ """Test that Assembly has correct default values."""
156
+ minimal_assembly = Assembly(
157
+ assembly_id="test-id",
158
+ assembly_specification_id="spec-id",
159
+ input_document_id="doc-id",
160
+ workflow_id="test-workflow-123",
161
+ )
162
+
163
+ assert minimal_assembly.status == AssemblyStatus.PENDING
164
+ assert minimal_assembly.assembled_document_id is None
165
+ assert minimal_assembly.created_at is not None
166
+ assert minimal_assembly.updated_at is not None
167
+ assert isinstance(minimal_assembly.created_at, datetime)
168
+ assert isinstance(minimal_assembly.updated_at, datetime)
169
+ # Should be timezone-aware
170
+ assert minimal_assembly.created_at.tzinfo is not None
171
+ assert minimal_assembly.updated_at.tzinfo is not None
172
+
173
+ def test_assembly_custom_values(self) -> None:
174
+ """Test Assembly with custom non-default values."""
175
+ custom_created_at = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc)
176
+ custom_updated_at = datetime(2023, 1, 2, 12, 0, 0, tzinfo=timezone.utc)
177
+
178
+ custom_assembly = Assembly(
179
+ assembly_id="custom-id",
180
+ assembly_specification_id="custom-spec",
181
+ input_document_id="custom-doc",
182
+ workflow_id="custom-workflow-456",
183
+ status=AssemblyStatus.COMPLETED,
184
+ assembled_document_id="custom-output-doc",
185
+ created_at=custom_created_at,
186
+ updated_at=custom_updated_at,
187
+ )
188
+
189
+ assert custom_assembly.status == AssemblyStatus.COMPLETED
190
+ assert custom_assembly.assembled_document_id == "custom-output-doc"
191
+ assert custom_assembly.created_at == custom_created_at
192
+ assert custom_assembly.updated_at == custom_updated_at
193
+
194
+ @pytest.mark.parametrize(
195
+ "status",
196
+ [
197
+ AssemblyStatus.PENDING,
198
+ AssemblyStatus.IN_PROGRESS,
199
+ AssemblyStatus.COMPLETED,
200
+ AssemblyStatus.FAILED,
201
+ AssemblyStatus.CANCELLED,
202
+ ],
203
+ )
204
+ def test_assembly_status_values(self, status: AssemblyStatus) -> None:
205
+ """Test Assembly with different status values."""
206
+ assembly = AssemblyFactory.build(status=status)
207
+ assert assembly.status == status
208
+
209
+
210
+ class TestAssemblyFieldValidation:
211
+ """Test Assembly field-specific validation."""
212
+
213
+ def test_assembly_id_validation(self) -> None:
214
+ """Test assembly_id field validation."""
215
+ # Valid cases
216
+ valid_assembly = Assembly(
217
+ assembly_id="valid-id",
218
+ assembly_specification_id="spec-id",
219
+ input_document_id="doc-id",
220
+ workflow_id="test-workflow-123",
221
+ )
222
+ assert valid_assembly.assembly_id == "valid-id"
223
+
224
+ # Invalid cases
225
+ with pytest.raises(Exception):
226
+ Assembly(
227
+ assembly_id="",
228
+ assembly_specification_id="spec-id",
229
+ input_document_id="doc-id",
230
+ workflow_id="test-workflow-123",
231
+ )
232
+
233
+ with pytest.raises(Exception):
234
+ Assembly(
235
+ assembly_id=" ",
236
+ assembly_specification_id="spec-id",
237
+ input_document_id="doc-id",
238
+ workflow_id="test-workflow-123",
239
+ )
240
+
241
+ def test_assembly_specification_id_validation(self) -> None:
242
+ """Test assembly_specification_id field validation."""
243
+ # Valid cases
244
+ valid_assembly = Assembly(
245
+ assembly_id="asm-id",
246
+ assembly_specification_id="valid-spec-id",
247
+ input_document_id="doc-id",
248
+ workflow_id="test-workflow-123",
249
+ )
250
+ assert valid_assembly.assembly_specification_id == "valid-spec-id"
251
+
252
+ # Invalid cases
253
+ with pytest.raises(Exception):
254
+ Assembly(
255
+ assembly_id="asm-id",
256
+ assembly_specification_id="",
257
+ input_document_id="doc-id",
258
+ workflow_id="test-workflow-123",
259
+ )
260
+
261
+ with pytest.raises(Exception):
262
+ Assembly(
263
+ assembly_id="asm-id",
264
+ assembly_specification_id=" ",
265
+ input_document_id="doc-id",
266
+ workflow_id="test-workflow-123",
267
+ )
268
+
269
+ def test_input_document_id_validation(self) -> None:
270
+ """Test input_document_id field validation."""
271
+ # Valid cases
272
+ valid_assembly = Assembly(
273
+ assembly_id="asm-id",
274
+ assembly_specification_id="spec-id",
275
+ input_document_id="valid-doc-id",
276
+ workflow_id="test-workflow-123",
277
+ )
278
+ assert valid_assembly.input_document_id == "valid-doc-id"
279
+
280
+ # Invalid cases
281
+ with pytest.raises(Exception):
282
+ Assembly(
283
+ assembly_id="asm-id",
284
+ assembly_specification_id="spec-id",
285
+ input_document_id="",
286
+ workflow_id="test-workflow-123",
287
+ )
288
+
289
+ with pytest.raises(Exception):
290
+ Assembly(
291
+ assembly_id="asm-id",
292
+ assembly_specification_id="spec-id",
293
+ input_document_id=" ",
294
+ workflow_id="test-workflow-123",
295
+ )
296
+
297
+ def test_field_trimming(self) -> None:
298
+ """Test that string fields are properly trimmed."""
299
+ assembly = Assembly(
300
+ assembly_id=" trim-asm ",
301
+ assembly_specification_id=" trim-spec ",
302
+ input_document_id=" trim-doc ",
303
+ workflow_id=" trim-workflow ",
304
+ )
305
+
306
+ assert assembly.assembly_id == "trim-asm"
307
+ assert assembly.assembly_specification_id == "trim-spec"
308
+ assert assembly.input_document_id == "trim-doc"
309
+ assert assembly.workflow_id == "trim-workflow"
310
+
311
+
312
+ class TestAssemblyDocumentManagement:
313
+ """Test Assembly assembled document management."""
314
+
315
+ def test_default_assembled_document_id(self) -> None:
316
+ """Test Assembly with default assembled_document_id (None)."""
317
+ assembly = AssemblyFactory.build(assembled_document_id=None)
318
+ assert assembly.assembled_document_id is None
319
+
320
+ def test_valid_assembled_document_id(self) -> None:
321
+ """Test Assembly with valid assembled document ID."""
322
+ assembly = AssemblyFactory.build(assembled_document_id="output-doc-123")
323
+ assert assembly.assembled_document_id == "output-doc-123"
324
+
325
+ def test_assembled_document_id_validation(self) -> None:
326
+ """Test assembled_document_id field validation."""
327
+ # Valid cases
328
+ valid_assembly = Assembly(
329
+ assembly_id="asm-id",
330
+ assembly_specification_id="spec-id",
331
+ input_document_id="doc-id",
332
+ workflow_id="test-workflow-123",
333
+ assembled_document_id="valid-output-doc",
334
+ )
335
+ assert valid_assembly.assembled_document_id == "valid-output-doc"
336
+
337
+ # None is valid
338
+ none_assembly = Assembly(
339
+ assembly_id="asm-id",
340
+ assembly_specification_id="spec-id",
341
+ input_document_id="doc-id",
342
+ workflow_id="test-workflow-123",
343
+ assembled_document_id=None,
344
+ )
345
+ assert none_assembly.assembled_document_id is None
346
+
347
+ # Invalid cases - empty string
348
+ with pytest.raises(Exception):
349
+ Assembly(
350
+ assembly_id="asm-id",
351
+ assembly_specification_id="spec-id",
352
+ input_document_id="doc-id",
353
+ workflow_id="test-workflow-123",
354
+ assembled_document_id="",
355
+ )
356
+
357
+ # Invalid cases - whitespace only
358
+ with pytest.raises(Exception):
359
+ Assembly(
360
+ assembly_id="asm-id",
361
+ assembly_specification_id="spec-id",
362
+ input_document_id="doc-id",
363
+ workflow_id="test-workflow-123",
364
+ assembled_document_id=" ",
365
+ )
366
+
367
+ def test_assembled_document_id_trimming(self) -> None:
368
+ """Test that assembled_document_id is properly trimmed."""
369
+ assembly = Assembly(
370
+ assembly_id="asm-id",
371
+ assembly_specification_id="spec-id",
372
+ input_document_id="doc-id",
373
+ workflow_id="test-workflow-123",
374
+ assembled_document_id=" trim-output-doc ",
375
+ )
376
+ assert assembly.assembled_document_id == "trim-output-doc"
377
+
378
+
379
+ class TestAssemblyWorkflowIdValidation:
380
+ """Test Assembly workflow_id field validation."""
381
+
382
+ def test_workflow_id_validation(self) -> None:
383
+ """Test workflow_id field validation."""
384
+ # Valid cases
385
+ valid_assembly = Assembly(
386
+ assembly_id="asm-id",
387
+ assembly_specification_id="spec-id",
388
+ input_document_id="doc-id",
389
+ workflow_id="valid-workflow-id",
390
+ )
391
+ assert valid_assembly.workflow_id == "valid-workflow-id"
392
+
393
+ # Invalid cases - empty string
394
+ with pytest.raises(Exception):
395
+ Assembly(
396
+ assembly_id="asm-id",
397
+ assembly_specification_id="spec-id",
398
+ input_document_id="doc-id",
399
+ workflow_id="",
400
+ )
401
+
402
+ # Invalid cases - whitespace only
403
+ with pytest.raises(Exception):
404
+ Assembly(
405
+ assembly_id="asm-id",
406
+ assembly_specification_id="spec-id",
407
+ input_document_id="doc-id",
408
+ workflow_id=" ",
409
+ )
410
+
411
+ def test_workflow_id_trimming(self) -> None:
412
+ """Test that workflow_id is properly trimmed."""
413
+ assembly = Assembly(
414
+ assembly_id="asm-id",
415
+ assembly_specification_id="spec-id",
416
+ input_document_id="doc-id",
417
+ workflow_id=" trim-workflow-id ",
418
+ )
419
+ assert assembly.workflow_id == "trim-workflow-id"
420
+
421
+ def test_workflow_id_required(self) -> None:
422
+ """Test that workflow_id is required."""
423
+ # workflow_id is required and cannot be omitted
424
+ with pytest.raises(Exception):
425
+ Assembly( # type: ignore[call-arg]
426
+ assembly_id="asm-id",
427
+ assembly_specification_id="spec-id",
428
+ input_document_id="doc-id",
429
+ # workflow_id is missing - should fail
430
+ )
@@ -0,0 +1,24 @@
1
+ """
2
+ Assembly domain package for the Capture, Extract, Assemble, Publish workflow.
3
+
4
+ This package contains the AssemblySpecification and KnowledgeServiceQuery
5
+ domain objects that work together to define assembly configurations in the
6
+ CEAP workflow.
7
+
8
+ AssemblySpecification defines document output types (like "meeting minutes")
9
+ with their JSON schemas and applicability rules. KnowledgeServiceQuery defines
10
+ specific extraction operations that can be performed against knowledge
11
+ services to populate the AssemblySpecification's schema.
12
+ """
13
+
14
+ from .assembly_specification import (
15
+ AssemblySpecification,
16
+ AssemblySpecificationStatus,
17
+ )
18
+ from .knowledge_service_query import KnowledgeServiceQuery
19
+
20
+ __all__ = [
21
+ "AssemblySpecification",
22
+ "AssemblySpecificationStatus",
23
+ "KnowledgeServiceQuery",
24
+ ]
@@ -0,0 +1,172 @@
1
+ """
2
+ AssemblySpecification domain models for the Capture, Extract, Assemble,
3
+ Publish workflow.
4
+
5
+ This module contains the AssemblySpecification domain object that represents
6
+ assembly configurations in the CEAP workflow system.
7
+
8
+ An AssemblySpecification defines a type of document output (like "meeting
9
+ minutes"), includes information about its applicability and and specifies
10
+ which extractors are needed to collect the data for that output.
11
+
12
+ All domain models use Pydantic BaseModel for validation, serialization,
13
+ and type safety, following the patterns established in the sample project.
14
+ """
15
+
16
+ from pydantic import BaseModel, Field, field_validator
17
+ from typing import Optional, Dict, Any
18
+ from datetime import datetime, timezone
19
+ from enum import Enum
20
+ import jsonschema
21
+ import jsonpointer # type: ignore
22
+
23
+
24
+ class AssemblySpecificationStatus(str, Enum):
25
+ """Status of an assembly specification configuration."""
26
+
27
+ ACTIVE = "active"
28
+ INACTIVE = "inactive"
29
+ DRAFT = "draft"
30
+ DEPRECATED = "deprecated"
31
+
32
+
33
+ class AssemblySpecification(BaseModel):
34
+ """Assembly specification configuration that defines how to assemble
35
+ documents of a specific type.
36
+
37
+ An AssemblySpecification represents a type of document output (like
38
+ "meeting minutes", "project report", etc.) and defines which extractors
39
+ should be used to collect the necessary data from source documents.
40
+
41
+ The AssemblySpecification does not contain the template itself - templates
42
+ will be handled separately during the assembly rendering (or publishing?)
43
+ phase. This separation allows the same AssemblySpecification definition to
44
+ be used with different templates over time.
45
+ """
46
+
47
+ # Core assembly identification
48
+ assembly_specification_id: str = Field(
49
+ description="Unique identifier for this assembly specification"
50
+ )
51
+ name: str = Field(description="Human-readable name like 'meeting minutes'")
52
+ applicability: str = Field(
53
+ description="Text description identifying to what type of "
54
+ "information this assembly applies, such as an online transcript "
55
+ "of a video meeting. This information may be used by knowledge "
56
+ "service for document-assembly matching"
57
+ )
58
+
59
+ jsonschema: Dict[str, Any] = Field(
60
+ description="JSON Schema defining the structure of data to be "
61
+ "extracted for this assembly"
62
+ )
63
+
64
+ # AssemblySpecification configuration
65
+ status: AssemblySpecificationStatus = AssemblySpecificationStatus.ACTIVE
66
+ knowledge_service_queries: Dict[str, str] = Field(
67
+ default_factory=dict,
68
+ description="Mapping from JSON Pointer paths to "
69
+ "KnowledgeServiceQuery IDs. Keys are JSON Pointer strings "
70
+ "(e.g., '/properties/attendees', '') and values are query IDs "
71
+ "for extracting data for that schema section",
72
+ )
73
+
74
+ # AssemblySpecification metadata
75
+ version: str = Field(default="0.1.0", description="Assembly definition version")
76
+ created_at: Optional[datetime] = Field(
77
+ default_factory=lambda: datetime.now(timezone.utc)
78
+ )
79
+ updated_at: Optional[datetime] = Field(
80
+ default_factory=lambda: datetime.now(timezone.utc)
81
+ )
82
+ # May later add a detailed description, change log, additional metadata
83
+ # Timestamps
84
+
85
+ @field_validator("assembly_specification_id")
86
+ @classmethod
87
+ def assembly_specification_id_must_not_be_empty(cls, v: str) -> str:
88
+ if not v or not v.strip():
89
+ raise ValueError("AssemblySpecification ID cannot be empty")
90
+ return v.strip()
91
+
92
+ @field_validator("name")
93
+ @classmethod
94
+ def name_must_not_be_empty(cls, v: str) -> str:
95
+ if not v or not v.strip():
96
+ raise ValueError("AssemblySpecification name cannot be empty")
97
+ return v.strip()
98
+
99
+ @field_validator("applicability")
100
+ @classmethod
101
+ def applicability_must_not_be_empty(cls, v: str) -> str:
102
+ if not v or not v.strip():
103
+ raise ValueError("AssemblySpecification applicability cannot be empty")
104
+ return v.strip()
105
+
106
+ @field_validator("jsonschema")
107
+ @classmethod
108
+ def jsonschema_must_be_valid(cls, v: Dict[str, Any]) -> Dict[str, Any]:
109
+ if not isinstance(v, dict):
110
+ raise ValueError("JSON Schema must be a dictionary")
111
+
112
+ # Basic validation that it looks like a JSON schema
113
+ if "type" not in v:
114
+ raise ValueError("JSON Schema must have a 'type' field")
115
+
116
+ # Validate that it's a proper JSON Schema using jsonschema library
117
+ try:
118
+ jsonschema.Draft7Validator.check_schema(v)
119
+ except jsonschema.SchemaError as e:
120
+ raise ValueError(f"Invalid JSON Schema: {e.message}")
121
+
122
+ return v
123
+
124
+ @field_validator("knowledge_service_queries")
125
+ @classmethod
126
+ def knowledge_service_queries_must_be_valid(
127
+ cls, v: Dict[str, str], info: Any
128
+ ) -> Dict[str, str]:
129
+ if not isinstance(v, dict):
130
+ raise ValueError("Knowledge service queries must be a dictionary")
131
+
132
+ # Get the jsonschema field value to validate pointers against it
133
+ jsonschema_value = info.data.get("jsonschema")
134
+ if not jsonschema_value:
135
+ raise ValueError("Cannot validate schema pointers without jsonschema field")
136
+
137
+ cleaned_queries = {}
138
+ for schema_pointer, query_id in v.items():
139
+ # Validate schema pointer keys are strings
140
+ if not isinstance(schema_pointer, str):
141
+ raise ValueError("Schema pointer keys must be strings")
142
+
143
+ # Validate JSON Pointer format and that it exists in the schema
144
+ try:
145
+ if schema_pointer == "":
146
+ # Empty string is valid - refers to root of schema
147
+ pass
148
+ else:
149
+ # Use jsonpointer to validate format and existence
150
+ ptr = jsonpointer.JsonPointer(schema_pointer)
151
+ ptr.resolve(jsonschema_value)
152
+ except jsonpointer.JsonPointerException as e:
153
+ raise ValueError(f"Invalid JSON Pointer '{schema_pointer}': {e}")
154
+ except (KeyError, IndexError, TypeError):
155
+ raise ValueError(
156
+ f"JSON Pointer '{schema_pointer}' does not exist in " f"schema"
157
+ )
158
+
159
+ # Validate query ID values
160
+ if not isinstance(query_id, str) or not query_id.strip():
161
+ raise ValueError("Query ID values must be non-empty strings")
162
+
163
+ cleaned_queries[schema_pointer] = query_id.strip()
164
+
165
+ return cleaned_queries
166
+
167
+ @field_validator("version")
168
+ @classmethod
169
+ def version_must_not_be_empty(cls, v: str) -> str:
170
+ if not v or not v.strip():
171
+ raise ValueError("AssemblySpecification version cannot be empty")
172
+ return v.strip()