julee 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. julee/__init__.py +3 -0
  2. julee/api/__init__.py +20 -0
  3. julee/api/app.py +180 -0
  4. julee/api/dependencies.py +257 -0
  5. julee/api/requests.py +175 -0
  6. julee/api/responses.py +43 -0
  7. julee/api/routers/__init__.py +43 -0
  8. julee/api/routers/assembly_specifications.py +212 -0
  9. julee/api/routers/documents.py +182 -0
  10. julee/api/routers/knowledge_service_configs.py +79 -0
  11. julee/api/routers/knowledge_service_queries.py +293 -0
  12. julee/api/routers/system.py +137 -0
  13. julee/api/routers/workflows.py +234 -0
  14. julee/api/services/__init__.py +20 -0
  15. julee/api/services/system_initialization.py +214 -0
  16. julee/api/tests/__init__.py +14 -0
  17. julee/api/tests/routers/__init__.py +17 -0
  18. julee/api/tests/routers/test_assembly_specifications.py +749 -0
  19. julee/api/tests/routers/test_documents.py +301 -0
  20. julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
  21. julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
  22. julee/api/tests/routers/test_system.py +179 -0
  23. julee/api/tests/routers/test_workflows.py +393 -0
  24. julee/api/tests/test_app.py +285 -0
  25. julee/api/tests/test_dependencies.py +245 -0
  26. julee/api/tests/test_requests.py +250 -0
  27. julee/domain/__init__.py +22 -0
  28. julee/domain/models/__init__.py +49 -0
  29. julee/domain/models/assembly/__init__.py +17 -0
  30. julee/domain/models/assembly/assembly.py +103 -0
  31. julee/domain/models/assembly/tests/__init__.py +0 -0
  32. julee/domain/models/assembly/tests/factories.py +37 -0
  33. julee/domain/models/assembly/tests/test_assembly.py +430 -0
  34. julee/domain/models/assembly_specification/__init__.py +24 -0
  35. julee/domain/models/assembly_specification/assembly_specification.py +172 -0
  36. julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
  37. julee/domain/models/assembly_specification/tests/__init__.py +0 -0
  38. julee/domain/models/assembly_specification/tests/factories.py +78 -0
  39. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
  40. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
  41. julee/domain/models/custom_fields/__init__.py +0 -0
  42. julee/domain/models/custom_fields/content_stream.py +68 -0
  43. julee/domain/models/custom_fields/tests/__init__.py +0 -0
  44. julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
  45. julee/domain/models/document/__init__.py +17 -0
  46. julee/domain/models/document/document.py +150 -0
  47. julee/domain/models/document/tests/__init__.py +0 -0
  48. julee/domain/models/document/tests/factories.py +76 -0
  49. julee/domain/models/document/tests/test_document.py +297 -0
  50. julee/domain/models/knowledge_service_config/__init__.py +17 -0
  51. julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
  52. julee/domain/models/policy/__init__.py +15 -0
  53. julee/domain/models/policy/document_policy_validation.py +220 -0
  54. julee/domain/models/policy/policy.py +203 -0
  55. julee/domain/models/policy/tests/__init__.py +0 -0
  56. julee/domain/models/policy/tests/factories.py +47 -0
  57. julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
  58. julee/domain/models/policy/tests/test_policy.py +546 -0
  59. julee/domain/repositories/__init__.py +27 -0
  60. julee/domain/repositories/assembly.py +45 -0
  61. julee/domain/repositories/assembly_specification.py +52 -0
  62. julee/domain/repositories/base.py +146 -0
  63. julee/domain/repositories/document.py +49 -0
  64. julee/domain/repositories/document_policy_validation.py +52 -0
  65. julee/domain/repositories/knowledge_service_config.py +54 -0
  66. julee/domain/repositories/knowledge_service_query.py +44 -0
  67. julee/domain/repositories/policy.py +49 -0
  68. julee/domain/use_cases/__init__.py +17 -0
  69. julee/domain/use_cases/decorators.py +107 -0
  70. julee/domain/use_cases/extract_assemble_data.py +649 -0
  71. julee/domain/use_cases/initialize_system_data.py +842 -0
  72. julee/domain/use_cases/tests/__init__.py +7 -0
  73. julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
  74. julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
  75. julee/domain/use_cases/tests/test_validate_document.py +1228 -0
  76. julee/domain/use_cases/validate_document.py +736 -0
  77. julee/fixtures/assembly_specifications.yaml +70 -0
  78. julee/fixtures/documents.yaml +178 -0
  79. julee/fixtures/knowledge_service_configs.yaml +37 -0
  80. julee/fixtures/knowledge_service_queries.yaml +27 -0
  81. julee/repositories/__init__.py +17 -0
  82. julee/repositories/memory/__init__.py +31 -0
  83. julee/repositories/memory/assembly.py +84 -0
  84. julee/repositories/memory/assembly_specification.py +125 -0
  85. julee/repositories/memory/base.py +227 -0
  86. julee/repositories/memory/document.py +149 -0
  87. julee/repositories/memory/document_policy_validation.py +104 -0
  88. julee/repositories/memory/knowledge_service_config.py +123 -0
  89. julee/repositories/memory/knowledge_service_query.py +120 -0
  90. julee/repositories/memory/policy.py +87 -0
  91. julee/repositories/memory/tests/__init__.py +0 -0
  92. julee/repositories/memory/tests/test_document.py +212 -0
  93. julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
  94. julee/repositories/memory/tests/test_policy.py +443 -0
  95. julee/repositories/minio/__init__.py +31 -0
  96. julee/repositories/minio/assembly.py +103 -0
  97. julee/repositories/minio/assembly_specification.py +170 -0
  98. julee/repositories/minio/client.py +570 -0
  99. julee/repositories/minio/document.py +530 -0
  100. julee/repositories/minio/document_policy_validation.py +120 -0
  101. julee/repositories/minio/knowledge_service_config.py +187 -0
  102. julee/repositories/minio/knowledge_service_query.py +211 -0
  103. julee/repositories/minio/policy.py +106 -0
  104. julee/repositories/minio/tests/__init__.py +0 -0
  105. julee/repositories/minio/tests/fake_client.py +213 -0
  106. julee/repositories/minio/tests/test_assembly.py +374 -0
  107. julee/repositories/minio/tests/test_assembly_specification.py +391 -0
  108. julee/repositories/minio/tests/test_client_protocol.py +57 -0
  109. julee/repositories/minio/tests/test_document.py +591 -0
  110. julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
  111. julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
  112. julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
  113. julee/repositories/minio/tests/test_policy.py +559 -0
  114. julee/repositories/temporal/__init__.py +38 -0
  115. julee/repositories/temporal/activities.py +114 -0
  116. julee/repositories/temporal/activity_names.py +34 -0
  117. julee/repositories/temporal/proxies.py +159 -0
  118. julee/services/__init__.py +18 -0
  119. julee/services/knowledge_service/__init__.py +48 -0
  120. julee/services/knowledge_service/anthropic/__init__.py +12 -0
  121. julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
  122. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
  123. julee/services/knowledge_service/factory.py +138 -0
  124. julee/services/knowledge_service/knowledge_service.py +160 -0
  125. julee/services/knowledge_service/memory/__init__.py +13 -0
  126. julee/services/knowledge_service/memory/knowledge_service.py +278 -0
  127. julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
  128. julee/services/knowledge_service/test_factory.py +112 -0
  129. julee/services/temporal/__init__.py +38 -0
  130. julee/services/temporal/activities.py +86 -0
  131. julee/services/temporal/activity_names.py +22 -0
  132. julee/services/temporal/proxies.py +41 -0
  133. julee/util/__init__.py +0 -0
  134. julee/util/domain.py +119 -0
  135. julee/util/repos/__init__.py +0 -0
  136. julee/util/repos/minio/__init__.py +0 -0
  137. julee/util/repos/minio/file_storage.py +213 -0
  138. julee/util/repos/temporal/__init__.py +11 -0
  139. julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
  140. julee/util/repos/temporal/data_converter.py +123 -0
  141. julee/util/repos/temporal/minio_file_storage.py +12 -0
  142. julee/util/repos/temporal/proxies/__init__.py +0 -0
  143. julee/util/repos/temporal/proxies/file_storage.py +58 -0
  144. julee/util/repositories.py +55 -0
  145. julee/util/temporal/__init__.py +22 -0
  146. julee/util/temporal/activities.py +123 -0
  147. julee/util/temporal/decorators.py +473 -0
  148. julee/util/tests/__init__.py +1 -0
  149. julee/util/tests/test_decorators.py +770 -0
  150. julee/util/validation/__init__.py +29 -0
  151. julee/util/validation/repository.py +100 -0
  152. julee/util/validation/type_guards.py +369 -0
  153. julee/worker.py +211 -0
  154. julee/workflows/__init__.py +26 -0
  155. julee/workflows/extract_assemble.py +215 -0
  156. julee/workflows/validate_document.py +228 -0
  157. julee-0.1.0.dist-info/METADATA +195 -0
  158. julee-0.1.0.dist-info/RECORD +161 -0
  159. julee-0.1.0.dist-info/WHEEL +5 -0
  160. julee-0.1.0.dist-info/licenses/LICENSE +674 -0
  161. julee-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,7 @@
1
+ """
2
+ Tests for julee use cases.
3
+
4
+ This package contains test modules for all use cases in the julee
5
+ domain, following the Clean Architecture testing patterns established in
6
+ the sample application.
7
+ """
@@ -0,0 +1,548 @@
1
+ """
2
+ Tests for ExtractAssembleDataUseCase.
3
+
4
+ This module provides tests for the extract and assemble data use case,
5
+ ensuring proper business logic execution and repository interaction patterns
6
+ following the Clean Architecture principles.
7
+ """
8
+
9
+ import io
10
+ import json
11
+ import pytest
12
+
13
+ from unittest.mock import AsyncMock
14
+ from datetime import datetime, timezone
15
+
16
+ from julee.domain.use_cases import ExtractAssembleDataUseCase
17
+ from julee.domain.models import (
18
+ Assembly,
19
+ AssemblyStatus,
20
+ Document,
21
+ DocumentStatus,
22
+ ContentStream,
23
+ AssemblySpecification,
24
+ AssemblySpecificationStatus,
25
+ KnowledgeServiceQuery,
26
+ KnowledgeServiceConfig,
27
+ )
28
+ from julee.domain.models.knowledge_service_config import ServiceApi
29
+ from julee.repositories.memory import (
30
+ MemoryDocumentRepository,
31
+ MemoryAssemblyRepository,
32
+ MemoryAssemblySpecificationRepository,
33
+ MemoryKnowledgeServiceConfigRepository,
34
+ MemoryKnowledgeServiceQueryRepository,
35
+ )
36
+ from julee.services.knowledge_service.memory import (
37
+ MemoryKnowledgeService,
38
+ )
39
+ from julee.services.knowledge_service import QueryResult
40
+
41
+
42
+ class TestExtractAssembleDataUseCase:
43
+ """Test cases for ExtractAssembleDataUseCase business logic."""
44
+
45
+ @pytest.fixture
46
+ def document_repo(self) -> MemoryDocumentRepository:
47
+ """Create a memory DocumentRepository for testing."""
48
+ return MemoryDocumentRepository()
49
+
50
+ @pytest.fixture
51
+ def assembly_repo(self) -> MemoryAssemblyRepository:
52
+ """Create a memory AssemblyRepository for testing."""
53
+ return MemoryAssemblyRepository()
54
+
55
+ @pytest.fixture
56
+ def assembly_specification_repo(
57
+ self,
58
+ ) -> MemoryAssemblySpecificationRepository:
59
+ """Create a memory AssemblySpecificationRepository for testing."""
60
+ return MemoryAssemblySpecificationRepository()
61
+
62
+ @pytest.fixture
63
+ def knowledge_service_query_repo(
64
+ self,
65
+ ) -> MemoryKnowledgeServiceQueryRepository:
66
+ """Create a memory KnowledgeServiceQueryRepository for testing."""
67
+ return MemoryKnowledgeServiceQueryRepository()
68
+
69
+ @pytest.fixture
70
+ def knowledge_service_config_repo(
71
+ self,
72
+ ) -> MemoryKnowledgeServiceConfigRepository:
73
+ """Create a memory KnowledgeServiceConfigRepository for testing."""
74
+ return MemoryKnowledgeServiceConfigRepository()
75
+
76
+ @pytest.fixture
77
+ def knowledge_service(self) -> MemoryKnowledgeService:
78
+ """Create a memory KnowledgeService for testing."""
79
+ ks_config = KnowledgeServiceConfig(
80
+ knowledge_service_id="ks-test",
81
+ name="Test Knowledge Service",
82
+ description="Test service",
83
+ service_api=ServiceApi.ANTHROPIC,
84
+ created_at=datetime.now(timezone.utc),
85
+ updated_at=datetime.now(timezone.utc),
86
+ )
87
+ return MemoryKnowledgeService(ks_config)
88
+
89
+ @pytest.fixture
90
+ def configured_knowledge_service(self) -> MemoryKnowledgeService:
91
+ """Create a configured memory KnowledgeService for full workflow
92
+ tests."""
93
+ ks_config = KnowledgeServiceConfig(
94
+ knowledge_service_id="ks-123",
95
+ name="Test Knowledge Service",
96
+ description="Test service",
97
+ service_api=ServiceApi.ANTHROPIC,
98
+ created_at=datetime.now(timezone.utc),
99
+ updated_at=datetime.now(timezone.utc),
100
+ )
101
+
102
+ memory_service = MemoryKnowledgeService(ks_config)
103
+ memory_service.add_canned_query_results(
104
+ [
105
+ QueryResult(
106
+ query_id="result-1",
107
+ query_text="Extract the title from this document",
108
+ result_data={"response": '"Test Meeting"'},
109
+ execution_time_ms=100,
110
+ created_at=datetime.now(timezone.utc),
111
+ ),
112
+ QueryResult(
113
+ query_id="result-2",
114
+ query_text="Extract a summary from this document",
115
+ result_data={
116
+ "response": ('"This was a test meeting about important topics"')
117
+ },
118
+ execution_time_ms=150,
119
+ created_at=datetime.now(timezone.utc),
120
+ ),
121
+ ]
122
+ )
123
+ return memory_service
124
+
125
+ @pytest.fixture
126
+ def use_case(
127
+ self,
128
+ document_repo: MemoryDocumentRepository,
129
+ assembly_repo: MemoryAssemblyRepository,
130
+ assembly_specification_repo: MemoryAssemblySpecificationRepository,
131
+ knowledge_service_query_repo: MemoryKnowledgeServiceQueryRepository,
132
+ knowledge_service_config_repo: MemoryKnowledgeServiceConfigRepository,
133
+ knowledge_service: MemoryKnowledgeService,
134
+ ) -> ExtractAssembleDataUseCase:
135
+ """Create ExtractAssembleDataUseCase with memory repository
136
+ dependencies."""
137
+ return ExtractAssembleDataUseCase(
138
+ document_repo=document_repo,
139
+ assembly_repo=assembly_repo,
140
+ assembly_specification_repo=assembly_specification_repo,
141
+ knowledge_service_query_repo=knowledge_service_query_repo,
142
+ knowledge_service_config_repo=knowledge_service_config_repo,
143
+ knowledge_service=knowledge_service,
144
+ )
145
+
146
+ @pytest.fixture
147
+ def configured_use_case(
148
+ self,
149
+ document_repo: MemoryDocumentRepository,
150
+ assembly_repo: MemoryAssemblyRepository,
151
+ assembly_specification_repo: MemoryAssemblySpecificationRepository,
152
+ knowledge_service_query_repo: MemoryKnowledgeServiceQueryRepository,
153
+ knowledge_service_config_repo: MemoryKnowledgeServiceConfigRepository,
154
+ configured_knowledge_service: MemoryKnowledgeService,
155
+ ) -> ExtractAssembleDataUseCase:
156
+ """Create ExtractAssembleDataUseCase with configured knowledge service
157
+ for full workflow tests."""
158
+ return ExtractAssembleDataUseCase(
159
+ document_repo=document_repo,
160
+ assembly_repo=assembly_repo,
161
+ assembly_specification_repo=assembly_specification_repo,
162
+ knowledge_service_query_repo=knowledge_service_query_repo,
163
+ knowledge_service_config_repo=knowledge_service_config_repo,
164
+ knowledge_service=configured_knowledge_service,
165
+ )
166
+
167
+ @pytest.mark.asyncio
168
+ async def test_assemble_data_fails_without_specification(
169
+ self, use_case: ExtractAssembleDataUseCase
170
+ ) -> None:
171
+ """Test that assemble_data fails when specification doesn't exist."""
172
+ # Arrange
173
+ document_id = "doc-456"
174
+ assembly_specification_id = "spec-789"
175
+
176
+ # Act & Assert
177
+ with pytest.raises(ValueError, match="Assembly specification not found"):
178
+ await use_case.assemble_data(
179
+ document_id=document_id,
180
+ assembly_specification_id=assembly_specification_id,
181
+ workflow_id="test-workflow-123",
182
+ )
183
+
184
+ @pytest.mark.asyncio
185
+ async def test_assemble_data_fails_without_document(
186
+ self,
187
+ use_case: ExtractAssembleDataUseCase,
188
+ assembly_specification_repo: MemoryAssemblySpecificationRepository,
189
+ ) -> None:
190
+ """Test that assemble_data fails when document doesn't exist."""
191
+ # Arrange - Create assembly specification but no document
192
+ assembly_spec = AssemblySpecification(
193
+ assembly_specification_id="spec-123",
194
+ name="Test Assembly",
195
+ applicability="Test documents",
196
+ jsonschema={"type": "object", "properties": {}},
197
+ status=AssemblySpecificationStatus.ACTIVE,
198
+ knowledge_service_queries={},
199
+ created_at=datetime.now(timezone.utc),
200
+ updated_at=datetime.now(timezone.utc),
201
+ )
202
+ await assembly_specification_repo.save(assembly_spec)
203
+
204
+ document_id = "nonexistent-doc"
205
+ assembly_specification_id = "spec-123"
206
+
207
+ # Act & Assert
208
+ with pytest.raises(ValueError, match="Document not found"):
209
+ await use_case.assemble_data(
210
+ document_id=document_id,
211
+ assembly_specification_id=assembly_specification_id,
212
+ workflow_id="test-workflow-123",
213
+ )
214
+
215
+ @pytest.mark.asyncio
216
+ async def test_assemble_data_propagates_id_generation_error(
217
+ self,
218
+ use_case: ExtractAssembleDataUseCase,
219
+ assembly_repo: MemoryAssemblyRepository,
220
+ ) -> None:
221
+ """Test that ID generation errors are properly propagated."""
222
+ # Arrange
223
+ document_id = "doc-456"
224
+ assembly_specification_id = "spec-789"
225
+ expected_error = RuntimeError("ID generation failed")
226
+
227
+ # Mock the generate_id method to raise an error
228
+ assembly_repo.generate_id = AsyncMock( # type: ignore[method-assign]
229
+ side_effect=expected_error
230
+ )
231
+
232
+ # Act & Assert
233
+ with pytest.raises(RuntimeError, match="ID generation failed"):
234
+ await use_case.assemble_data(
235
+ document_id=document_id,
236
+ assembly_specification_id=assembly_specification_id,
237
+ workflow_id="test-workflow-123",
238
+ )
239
+
240
+ @pytest.mark.asyncio
241
+ async def test_full_assembly_workflow_success(
242
+ self,
243
+ configured_use_case: ExtractAssembleDataUseCase,
244
+ document_repo: MemoryDocumentRepository,
245
+ assembly_repo: MemoryAssemblyRepository,
246
+ assembly_specification_repo: MemoryAssemblySpecificationRepository,
247
+ knowledge_service_query_repo: MemoryKnowledgeServiceQueryRepository,
248
+ knowledge_service_config_repo: MemoryKnowledgeServiceConfigRepository,
249
+ ) -> None:
250
+ """Test complete assembly workflow with knowledge service."""
251
+ # Arrange - Create test document
252
+ content_text = "Sample meeting transcript for testing"
253
+ content_bytes = content_text.encode("utf-8")
254
+ document = Document(
255
+ document_id="doc-123",
256
+ original_filename="test_transcript.txt",
257
+ content_type="text/plain",
258
+ size_bytes=len(content_bytes),
259
+ content_multihash="test-hash-123",
260
+ status=DocumentStatus.CAPTURED,
261
+ content=ContentStream(io.BytesIO(content_bytes)),
262
+ created_at=datetime.now(timezone.utc),
263
+ updated_at=datetime.now(timezone.utc),
264
+ )
265
+ await document_repo.save(document)
266
+
267
+ # Create assembly specification with simple schema
268
+ schema = {
269
+ "type": "object",
270
+ "properties": {
271
+ "title": {"type": "string"},
272
+ "summary": {"type": "string"},
273
+ },
274
+ "required": ["title", "summary"],
275
+ }
276
+
277
+ assembly_spec = AssemblySpecification(
278
+ assembly_specification_id="spec-123",
279
+ name="Test Assembly",
280
+ applicability="Test documents",
281
+ jsonschema=schema,
282
+ status=AssemblySpecificationStatus.ACTIVE,
283
+ knowledge_service_queries={
284
+ "/properties/title": "query-1",
285
+ "/properties/summary": "query-2",
286
+ },
287
+ created_at=datetime.now(timezone.utc),
288
+ updated_at=datetime.now(timezone.utc),
289
+ )
290
+ await assembly_specification_repo.save(assembly_spec)
291
+
292
+ # Create knowledge service config
293
+ ks_config = KnowledgeServiceConfig(
294
+ knowledge_service_id="ks-123",
295
+ name="Test Knowledge Service",
296
+ description="Test service",
297
+ service_api=ServiceApi.ANTHROPIC,
298
+ created_at=datetime.now(timezone.utc),
299
+ updated_at=datetime.now(timezone.utc),
300
+ )
301
+ await knowledge_service_config_repo.save(ks_config)
302
+
303
+ # Create knowledge service queries
304
+ query1 = KnowledgeServiceQuery(
305
+ query_id="query-1",
306
+ name="Extract Title",
307
+ knowledge_service_id="ks-123",
308
+ prompt="Extract the title from this document",
309
+ query_metadata={"max_tokens": 100},
310
+ created_at=datetime.now(timezone.utc),
311
+ updated_at=datetime.now(timezone.utc),
312
+ )
313
+ query2 = KnowledgeServiceQuery(
314
+ query_id="query-2",
315
+ name="Extract Summary",
316
+ knowledge_service_id="ks-123",
317
+ prompt="Extract a summary from this document",
318
+ query_metadata={"max_tokens": 200},
319
+ created_at=datetime.now(timezone.utc),
320
+ updated_at=datetime.now(timezone.utc),
321
+ )
322
+ await knowledge_service_query_repo.save(query1)
323
+ await knowledge_service_query_repo.save(query2)
324
+
325
+ # Act - use configured_use_case which already has the configured
326
+ # memory service
327
+ result = await configured_use_case.assemble_data(
328
+ document_id="doc-123",
329
+ assembly_specification_id="spec-123",
330
+ workflow_id="test-workflow-success",
331
+ )
332
+
333
+ # Assert
334
+ assert isinstance(result, Assembly)
335
+ assert result.status == AssemblyStatus.COMPLETED
336
+ assert result.assembled_document_id is not None
337
+
338
+ # Verify assembled document was created
339
+ assembled_doc = await document_repo.get(result.assembled_document_id)
340
+ assert assembled_doc is not None
341
+ assert assembled_doc.status == DocumentStatus.ASSEMBLED
342
+
343
+ # Check assembled content
344
+ if assembled_doc.content is None:
345
+ raise ValueError("Assembled document content is required")
346
+ assembled_doc.content.seek(0)
347
+ content = assembled_doc.content.read().decode("utf-8")
348
+ assembled_data = json.loads(content)
349
+
350
+ assert "title" in assembled_data
351
+ assert "summary" in assembled_data
352
+ assert assembled_data["title"] == "Test Meeting"
353
+ assert (
354
+ assembled_data["summary"]
355
+ == "This was a test meeting about important topics"
356
+ )
357
+
358
+ @pytest.mark.asyncio
359
+ async def test_assembly_fails_when_specification_not_found(
360
+ self, use_case: ExtractAssembleDataUseCase
361
+ ) -> None:
362
+ """Test that assembly fails when specification is not found."""
363
+ # Act & Assert
364
+ with pytest.raises(ValueError, match="Assembly specification not found"):
365
+ await use_case.assemble_data(
366
+ document_id="doc-123",
367
+ assembly_specification_id="nonexistent-spec",
368
+ workflow_id="test-workflow-123",
369
+ )
370
+
371
+ @pytest.mark.asyncio
372
+ async def test_assembly_fails_when_document_not_found(
373
+ self,
374
+ use_case: ExtractAssembleDataUseCase,
375
+ assembly_specification_repo: MemoryAssemblySpecificationRepository,
376
+ ) -> None:
377
+ """Test that assembly fails when input document is not found."""
378
+ # Arrange - Create assembly specification but no document
379
+ assembly_spec = AssemblySpecification(
380
+ assembly_specification_id="spec-123",
381
+ name="Test Assembly",
382
+ applicability="Test documents",
383
+ jsonschema={"type": "object", "properties": {}},
384
+ status=AssemblySpecificationStatus.ACTIVE,
385
+ knowledge_service_queries={},
386
+ created_at=datetime.now(timezone.utc),
387
+ updated_at=datetime.now(timezone.utc),
388
+ )
389
+ await assembly_specification_repo.save(assembly_spec)
390
+
391
+ # Act & Assert
392
+ with pytest.raises(ValueError, match="Document not found"):
393
+ await use_case.assemble_data(
394
+ document_id="nonexistent-doc",
395
+ assembly_specification_id="spec-123",
396
+ workflow_id="test-workflow-123",
397
+ )
398
+
399
+ @pytest.mark.asyncio
400
+ async def test_assembly_fails_when_query_not_found(
401
+ self,
402
+ use_case: ExtractAssembleDataUseCase,
403
+ document_repo: MemoryDocumentRepository,
404
+ assembly_specification_repo: MemoryAssemblySpecificationRepository,
405
+ ) -> None:
406
+ """Test that assembly fails when query is not found."""
407
+ # Arrange - Create document and spec with non-existent query
408
+ content_text = "Sample content"
409
+ content_bytes = content_text.encode("utf-8")
410
+ document = Document(
411
+ document_id="doc-123",
412
+ original_filename="test.txt",
413
+ content_type="text/plain",
414
+ size_bytes=len(content_bytes),
415
+ content_multihash="test-hash",
416
+ status=DocumentStatus.CAPTURED,
417
+ content=ContentStream(io.BytesIO(content_bytes)),
418
+ created_at=datetime.now(timezone.utc),
419
+ updated_at=datetime.now(timezone.utc),
420
+ )
421
+ await document_repo.save(document)
422
+
423
+ assembly_spec = AssemblySpecification(
424
+ assembly_specification_id="spec-123",
425
+ name="Test Assembly",
426
+ applicability="Test documents",
427
+ jsonschema={
428
+ "type": "object",
429
+ "properties": {"title": {"type": "string"}},
430
+ },
431
+ status=AssemblySpecificationStatus.ACTIVE,
432
+ knowledge_service_queries={"/properties/title": "nonexistent-query"},
433
+ created_at=datetime.now(timezone.utc),
434
+ updated_at=datetime.now(timezone.utc),
435
+ )
436
+ await assembly_specification_repo.save(assembly_spec)
437
+
438
+ # Act & Assert
439
+ with pytest.raises(ValueError, match="Knowledge service query not found"):
440
+ await use_case.assemble_data(
441
+ document_id="doc-123",
442
+ assembly_specification_id="spec-123",
443
+ workflow_id="test-workflow-123",
444
+ )
445
+
446
+ @pytest.mark.asyncio
447
+ async def test_assembly_fails_with_invalid_json_schema(
448
+ self,
449
+ document_repo: MemoryDocumentRepository,
450
+ assembly_repo: MemoryAssemblyRepository,
451
+ assembly_specification_repo: MemoryAssemblySpecificationRepository,
452
+ knowledge_service_query_repo: MemoryKnowledgeServiceQueryRepository,
453
+ knowledge_service_config_repo: MemoryKnowledgeServiceConfigRepository,
454
+ ) -> None:
455
+ """Test that assembly fails when data doesn't match JSON schema."""
456
+ # Arrange - Create test document
457
+ content_text = "Sample content"
458
+ content_bytes = content_text.encode("utf-8")
459
+ document = Document(
460
+ document_id="doc-123",
461
+ original_filename="test.txt",
462
+ content_type="text/plain",
463
+ size_bytes=len(content_bytes),
464
+ content_multihash="test-hash",
465
+ status=DocumentStatus.CAPTURED,
466
+ content=ContentStream(io.BytesIO(content_bytes)),
467
+ created_at=datetime.now(timezone.utc),
468
+ updated_at=datetime.now(timezone.utc),
469
+ )
470
+ await document_repo.save(document)
471
+
472
+ # Create assembly specification with strict schema
473
+ schema = {
474
+ "type": "object",
475
+ "properties": {
476
+ "title": {"type": "string"},
477
+ "count": {"type": "integer"}, # Require integer
478
+ },
479
+ "required": ["title", "count"],
480
+ }
481
+
482
+ assembly_spec = AssemblySpecification(
483
+ assembly_specification_id="spec-123",
484
+ name="Test Assembly",
485
+ applicability="Test documents",
486
+ jsonschema=schema,
487
+ status=AssemblySpecificationStatus.ACTIVE,
488
+ knowledge_service_queries={"/properties/title": "query-1"},
489
+ created_at=datetime.now(timezone.utc),
490
+ updated_at=datetime.now(timezone.utc),
491
+ )
492
+ await assembly_specification_repo.save(assembly_spec)
493
+
494
+ # Create knowledge service config and query
495
+ ks_config = KnowledgeServiceConfig(
496
+ knowledge_service_id="ks-123",
497
+ name="Test Knowledge Service",
498
+ description="Test service",
499
+ service_api=ServiceApi.ANTHROPIC,
500
+ created_at=datetime.now(timezone.utc),
501
+ updated_at=datetime.now(timezone.utc),
502
+ )
503
+ await knowledge_service_config_repo.save(ks_config)
504
+
505
+ query = KnowledgeServiceQuery(
506
+ query_id="query-1",
507
+ name="Extract Title",
508
+ knowledge_service_id="ks-123",
509
+ prompt="Extract the title",
510
+ created_at=datetime.now(timezone.utc),
511
+ updated_at=datetime.now(timezone.utc),
512
+ )
513
+ await knowledge_service_query_repo.save(query)
514
+
515
+ # Create memory service that returns invalid data (missing count)
516
+ memory_service = MemoryKnowledgeService(ks_config)
517
+ memory_service.add_canned_query_result(
518
+ QueryResult(
519
+ query_id="result-1",
520
+ query_text="Extract the title",
521
+ result_data={
522
+ "response": '"Test"'
523
+ }, # Only returns title, missing "count" field
524
+ execution_time_ms=100,
525
+ created_at=datetime.now(timezone.utc),
526
+ )
527
+ )
528
+
529
+ # Create use case with configured memory service
530
+ test_use_case = ExtractAssembleDataUseCase(
531
+ document_repo=document_repo,
532
+ assembly_repo=assembly_repo,
533
+ assembly_specification_repo=assembly_specification_repo,
534
+ knowledge_service_query_repo=knowledge_service_query_repo,
535
+ knowledge_service_config_repo=knowledge_service_config_repo,
536
+ knowledge_service=memory_service,
537
+ )
538
+
539
+ # Act & Assert
540
+ with pytest.raises(
541
+ ValueError,
542
+ match="Assembled data does not conform to JSON schema",
543
+ ):
544
+ await test_use_case.assemble_data(
545
+ document_id="doc-123",
546
+ assembly_specification_id="spec-123",
547
+ workflow_id="test-workflow-123",
548
+ )