julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,548 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for ExtractAssembleDataUseCase.
|
|
3
|
+
|
|
4
|
+
This module provides tests for the extract and assemble data use case,
|
|
5
|
+
ensuring proper business logic execution and repository interaction patterns
|
|
6
|
+
following the Clean Architecture principles.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import io
|
|
10
|
+
import json
|
|
11
|
+
import pytest
|
|
12
|
+
|
|
13
|
+
from unittest.mock import AsyncMock
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
|
|
16
|
+
from julee.domain.use_cases import ExtractAssembleDataUseCase
|
|
17
|
+
from julee.domain.models import (
|
|
18
|
+
Assembly,
|
|
19
|
+
AssemblyStatus,
|
|
20
|
+
Document,
|
|
21
|
+
DocumentStatus,
|
|
22
|
+
ContentStream,
|
|
23
|
+
AssemblySpecification,
|
|
24
|
+
AssemblySpecificationStatus,
|
|
25
|
+
KnowledgeServiceQuery,
|
|
26
|
+
KnowledgeServiceConfig,
|
|
27
|
+
)
|
|
28
|
+
from julee.domain.models.knowledge_service_config import ServiceApi
|
|
29
|
+
from julee.repositories.memory import (
|
|
30
|
+
MemoryDocumentRepository,
|
|
31
|
+
MemoryAssemblyRepository,
|
|
32
|
+
MemoryAssemblySpecificationRepository,
|
|
33
|
+
MemoryKnowledgeServiceConfigRepository,
|
|
34
|
+
MemoryKnowledgeServiceQueryRepository,
|
|
35
|
+
)
|
|
36
|
+
from julee.services.knowledge_service.memory import (
|
|
37
|
+
MemoryKnowledgeService,
|
|
38
|
+
)
|
|
39
|
+
from julee.services.knowledge_service import QueryResult
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class TestExtractAssembleDataUseCase:
|
|
43
|
+
"""Test cases for ExtractAssembleDataUseCase business logic."""
|
|
44
|
+
|
|
45
|
+
@pytest.fixture
|
|
46
|
+
def document_repo(self) -> MemoryDocumentRepository:
|
|
47
|
+
"""Create a memory DocumentRepository for testing."""
|
|
48
|
+
return MemoryDocumentRepository()
|
|
49
|
+
|
|
50
|
+
@pytest.fixture
|
|
51
|
+
def assembly_repo(self) -> MemoryAssemblyRepository:
|
|
52
|
+
"""Create a memory AssemblyRepository for testing."""
|
|
53
|
+
return MemoryAssemblyRepository()
|
|
54
|
+
|
|
55
|
+
@pytest.fixture
|
|
56
|
+
def assembly_specification_repo(
|
|
57
|
+
self,
|
|
58
|
+
) -> MemoryAssemblySpecificationRepository:
|
|
59
|
+
"""Create a memory AssemblySpecificationRepository for testing."""
|
|
60
|
+
return MemoryAssemblySpecificationRepository()
|
|
61
|
+
|
|
62
|
+
@pytest.fixture
|
|
63
|
+
def knowledge_service_query_repo(
|
|
64
|
+
self,
|
|
65
|
+
) -> MemoryKnowledgeServiceQueryRepository:
|
|
66
|
+
"""Create a memory KnowledgeServiceQueryRepository for testing."""
|
|
67
|
+
return MemoryKnowledgeServiceQueryRepository()
|
|
68
|
+
|
|
69
|
+
@pytest.fixture
|
|
70
|
+
def knowledge_service_config_repo(
|
|
71
|
+
self,
|
|
72
|
+
) -> MemoryKnowledgeServiceConfigRepository:
|
|
73
|
+
"""Create a memory KnowledgeServiceConfigRepository for testing."""
|
|
74
|
+
return MemoryKnowledgeServiceConfigRepository()
|
|
75
|
+
|
|
76
|
+
@pytest.fixture
|
|
77
|
+
def knowledge_service(self) -> MemoryKnowledgeService:
|
|
78
|
+
"""Create a memory KnowledgeService for testing."""
|
|
79
|
+
ks_config = KnowledgeServiceConfig(
|
|
80
|
+
knowledge_service_id="ks-test",
|
|
81
|
+
name="Test Knowledge Service",
|
|
82
|
+
description="Test service",
|
|
83
|
+
service_api=ServiceApi.ANTHROPIC,
|
|
84
|
+
created_at=datetime.now(timezone.utc),
|
|
85
|
+
updated_at=datetime.now(timezone.utc),
|
|
86
|
+
)
|
|
87
|
+
return MemoryKnowledgeService(ks_config)
|
|
88
|
+
|
|
89
|
+
@pytest.fixture
|
|
90
|
+
def configured_knowledge_service(self) -> MemoryKnowledgeService:
|
|
91
|
+
"""Create a configured memory KnowledgeService for full workflow
|
|
92
|
+
tests."""
|
|
93
|
+
ks_config = KnowledgeServiceConfig(
|
|
94
|
+
knowledge_service_id="ks-123",
|
|
95
|
+
name="Test Knowledge Service",
|
|
96
|
+
description="Test service",
|
|
97
|
+
service_api=ServiceApi.ANTHROPIC,
|
|
98
|
+
created_at=datetime.now(timezone.utc),
|
|
99
|
+
updated_at=datetime.now(timezone.utc),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
memory_service = MemoryKnowledgeService(ks_config)
|
|
103
|
+
memory_service.add_canned_query_results(
|
|
104
|
+
[
|
|
105
|
+
QueryResult(
|
|
106
|
+
query_id="result-1",
|
|
107
|
+
query_text="Extract the title from this document",
|
|
108
|
+
result_data={"response": '"Test Meeting"'},
|
|
109
|
+
execution_time_ms=100,
|
|
110
|
+
created_at=datetime.now(timezone.utc),
|
|
111
|
+
),
|
|
112
|
+
QueryResult(
|
|
113
|
+
query_id="result-2",
|
|
114
|
+
query_text="Extract a summary from this document",
|
|
115
|
+
result_data={
|
|
116
|
+
"response": ('"This was a test meeting about important topics"')
|
|
117
|
+
},
|
|
118
|
+
execution_time_ms=150,
|
|
119
|
+
created_at=datetime.now(timezone.utc),
|
|
120
|
+
),
|
|
121
|
+
]
|
|
122
|
+
)
|
|
123
|
+
return memory_service
|
|
124
|
+
|
|
125
|
+
@pytest.fixture
|
|
126
|
+
def use_case(
|
|
127
|
+
self,
|
|
128
|
+
document_repo: MemoryDocumentRepository,
|
|
129
|
+
assembly_repo: MemoryAssemblyRepository,
|
|
130
|
+
assembly_specification_repo: MemoryAssemblySpecificationRepository,
|
|
131
|
+
knowledge_service_query_repo: MemoryKnowledgeServiceQueryRepository,
|
|
132
|
+
knowledge_service_config_repo: MemoryKnowledgeServiceConfigRepository,
|
|
133
|
+
knowledge_service: MemoryKnowledgeService,
|
|
134
|
+
) -> ExtractAssembleDataUseCase:
|
|
135
|
+
"""Create ExtractAssembleDataUseCase with memory repository
|
|
136
|
+
dependencies."""
|
|
137
|
+
return ExtractAssembleDataUseCase(
|
|
138
|
+
document_repo=document_repo,
|
|
139
|
+
assembly_repo=assembly_repo,
|
|
140
|
+
assembly_specification_repo=assembly_specification_repo,
|
|
141
|
+
knowledge_service_query_repo=knowledge_service_query_repo,
|
|
142
|
+
knowledge_service_config_repo=knowledge_service_config_repo,
|
|
143
|
+
knowledge_service=knowledge_service,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
@pytest.fixture
|
|
147
|
+
def configured_use_case(
|
|
148
|
+
self,
|
|
149
|
+
document_repo: MemoryDocumentRepository,
|
|
150
|
+
assembly_repo: MemoryAssemblyRepository,
|
|
151
|
+
assembly_specification_repo: MemoryAssemblySpecificationRepository,
|
|
152
|
+
knowledge_service_query_repo: MemoryKnowledgeServiceQueryRepository,
|
|
153
|
+
knowledge_service_config_repo: MemoryKnowledgeServiceConfigRepository,
|
|
154
|
+
configured_knowledge_service: MemoryKnowledgeService,
|
|
155
|
+
) -> ExtractAssembleDataUseCase:
|
|
156
|
+
"""Create ExtractAssembleDataUseCase with configured knowledge service
|
|
157
|
+
for full workflow tests."""
|
|
158
|
+
return ExtractAssembleDataUseCase(
|
|
159
|
+
document_repo=document_repo,
|
|
160
|
+
assembly_repo=assembly_repo,
|
|
161
|
+
assembly_specification_repo=assembly_specification_repo,
|
|
162
|
+
knowledge_service_query_repo=knowledge_service_query_repo,
|
|
163
|
+
knowledge_service_config_repo=knowledge_service_config_repo,
|
|
164
|
+
knowledge_service=configured_knowledge_service,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
@pytest.mark.asyncio
|
|
168
|
+
async def test_assemble_data_fails_without_specification(
|
|
169
|
+
self, use_case: ExtractAssembleDataUseCase
|
|
170
|
+
) -> None:
|
|
171
|
+
"""Test that assemble_data fails when specification doesn't exist."""
|
|
172
|
+
# Arrange
|
|
173
|
+
document_id = "doc-456"
|
|
174
|
+
assembly_specification_id = "spec-789"
|
|
175
|
+
|
|
176
|
+
# Act & Assert
|
|
177
|
+
with pytest.raises(ValueError, match="Assembly specification not found"):
|
|
178
|
+
await use_case.assemble_data(
|
|
179
|
+
document_id=document_id,
|
|
180
|
+
assembly_specification_id=assembly_specification_id,
|
|
181
|
+
workflow_id="test-workflow-123",
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
@pytest.mark.asyncio
|
|
185
|
+
async def test_assemble_data_fails_without_document(
|
|
186
|
+
self,
|
|
187
|
+
use_case: ExtractAssembleDataUseCase,
|
|
188
|
+
assembly_specification_repo: MemoryAssemblySpecificationRepository,
|
|
189
|
+
) -> None:
|
|
190
|
+
"""Test that assemble_data fails when document doesn't exist."""
|
|
191
|
+
# Arrange - Create assembly specification but no document
|
|
192
|
+
assembly_spec = AssemblySpecification(
|
|
193
|
+
assembly_specification_id="spec-123",
|
|
194
|
+
name="Test Assembly",
|
|
195
|
+
applicability="Test documents",
|
|
196
|
+
jsonschema={"type": "object", "properties": {}},
|
|
197
|
+
status=AssemblySpecificationStatus.ACTIVE,
|
|
198
|
+
knowledge_service_queries={},
|
|
199
|
+
created_at=datetime.now(timezone.utc),
|
|
200
|
+
updated_at=datetime.now(timezone.utc),
|
|
201
|
+
)
|
|
202
|
+
await assembly_specification_repo.save(assembly_spec)
|
|
203
|
+
|
|
204
|
+
document_id = "nonexistent-doc"
|
|
205
|
+
assembly_specification_id = "spec-123"
|
|
206
|
+
|
|
207
|
+
# Act & Assert
|
|
208
|
+
with pytest.raises(ValueError, match="Document not found"):
|
|
209
|
+
await use_case.assemble_data(
|
|
210
|
+
document_id=document_id,
|
|
211
|
+
assembly_specification_id=assembly_specification_id,
|
|
212
|
+
workflow_id="test-workflow-123",
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
@pytest.mark.asyncio
|
|
216
|
+
async def test_assemble_data_propagates_id_generation_error(
|
|
217
|
+
self,
|
|
218
|
+
use_case: ExtractAssembleDataUseCase,
|
|
219
|
+
assembly_repo: MemoryAssemblyRepository,
|
|
220
|
+
) -> None:
|
|
221
|
+
"""Test that ID generation errors are properly propagated."""
|
|
222
|
+
# Arrange
|
|
223
|
+
document_id = "doc-456"
|
|
224
|
+
assembly_specification_id = "spec-789"
|
|
225
|
+
expected_error = RuntimeError("ID generation failed")
|
|
226
|
+
|
|
227
|
+
# Mock the generate_id method to raise an error
|
|
228
|
+
assembly_repo.generate_id = AsyncMock( # type: ignore[method-assign]
|
|
229
|
+
side_effect=expected_error
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Act & Assert
|
|
233
|
+
with pytest.raises(RuntimeError, match="ID generation failed"):
|
|
234
|
+
await use_case.assemble_data(
|
|
235
|
+
document_id=document_id,
|
|
236
|
+
assembly_specification_id=assembly_specification_id,
|
|
237
|
+
workflow_id="test-workflow-123",
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
@pytest.mark.asyncio
|
|
241
|
+
async def test_full_assembly_workflow_success(
|
|
242
|
+
self,
|
|
243
|
+
configured_use_case: ExtractAssembleDataUseCase,
|
|
244
|
+
document_repo: MemoryDocumentRepository,
|
|
245
|
+
assembly_repo: MemoryAssemblyRepository,
|
|
246
|
+
assembly_specification_repo: MemoryAssemblySpecificationRepository,
|
|
247
|
+
knowledge_service_query_repo: MemoryKnowledgeServiceQueryRepository,
|
|
248
|
+
knowledge_service_config_repo: MemoryKnowledgeServiceConfigRepository,
|
|
249
|
+
) -> None:
|
|
250
|
+
"""Test complete assembly workflow with knowledge service."""
|
|
251
|
+
# Arrange - Create test document
|
|
252
|
+
content_text = "Sample meeting transcript for testing"
|
|
253
|
+
content_bytes = content_text.encode("utf-8")
|
|
254
|
+
document = Document(
|
|
255
|
+
document_id="doc-123",
|
|
256
|
+
original_filename="test_transcript.txt",
|
|
257
|
+
content_type="text/plain",
|
|
258
|
+
size_bytes=len(content_bytes),
|
|
259
|
+
content_multihash="test-hash-123",
|
|
260
|
+
status=DocumentStatus.CAPTURED,
|
|
261
|
+
content=ContentStream(io.BytesIO(content_bytes)),
|
|
262
|
+
created_at=datetime.now(timezone.utc),
|
|
263
|
+
updated_at=datetime.now(timezone.utc),
|
|
264
|
+
)
|
|
265
|
+
await document_repo.save(document)
|
|
266
|
+
|
|
267
|
+
# Create assembly specification with simple schema
|
|
268
|
+
schema = {
|
|
269
|
+
"type": "object",
|
|
270
|
+
"properties": {
|
|
271
|
+
"title": {"type": "string"},
|
|
272
|
+
"summary": {"type": "string"},
|
|
273
|
+
},
|
|
274
|
+
"required": ["title", "summary"],
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
assembly_spec = AssemblySpecification(
|
|
278
|
+
assembly_specification_id="spec-123",
|
|
279
|
+
name="Test Assembly",
|
|
280
|
+
applicability="Test documents",
|
|
281
|
+
jsonschema=schema,
|
|
282
|
+
status=AssemblySpecificationStatus.ACTIVE,
|
|
283
|
+
knowledge_service_queries={
|
|
284
|
+
"/properties/title": "query-1",
|
|
285
|
+
"/properties/summary": "query-2",
|
|
286
|
+
},
|
|
287
|
+
created_at=datetime.now(timezone.utc),
|
|
288
|
+
updated_at=datetime.now(timezone.utc),
|
|
289
|
+
)
|
|
290
|
+
await assembly_specification_repo.save(assembly_spec)
|
|
291
|
+
|
|
292
|
+
# Create knowledge service config
|
|
293
|
+
ks_config = KnowledgeServiceConfig(
|
|
294
|
+
knowledge_service_id="ks-123",
|
|
295
|
+
name="Test Knowledge Service",
|
|
296
|
+
description="Test service",
|
|
297
|
+
service_api=ServiceApi.ANTHROPIC,
|
|
298
|
+
created_at=datetime.now(timezone.utc),
|
|
299
|
+
updated_at=datetime.now(timezone.utc),
|
|
300
|
+
)
|
|
301
|
+
await knowledge_service_config_repo.save(ks_config)
|
|
302
|
+
|
|
303
|
+
# Create knowledge service queries
|
|
304
|
+
query1 = KnowledgeServiceQuery(
|
|
305
|
+
query_id="query-1",
|
|
306
|
+
name="Extract Title",
|
|
307
|
+
knowledge_service_id="ks-123",
|
|
308
|
+
prompt="Extract the title from this document",
|
|
309
|
+
query_metadata={"max_tokens": 100},
|
|
310
|
+
created_at=datetime.now(timezone.utc),
|
|
311
|
+
updated_at=datetime.now(timezone.utc),
|
|
312
|
+
)
|
|
313
|
+
query2 = KnowledgeServiceQuery(
|
|
314
|
+
query_id="query-2",
|
|
315
|
+
name="Extract Summary",
|
|
316
|
+
knowledge_service_id="ks-123",
|
|
317
|
+
prompt="Extract a summary from this document",
|
|
318
|
+
query_metadata={"max_tokens": 200},
|
|
319
|
+
created_at=datetime.now(timezone.utc),
|
|
320
|
+
updated_at=datetime.now(timezone.utc),
|
|
321
|
+
)
|
|
322
|
+
await knowledge_service_query_repo.save(query1)
|
|
323
|
+
await knowledge_service_query_repo.save(query2)
|
|
324
|
+
|
|
325
|
+
# Act - use configured_use_case which already has the configured
|
|
326
|
+
# memory service
|
|
327
|
+
result = await configured_use_case.assemble_data(
|
|
328
|
+
document_id="doc-123",
|
|
329
|
+
assembly_specification_id="spec-123",
|
|
330
|
+
workflow_id="test-workflow-success",
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Assert
|
|
334
|
+
assert isinstance(result, Assembly)
|
|
335
|
+
assert result.status == AssemblyStatus.COMPLETED
|
|
336
|
+
assert result.assembled_document_id is not None
|
|
337
|
+
|
|
338
|
+
# Verify assembled document was created
|
|
339
|
+
assembled_doc = await document_repo.get(result.assembled_document_id)
|
|
340
|
+
assert assembled_doc is not None
|
|
341
|
+
assert assembled_doc.status == DocumentStatus.ASSEMBLED
|
|
342
|
+
|
|
343
|
+
# Check assembled content
|
|
344
|
+
if assembled_doc.content is None:
|
|
345
|
+
raise ValueError("Assembled document content is required")
|
|
346
|
+
assembled_doc.content.seek(0)
|
|
347
|
+
content = assembled_doc.content.read().decode("utf-8")
|
|
348
|
+
assembled_data = json.loads(content)
|
|
349
|
+
|
|
350
|
+
assert "title" in assembled_data
|
|
351
|
+
assert "summary" in assembled_data
|
|
352
|
+
assert assembled_data["title"] == "Test Meeting"
|
|
353
|
+
assert (
|
|
354
|
+
assembled_data["summary"]
|
|
355
|
+
== "This was a test meeting about important topics"
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
@pytest.mark.asyncio
|
|
359
|
+
async def test_assembly_fails_when_specification_not_found(
|
|
360
|
+
self, use_case: ExtractAssembleDataUseCase
|
|
361
|
+
) -> None:
|
|
362
|
+
"""Test that assembly fails when specification is not found."""
|
|
363
|
+
# Act & Assert
|
|
364
|
+
with pytest.raises(ValueError, match="Assembly specification not found"):
|
|
365
|
+
await use_case.assemble_data(
|
|
366
|
+
document_id="doc-123",
|
|
367
|
+
assembly_specification_id="nonexistent-spec",
|
|
368
|
+
workflow_id="test-workflow-123",
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
@pytest.mark.asyncio
|
|
372
|
+
async def test_assembly_fails_when_document_not_found(
|
|
373
|
+
self,
|
|
374
|
+
use_case: ExtractAssembleDataUseCase,
|
|
375
|
+
assembly_specification_repo: MemoryAssemblySpecificationRepository,
|
|
376
|
+
) -> None:
|
|
377
|
+
"""Test that assembly fails when input document is not found."""
|
|
378
|
+
# Arrange - Create assembly specification but no document
|
|
379
|
+
assembly_spec = AssemblySpecification(
|
|
380
|
+
assembly_specification_id="spec-123",
|
|
381
|
+
name="Test Assembly",
|
|
382
|
+
applicability="Test documents",
|
|
383
|
+
jsonschema={"type": "object", "properties": {}},
|
|
384
|
+
status=AssemblySpecificationStatus.ACTIVE,
|
|
385
|
+
knowledge_service_queries={},
|
|
386
|
+
created_at=datetime.now(timezone.utc),
|
|
387
|
+
updated_at=datetime.now(timezone.utc),
|
|
388
|
+
)
|
|
389
|
+
await assembly_specification_repo.save(assembly_spec)
|
|
390
|
+
|
|
391
|
+
# Act & Assert
|
|
392
|
+
with pytest.raises(ValueError, match="Document not found"):
|
|
393
|
+
await use_case.assemble_data(
|
|
394
|
+
document_id="nonexistent-doc",
|
|
395
|
+
assembly_specification_id="spec-123",
|
|
396
|
+
workflow_id="test-workflow-123",
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
@pytest.mark.asyncio
|
|
400
|
+
async def test_assembly_fails_when_query_not_found(
|
|
401
|
+
self,
|
|
402
|
+
use_case: ExtractAssembleDataUseCase,
|
|
403
|
+
document_repo: MemoryDocumentRepository,
|
|
404
|
+
assembly_specification_repo: MemoryAssemblySpecificationRepository,
|
|
405
|
+
) -> None:
|
|
406
|
+
"""Test that assembly fails when query is not found."""
|
|
407
|
+
# Arrange - Create document and spec with non-existent query
|
|
408
|
+
content_text = "Sample content"
|
|
409
|
+
content_bytes = content_text.encode("utf-8")
|
|
410
|
+
document = Document(
|
|
411
|
+
document_id="doc-123",
|
|
412
|
+
original_filename="test.txt",
|
|
413
|
+
content_type="text/plain",
|
|
414
|
+
size_bytes=len(content_bytes),
|
|
415
|
+
content_multihash="test-hash",
|
|
416
|
+
status=DocumentStatus.CAPTURED,
|
|
417
|
+
content=ContentStream(io.BytesIO(content_bytes)),
|
|
418
|
+
created_at=datetime.now(timezone.utc),
|
|
419
|
+
updated_at=datetime.now(timezone.utc),
|
|
420
|
+
)
|
|
421
|
+
await document_repo.save(document)
|
|
422
|
+
|
|
423
|
+
assembly_spec = AssemblySpecification(
|
|
424
|
+
assembly_specification_id="spec-123",
|
|
425
|
+
name="Test Assembly",
|
|
426
|
+
applicability="Test documents",
|
|
427
|
+
jsonschema={
|
|
428
|
+
"type": "object",
|
|
429
|
+
"properties": {"title": {"type": "string"}},
|
|
430
|
+
},
|
|
431
|
+
status=AssemblySpecificationStatus.ACTIVE,
|
|
432
|
+
knowledge_service_queries={"/properties/title": "nonexistent-query"},
|
|
433
|
+
created_at=datetime.now(timezone.utc),
|
|
434
|
+
updated_at=datetime.now(timezone.utc),
|
|
435
|
+
)
|
|
436
|
+
await assembly_specification_repo.save(assembly_spec)
|
|
437
|
+
|
|
438
|
+
# Act & Assert
|
|
439
|
+
with pytest.raises(ValueError, match="Knowledge service query not found"):
|
|
440
|
+
await use_case.assemble_data(
|
|
441
|
+
document_id="doc-123",
|
|
442
|
+
assembly_specification_id="spec-123",
|
|
443
|
+
workflow_id="test-workflow-123",
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
@pytest.mark.asyncio
|
|
447
|
+
async def test_assembly_fails_with_invalid_json_schema(
|
|
448
|
+
self,
|
|
449
|
+
document_repo: MemoryDocumentRepository,
|
|
450
|
+
assembly_repo: MemoryAssemblyRepository,
|
|
451
|
+
assembly_specification_repo: MemoryAssemblySpecificationRepository,
|
|
452
|
+
knowledge_service_query_repo: MemoryKnowledgeServiceQueryRepository,
|
|
453
|
+
knowledge_service_config_repo: MemoryKnowledgeServiceConfigRepository,
|
|
454
|
+
) -> None:
|
|
455
|
+
"""Test that assembly fails when data doesn't match JSON schema."""
|
|
456
|
+
# Arrange - Create test document
|
|
457
|
+
content_text = "Sample content"
|
|
458
|
+
content_bytes = content_text.encode("utf-8")
|
|
459
|
+
document = Document(
|
|
460
|
+
document_id="doc-123",
|
|
461
|
+
original_filename="test.txt",
|
|
462
|
+
content_type="text/plain",
|
|
463
|
+
size_bytes=len(content_bytes),
|
|
464
|
+
content_multihash="test-hash",
|
|
465
|
+
status=DocumentStatus.CAPTURED,
|
|
466
|
+
content=ContentStream(io.BytesIO(content_bytes)),
|
|
467
|
+
created_at=datetime.now(timezone.utc),
|
|
468
|
+
updated_at=datetime.now(timezone.utc),
|
|
469
|
+
)
|
|
470
|
+
await document_repo.save(document)
|
|
471
|
+
|
|
472
|
+
# Create assembly specification with strict schema
|
|
473
|
+
schema = {
|
|
474
|
+
"type": "object",
|
|
475
|
+
"properties": {
|
|
476
|
+
"title": {"type": "string"},
|
|
477
|
+
"count": {"type": "integer"}, # Require integer
|
|
478
|
+
},
|
|
479
|
+
"required": ["title", "count"],
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
assembly_spec = AssemblySpecification(
|
|
483
|
+
assembly_specification_id="spec-123",
|
|
484
|
+
name="Test Assembly",
|
|
485
|
+
applicability="Test documents",
|
|
486
|
+
jsonschema=schema,
|
|
487
|
+
status=AssemblySpecificationStatus.ACTIVE,
|
|
488
|
+
knowledge_service_queries={"/properties/title": "query-1"},
|
|
489
|
+
created_at=datetime.now(timezone.utc),
|
|
490
|
+
updated_at=datetime.now(timezone.utc),
|
|
491
|
+
)
|
|
492
|
+
await assembly_specification_repo.save(assembly_spec)
|
|
493
|
+
|
|
494
|
+
# Create knowledge service config and query
|
|
495
|
+
ks_config = KnowledgeServiceConfig(
|
|
496
|
+
knowledge_service_id="ks-123",
|
|
497
|
+
name="Test Knowledge Service",
|
|
498
|
+
description="Test service",
|
|
499
|
+
service_api=ServiceApi.ANTHROPIC,
|
|
500
|
+
created_at=datetime.now(timezone.utc),
|
|
501
|
+
updated_at=datetime.now(timezone.utc),
|
|
502
|
+
)
|
|
503
|
+
await knowledge_service_config_repo.save(ks_config)
|
|
504
|
+
|
|
505
|
+
query = KnowledgeServiceQuery(
|
|
506
|
+
query_id="query-1",
|
|
507
|
+
name="Extract Title",
|
|
508
|
+
knowledge_service_id="ks-123",
|
|
509
|
+
prompt="Extract the title",
|
|
510
|
+
created_at=datetime.now(timezone.utc),
|
|
511
|
+
updated_at=datetime.now(timezone.utc),
|
|
512
|
+
)
|
|
513
|
+
await knowledge_service_query_repo.save(query)
|
|
514
|
+
|
|
515
|
+
# Create memory service that returns invalid data (missing count)
|
|
516
|
+
memory_service = MemoryKnowledgeService(ks_config)
|
|
517
|
+
memory_service.add_canned_query_result(
|
|
518
|
+
QueryResult(
|
|
519
|
+
query_id="result-1",
|
|
520
|
+
query_text="Extract the title",
|
|
521
|
+
result_data={
|
|
522
|
+
"response": '"Test"'
|
|
523
|
+
}, # Only returns title, missing "count" field
|
|
524
|
+
execution_time_ms=100,
|
|
525
|
+
created_at=datetime.now(timezone.utc),
|
|
526
|
+
)
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
# Create use case with configured memory service
|
|
530
|
+
test_use_case = ExtractAssembleDataUseCase(
|
|
531
|
+
document_repo=document_repo,
|
|
532
|
+
assembly_repo=assembly_repo,
|
|
533
|
+
assembly_specification_repo=assembly_specification_repo,
|
|
534
|
+
knowledge_service_query_repo=knowledge_service_query_repo,
|
|
535
|
+
knowledge_service_config_repo=knowledge_service_config_repo,
|
|
536
|
+
knowledge_service=memory_service,
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
# Act & Assert
|
|
540
|
+
with pytest.raises(
|
|
541
|
+
ValueError,
|
|
542
|
+
match="Assembled data does not conform to JSON schema",
|
|
543
|
+
):
|
|
544
|
+
await test_use_case.assemble_data(
|
|
545
|
+
document_id="doc-123",
|
|
546
|
+
assembly_specification_id="spec-123",
|
|
547
|
+
workflow_id="test-workflow-123",
|
|
548
|
+
)
|