julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Comprehensive tests for KnowledgeServiceQuery domain model.
|
|
3
|
+
|
|
4
|
+
This test module documents the design decisions made for the
|
|
5
|
+
KnowledgeServiceQuery domain model using table-based tests. It covers:
|
|
6
|
+
|
|
7
|
+
- KnowledgeServiceQuery instantiation with various field combinations
|
|
8
|
+
- JSON Pointer validation for schema_pointer field
|
|
9
|
+
- JSON serialization behavior
|
|
10
|
+
- Field validation for required fields
|
|
11
|
+
|
|
12
|
+
Design decisions documented:
|
|
13
|
+
- KnowledgeServiceQuery must have all required fields (query_id, name,
|
|
14
|
+
knowledge_service_id, prompt, schema_pointer)
|
|
15
|
+
- Schema pointer must be a valid JSON Pointer (RFC 6901)
|
|
16
|
+
- All text fields must be non-empty and non-whitespace
|
|
17
|
+
- Version field has a default but can be customized
|
|
18
|
+
- Status defaults to ACTIVE
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import pytest
|
|
22
|
+
|
|
23
|
+
from julee.domain.models.assembly_specification import (
|
|
24
|
+
KnowledgeServiceQuery,
|
|
25
|
+
)
|
|
26
|
+
from .factories import KnowledgeServiceQueryFactory
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TestKnowledgeServiceQueryInstantiation:
|
|
30
|
+
"""Test KnowledgeServiceQuery creation with various field combinations."""
|
|
31
|
+
|
|
32
|
+
@pytest.mark.parametrize(
|
|
33
|
+
"query_id,name,knowledge_service_id,prompt,expected_success",
|
|
34
|
+
[
|
|
35
|
+
# Valid cases
|
|
36
|
+
(
|
|
37
|
+
"query-1",
|
|
38
|
+
"Extract Attendees",
|
|
39
|
+
"ragflow-service-1",
|
|
40
|
+
"Extract list of meeting attendees with their roles",
|
|
41
|
+
True,
|
|
42
|
+
),
|
|
43
|
+
(
|
|
44
|
+
"query-2",
|
|
45
|
+
"Extract Action Items",
|
|
46
|
+
"knowledge-service-2",
|
|
47
|
+
"Extract action items with assignees and due dates",
|
|
48
|
+
True,
|
|
49
|
+
),
|
|
50
|
+
(
|
|
51
|
+
"query-3",
|
|
52
|
+
"Extract Meeting Metadata",
|
|
53
|
+
"service-alpha",
|
|
54
|
+
"Extract basic meeting information like title, date, time",
|
|
55
|
+
True,
|
|
56
|
+
),
|
|
57
|
+
# Invalid cases - empty required fields
|
|
58
|
+
(
|
|
59
|
+
"",
|
|
60
|
+
"Test Query",
|
|
61
|
+
"service-1",
|
|
62
|
+
"Test prompt",
|
|
63
|
+
False,
|
|
64
|
+
), # Empty query_id
|
|
65
|
+
(
|
|
66
|
+
"query-4",
|
|
67
|
+
"",
|
|
68
|
+
"service-1",
|
|
69
|
+
"Test prompt",
|
|
70
|
+
False,
|
|
71
|
+
), # Empty name
|
|
72
|
+
(
|
|
73
|
+
"query-5",
|
|
74
|
+
"Test Query",
|
|
75
|
+
"",
|
|
76
|
+
"Test prompt",
|
|
77
|
+
False,
|
|
78
|
+
), # Empty knowledge_service_id
|
|
79
|
+
(
|
|
80
|
+
"query-6",
|
|
81
|
+
"Test Query",
|
|
82
|
+
"service-1",
|
|
83
|
+
"",
|
|
84
|
+
False,
|
|
85
|
+
), # Empty prompt
|
|
86
|
+
# Invalid cases - whitespace only
|
|
87
|
+
(
|
|
88
|
+
" ",
|
|
89
|
+
"Test Query",
|
|
90
|
+
"service-1",
|
|
91
|
+
"Test prompt",
|
|
92
|
+
False,
|
|
93
|
+
), # Whitespace query_id
|
|
94
|
+
(
|
|
95
|
+
"query-8",
|
|
96
|
+
" ",
|
|
97
|
+
"service-1",
|
|
98
|
+
"Test prompt",
|
|
99
|
+
False,
|
|
100
|
+
), # Whitespace name
|
|
101
|
+
(
|
|
102
|
+
"query-9",
|
|
103
|
+
"Test Query",
|
|
104
|
+
" ",
|
|
105
|
+
"Test prompt",
|
|
106
|
+
False,
|
|
107
|
+
), # Whitespace knowledge_service_id
|
|
108
|
+
(
|
|
109
|
+
"query-10",
|
|
110
|
+
"Test Query",
|
|
111
|
+
"service-1",
|
|
112
|
+
" ",
|
|
113
|
+
False,
|
|
114
|
+
), # Whitespace prompt
|
|
115
|
+
],
|
|
116
|
+
)
|
|
117
|
+
def test_knowledge_service_query_creation_validation(
|
|
118
|
+
self,
|
|
119
|
+
query_id: str,
|
|
120
|
+
name: str,
|
|
121
|
+
knowledge_service_id: str,
|
|
122
|
+
prompt: str,
|
|
123
|
+
expected_success: bool,
|
|
124
|
+
) -> None:
|
|
125
|
+
"""Test query creation with various field validation scenarios."""
|
|
126
|
+
if expected_success:
|
|
127
|
+
# Should create successfully
|
|
128
|
+
query = KnowledgeServiceQuery(
|
|
129
|
+
query_id=query_id,
|
|
130
|
+
name=name,
|
|
131
|
+
knowledge_service_id=knowledge_service_id,
|
|
132
|
+
prompt=prompt,
|
|
133
|
+
)
|
|
134
|
+
assert query.query_id == query_id.strip()
|
|
135
|
+
assert query.name == name.strip()
|
|
136
|
+
assert query.knowledge_service_id == knowledge_service_id.strip()
|
|
137
|
+
assert query.prompt == prompt.strip()
|
|
138
|
+
else:
|
|
139
|
+
# Should raise validation error
|
|
140
|
+
with pytest.raises(Exception): # Could be ValueError or ValidationError
|
|
141
|
+
KnowledgeServiceQuery(
|
|
142
|
+
query_id=query_id,
|
|
143
|
+
name=name,
|
|
144
|
+
knowledge_service_id=knowledge_service_id,
|
|
145
|
+
prompt=prompt,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class TestKnowledgeServiceQuerySerialization:
|
|
150
|
+
"""Test KnowledgeServiceQuery JSON serialization behavior."""
|
|
151
|
+
|
|
152
|
+
def test_knowledge_service_query_json_serialization(self) -> None:
|
|
153
|
+
"""Test that KnowledgeServiceQuery serializes to JSON correctly."""
|
|
154
|
+
query = KnowledgeServiceQueryFactory.build(
|
|
155
|
+
query_id="attendee-extractor",
|
|
156
|
+
name="Meeting Attendee Extractor",
|
|
157
|
+
knowledge_service_id="ragflow-primary",
|
|
158
|
+
prompt="Extract meeting attendees with names, roles",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
json_str = query.model_dump_json()
|
|
162
|
+
import json
|
|
163
|
+
|
|
164
|
+
json_data = json.loads(json_str)
|
|
165
|
+
|
|
166
|
+
# All fields should be present in JSON
|
|
167
|
+
assert json_data["query_id"] == query.query_id
|
|
168
|
+
assert json_data["name"] == query.name
|
|
169
|
+
assert json_data["knowledge_service_id"] == query.knowledge_service_id
|
|
170
|
+
assert json_data["prompt"] == query.prompt
|
|
171
|
+
|
|
172
|
+
def test_knowledge_service_query_json_roundtrip(self) -> None:
|
|
173
|
+
"""Test that KnowledgeServiceQuery can be serialized to JSON and
|
|
174
|
+
deserialized back."""
|
|
175
|
+
original_query = KnowledgeServiceQueryFactory.build()
|
|
176
|
+
|
|
177
|
+
# Serialize to JSON
|
|
178
|
+
json_str = original_query.model_dump_json()
|
|
179
|
+
import json
|
|
180
|
+
|
|
181
|
+
json_data = json.loads(json_str)
|
|
182
|
+
|
|
183
|
+
# Deserialize back to KnowledgeServiceQuery
|
|
184
|
+
reconstructed_query = KnowledgeServiceQuery(**json_data)
|
|
185
|
+
|
|
186
|
+
# Should be equivalent
|
|
187
|
+
assert reconstructed_query.query_id == original_query.query_id
|
|
188
|
+
assert reconstructed_query.name == original_query.name
|
|
189
|
+
assert (
|
|
190
|
+
reconstructed_query.knowledge_service_id
|
|
191
|
+
== original_query.knowledge_service_id
|
|
192
|
+
)
|
|
193
|
+
assert reconstructed_query.prompt == original_query.prompt
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class TestKnowledgeServiceQueryDefaults:
|
|
197
|
+
"""Test KnowledgeServiceQuery default values and behavior."""
|
|
198
|
+
|
|
199
|
+
def test_knowledge_service_query_default_values(self) -> None:
|
|
200
|
+
"""Test that KnowledgeServiceQuery has correct default values."""
|
|
201
|
+
minimal_query = KnowledgeServiceQuery(
|
|
202
|
+
query_id="test-id",
|
|
203
|
+
name="Test Query",
|
|
204
|
+
knowledge_service_id="test-service",
|
|
205
|
+
prompt="Test prompt",
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
assert minimal_query.created_at is not None
|
|
209
|
+
assert minimal_query.updated_at is not None
|
|
210
|
+
|
|
211
|
+
def test_knowledge_service_query_custom_values(self) -> None:
|
|
212
|
+
"""Test KnowledgeServiceQuery with custom non-default values."""
|
|
213
|
+
custom_query = KnowledgeServiceQuery(
|
|
214
|
+
query_id="custom-id",
|
|
215
|
+
name="Custom Query",
|
|
216
|
+
knowledge_service_id="custom-service",
|
|
217
|
+
prompt="Custom prompt",
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
assert custom_query.query_id == "custom-id"
|
|
221
|
+
assert custom_query.name == "Custom Query"
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class TestKnowledgeServiceQueryMetadata:
|
|
225
|
+
"""Test KnowledgeServiceQuery query_metadata field functionality."""
|
|
226
|
+
|
|
227
|
+
def test_query_metadata_defaults_to_empty_dict(self) -> None:
|
|
228
|
+
"""Test that query_metadata defaults to an empty dict."""
|
|
229
|
+
query = KnowledgeServiceQuery(
|
|
230
|
+
query_id="test-id",
|
|
231
|
+
name="Test Query",
|
|
232
|
+
knowledge_service_id="test-service",
|
|
233
|
+
prompt="Test prompt",
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
assert query.query_metadata == {}
|
|
237
|
+
|
|
238
|
+
def test_query_metadata_accepts_custom_values(self) -> None:
|
|
239
|
+
"""Test that query_metadata can accept custom service values."""
|
|
240
|
+
metadata = {
|
|
241
|
+
"model": "claude-sonnet-4-20250514",
|
|
242
|
+
"max_tokens": 4000,
|
|
243
|
+
"temperature": 0.1,
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
query = KnowledgeServiceQuery(
|
|
247
|
+
query_id="test-id",
|
|
248
|
+
name="Test Query",
|
|
249
|
+
knowledge_service_id="anthropic-service",
|
|
250
|
+
prompt="Test prompt",
|
|
251
|
+
query_metadata=metadata,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
assert query.query_metadata == metadata
|
|
255
|
+
assert query.query_metadata["model"] == "claude-sonnet-4-20250514"
|
|
256
|
+
assert query.query_metadata["max_tokens"] == 4000
|
|
257
|
+
assert query.query_metadata["temperature"] == 0.1
|
|
258
|
+
|
|
259
|
+
def test_query_metadata_serialization(self) -> None:
|
|
260
|
+
"""Test that query_metadata serializes correctly in JSON."""
|
|
261
|
+
metadata = {
|
|
262
|
+
"model": "gpt-4",
|
|
263
|
+
"temperature": 0.2,
|
|
264
|
+
"top_p": 0.9,
|
|
265
|
+
"custom_config": {"endpoint": "v2", "retries": 3},
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
query = KnowledgeServiceQuery(
|
|
269
|
+
query_id="openai-query",
|
|
270
|
+
name="OpenAI Query",
|
|
271
|
+
knowledge_service_id="openai-service",
|
|
272
|
+
prompt="Test prompt for OpenAI",
|
|
273
|
+
query_metadata=metadata,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
json_str = query.model_dump_json()
|
|
277
|
+
import json
|
|
278
|
+
|
|
279
|
+
json_data = json.loads(json_str)
|
|
280
|
+
|
|
281
|
+
assert json_data["query_metadata"] == metadata
|
|
282
|
+
assert json_data["query_metadata"]["model"] == "gpt-4"
|
|
283
|
+
assert json_data["query_metadata"]["custom_config"]["endpoint"] == "v2"
|
|
284
|
+
|
|
285
|
+
def test_query_metadata_roundtrip_serialization(self) -> None:
|
|
286
|
+
"""Test query_metadata survives JSON roundtrip serialization."""
|
|
287
|
+
metadata = {
|
|
288
|
+
"model": "claude-sonnet-4-20250514",
|
|
289
|
+
"max_tokens": 2000,
|
|
290
|
+
"temperature": 0.0,
|
|
291
|
+
"citations": True,
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
original = KnowledgeServiceQuery(
|
|
295
|
+
query_id="roundtrip-test",
|
|
296
|
+
name="Roundtrip Test",
|
|
297
|
+
knowledge_service_id="test-service",
|
|
298
|
+
prompt="Test roundtrip serialization",
|
|
299
|
+
query_metadata=metadata,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Serialize and deserialize
|
|
303
|
+
json_str = original.model_dump_json()
|
|
304
|
+
import json
|
|
305
|
+
|
|
306
|
+
json_data = json.loads(json_str)
|
|
307
|
+
reconstructed = KnowledgeServiceQuery(**json_data)
|
|
308
|
+
|
|
309
|
+
assert reconstructed.query_metadata == original.query_metadata
|
|
310
|
+
assert reconstructed.query_metadata == metadata
|
|
File without changes
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom Pydantic field types for the CEAP workflow domain.
|
|
3
|
+
|
|
4
|
+
This module contains custom field types that provide proper Pydantic
|
|
5
|
+
validation
|
|
6
|
+
for specialized data types used in the document processing workflow.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from pydantic import GetCoreSchemaHandler
|
|
10
|
+
from pydantic_core import core_schema
|
|
11
|
+
from typing import Any
|
|
12
|
+
import io
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ContentStream:
|
|
16
|
+
"""Wrapper for IO streams that provides proper Pydantic validation.
|
|
17
|
+
|
|
18
|
+
This class wraps io.IOBase instances to provide proper Pydantic validation
|
|
19
|
+
without requiring arbitrary_types_allowed. It ensures that only valid
|
|
20
|
+
stream objects are accepted while providing a clean interface for
|
|
21
|
+
stream operations.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, stream: io.IOBase):
|
|
25
|
+
if not isinstance(stream, io.IOBase):
|
|
26
|
+
raise ValueError(
|
|
27
|
+
"ContentStream requires an io.IOBase instance, " + f"got {type(stream)}"
|
|
28
|
+
)
|
|
29
|
+
self._stream = stream
|
|
30
|
+
|
|
31
|
+
def read(self, size: int = -1) -> bytes:
|
|
32
|
+
"""Read from the underlying stream."""
|
|
33
|
+
result = self.stream.read(size)
|
|
34
|
+
if not isinstance(result, bytes):
|
|
35
|
+
# Handle case where stream returns str (like StringIO)
|
|
36
|
+
if isinstance(result, str):
|
|
37
|
+
return result.encode("utf-8")
|
|
38
|
+
return b"" # Fallback for other types
|
|
39
|
+
return result
|
|
40
|
+
|
|
41
|
+
def seek(self, offset: int, whence: int = 0) -> int:
|
|
42
|
+
"""Seek in the underlying stream."""
|
|
43
|
+
return self._stream.seek(offset, whence)
|
|
44
|
+
|
|
45
|
+
def tell(self) -> int:
|
|
46
|
+
"""Get current position in stream."""
|
|
47
|
+
return self._stream.tell()
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def stream(self) -> io.IOBase:
|
|
51
|
+
"""Access the underlying stream."""
|
|
52
|
+
return self._stream
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def __get_pydantic_core_schema__(
|
|
56
|
+
cls, source_type: type, handler: GetCoreSchemaHandler
|
|
57
|
+
) -> core_schema.CoreSchema:
|
|
58
|
+
"""Define how Pydantic should validate this type."""
|
|
59
|
+
return core_schema.no_info_plain_validator_function(cls._validate)
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def _validate(cls, value: Any) -> "ContentStream":
|
|
63
|
+
"""Validate input and convert to ContentStream."""
|
|
64
|
+
if isinstance(value, cls):
|
|
65
|
+
return value
|
|
66
|
+
if isinstance(value, io.IOBase):
|
|
67
|
+
return cls(value)
|
|
68
|
+
raise ValueError(f"ContentStream expects io.IOBase, got {type(value)}")
|
|
File without changes
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for custom Pydantic field types.
|
|
3
|
+
|
|
4
|
+
This test module validates the behavior of custom field types used in
|
|
5
|
+
the domain models, particularly focusing on ContentStream which wraps
|
|
6
|
+
io.IOBase instances for proper Pydantic validation.
|
|
7
|
+
|
|
8
|
+
Design decisions documented:
|
|
9
|
+
- ContentStream accepts any io.IOBase instance
|
|
10
|
+
- ContentStream provides read, seek, tell interface
|
|
11
|
+
- ContentStream validates input types at creation
|
|
12
|
+
- ContentStream works with Pydantic validation without arbitrary_types_allowed
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
import io
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from julee.domain.models.custom_fields.content_stream import (
|
|
20
|
+
ContentStream,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.mark.parametrize(
|
|
25
|
+
"stream_input,error_message",
|
|
26
|
+
[
|
|
27
|
+
# Valid inputs - io.IOBase instances
|
|
28
|
+
(io.BytesIO(b"test content"), None),
|
|
29
|
+
(io.StringIO("text content"), None),
|
|
30
|
+
(io.BufferedReader(io.BytesIO(b"buffered")), None),
|
|
31
|
+
(ContentStream(io.BytesIO(b"nested content")).stream, None),
|
|
32
|
+
# Invalid inputs - not io.IOBase
|
|
33
|
+
(b"raw bytes", "ContentStream requires an io.IOBase instance"),
|
|
34
|
+
("string", "ContentStream requires an io.IOBase instance"),
|
|
35
|
+
(123, "ContentStream requires an io.IOBase instance"),
|
|
36
|
+
(None, "ContentStream requires an io.IOBase instance"),
|
|
37
|
+
([], "ContentStream requires an io.IOBase instance"),
|
|
38
|
+
({}, "ContentStream requires an io.IOBase instance"),
|
|
39
|
+
],
|
|
40
|
+
)
|
|
41
|
+
def test_content_stream_validation(
|
|
42
|
+
stream_input: Any, error_message: str | None
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Test ContentStream validation with various input types including nested
|
|
45
|
+
streams."""
|
|
46
|
+
if error_message is None:
|
|
47
|
+
# Should create successfully
|
|
48
|
+
content_stream = ContentStream(stream_input)
|
|
49
|
+
assert content_stream.stream is stream_input
|
|
50
|
+
else:
|
|
51
|
+
# Should raise ValueError with specific message
|
|
52
|
+
with pytest.raises(ValueError, match=error_message):
|
|
53
|
+
ContentStream(stream_input)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Document domain package for the Capture, Extract, Assemble, Publish workflow.
|
|
3
|
+
|
|
4
|
+
This package contains the Document domain object and its related functionality
|
|
5
|
+
for the CEAP workflow system.
|
|
6
|
+
|
|
7
|
+
Document represents complete document entities including content and metadata,
|
|
8
|
+
providing a stream-like interface for efficient handling of both small and
|
|
9
|
+
large documents.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from .document import Document, DocumentStatus
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"Document",
|
|
16
|
+
"DocumentStatus",
|
|
17
|
+
]
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Document domain models for the Capture, Extract, Assemble, Publish workflow.
|
|
3
|
+
|
|
4
|
+
This module contains the core document domain objects that represent
|
|
5
|
+
documents and their metadata in the CEAP workflow system.
|
|
6
|
+
|
|
7
|
+
All domain models use Pydantic BaseModel for validation, serialization,
|
|
8
|
+
and type safety, following the patterns established in the sample project.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
12
|
+
from pydantic import ValidationInfo
|
|
13
|
+
from typing import Callable, Optional, List, Dict, Any
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from julee.domain.models.custom_fields.content_stream import (
|
|
17
|
+
ContentStream,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def delegate_to_content(*method_names: str) -> Callable[[type], type]:
|
|
22
|
+
"""Decorator to delegate IO methods to the content stream property."""
|
|
23
|
+
|
|
24
|
+
def decorator(cls: type) -> type:
|
|
25
|
+
for method_name in method_names:
|
|
26
|
+
|
|
27
|
+
def make_delegated_method(name: str) -> Callable[..., Any]:
|
|
28
|
+
def delegated_method(self: Any, *args: Any, **kwargs: Any) -> Any:
|
|
29
|
+
return getattr(self.content, name)(*args, **kwargs)
|
|
30
|
+
|
|
31
|
+
delegated_method.__name__ = name
|
|
32
|
+
delegated_method.__doc__ = f"Delegate {name} to content stream."
|
|
33
|
+
return delegated_method
|
|
34
|
+
|
|
35
|
+
setattr(cls, method_name, make_delegated_method(method_name))
|
|
36
|
+
return cls
|
|
37
|
+
|
|
38
|
+
return decorator
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DocumentStatus(str, Enum):
|
|
42
|
+
"""Status of a document through the Capture, Extract, Assemble, Publish
|
|
43
|
+
pipeline."""
|
|
44
|
+
|
|
45
|
+
CAPTURED = "captured"
|
|
46
|
+
REGISTERED = "registered" # Registered with knowledge service
|
|
47
|
+
# Assembly specification types determined
|
|
48
|
+
ASSEMBLY_SPECIFICATION_IDENTIFIED = "assembly_specification_identified"
|
|
49
|
+
EXTRACTED = "extracted" # Extractions completed
|
|
50
|
+
ASSEMBLED = "assembled" # Template rendered and policies applied
|
|
51
|
+
PUBLISHED = "published"
|
|
52
|
+
FAILED = "failed"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@delegate_to_content("read", "seek", "tell")
|
|
56
|
+
class Document(BaseModel):
|
|
57
|
+
"""Complete document entity including content and metadata.
|
|
58
|
+
|
|
59
|
+
This is the primary domain model that represents a complete document
|
|
60
|
+
in the CEAP workflow system. Content is provided as a ContentStream
|
|
61
|
+
for efficient handling of both small and large documents.
|
|
62
|
+
|
|
63
|
+
The content stream is excluded from JSON serialization - use separate
|
|
64
|
+
content endpoints for streaming binary data over HTTP.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
# Core document identification
|
|
68
|
+
document_id: str
|
|
69
|
+
original_filename: str
|
|
70
|
+
content_type: str
|
|
71
|
+
size_bytes: int = Field(gt=0, description="Size must be positive")
|
|
72
|
+
content_multihash: str = Field(
|
|
73
|
+
description="Multihash of document content for integrity verification"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Document processing state
|
|
77
|
+
status: DocumentStatus = DocumentStatus.CAPTURED
|
|
78
|
+
knowledge_service_id: Optional[str] = None
|
|
79
|
+
assembly_types: List[str] = Field(default_factory=list)
|
|
80
|
+
|
|
81
|
+
# Timestamps
|
|
82
|
+
created_at: Optional[datetime] = Field(
|
|
83
|
+
default_factory=lambda: datetime.now(timezone.utc)
|
|
84
|
+
)
|
|
85
|
+
updated_at: Optional[datetime] = Field(
|
|
86
|
+
default_factory=lambda: datetime.now(timezone.utc)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Additional data and content stream
|
|
90
|
+
additional_metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
91
|
+
content: Optional[ContentStream] = Field(default=None, exclude=True)
|
|
92
|
+
content_string: Optional[str] = Field(
|
|
93
|
+
default=None,
|
|
94
|
+
description="Small content as string (few KB max). Use for "
|
|
95
|
+
"workflow-generated content to avoid ContentStream serialization "
|
|
96
|
+
"issues. For larger content, ensure calling from concrete "
|
|
97
|
+
"implementations (ie. outside workflows and use-cases) and use "
|
|
98
|
+
"content field instead.",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
@field_validator("document_id")
|
|
102
|
+
@classmethod
|
|
103
|
+
def document_id_must_not_be_empty(cls, v: str) -> str:
|
|
104
|
+
if not v or not v.strip():
|
|
105
|
+
raise ValueError("Document ID cannot be empty")
|
|
106
|
+
return v.strip()
|
|
107
|
+
|
|
108
|
+
@field_validator("original_filename")
|
|
109
|
+
@classmethod
|
|
110
|
+
def filename_must_not_be_empty(cls, v: str) -> str:
|
|
111
|
+
if not v or not v.strip():
|
|
112
|
+
raise ValueError("Original filename cannot be empty")
|
|
113
|
+
return v.strip()
|
|
114
|
+
|
|
115
|
+
@field_validator("content_type")
|
|
116
|
+
@classmethod
|
|
117
|
+
def content_type_must_not_be_empty(cls, v: str) -> str:
|
|
118
|
+
if not v or not v.strip():
|
|
119
|
+
raise ValueError("Content type cannot be empty")
|
|
120
|
+
return v.strip()
|
|
121
|
+
|
|
122
|
+
@field_validator("content_multihash")
|
|
123
|
+
@classmethod
|
|
124
|
+
def content_multihash_must_not_be_empty(cls, v: str) -> str:
|
|
125
|
+
# TODO: actually validate the multihash against the content?
|
|
126
|
+
if not v or not v.strip():
|
|
127
|
+
raise ValueError("Content multihash cannot be empty")
|
|
128
|
+
return v.strip()
|
|
129
|
+
|
|
130
|
+
@model_validator(mode="after")
|
|
131
|
+
def validate_content_fields(self, info: ValidationInfo) -> "Document":
|
|
132
|
+
"""Ensure document has either content or content_string, not both."""
|
|
133
|
+
# Check if we're in a Temporal deserialization context
|
|
134
|
+
if info.context and info.context.get("temporal_validation"):
|
|
135
|
+
return self
|
|
136
|
+
|
|
137
|
+
# Normal validation for direct instantiation
|
|
138
|
+
has_content = self.content is not None
|
|
139
|
+
has_content_string = self.content_string is not None
|
|
140
|
+
|
|
141
|
+
if has_content and has_content_string:
|
|
142
|
+
raise ValueError(
|
|
143
|
+
"Document cannot have both content and content_string. "
|
|
144
|
+
"Provide only one."
|
|
145
|
+
)
|
|
146
|
+
elif not has_content and not has_content_string:
|
|
147
|
+
raise ValueError(
|
|
148
|
+
"Document must have either content or content_string. " "Provide one."
|
|
149
|
+
)
|
|
150
|
+
return self
|
|
File without changes
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test factories for Document domain objects using factory_boy.
|
|
3
|
+
|
|
4
|
+
This module provides factory_boy factories for creating test instances of
|
|
5
|
+
Document domain objects with sensible defaults.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import io
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from typing import Any
|
|
11
|
+
from factory.base import Factory
|
|
12
|
+
from factory.faker import Faker
|
|
13
|
+
from factory.declarations import LazyAttribute, LazyFunction
|
|
14
|
+
|
|
15
|
+
from julee.domain.models.document import Document, DocumentStatus
|
|
16
|
+
from julee.domain.models.custom_fields.content_stream import (
|
|
17
|
+
ContentStream,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Helper functions to generate content bytes consistently
|
|
22
|
+
def _get_default_content_bytes() -> bytes:
|
|
23
|
+
"""Generate the default content bytes for documents."""
|
|
24
|
+
return b"Test document content for testing purposes"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ContentStreamFactory(Factory):
|
|
28
|
+
class Meta:
|
|
29
|
+
model = ContentStream
|
|
30
|
+
|
|
31
|
+
# Create ContentStream with BytesIO containing test content
|
|
32
|
+
@classmethod
|
|
33
|
+
def _create(cls, model_class: type[ContentStream], **kwargs: Any) -> ContentStream:
|
|
34
|
+
content = kwargs.get("content", b"Test stream content")
|
|
35
|
+
return model_class(io.BytesIO(content))
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def _build(cls, model_class: type[ContentStream], **kwargs: Any) -> ContentStream:
|
|
39
|
+
content = kwargs.get("content", b"Test stream content")
|
|
40
|
+
return model_class(io.BytesIO(content))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class DocumentFactory(Factory):
|
|
44
|
+
"""Factory for creating Document instances with sensible test defaults."""
|
|
45
|
+
|
|
46
|
+
class Meta:
|
|
47
|
+
model = Document
|
|
48
|
+
|
|
49
|
+
# Core document identification
|
|
50
|
+
document_id = Faker("uuid4")
|
|
51
|
+
original_filename = "test_document.txt"
|
|
52
|
+
content_type = "text/plain"
|
|
53
|
+
content_multihash = Faker("sha256")
|
|
54
|
+
|
|
55
|
+
# Document processing state
|
|
56
|
+
status = DocumentStatus.CAPTURED
|
|
57
|
+
knowledge_service_id = None
|
|
58
|
+
assembly_types: list[str] = []
|
|
59
|
+
|
|
60
|
+
# Timestamps
|
|
61
|
+
created_at = LazyFunction(lambda: datetime.now(timezone.utc))
|
|
62
|
+
updated_at = LazyFunction(lambda: datetime.now(timezone.utc))
|
|
63
|
+
|
|
64
|
+
# Additional data
|
|
65
|
+
additional_metadata: dict[str, Any] = {}
|
|
66
|
+
|
|
67
|
+
# Content - using LazyAttribute to create fresh BytesIO for each instance
|
|
68
|
+
@LazyAttribute
|
|
69
|
+
def size_bytes(self) -> int:
|
|
70
|
+
# Calculate size from the default content
|
|
71
|
+
return len(_get_default_content_bytes())
|
|
72
|
+
|
|
73
|
+
@LazyAttribute
|
|
74
|
+
def content(self) -> ContentStream:
|
|
75
|
+
# Create ContentStream with default content
|
|
76
|
+
return ContentStream(io.BytesIO(_get_default_content_bytes()))
|