julee 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. julee/__init__.py +3 -0
  2. julee/api/__init__.py +20 -0
  3. julee/api/app.py +180 -0
  4. julee/api/dependencies.py +257 -0
  5. julee/api/requests.py +175 -0
  6. julee/api/responses.py +43 -0
  7. julee/api/routers/__init__.py +43 -0
  8. julee/api/routers/assembly_specifications.py +212 -0
  9. julee/api/routers/documents.py +182 -0
  10. julee/api/routers/knowledge_service_configs.py +79 -0
  11. julee/api/routers/knowledge_service_queries.py +293 -0
  12. julee/api/routers/system.py +137 -0
  13. julee/api/routers/workflows.py +234 -0
  14. julee/api/services/__init__.py +20 -0
  15. julee/api/services/system_initialization.py +214 -0
  16. julee/api/tests/__init__.py +14 -0
  17. julee/api/tests/routers/__init__.py +17 -0
  18. julee/api/tests/routers/test_assembly_specifications.py +749 -0
  19. julee/api/tests/routers/test_documents.py +301 -0
  20. julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
  21. julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
  22. julee/api/tests/routers/test_system.py +179 -0
  23. julee/api/tests/routers/test_workflows.py +393 -0
  24. julee/api/tests/test_app.py +285 -0
  25. julee/api/tests/test_dependencies.py +245 -0
  26. julee/api/tests/test_requests.py +250 -0
  27. julee/domain/__init__.py +22 -0
  28. julee/domain/models/__init__.py +49 -0
  29. julee/domain/models/assembly/__init__.py +17 -0
  30. julee/domain/models/assembly/assembly.py +103 -0
  31. julee/domain/models/assembly/tests/__init__.py +0 -0
  32. julee/domain/models/assembly/tests/factories.py +37 -0
  33. julee/domain/models/assembly/tests/test_assembly.py +430 -0
  34. julee/domain/models/assembly_specification/__init__.py +24 -0
  35. julee/domain/models/assembly_specification/assembly_specification.py +172 -0
  36. julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
  37. julee/domain/models/assembly_specification/tests/__init__.py +0 -0
  38. julee/domain/models/assembly_specification/tests/factories.py +78 -0
  39. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
  40. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
  41. julee/domain/models/custom_fields/__init__.py +0 -0
  42. julee/domain/models/custom_fields/content_stream.py +68 -0
  43. julee/domain/models/custom_fields/tests/__init__.py +0 -0
  44. julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
  45. julee/domain/models/document/__init__.py +17 -0
  46. julee/domain/models/document/document.py +150 -0
  47. julee/domain/models/document/tests/__init__.py +0 -0
  48. julee/domain/models/document/tests/factories.py +76 -0
  49. julee/domain/models/document/tests/test_document.py +297 -0
  50. julee/domain/models/knowledge_service_config/__init__.py +17 -0
  51. julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
  52. julee/domain/models/policy/__init__.py +15 -0
  53. julee/domain/models/policy/document_policy_validation.py +220 -0
  54. julee/domain/models/policy/policy.py +203 -0
  55. julee/domain/models/policy/tests/__init__.py +0 -0
  56. julee/domain/models/policy/tests/factories.py +47 -0
  57. julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
  58. julee/domain/models/policy/tests/test_policy.py +546 -0
  59. julee/domain/repositories/__init__.py +27 -0
  60. julee/domain/repositories/assembly.py +45 -0
  61. julee/domain/repositories/assembly_specification.py +52 -0
  62. julee/domain/repositories/base.py +146 -0
  63. julee/domain/repositories/document.py +49 -0
  64. julee/domain/repositories/document_policy_validation.py +52 -0
  65. julee/domain/repositories/knowledge_service_config.py +54 -0
  66. julee/domain/repositories/knowledge_service_query.py +44 -0
  67. julee/domain/repositories/policy.py +49 -0
  68. julee/domain/use_cases/__init__.py +17 -0
  69. julee/domain/use_cases/decorators.py +107 -0
  70. julee/domain/use_cases/extract_assemble_data.py +649 -0
  71. julee/domain/use_cases/initialize_system_data.py +842 -0
  72. julee/domain/use_cases/tests/__init__.py +7 -0
  73. julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
  74. julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
  75. julee/domain/use_cases/tests/test_validate_document.py +1228 -0
  76. julee/domain/use_cases/validate_document.py +736 -0
  77. julee/fixtures/assembly_specifications.yaml +70 -0
  78. julee/fixtures/documents.yaml +178 -0
  79. julee/fixtures/knowledge_service_configs.yaml +37 -0
  80. julee/fixtures/knowledge_service_queries.yaml +27 -0
  81. julee/repositories/__init__.py +17 -0
  82. julee/repositories/memory/__init__.py +31 -0
  83. julee/repositories/memory/assembly.py +84 -0
  84. julee/repositories/memory/assembly_specification.py +125 -0
  85. julee/repositories/memory/base.py +227 -0
  86. julee/repositories/memory/document.py +149 -0
  87. julee/repositories/memory/document_policy_validation.py +104 -0
  88. julee/repositories/memory/knowledge_service_config.py +123 -0
  89. julee/repositories/memory/knowledge_service_query.py +120 -0
  90. julee/repositories/memory/policy.py +87 -0
  91. julee/repositories/memory/tests/__init__.py +0 -0
  92. julee/repositories/memory/tests/test_document.py +212 -0
  93. julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
  94. julee/repositories/memory/tests/test_policy.py +443 -0
  95. julee/repositories/minio/__init__.py +31 -0
  96. julee/repositories/minio/assembly.py +103 -0
  97. julee/repositories/minio/assembly_specification.py +170 -0
  98. julee/repositories/minio/client.py +570 -0
  99. julee/repositories/minio/document.py +530 -0
  100. julee/repositories/minio/document_policy_validation.py +120 -0
  101. julee/repositories/minio/knowledge_service_config.py +187 -0
  102. julee/repositories/minio/knowledge_service_query.py +211 -0
  103. julee/repositories/minio/policy.py +106 -0
  104. julee/repositories/minio/tests/__init__.py +0 -0
  105. julee/repositories/minio/tests/fake_client.py +213 -0
  106. julee/repositories/minio/tests/test_assembly.py +374 -0
  107. julee/repositories/minio/tests/test_assembly_specification.py +391 -0
  108. julee/repositories/minio/tests/test_client_protocol.py +57 -0
  109. julee/repositories/minio/tests/test_document.py +591 -0
  110. julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
  111. julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
  112. julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
  113. julee/repositories/minio/tests/test_policy.py +559 -0
  114. julee/repositories/temporal/__init__.py +38 -0
  115. julee/repositories/temporal/activities.py +114 -0
  116. julee/repositories/temporal/activity_names.py +34 -0
  117. julee/repositories/temporal/proxies.py +159 -0
  118. julee/services/__init__.py +18 -0
  119. julee/services/knowledge_service/__init__.py +48 -0
  120. julee/services/knowledge_service/anthropic/__init__.py +12 -0
  121. julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
  122. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
  123. julee/services/knowledge_service/factory.py +138 -0
  124. julee/services/knowledge_service/knowledge_service.py +160 -0
  125. julee/services/knowledge_service/memory/__init__.py +13 -0
  126. julee/services/knowledge_service/memory/knowledge_service.py +278 -0
  127. julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
  128. julee/services/knowledge_service/test_factory.py +112 -0
  129. julee/services/temporal/__init__.py +38 -0
  130. julee/services/temporal/activities.py +86 -0
  131. julee/services/temporal/activity_names.py +22 -0
  132. julee/services/temporal/proxies.py +41 -0
  133. julee/util/__init__.py +0 -0
  134. julee/util/domain.py +119 -0
  135. julee/util/repos/__init__.py +0 -0
  136. julee/util/repos/minio/__init__.py +0 -0
  137. julee/util/repos/minio/file_storage.py +213 -0
  138. julee/util/repos/temporal/__init__.py +11 -0
  139. julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
  140. julee/util/repos/temporal/data_converter.py +123 -0
  141. julee/util/repos/temporal/minio_file_storage.py +12 -0
  142. julee/util/repos/temporal/proxies/__init__.py +0 -0
  143. julee/util/repos/temporal/proxies/file_storage.py +58 -0
  144. julee/util/repositories.py +55 -0
  145. julee/util/temporal/__init__.py +22 -0
  146. julee/util/temporal/activities.py +123 -0
  147. julee/util/temporal/decorators.py +473 -0
  148. julee/util/tests/__init__.py +1 -0
  149. julee/util/tests/test_decorators.py +770 -0
  150. julee/util/validation/__init__.py +29 -0
  151. julee/util/validation/repository.py +100 -0
  152. julee/util/validation/type_guards.py +369 -0
  153. julee/worker.py +211 -0
  154. julee/workflows/__init__.py +26 -0
  155. julee/workflows/extract_assemble.py +215 -0
  156. julee/workflows/validate_document.py +228 -0
  157. julee-0.1.0.dist-info/METADATA +195 -0
  158. julee-0.1.0.dist-info/RECORD +161 -0
  159. julee-0.1.0.dist-info/WHEEL +5 -0
  160. julee-0.1.0.dist-info/licenses/LICENSE +674 -0
  161. julee-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,842 @@
1
+ """
2
+ Initialize System Data Use Case for the julee CEAP system.
3
+
4
+ This module provides the use case for initializing required system data
5
+ on application startup, such as knowledge service configurations that
6
+ are needed for the system to function properly.
7
+
8
+ The use case follows clean architecture principles:
9
+ - Contains business logic for what system data is required
10
+ - Uses repository interfaces for persistence
11
+ - Is idempotent and safe to run multiple times
12
+ - Can be tested independently of infrastructure concerns
13
+ """
14
+
15
+ import hashlib
16
+ import logging
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List
20
+
21
+ import yaml
22
+
23
+ from julee.domain.models.assembly_specification import (
24
+ AssemblySpecification,
25
+ AssemblySpecificationStatus,
26
+ KnowledgeServiceQuery,
27
+ )
28
+ from julee.domain.models.document import Document, DocumentStatus
29
+ from julee.domain.models.knowledge_service_config import (
30
+ KnowledgeServiceConfig,
31
+ ServiceApi,
32
+ )
33
+ from julee.domain.repositories.assembly_specification import (
34
+ AssemblySpecificationRepository,
35
+ )
36
+ from julee.domain.repositories.document import DocumentRepository
37
+ from julee.domain.repositories.knowledge_service_config import (
38
+ KnowledgeServiceConfigRepository,
39
+ )
40
+ from julee.domain.repositories.knowledge_service_query import (
41
+ KnowledgeServiceQueryRepository,
42
+ )
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ class InitializeSystemDataUseCase:
48
+ """
49
+ Use case for initializing required system data on application startup.
50
+
51
+ This use case ensures that essential configuration data exists in the
52
+ system, such as knowledge service configurations that are required
53
+ for the application to function properly.
54
+
55
+ All operations are idempotent - running this multiple times will not
56
+ create duplicate data or cause errors.
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ knowledge_service_config_repository: KnowledgeServiceConfigRepository,
62
+ document_repository: DocumentRepository,
63
+ knowledge_service_query_repository: KnowledgeServiceQueryRepository,
64
+ assembly_specification_repository: AssemblySpecificationRepository,
65
+ ) -> None:
66
+ """Initialize the use case with required repositories.
67
+
68
+ Args:
69
+ knowledge_service_config_repository: Repository for knowledge
70
+ service configurations
71
+ document_repository: Repository for documents
72
+ knowledge_service_query_repository: Repository for knowledge
73
+ service queries
74
+ assembly_specification_repository: Repository for assembly
75
+ specifications
76
+ """
77
+ self.config_repo = knowledge_service_config_repository
78
+ self.document_repo = document_repository
79
+ self.query_repo = knowledge_service_query_repository
80
+ self.assembly_spec_repo = assembly_specification_repository
81
+ self.logger = logging.getLogger("InitializeSystemDataUseCase")
82
+
83
+ async def execute(self) -> None:
84
+ """
85
+ Execute system data initialization.
86
+
87
+ This method orchestrates the creation of all required system data.
88
+ It's idempotent and can be safely called multiple times.
89
+
90
+ Raises:
91
+ Exception: If any critical system data cannot be initialized
92
+ """
93
+ self.logger.info("Starting system data initialization")
94
+
95
+ try:
96
+ await self._ensure_knowledge_service_configs_exist()
97
+ await self._ensure_knowledge_service_queries_exist()
98
+ await self._ensure_example_documents_exist()
99
+ await self._ensure_assembly_specifications_exist()
100
+
101
+ self.logger.info("System data initialization completed successfully")
102
+
103
+ except Exception as e:
104
+ self.logger.error(
105
+ "Failed to initialize system data",
106
+ exc_info=True,
107
+ extra={
108
+ "error_type": type(e).__name__,
109
+ "error_message": str(e),
110
+ },
111
+ )
112
+ raise
113
+
114
+ def _get_demo_fixture_path(self, filename: str) -> Path:
115
+ """
116
+ Get path to a demo fixture file.
117
+
118
+ Args:
119
+ filename: Name of the fixture file
120
+
121
+ Returns:
122
+ Path to the fixture file
123
+ """
124
+ current_file = Path(__file__)
125
+ julee_dir = current_file.parent.parent.parent
126
+ return julee_dir / "fixtures" / filename
127
+
128
+ async def _ensure_knowledge_service_configs_exist(self) -> None:
129
+ """
130
+ Ensure all knowledge service configurations from fixture exist.
131
+
132
+ This loads configurations from the YAML fixture file and creates
133
+ any that don't already exist in the repository. The operation is
134
+ idempotent - existing configurations are not modified.
135
+ """
136
+ self.logger.info("Loading knowledge service configurations from fixture")
137
+
138
+ try:
139
+ # Load configurations from YAML fixture
140
+ fixture_configs = self._load_fixture_configurations()
141
+
142
+ created_count = 0
143
+ skipped_count = 0
144
+
145
+ for config_data in fixture_configs:
146
+ config_id = config_data["knowledge_service_id"]
147
+
148
+ # Check if configuration already exists
149
+ existing_config = await self.config_repo.get(config_id)
150
+ if existing_config:
151
+ self.logger.debug(
152
+ "Knowledge service config already exists, skipping",
153
+ extra={
154
+ "config_id": config_id,
155
+ "config_name": existing_config.name,
156
+ },
157
+ )
158
+ skipped_count += 1
159
+ continue
160
+
161
+ # Create new configuration from fixture data
162
+ config = self._create_config_from_fixture_data(config_data)
163
+ await self.config_repo.save(config)
164
+
165
+ self.logger.info(
166
+ "Knowledge service config created successfully",
167
+ extra={
168
+ "config_id": config.knowledge_service_id,
169
+ "config_name": config.name,
170
+ "service_api": config.service_api.value,
171
+ },
172
+ )
173
+ created_count += 1
174
+
175
+ self.logger.info(
176
+ "Knowledge service configurations processed",
177
+ extra={
178
+ "created_count": created_count,
179
+ "skipped_count": skipped_count,
180
+ "total_count": len(fixture_configs),
181
+ },
182
+ )
183
+
184
+ except Exception as e:
185
+ self.logger.error(
186
+ "Failed to ensure knowledge service configurations exist",
187
+ exc_info=True,
188
+ extra={
189
+ "error_type": type(e).__name__,
190
+ "error_message": str(e),
191
+ },
192
+ )
193
+ raise
194
+
195
+ def _load_fixture_configurations(self) -> List[Dict[str, Any]]:
196
+ """
197
+ Load knowledge service configurations from the YAML fixture file.
198
+
199
+ Returns:
200
+ List of configuration dictionaries from the fixture file
201
+
202
+ Raises:
203
+ FileNotFoundError: If the fixture file doesn't exist
204
+ yaml.YAMLError: If the fixture file is invalid YAML
205
+ KeyError: If required fields are missing from the fixture
206
+ """
207
+ fixture_path = self._get_demo_fixture_path("knowledge_service_configs.yaml")
208
+
209
+ self.logger.debug(
210
+ "Loading fixture file",
211
+ extra={"fixture_path": str(fixture_path)},
212
+ )
213
+
214
+ if not fixture_path.exists():
215
+ raise FileNotFoundError(
216
+ f"Knowledge services fixture file not found: {fixture_path}"
217
+ )
218
+
219
+ try:
220
+ with open(fixture_path, "r", encoding="utf-8") as f:
221
+ fixture_data = yaml.safe_load(f)
222
+
223
+ if not fixture_data or "knowledge_services" not in fixture_data:
224
+ raise KeyError("Fixture file must contain 'knowledge_services' key")
225
+
226
+ configs = fixture_data["knowledge_services"]
227
+ if not isinstance(configs, list):
228
+ raise ValueError(
229
+ "'knowledge_services' must be a list of configurations"
230
+ )
231
+
232
+ self.logger.debug(
233
+ "Loaded fixture configurations",
234
+ extra={"count": len(configs)},
235
+ )
236
+
237
+ return configs
238
+
239
+ except yaml.YAMLError as e:
240
+ raise yaml.YAMLError(f"Invalid YAML in fixture file: {e}")
241
+
242
+ def _create_config_from_fixture_data(
243
+ self, config_data: Dict[str, Any]
244
+ ) -> KnowledgeServiceConfig:
245
+ """
246
+ Create a KnowledgeServiceConfig from fixture data.
247
+
248
+ Args:
249
+ config_data: Dictionary containing configuration data from fixture
250
+
251
+ Returns:
252
+ KnowledgeServiceConfig instance
253
+
254
+ Raises:
255
+ KeyError: If required fields are missing
256
+ ValueError: If field values are invalid
257
+ """
258
+ required_fields = [
259
+ "knowledge_service_id",
260
+ "name",
261
+ "description",
262
+ "service_api",
263
+ ]
264
+
265
+ # Validate required fields
266
+ for field in required_fields:
267
+ if field not in config_data:
268
+ raise KeyError(f"Required field '{field}' missing from config")
269
+
270
+ # Parse service API enum
271
+ try:
272
+ service_api = ServiceApi(config_data["service_api"])
273
+ except ValueError:
274
+ raise ValueError(
275
+ f"Invalid service_api '{config_data['service_api']}'. "
276
+ f"Must be one of: {[api.value for api in ServiceApi]}"
277
+ )
278
+
279
+ # Create configuration
280
+ config = KnowledgeServiceConfig(
281
+ knowledge_service_id=config_data["knowledge_service_id"],
282
+ name=config_data["name"],
283
+ description=config_data["description"],
284
+ service_api=service_api,
285
+ created_at=datetime.now(timezone.utc),
286
+ updated_at=datetime.now(timezone.utc),
287
+ )
288
+
289
+ self.logger.debug(
290
+ "Created config from fixture data",
291
+ extra={
292
+ "config_id": config.knowledge_service_id,
293
+ "config_name": config.name,
294
+ },
295
+ )
296
+
297
+ return config
298
+
299
+ async def _ensure_knowledge_service_queries_exist(self) -> None:
300
+ """
301
+ Ensure all knowledge service queries from fixture exist.
302
+
303
+ This loads queries from the YAML fixture file and creates
304
+ any that don't already exist in the repository. The operation is
305
+ idempotent - existing queries are not modified.
306
+ """
307
+ self.logger.info("Loading knowledge service queries from fixture")
308
+
309
+ try:
310
+ # Load queries from YAML fixture
311
+ fixture_queries = self._load_fixture_queries()
312
+
313
+ created_count = 0
314
+ skipped_count = 0
315
+
316
+ for query_data in fixture_queries:
317
+ query_id = query_data["query_id"]
318
+
319
+ # Check if query already exists
320
+ existing_query = await self.query_repo.get(query_id)
321
+ if existing_query:
322
+ self.logger.debug(
323
+ "Knowledge service query already exists, skipping",
324
+ extra={
325
+ "query_id": query_id,
326
+ "query_name": existing_query.name,
327
+ },
328
+ )
329
+ skipped_count += 1
330
+ continue
331
+
332
+ # Create new query from fixture data
333
+ query = self._create_query_from_fixture_data(query_data)
334
+ await self.query_repo.save(query)
335
+
336
+ self.logger.info(
337
+ "Knowledge service query created successfully",
338
+ extra={
339
+ "query_id": query.query_id,
340
+ "query_name": query.name,
341
+ "knowledge_service_id": query.knowledge_service_id,
342
+ },
343
+ )
344
+ created_count += 1
345
+
346
+ self.logger.info(
347
+ "Knowledge service queries processed",
348
+ extra={
349
+ "created_count": created_count,
350
+ "skipped_count": skipped_count,
351
+ "total_count": len(fixture_queries),
352
+ },
353
+ )
354
+
355
+ except Exception as e:
356
+ self.logger.error(
357
+ "Failed to ensure knowledge service queries exist",
358
+ exc_info=True,
359
+ extra={
360
+ "error_type": type(e).__name__,
361
+ "error_message": str(e),
362
+ },
363
+ )
364
+ raise
365
+
366
+ def _load_fixture_queries(self) -> List[Dict[str, Any]]:
367
+ """
368
+ Load knowledge service queries from the YAML fixture file.
369
+
370
+ Returns:
371
+ List of query dictionaries from the fixture file
372
+
373
+ Raises:
374
+ FileNotFoundError: If the fixture file doesn't exist
375
+ yaml.YAMLError: If the fixture file is invalid YAML
376
+ KeyError: If required fields are missing from the fixture
377
+ """
378
+ fixture_path = self._get_demo_fixture_path("knowledge_service_queries.yaml")
379
+
380
+ self.logger.debug(
381
+ "Loading queries fixture file",
382
+ extra={"fixture_path": str(fixture_path)},
383
+ )
384
+
385
+ if not fixture_path.exists():
386
+ raise FileNotFoundError(
387
+ f"Knowledge service queries fixture file not found: {fixture_path}"
388
+ )
389
+
390
+ try:
391
+ with open(fixture_path, "r", encoding="utf-8") as f:
392
+ fixture_data = yaml.safe_load(f)
393
+
394
+ if not fixture_data or "knowledge_service_queries" not in fixture_data:
395
+ raise KeyError(
396
+ "Fixture file must contain 'knowledge_service_queries' key"
397
+ )
398
+
399
+ queries = fixture_data["knowledge_service_queries"]
400
+ if not isinstance(queries, list):
401
+ raise ValueError(
402
+ "'knowledge_service_queries' must be a list of query configurations"
403
+ )
404
+
405
+ self.logger.debug(
406
+ "Loaded fixture queries",
407
+ extra={"count": len(queries)},
408
+ )
409
+
410
+ return queries
411
+
412
+ except yaml.YAMLError as e:
413
+ raise yaml.YAMLError(f"Invalid YAML in queries fixture file: {e}")
414
+
415
+ def _create_query_from_fixture_data(
416
+ self, query_data: Dict[str, Any]
417
+ ) -> KnowledgeServiceQuery:
418
+ """
419
+ Create a KnowledgeServiceQuery from fixture data.
420
+
421
+ Args:
422
+ query_data: Dictionary containing query data from fixture
423
+
424
+ Returns:
425
+ KnowledgeServiceQuery instance
426
+
427
+ Raises:
428
+ KeyError: If required fields are missing
429
+ ValueError: If field values are invalid
430
+ """
431
+ required_fields = [
432
+ "query_id",
433
+ "name",
434
+ "knowledge_service_id",
435
+ "prompt",
436
+ "assistant_prompt",
437
+ ]
438
+
439
+ # Validate required fields
440
+ for field in required_fields:
441
+ if field not in query_data:
442
+ raise KeyError(f"Required field '{field}' missing from query")
443
+
444
+ # Get optional fields
445
+ query_metadata = query_data.get("query_metadata", {})
446
+
447
+ # Create query
448
+ query = KnowledgeServiceQuery(
449
+ query_id=query_data["query_id"],
450
+ name=query_data["name"],
451
+ knowledge_service_id=query_data["knowledge_service_id"],
452
+ prompt=query_data["prompt"],
453
+ assistant_prompt=query_data["assistant_prompt"],
454
+ query_metadata=query_metadata,
455
+ created_at=datetime.now(timezone.utc),
456
+ updated_at=datetime.now(timezone.utc),
457
+ )
458
+
459
+ self.logger.debug(
460
+ "Created query from fixture data",
461
+ extra={
462
+ "query_id": query.query_id,
463
+ "query_name": query.name,
464
+ },
465
+ )
466
+
467
+ return query
468
+
469
+ async def _ensure_assembly_specifications_exist(self) -> None:
470
+ """
471
+ Ensure all assembly specifications from fixture exist.
472
+
473
+ This loads specifications from the YAML fixture file and creates
474
+ any that don't already exist in the repository. The operation is
475
+ idempotent - existing specifications are not modified.
476
+ """
477
+ self.logger.info("Loading assembly specifications from fixture")
478
+
479
+ try:
480
+ # Load specifications from YAML fixture
481
+ fixture_specs = self._load_fixture_assembly_specifications()
482
+
483
+ created_count = 0
484
+ skipped_count = 0
485
+
486
+ for spec_data in fixture_specs:
487
+ spec_id = spec_data["assembly_specification_id"]
488
+
489
+ # Check if specification already exists
490
+ existing_spec = await self.assembly_spec_repo.get(spec_id)
491
+ if existing_spec:
492
+ self.logger.debug(
493
+ "Assembly specification already exists, skipping",
494
+ extra={
495
+ "spec_id": spec_id,
496
+ "spec_name": existing_spec.name,
497
+ },
498
+ )
499
+ skipped_count += 1
500
+ continue
501
+
502
+ # Create new specification from fixture data
503
+ spec = self._create_assembly_spec_from_fixture_data(spec_data)
504
+ await self.assembly_spec_repo.save(spec)
505
+
506
+ self.logger.info(
507
+ "Assembly specification created successfully",
508
+ extra={
509
+ "spec_id": spec.assembly_specification_id,
510
+ "spec_name": spec.name,
511
+ "status": spec.status.value,
512
+ },
513
+ )
514
+ created_count += 1
515
+
516
+ self.logger.info(
517
+ "Assembly specifications processed",
518
+ extra={
519
+ "created_count": created_count,
520
+ "skipped_count": skipped_count,
521
+ "total_count": len(fixture_specs),
522
+ },
523
+ )
524
+
525
+ except Exception as e:
526
+ self.logger.error(
527
+ "Failed to ensure assembly specifications exist",
528
+ exc_info=True,
529
+ extra={
530
+ "error_type": type(e).__name__,
531
+ "error_message": str(e),
532
+ },
533
+ )
534
+ raise
535
+
536
+ def _load_fixture_assembly_specifications(self) -> List[Dict[str, Any]]:
537
+ """
538
+ Load assembly specifications from the YAML fixture file.
539
+
540
+ Returns:
541
+ List of specification dictionaries from the fixture file
542
+
543
+ Raises:
544
+ FileNotFoundError: If the fixture file doesn't exist
545
+ yaml.YAMLError: If the fixture file is invalid YAML
546
+ KeyError: If required fields are missing from the fixture
547
+ """
548
+ fixture_path = self._get_demo_fixture_path("assembly_specifications.yaml")
549
+
550
+ self.logger.debug(
551
+ "Loading assembly specifications fixture file",
552
+ extra={"fixture_path": str(fixture_path)},
553
+ )
554
+
555
+ if not fixture_path.exists():
556
+ raise FileNotFoundError(
557
+ f"Assembly specifications fixture file not found: {fixture_path}"
558
+ )
559
+
560
+ try:
561
+ with open(fixture_path, "r", encoding="utf-8") as f:
562
+ fixture_data = yaml.safe_load(f)
563
+
564
+ if not fixture_data or "assembly_specifications" not in fixture_data:
565
+ raise KeyError(
566
+ "Fixture file must contain 'assembly_specifications' key"
567
+ )
568
+
569
+ specs = fixture_data["assembly_specifications"]
570
+ if not isinstance(specs, list):
571
+ raise ValueError(
572
+ "'assembly_specifications' must be a list of "
573
+ "specification configurations"
574
+ )
575
+
576
+ self.logger.debug(
577
+ "Loaded fixture assembly specifications",
578
+ extra={"count": len(specs)},
579
+ )
580
+
581
+ return specs
582
+
583
+ except yaml.YAMLError as e:
584
+ raise yaml.YAMLError(
585
+ f"Invalid YAML in assembly specifications fixture file: {e}"
586
+ )
587
+
588
+ def _create_assembly_spec_from_fixture_data(
589
+ self, spec_data: Dict[str, Any]
590
+ ) -> AssemblySpecification:
591
+ """
592
+ Create an AssemblySpecification from fixture data.
593
+
594
+ Args:
595
+ spec_data: Dictionary containing specification data from fixture
596
+
597
+ Returns:
598
+ AssemblySpecification instance
599
+
600
+ Raises:
601
+ KeyError: If required fields are missing
602
+ ValueError: If field values are invalid
603
+ """
604
+ required_fields = [
605
+ "assembly_specification_id",
606
+ "name",
607
+ "applicability",
608
+ "jsonschema",
609
+ ]
610
+
611
+ # Validate required fields
612
+ for field in required_fields:
613
+ if field not in spec_data:
614
+ raise KeyError(
615
+ f"Required field '{field}' missing from assembly specification"
616
+ )
617
+
618
+ # Parse status
619
+ status = AssemblySpecificationStatus.ACTIVE
620
+ if "status" in spec_data:
621
+ try:
622
+ status = AssemblySpecificationStatus(spec_data["status"])
623
+ except ValueError:
624
+ self.logger.warning(
625
+ f"Invalid status '{spec_data['status']}', using default 'active'"
626
+ )
627
+
628
+ # Get optional fields
629
+ version = spec_data.get("version", "1.0")
630
+ knowledge_service_queries = spec_data.get("knowledge_service_queries", {})
631
+
632
+ # Create specification
633
+ spec = AssemblySpecification(
634
+ assembly_specification_id=spec_data["assembly_specification_id"],
635
+ name=spec_data["name"],
636
+ applicability=spec_data["applicability"],
637
+ jsonschema=spec_data["jsonschema"],
638
+ knowledge_service_queries=knowledge_service_queries,
639
+ status=status,
640
+ version=version,
641
+ created_at=datetime.now(timezone.utc),
642
+ updated_at=datetime.now(timezone.utc),
643
+ )
644
+
645
+ self.logger.debug(
646
+ "Created assembly specification from fixture data",
647
+ extra={
648
+ "spec_id": spec.assembly_specification_id,
649
+ "spec_name": spec.name,
650
+ },
651
+ )
652
+
653
+ return spec
654
+
655
+ async def _ensure_example_documents_exist(self) -> None:
656
+ """
657
+ Ensure all example documents from fixture exist.
658
+
659
+ This loads documents from the YAML fixture file and creates
660
+ any that don't already exist in the repository. The operation is
661
+ idempotent - existing documents are not modified.
662
+ """
663
+ self.logger.info("Loading example documents from fixture")
664
+
665
+ try:
666
+ # Load documents from YAML fixture
667
+ fixture_documents = self._load_fixture_documents()
668
+
669
+ created_count = 0
670
+ skipped_count = 0
671
+
672
+ for doc_data in fixture_documents:
673
+ doc_id = doc_data["document_id"]
674
+
675
+ # Check if document already exists
676
+ existing_doc = await self.document_repo.get(doc_id)
677
+ if existing_doc:
678
+ self.logger.debug(
679
+ "Document already exists, skipping",
680
+ extra={
681
+ "document_id": doc_id,
682
+ "original_filename": (existing_doc.original_filename),
683
+ },
684
+ )
685
+ skipped_count += 1
686
+ continue
687
+
688
+ # Create new document from fixture data
689
+ document = self._create_document_from_fixture_data(doc_data)
690
+ await self.document_repo.save(document)
691
+
692
+ self.logger.info(
693
+ "Example document created successfully",
694
+ extra={
695
+ "document_id": document.document_id,
696
+ "original_filename": document.original_filename,
697
+ "status": document.status.value,
698
+ },
699
+ )
700
+ created_count += 1
701
+
702
+ self.logger.info(
703
+ "Example documents processed",
704
+ extra={
705
+ "created_count": created_count,
706
+ "skipped_count": skipped_count,
707
+ "total_count": len(fixture_documents),
708
+ },
709
+ )
710
+
711
+ except Exception as e:
712
+ self.logger.error(
713
+ "Failed to ensure example documents exist",
714
+ exc_info=True,
715
+ extra={
716
+ "error_type": type(e).__name__,
717
+ "error_message": str(e),
718
+ },
719
+ )
720
+ raise
721
+
722
+ def _load_fixture_documents(self) -> List[Dict[str, Any]]:
723
+ """
724
+ Load documents from the YAML fixture file.
725
+
726
+ Returns:
727
+ List of document dictionaries from the fixture file
728
+
729
+ Raises:
730
+ FileNotFoundError: If the fixture file doesn't exist
731
+ yaml.YAMLError: If the fixture file is invalid YAML
732
+ KeyError: If required fields are missing from the fixture
733
+ """
734
+ fixture_path = self._get_demo_fixture_path("documents.yaml")
735
+
736
+ self.logger.debug(
737
+ "Loading documents fixture file",
738
+ extra={"fixture_path": str(fixture_path)},
739
+ )
740
+
741
+ if not fixture_path.exists():
742
+ raise FileNotFoundError(f"Documents fixture file not found: {fixture_path}")
743
+
744
+ try:
745
+ with open(fixture_path, "r", encoding="utf-8") as f:
746
+ fixture_data = yaml.safe_load(f)
747
+
748
+ if not fixture_data or "documents" not in fixture_data:
749
+ raise KeyError("Fixture file must contain 'documents' key")
750
+
751
+ documents = fixture_data["documents"]
752
+ if not isinstance(documents, list):
753
+ raise ValueError(
754
+ "'documents' must be a list of document configurations"
755
+ )
756
+
757
+ self.logger.debug(
758
+ "Loaded fixture documents",
759
+ extra={"count": len(documents)},
760
+ )
761
+
762
+ return documents
763
+
764
+ except yaml.YAMLError as e:
765
+ raise yaml.YAMLError(f"Invalid YAML in documents fixture file: {e}")
766
+
767
+ def _create_document_from_fixture_data(self, doc_data: Dict[str, Any]) -> Document:
768
+ """
769
+ Create a Document from fixture data.
770
+
771
+ Args:
772
+ doc_data: Dictionary containing document data from fixture
773
+
774
+ Returns:
775
+ Document instance
776
+
777
+ Raises:
778
+ KeyError: If required fields are missing
779
+ ValueError: If field values are invalid
780
+ """
781
+ required_fields = [
782
+ "document_id",
783
+ "original_filename",
784
+ "content_type",
785
+ "content",
786
+ ]
787
+
788
+ # Validate required fields
789
+ for field in required_fields:
790
+ if field not in doc_data:
791
+ raise KeyError(f"Required field '{field}' missing from document")
792
+
793
+ # Get content and calculate hash
794
+ content = doc_data["content"]
795
+ content_bytes = content.encode("utf-8")
796
+ size_bytes = len(content_bytes)
797
+
798
+ # Create multihash (using SHA-256)
799
+ sha256_hash = hashlib.sha256(content_bytes).hexdigest()
800
+ content_multihash = f"sha256-{sha256_hash}"
801
+
802
+ # Parse status
803
+ status = DocumentStatus.CAPTURED
804
+ if "status" in doc_data:
805
+ try:
806
+ status = DocumentStatus(doc_data["status"])
807
+ except ValueError:
808
+ self.logger.warning(
809
+ f"Invalid status '{doc_data['status']}', using default 'captured'"
810
+ )
811
+
812
+ # Get optional fields
813
+ knowledge_service_id = doc_data.get("knowledge_service_id")
814
+ assembly_types = doc_data.get("assembly_types", [])
815
+ additional_metadata = doc_data.get("additional_metadata", {})
816
+
817
+ # Create document
818
+ document = Document(
819
+ document_id=doc_data["document_id"],
820
+ original_filename=doc_data["original_filename"],
821
+ content_type=doc_data["content_type"],
822
+ size_bytes=size_bytes,
823
+ content_multihash=content_multihash,
824
+ status=status,
825
+ knowledge_service_id=knowledge_service_id,
826
+ assembly_types=assembly_types,
827
+ created_at=datetime.now(timezone.utc),
828
+ updated_at=datetime.now(timezone.utc),
829
+ additional_metadata=additional_metadata,
830
+ content_string=content, # Store content as string for fixtures
831
+ )
832
+
833
+ self.logger.debug(
834
+ "Created document from fixture data",
835
+ extra={
836
+ "document_id": document.document_id,
837
+ "original_filename": document.original_filename,
838
+ "size_bytes": size_bytes,
839
+ },
840
+ )
841
+
842
+ return document