julee 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. julee/__init__.py +3 -0
  2. julee/api/__init__.py +20 -0
  3. julee/api/app.py +180 -0
  4. julee/api/dependencies.py +257 -0
  5. julee/api/requests.py +175 -0
  6. julee/api/responses.py +43 -0
  7. julee/api/routers/__init__.py +43 -0
  8. julee/api/routers/assembly_specifications.py +212 -0
  9. julee/api/routers/documents.py +182 -0
  10. julee/api/routers/knowledge_service_configs.py +79 -0
  11. julee/api/routers/knowledge_service_queries.py +293 -0
  12. julee/api/routers/system.py +137 -0
  13. julee/api/routers/workflows.py +234 -0
  14. julee/api/services/__init__.py +20 -0
  15. julee/api/services/system_initialization.py +214 -0
  16. julee/api/tests/__init__.py +14 -0
  17. julee/api/tests/routers/__init__.py +17 -0
  18. julee/api/tests/routers/test_assembly_specifications.py +749 -0
  19. julee/api/tests/routers/test_documents.py +301 -0
  20. julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
  21. julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
  22. julee/api/tests/routers/test_system.py +179 -0
  23. julee/api/tests/routers/test_workflows.py +393 -0
  24. julee/api/tests/test_app.py +285 -0
  25. julee/api/tests/test_dependencies.py +245 -0
  26. julee/api/tests/test_requests.py +250 -0
  27. julee/domain/__init__.py +22 -0
  28. julee/domain/models/__init__.py +49 -0
  29. julee/domain/models/assembly/__init__.py +17 -0
  30. julee/domain/models/assembly/assembly.py +103 -0
  31. julee/domain/models/assembly/tests/__init__.py +0 -0
  32. julee/domain/models/assembly/tests/factories.py +37 -0
  33. julee/domain/models/assembly/tests/test_assembly.py +430 -0
  34. julee/domain/models/assembly_specification/__init__.py +24 -0
  35. julee/domain/models/assembly_specification/assembly_specification.py +172 -0
  36. julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
  37. julee/domain/models/assembly_specification/tests/__init__.py +0 -0
  38. julee/domain/models/assembly_specification/tests/factories.py +78 -0
  39. julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
  40. julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
  41. julee/domain/models/custom_fields/__init__.py +0 -0
  42. julee/domain/models/custom_fields/content_stream.py +68 -0
  43. julee/domain/models/custom_fields/tests/__init__.py +0 -0
  44. julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
  45. julee/domain/models/document/__init__.py +17 -0
  46. julee/domain/models/document/document.py +150 -0
  47. julee/domain/models/document/tests/__init__.py +0 -0
  48. julee/domain/models/document/tests/factories.py +76 -0
  49. julee/domain/models/document/tests/test_document.py +297 -0
  50. julee/domain/models/knowledge_service_config/__init__.py +17 -0
  51. julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
  52. julee/domain/models/policy/__init__.py +15 -0
  53. julee/domain/models/policy/document_policy_validation.py +220 -0
  54. julee/domain/models/policy/policy.py +203 -0
  55. julee/domain/models/policy/tests/__init__.py +0 -0
  56. julee/domain/models/policy/tests/factories.py +47 -0
  57. julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
  58. julee/domain/models/policy/tests/test_policy.py +546 -0
  59. julee/domain/repositories/__init__.py +27 -0
  60. julee/domain/repositories/assembly.py +45 -0
  61. julee/domain/repositories/assembly_specification.py +52 -0
  62. julee/domain/repositories/base.py +146 -0
  63. julee/domain/repositories/document.py +49 -0
  64. julee/domain/repositories/document_policy_validation.py +52 -0
  65. julee/domain/repositories/knowledge_service_config.py +54 -0
  66. julee/domain/repositories/knowledge_service_query.py +44 -0
  67. julee/domain/repositories/policy.py +49 -0
  68. julee/domain/use_cases/__init__.py +17 -0
  69. julee/domain/use_cases/decorators.py +107 -0
  70. julee/domain/use_cases/extract_assemble_data.py +649 -0
  71. julee/domain/use_cases/initialize_system_data.py +842 -0
  72. julee/domain/use_cases/tests/__init__.py +7 -0
  73. julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
  74. julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
  75. julee/domain/use_cases/tests/test_validate_document.py +1228 -0
  76. julee/domain/use_cases/validate_document.py +736 -0
  77. julee/fixtures/assembly_specifications.yaml +70 -0
  78. julee/fixtures/documents.yaml +178 -0
  79. julee/fixtures/knowledge_service_configs.yaml +37 -0
  80. julee/fixtures/knowledge_service_queries.yaml +27 -0
  81. julee/repositories/__init__.py +17 -0
  82. julee/repositories/memory/__init__.py +31 -0
  83. julee/repositories/memory/assembly.py +84 -0
  84. julee/repositories/memory/assembly_specification.py +125 -0
  85. julee/repositories/memory/base.py +227 -0
  86. julee/repositories/memory/document.py +149 -0
  87. julee/repositories/memory/document_policy_validation.py +104 -0
  88. julee/repositories/memory/knowledge_service_config.py +123 -0
  89. julee/repositories/memory/knowledge_service_query.py +120 -0
  90. julee/repositories/memory/policy.py +87 -0
  91. julee/repositories/memory/tests/__init__.py +0 -0
  92. julee/repositories/memory/tests/test_document.py +212 -0
  93. julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
  94. julee/repositories/memory/tests/test_policy.py +443 -0
  95. julee/repositories/minio/__init__.py +31 -0
  96. julee/repositories/minio/assembly.py +103 -0
  97. julee/repositories/minio/assembly_specification.py +170 -0
  98. julee/repositories/minio/client.py +570 -0
  99. julee/repositories/minio/document.py +530 -0
  100. julee/repositories/minio/document_policy_validation.py +120 -0
  101. julee/repositories/minio/knowledge_service_config.py +187 -0
  102. julee/repositories/minio/knowledge_service_query.py +211 -0
  103. julee/repositories/minio/policy.py +106 -0
  104. julee/repositories/minio/tests/__init__.py +0 -0
  105. julee/repositories/minio/tests/fake_client.py +213 -0
  106. julee/repositories/minio/tests/test_assembly.py +374 -0
  107. julee/repositories/minio/tests/test_assembly_specification.py +391 -0
  108. julee/repositories/minio/tests/test_client_protocol.py +57 -0
  109. julee/repositories/minio/tests/test_document.py +591 -0
  110. julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
  111. julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
  112. julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
  113. julee/repositories/minio/tests/test_policy.py +559 -0
  114. julee/repositories/temporal/__init__.py +38 -0
  115. julee/repositories/temporal/activities.py +114 -0
  116. julee/repositories/temporal/activity_names.py +34 -0
  117. julee/repositories/temporal/proxies.py +159 -0
  118. julee/services/__init__.py +18 -0
  119. julee/services/knowledge_service/__init__.py +48 -0
  120. julee/services/knowledge_service/anthropic/__init__.py +12 -0
  121. julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
  122. julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
  123. julee/services/knowledge_service/factory.py +138 -0
  124. julee/services/knowledge_service/knowledge_service.py +160 -0
  125. julee/services/knowledge_service/memory/__init__.py +13 -0
  126. julee/services/knowledge_service/memory/knowledge_service.py +278 -0
  127. julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
  128. julee/services/knowledge_service/test_factory.py +112 -0
  129. julee/services/temporal/__init__.py +38 -0
  130. julee/services/temporal/activities.py +86 -0
  131. julee/services/temporal/activity_names.py +22 -0
  132. julee/services/temporal/proxies.py +41 -0
  133. julee/util/__init__.py +0 -0
  134. julee/util/domain.py +119 -0
  135. julee/util/repos/__init__.py +0 -0
  136. julee/util/repos/minio/__init__.py +0 -0
  137. julee/util/repos/minio/file_storage.py +213 -0
  138. julee/util/repos/temporal/__init__.py +11 -0
  139. julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
  140. julee/util/repos/temporal/data_converter.py +123 -0
  141. julee/util/repos/temporal/minio_file_storage.py +12 -0
  142. julee/util/repos/temporal/proxies/__init__.py +0 -0
  143. julee/util/repos/temporal/proxies/file_storage.py +58 -0
  144. julee/util/repositories.py +55 -0
  145. julee/util/temporal/__init__.py +22 -0
  146. julee/util/temporal/activities.py +123 -0
  147. julee/util/temporal/decorators.py +473 -0
  148. julee/util/tests/__init__.py +1 -0
  149. julee/util/tests/test_decorators.py +770 -0
  150. julee/util/validation/__init__.py +29 -0
  151. julee/util/validation/repository.py +100 -0
  152. julee/util/validation/type_guards.py +369 -0
  153. julee/worker.py +211 -0
  154. julee/workflows/__init__.py +26 -0
  155. julee/workflows/extract_assemble.py +215 -0
  156. julee/workflows/validate_document.py +228 -0
  157. julee-0.1.0.dist-info/METADATA +195 -0
  158. julee-0.1.0.dist-info/RECORD +161 -0
  159. julee-0.1.0.dist-info/WHEEL +5 -0
  160. julee-0.1.0.dist-info/licenses/LICENSE +674 -0
  161. julee-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,345 @@
1
+ """
2
+ Tests for MemoryKnowledgeService implementation.
3
+
4
+ This module contains tests for the in-memory implementation of the
5
+ KnowledgeService protocol, verifying file registration storage and
6
+ canned query response functionality.
7
+ """
8
+
9
+ import pytest
10
+ from datetime import datetime, timezone
11
+ from julee.domain.models.knowledge_service_config import (
12
+ KnowledgeServiceConfig,
13
+ )
14
+ from julee.domain.models.document import Document, DocumentStatus
15
+ from julee.domain.models.knowledge_service_config import ServiceApi
16
+ from julee.domain.models.custom_fields.content_stream import (
17
+ ContentStream,
18
+ )
19
+ from ..knowledge_service import QueryResult
20
+ from .knowledge_service import MemoryKnowledgeService
21
+ import io
22
+
23
+
24
+ @pytest.fixture
25
+ def test_document() -> Document:
26
+ """Create a test Document for testing."""
27
+ content_text = "This is test document content for knowledge service testing."
28
+ content_bytes = content_text.encode("utf-8")
29
+ content_stream = ContentStream(io.BytesIO(content_bytes))
30
+
31
+ return Document(
32
+ document_id="test-doc-123",
33
+ original_filename="test_document.txt",
34
+ content_type="text/plain",
35
+ size_bytes=len(content_bytes),
36
+ content_multihash="test-hash-123",
37
+ status=DocumentStatus.CAPTURED,
38
+ content=content_stream,
39
+ created_at=datetime.now(timezone.utc),
40
+ updated_at=datetime.now(timezone.utc),
41
+ )
42
+
43
+
44
+ @pytest.fixture
45
+ def knowledge_service_config() -> KnowledgeServiceConfig:
46
+ """Create a test KnowledgeServiceConfig."""
47
+ return KnowledgeServiceConfig(
48
+ knowledge_service_id="ks-memory-test",
49
+ name="Test Memory Service",
50
+ description="Memory service for testing",
51
+ service_api=ServiceApi.ANTHROPIC,
52
+ )
53
+
54
+
55
+ @pytest.fixture
56
+ def memory_service(
57
+ knowledge_service_config: KnowledgeServiceConfig,
58
+ ) -> MemoryKnowledgeService:
59
+ """Create a MemoryKnowledgeService instance for testing."""
60
+ return MemoryKnowledgeService(knowledge_service_config)
61
+
62
+
63
+ @pytest.fixture
64
+ def sample_query_result() -> QueryResult:
65
+ """Create a sample QueryResult for testing."""
66
+ return QueryResult(
67
+ query_id="test-query-123",
68
+ query_text="What is the main topic?",
69
+ result_data={
70
+ "response": "The main topic is testing",
71
+ "confidence": 0.95,
72
+ },
73
+ execution_time_ms=150,
74
+ )
75
+
76
+
77
+ class TestMemoryKnowledgeService:
78
+ """Test cases for MemoryKnowledgeService."""
79
+
80
+ async def test_register_file_creates_new_registration(
81
+ self,
82
+ memory_service: MemoryKnowledgeService,
83
+ knowledge_service_config: KnowledgeServiceConfig,
84
+ test_document: Document,
85
+ ) -> None:
86
+ """Test that register_file creates a new file registration."""
87
+ result = await memory_service.register_file(
88
+ knowledge_service_config, test_document
89
+ )
90
+
91
+ assert result.document_id == test_document.document_id
92
+ assert result.knowledge_service_file_id.startswith(
93
+ f"memory_{test_document.document_id}_"
94
+ )
95
+ assert result.registration_metadata["service"] == "memory"
96
+ assert result.registration_metadata["registered_via"] == ("in_memory_storage")
97
+ assert result.registration_metadata["knowledge_service_id"] == (
98
+ knowledge_service_config.knowledge_service_id
99
+ )
100
+ assert isinstance(result.created_at, datetime)
101
+
102
+ async def test_register_file_idempotent(
103
+ self,
104
+ memory_service: MemoryKnowledgeService,
105
+ knowledge_service_config: KnowledgeServiceConfig,
106
+ test_document: Document,
107
+ ) -> None:
108
+ """Test that registering the same document returns same result."""
109
+ # Register twice
110
+ result1 = await memory_service.register_file(
111
+ knowledge_service_config, test_document
112
+ )
113
+ result2 = await memory_service.register_file(
114
+ knowledge_service_config, test_document
115
+ )
116
+
117
+ # Should get the exact same result
118
+ assert result1 == result2
119
+ assert result1.knowledge_service_file_id == (result2.knowledge_service_file_id)
120
+
121
+ async def test_register_file_stores_in_memory(
122
+ self,
123
+ memory_service: MemoryKnowledgeService,
124
+ knowledge_service_config: KnowledgeServiceConfig,
125
+ test_document: Document,
126
+ ) -> None:
127
+ """Test that register_file stores the result in memory."""
128
+ result = await memory_service.register_file(
129
+ knowledge_service_config, test_document
130
+ )
131
+ file_id = result.knowledge_service_file_id
132
+
133
+ # Should be able to retrieve the registration
134
+ retrieved = memory_service.get_registered_file(file_id)
135
+ assert retrieved == result
136
+
137
+ def test_get_registered_file_nonexistent(
138
+ self, memory_service: MemoryKnowledgeService
139
+ ) -> None:
140
+ """Test getting a non-existent registered file returns None."""
141
+ result = memory_service.get_registered_file("nonexistent-file-id")
142
+ assert result is None
143
+
144
+ def test_get_all_registered_files_empty_initially(
145
+ self, memory_service: MemoryKnowledgeService
146
+ ) -> None:
147
+ """Test that get_all_registered_files returns empty dict initially."""
148
+ result = memory_service.get_all_registered_files()
149
+ assert result == {}
150
+
151
+ async def test_get_all_registered_files_after_registration(
152
+ self,
153
+ memory_service: MemoryKnowledgeService,
154
+ knowledge_service_config: KnowledgeServiceConfig,
155
+ test_document: Document,
156
+ ) -> None:
157
+ """Test get_all_registered_files after registering files."""
158
+ # Create a second test document
159
+ content_text = "Second test document content."
160
+ content_bytes = content_text.encode("utf-8")
161
+ content_stream = ContentStream(io.BytesIO(content_bytes))
162
+
163
+ doc2 = Document(
164
+ document_id="test-doc-2",
165
+ original_filename="test_document_2.txt",
166
+ content_type="text/plain",
167
+ size_bytes=len(content_bytes),
168
+ content_multihash="test-hash-2",
169
+ status=DocumentStatus.CAPTURED,
170
+ content=content_stream,
171
+ created_at=datetime.now(timezone.utc),
172
+ updated_at=datetime.now(timezone.utc),
173
+ )
174
+
175
+ result1 = await memory_service.register_file(
176
+ knowledge_service_config, test_document
177
+ )
178
+ result2 = await memory_service.register_file(knowledge_service_config, doc2)
179
+
180
+ all_files = memory_service.get_all_registered_files()
181
+
182
+ # Should have both registrations
183
+ assert len(all_files) == 2
184
+ assert result1.knowledge_service_file_id in all_files
185
+ assert result2.knowledge_service_file_id in all_files
186
+
187
+ # Verify the registrations are correct
188
+ assert all_files[result1.knowledge_service_file_id] == result1
189
+ assert all_files[result2.knowledge_service_file_id] == result2
190
+
191
+ def test_add_canned_query_result(
192
+ self,
193
+ memory_service: MemoryKnowledgeService,
194
+ sample_query_result: QueryResult,
195
+ ) -> None:
196
+ """Test adding canned query results."""
197
+ memory_service.add_canned_query_result(sample_query_result)
198
+
199
+ # Should have one canned result available
200
+ assert len(memory_service._canned_query_results) == 1
201
+
202
+ def test_clear_canned_query_results(
203
+ self,
204
+ memory_service: MemoryKnowledgeService,
205
+ sample_query_result: QueryResult,
206
+ ) -> None:
207
+ """Test clearing canned query results."""
208
+ memory_service.add_canned_query_result(sample_query_result)
209
+ memory_service.add_canned_query_result(sample_query_result)
210
+
211
+ assert len(memory_service._canned_query_results) == 2
212
+
213
+ memory_service.clear_canned_query_results()
214
+ assert len(memory_service._canned_query_results) == 0
215
+
216
+ async def test_execute_query_no_canned_results_raises_error(
217
+ self,
218
+ memory_service: MemoryKnowledgeService,
219
+ knowledge_service_config: KnowledgeServiceConfig,
220
+ ) -> None:
221
+ """Test that execute_query raises error when no canned results."""
222
+ with pytest.raises(
223
+ ValueError,
224
+ match="No canned query results available",
225
+ ):
226
+ await memory_service.execute_query(
227
+ knowledge_service_config, "What is this?"
228
+ )
229
+
230
+ async def test_execute_query_returns_canned_result(
231
+ self,
232
+ memory_service: MemoryKnowledgeService,
233
+ sample_query_result: QueryResult,
234
+ knowledge_service_config: KnowledgeServiceConfig,
235
+ ) -> None:
236
+ """Test that execute_query returns and pops canned result."""
237
+ query_text = "Custom query text"
238
+ document_ids = ["doc-1", "doc-2"]
239
+
240
+ memory_service.add_canned_query_result(sample_query_result)
241
+
242
+ result = await memory_service.execute_query(
243
+ knowledge_service_config, query_text, document_ids
244
+ )
245
+
246
+ # Should return updated result with actual query parameters
247
+ assert result.query_id == sample_query_result.query_id
248
+ assert result.query_text == query_text # Updated to actual query
249
+ assert result.execution_time_ms == sample_query_result.execution_time_ms
250
+ assert result.result_data["queried_documents"] == document_ids
251
+ assert result.result_data["service"] == "memory"
252
+ assert result.result_data["knowledge_service_id"] == (
253
+ knowledge_service_config.knowledge_service_id
254
+ )
255
+ # Should preserve original result_data
256
+ assert result.result_data["response"] == "The main topic is testing"
257
+ assert result.result_data["confidence"] == 0.95
258
+
259
+ # Canned result should be consumed (popped)
260
+ assert len(memory_service._canned_query_results) == 0
261
+
262
+ async def test_execute_query_fifo_order(
263
+ self,
264
+ memory_service: MemoryKnowledgeService,
265
+ knowledge_service_config: KnowledgeServiceConfig,
266
+ ) -> None:
267
+ """Test that execute_query returns canned results in FIFO order."""
268
+ result1 = QueryResult(
269
+ query_id="query-1",
270
+ query_text="First query",
271
+ result_data={"response": "First response"},
272
+ )
273
+ result2 = QueryResult(
274
+ query_id="query-2",
275
+ query_text="Second query",
276
+ result_data={"response": "Second response"},
277
+ )
278
+
279
+ memory_service.add_canned_query_result(result1)
280
+ memory_service.add_canned_query_result(result2)
281
+
282
+ # First execute_query should return first added result
283
+ first_returned = await memory_service.execute_query(
284
+ knowledge_service_config, "test query 1"
285
+ )
286
+ assert first_returned.query_id == "query-1"
287
+ assert first_returned.result_data["response"] == "First response"
288
+
289
+ # Second execute_query should return second added result
290
+ second_returned = await memory_service.execute_query(
291
+ knowledge_service_config, "test query 2"
292
+ )
293
+ assert second_returned.query_id == "query-2"
294
+ assert second_returned.result_data["response"] == "Second response"
295
+
296
+ # No more results should be available
297
+ assert len(memory_service._canned_query_results) == 0
298
+
299
+ async def test_execute_query_with_none_document_ids(
300
+ self,
301
+ memory_service: MemoryKnowledgeService,
302
+ knowledge_service_config: KnowledgeServiceConfig,
303
+ sample_query_result: QueryResult,
304
+ ) -> None:
305
+ """Test execute_query with None document_ids parameter."""
306
+ memory_service.add_canned_query_result(sample_query_result)
307
+
308
+ result = await memory_service.execute_query(
309
+ knowledge_service_config, "test query", None
310
+ )
311
+
312
+ assert result.result_data["queried_documents"] == []
313
+
314
+ async def test_execute_query_updates_created_at(
315
+ self,
316
+ memory_service: MemoryKnowledgeService,
317
+ knowledge_service_config: KnowledgeServiceConfig,
318
+ sample_query_result: QueryResult,
319
+ ) -> None:
320
+ """Test that execute_query updates created_at timestamp."""
321
+ original_created_at = sample_query_result.created_at
322
+ memory_service.add_canned_query_result(sample_query_result)
323
+
324
+ result = await memory_service.execute_query(
325
+ knowledge_service_config, "test query"
326
+ )
327
+
328
+ # created_at should be updated to current time
329
+ assert result.created_at is not None
330
+ assert original_created_at is not None
331
+ assert result.created_at > original_created_at
332
+ assert (
333
+ datetime.now(timezone.utc) - result.created_at
334
+ ).total_seconds() < 5 # Should be very recent
335
+
336
+ def test_initialization_with_config(
337
+ self,
338
+ knowledge_service_config: KnowledgeServiceConfig,
339
+ ) -> None:
340
+ """Test proper initialization with config."""
341
+ service = MemoryKnowledgeService(knowledge_service_config)
342
+
343
+ assert service.config == knowledge_service_config
344
+ assert service._registered_files == {}
345
+ assert len(service._canned_query_results) == 0
@@ -0,0 +1,112 @@
1
+ """
2
+ Tests for knowledge_service_factory function.
3
+
4
+ This module contains tests for the factory function that creates
5
+ KnowledgeService implementations based on configuration.
6
+ """
7
+
8
+ import pytest
9
+
10
+ from julee.domain.models.knowledge_service_config import (
11
+ KnowledgeServiceConfig,
12
+ )
13
+ from julee.domain.models.document import Document, DocumentStatus
14
+ from julee.domain.models.knowledge_service_config import ServiceApi
15
+ from julee.domain.models.custom_fields.content_stream import (
16
+ ContentStream,
17
+ )
18
+ from julee.services.knowledge_service import ensure_knowledge_service
19
+ from julee.services.knowledge_service.factory import (
20
+ knowledge_service_factory,
21
+ )
22
+ from julee.services.knowledge_service.anthropic import (
23
+ AnthropicKnowledgeService,
24
+ )
25
+ import io
26
+ from datetime import datetime, timezone
27
+
28
+
29
+ @pytest.fixture
30
+ def test_document() -> Document:
31
+ """Create a test Document for testing."""
32
+ content_text = "This is test document content for knowledge service testing."
33
+ content_bytes = content_text.encode("utf-8")
34
+ content_stream = ContentStream(io.BytesIO(content_bytes))
35
+
36
+ return Document(
37
+ document_id="test-doc-123",
38
+ original_filename="test_document.txt",
39
+ content_type="text/plain",
40
+ size_bytes=len(content_bytes),
41
+ content_multihash="test-hash-123",
42
+ status=DocumentStatus.CAPTURED,
43
+ content=content_stream,
44
+ created_at=datetime.now(timezone.utc),
45
+ updated_at=datetime.now(timezone.utc),
46
+ )
47
+
48
+
49
+ @pytest.fixture
50
+ def anthropic_config() -> KnowledgeServiceConfig:
51
+ """Create a test KnowledgeServiceConfig for Anthropic."""
52
+ return KnowledgeServiceConfig(
53
+ knowledge_service_id="ks-anthropic-test",
54
+ name="Test Anthropic Service",
55
+ description="Anthropic service for testing",
56
+ service_api=ServiceApi.ANTHROPIC,
57
+ )
58
+
59
+
60
+ class TestKnowledgeServiceFactory:
61
+ """Test cases for knowledge_service_factory function."""
62
+
63
+ def test_factory_creates_anthropic_service(
64
+ self,
65
+ anthropic_config: KnowledgeServiceConfig,
66
+ ) -> None:
67
+ """Test factory creates AnthropicKnowledgeService for ANTHROPIC."""
68
+ with pytest.MonkeyPatch.context() as m:
69
+ m.setenv("ANTHROPIC_API_KEY", "test-key")
70
+ service = knowledge_service_factory(anthropic_config)
71
+
72
+ assert isinstance(service, AnthropicKnowledgeService)
73
+
74
+ def test_factory_returns_validated_service(
75
+ self,
76
+ anthropic_config: KnowledgeServiceConfig,
77
+ ) -> None:
78
+ """Test factory returns service that passes protocol validation."""
79
+ with pytest.MonkeyPatch.context() as m:
80
+ m.setenv("ANTHROPIC_API_KEY", "test-key")
81
+ service = knowledge_service_factory(anthropic_config)
82
+
83
+ # Should not raise an error when validating the service
84
+ validated_service = ensure_knowledge_service(service)
85
+ assert validated_service == service
86
+
87
+
88
+ class TestEnsureKnowledgeService:
89
+ """Test cases for ensure_knowledge_service function."""
90
+
91
+ def test_ensure_knowledge_service_accepts_valid_service(
92
+ self,
93
+ anthropic_config: KnowledgeServiceConfig,
94
+ ) -> None:
95
+ """Test that ensure_knowledge_service accepts a valid service."""
96
+ # Mock the anthropic import to avoid dependency issues in tests
97
+ with pytest.MonkeyPatch.context() as m:
98
+ m.setenv("ANTHROPIC_API_KEY", "test-key")
99
+ service = AnthropicKnowledgeService()
100
+
101
+ validated_service = ensure_knowledge_service(service)
102
+ assert validated_service == service
103
+
104
+ def test_ensure_knowledge_service_rejects_invalid_service(self) -> None:
105
+ """Test that ensure_knowledge_service rejects invalid service."""
106
+ invalid_service = "not a knowledge service"
107
+
108
+ with pytest.raises(
109
+ TypeError,
110
+ match="Service str does not satisfy KnowledgeService protocol",
111
+ ):
112
+ ensure_knowledge_service(invalid_service)
@@ -0,0 +1,38 @@
1
+ """
2
+ Temporal integration for the julee knowledge service domain.
3
+
4
+ This package contains Temporal activity and proxy implementations for
5
+ knowledge service operations, following the established patterns from
6
+ systemPatterns.org.
7
+
8
+ The package is organized into separate modules to respect Temporal's workflow
9
+ sandbox restrictions:
10
+
11
+ - activities.py: All temporal activity registrations (for worker use only)
12
+ Contains imports from backend service implementations - NOT SANDBOX SAFE
13
+
14
+ - proxies.py: All workflow-safe proxy classes (for workflow use only)
15
+ Contains no backend imports - SANDBOX SAFE
16
+
17
+ - activity_names.py: Shared activity name constants - SANDBOX SAFE
18
+
19
+ IMPORTANT: Do not import everything from __init__.py as this would mix
20
+ sandbox-safe and non-sandbox-safe imports. Import directly from the
21
+ specific module you need:
22
+
23
+ - Workers should import from activities.py
24
+ - Workflows should import from proxies.py
25
+ - Both can import constants from activity_names.py
26
+ """
27
+
28
+ # This __init__.py intentionally does NOT re-export classes to avoid
29
+ # mixing sandbox-safe (proxies) and non-sandbox-safe (activities) imports.
30
+ # Import directly from the specific modules instead.
31
+
32
+ __all__: list[str] = [
33
+ # No re-exports to avoid sandbox violations
34
+ # Import directly from:
35
+ # - .activities for worker use
36
+ # - .proxies for workflow use
37
+ # - .activity_names for constants
38
+ ]
@@ -0,0 +1,86 @@
1
+ """
2
+ Temporal activity wrapper classes for the julee knowledge service
3
+ domain.
4
+
5
+ This module contains the @temporal_activity_registration decorated class
6
+ that wraps knowledge service operations as Temporal activities. This class is
7
+ imported by the worker to register activities with Temporal.
8
+
9
+ The class follows the naming pattern documented in systemPatterns.org:
10
+ - Activity names: {domain}.{service_name}.{method}
11
+ - The knowledge service gets its own activity prefix
12
+ """
13
+
14
+ import io
15
+ import logging
16
+ from typing_extensions import override
17
+
18
+ from julee.util.temporal.decorators import temporal_activity_registration
19
+ from julee.services.knowledge_service.factory import (
20
+ ConfigurableKnowledgeService,
21
+ )
22
+ from julee.domain.repositories.document import DocumentRepository
23
+ from julee.domain.models.knowledge_service_config import (
24
+ KnowledgeServiceConfig,
25
+ )
26
+ from julee.domain.models.document import Document
27
+ from ..knowledge_service import FileRegistrationResult
28
+
29
+ # Import activity name bases from shared module
30
+ from julee.services.temporal.activity_names import (
31
+ KNOWLEDGE_SERVICE_ACTIVITY_BASE,
32
+ )
33
+
34
+
35
+ @temporal_activity_registration(KNOWLEDGE_SERVICE_ACTIVITY_BASE)
36
+ class TemporalKnowledgeService(ConfigurableKnowledgeService):
37
+ """Temporal activity wrapper for KnowledgeService operations.
38
+
39
+ This class handles the issue where ContentStream objects don't survive
40
+ Temporal's serialization by re-fetching document content from the
41
+ injected DocumentRepository before performing operations that require it.
42
+ """
43
+
44
+ def __init__(self, document_repo: DocumentRepository) -> None:
45
+ super().__init__()
46
+ self.logger: logging.Logger = logging.getLogger(__name__)
47
+ self.document_repo: DocumentRepository = document_repo
48
+
49
+ @override
50
+ async def register_file(
51
+ self, config: KnowledgeServiceConfig, document: Document
52
+ ) -> FileRegistrationResult:
53
+ """Register a document file, re-fetching content if needed.
54
+
55
+ This method checks if the document's ContentStream is None (due to
56
+ Temporal serialization) and re-fetches content from MinIO if needed.
57
+ """
58
+ if document.content is None:
59
+ self.logger.info(
60
+ f"Document {document.document_id} has no content stream, "
61
+ f"re-fetching from repo"
62
+ )
63
+ # Re-fetch the document with proper content
64
+ fresh_document = await self.document_repo.get(document.document_id)
65
+ if fresh_document and fresh_document.content:
66
+ # Read the MinIO stream content into a seekable buffer
67
+ # This prevents the stream from being consumed during upload
68
+ content_data = fresh_document.content.read()
69
+ seekable_stream = io.BytesIO(content_data)
70
+ fresh_document.content._stream = seekable_stream
71
+ document = fresh_document
72
+ else:
73
+ raise ValueError(
74
+ f"Could not re-fetch document {document.document_id} "
75
+ f"from repository"
76
+ )
77
+
78
+ # Now call the parent method with the document that has proper content
79
+ return await super().register_file(config, document)
80
+
81
+
82
+ # Export the temporal service class for use in worker.py
83
+ __all__ = [
84
+ "TemporalKnowledgeService",
85
+ "KNOWLEDGE_SERVICE_ACTIVITY_BASE",
86
+ ]
@@ -0,0 +1,22 @@
1
+ """
2
+ Shared activity name constants for the julee knowledge service domain.
3
+
4
+ This module contains activity name base constants that are shared between
5
+ activities.py and proxies.py, avoiding the need for either module to import
6
+ from the other, which would create problematic transitive dependencies.
7
+
8
+ By isolating these constants in their own module, we maintain DRY principles
9
+ while preserving Temporal's workflow sandbox restrictions. The proxies module
10
+ can import these constants without transitively importing non-deterministic
11
+ backend code from activities.py.
12
+ """
13
+
14
+ # Activity name bases - shared constants for consistency between
15
+ # activity registrations and workflow proxies
16
+ KNOWLEDGE_SERVICE_ACTIVITY_BASE = "julee.knowledge_service"
17
+
18
+
19
+ # Export all constants
20
+ __all__ = [
21
+ "KNOWLEDGE_SERVICE_ACTIVITY_BASE",
22
+ ]
@@ -0,0 +1,41 @@
1
+ """
2
+ Workflow-safe proxy classes for the julee knowledge service domain.
3
+
4
+ This module contains @temporal_workflow_proxy decorated classes that
5
+ delegate to Temporal activities from within workflows. These classes are
6
+ isolated from backend imports to avoid Temporal's workflow sandbox
7
+ restrictions.
8
+
9
+ The proxy classes automatically generate methods that call
10
+ workflow.execute_activity() with the appropriate activity names, timeouts,
11
+ and retry policies.
12
+ """
13
+
14
+ from julee.util.temporal.decorators import temporal_workflow_proxy
15
+ from julee.services.knowledge_service import KnowledgeService
16
+
17
+ # Import activity name bases from shared module
18
+ from julee.services.temporal.activity_names import (
19
+ KNOWLEDGE_SERVICE_ACTIVITY_BASE,
20
+ )
21
+
22
+
23
+ @temporal_workflow_proxy(
24
+ activity_base=KNOWLEDGE_SERVICE_ACTIVITY_BASE,
25
+ default_timeout_seconds=300, # 5 minutes for external service calls
26
+ retry_methods=["register_file", "execute_query"],
27
+ )
28
+ class WorkflowKnowledgeServiceProxy(KnowledgeService):
29
+ """
30
+ Workflow implementation of KnowledgeService that calls activities.
31
+ All methods are automatically generated by the @temporal_workflow_proxy
32
+ decorator.
33
+ """
34
+
35
+ pass
36
+
37
+
38
+ # Export the workflow proxy class
39
+ __all__ = [
40
+ "WorkflowKnowledgeServiceProxy",
41
+ ]
julee/util/__init__.py ADDED
File without changes