julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Anthropic implementation of KnowledgeService for the Capture, Extract,
|
|
3
|
+
Assemble, Publish workflow.
|
|
4
|
+
|
|
5
|
+
This module provides the Anthropic-specific implementation of the
|
|
6
|
+
KnowledgeService protocol. It handles interactions with Anthropic's API
|
|
7
|
+
for document registration and query execution.
|
|
8
|
+
|
|
9
|
+
Requirements:
|
|
10
|
+
- ANTHROPIC_API_KEY environment variable must be set
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import logging
|
|
15
|
+
import time
|
|
16
|
+
import uuid
|
|
17
|
+
from typing import Optional, List, Dict, Any
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
|
|
20
|
+
from anthropic import AsyncAnthropic
|
|
21
|
+
|
|
22
|
+
from julee.domain.models.knowledge_service_config import (
|
|
23
|
+
KnowledgeServiceConfig,
|
|
24
|
+
)
|
|
25
|
+
from julee.domain.models.document import Document
|
|
26
|
+
from ..knowledge_service import (
|
|
27
|
+
KnowledgeService,
|
|
28
|
+
QueryResult,
|
|
29
|
+
FileRegistrationResult,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
# Default configuration constants
|
|
35
|
+
DEFAULT_MODEL = "claude-sonnet-4-20250514"
|
|
36
|
+
DEFAULT_MAX_TOKENS = 4000
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class AnthropicKnowledgeService(KnowledgeService):
|
|
40
|
+
"""
|
|
41
|
+
Anthropic implementation of the KnowledgeService protocol.
|
|
42
|
+
|
|
43
|
+
This class handles interactions with Anthropic's API for document
|
|
44
|
+
registration and query execution. It implements the KnowledgeService
|
|
45
|
+
protocol with Anthropic-specific logic.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self) -> None:
|
|
49
|
+
"""Initialize Anthropic knowledge service without configuration.
|
|
50
|
+
|
|
51
|
+
Configuration will be provided per method call to maintain
|
|
52
|
+
stateless operation compatible with Temporal workflows.
|
|
53
|
+
"""
|
|
54
|
+
# No initialization needed - everything happens per method call
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
def _get_client(self, config: KnowledgeServiceConfig) -> AsyncAnthropic:
|
|
58
|
+
"""Get an initialized Anthropic client.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
config: KnowledgeServiceConfig (for future extensibility)
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Configured AsyncAnthropic client instance
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
ValueError: If ANTHROPIC_API_KEY environment variable is not set
|
|
68
|
+
"""
|
|
69
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
70
|
+
if not api_key:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
"ANTHROPIC_API_KEY environment variable is required for "
|
|
73
|
+
"AnthropicKnowledgeService"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return AsyncAnthropic(
|
|
77
|
+
api_key=api_key,
|
|
78
|
+
default_headers={"anthropic-beta": "files-api-2025-04-14"},
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
async def register_file(
|
|
82
|
+
self, config: KnowledgeServiceConfig, document: Document
|
|
83
|
+
) -> FileRegistrationResult:
|
|
84
|
+
"""Register a document file with Anthropic.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
config: KnowledgeServiceConfig for this operation
|
|
88
|
+
document: Document domain object to register
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
FileRegistrationResult with Anthropic-specific details
|
|
92
|
+
"""
|
|
93
|
+
logger.debug(
|
|
94
|
+
"Registering file with Anthropic",
|
|
95
|
+
extra={
|
|
96
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
97
|
+
"document_id": document.document_id,
|
|
98
|
+
},
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
# Get Anthropic client for this operation
|
|
103
|
+
client = self._get_client(config)
|
|
104
|
+
|
|
105
|
+
# Ensure content stream is positioned at beginning for upload
|
|
106
|
+
if document.content:
|
|
107
|
+
document.content.seek(0)
|
|
108
|
+
|
|
109
|
+
# Upload file using Anthropic beta Files API
|
|
110
|
+
# Use tuple format: (filename, file_stream, media_type)
|
|
111
|
+
if not document.content:
|
|
112
|
+
raise ValueError("Document content stream is required for upload")
|
|
113
|
+
|
|
114
|
+
# Anthropic only supports PDF and plaintext files
|
|
115
|
+
# Convert JSON content type to text/plain for compatibility
|
|
116
|
+
content_type = document.content_type
|
|
117
|
+
if content_type == "application/json":
|
|
118
|
+
content_type = "text/plain"
|
|
119
|
+
|
|
120
|
+
file_response = await client.beta.files.upload(
|
|
121
|
+
file=(
|
|
122
|
+
document.original_filename,
|
|
123
|
+
document.content.stream, # type: ignore[arg-type]
|
|
124
|
+
content_type,
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
anthropic_file_id = file_response.id
|
|
129
|
+
|
|
130
|
+
result = FileRegistrationResult(
|
|
131
|
+
document_id=document.document_id,
|
|
132
|
+
knowledge_service_file_id=anthropic_file_id,
|
|
133
|
+
registration_metadata={
|
|
134
|
+
"service": "anthropic",
|
|
135
|
+
"registered_via": "beta_files_api",
|
|
136
|
+
"filename": document.original_filename,
|
|
137
|
+
"content_type": document.content_type,
|
|
138
|
+
"size_bytes": document.size_bytes,
|
|
139
|
+
"content_multihash": document.content_multihash,
|
|
140
|
+
"anthropic_file_id": anthropic_file_id,
|
|
141
|
+
},
|
|
142
|
+
created_at=datetime.now(timezone.utc),
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
logger.info(
|
|
146
|
+
"File registered with Anthropic beta Files API",
|
|
147
|
+
extra={
|
|
148
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
149
|
+
"document_id": document.document_id,
|
|
150
|
+
"anthropic_file_id": anthropic_file_id,
|
|
151
|
+
"original_filename": document.original_filename,
|
|
152
|
+
"size_bytes": document.size_bytes,
|
|
153
|
+
},
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return result
|
|
157
|
+
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.error(
|
|
160
|
+
"Failed to register file with Anthropic",
|
|
161
|
+
extra={
|
|
162
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
163
|
+
"document_id": document.document_id,
|
|
164
|
+
"error": str(e),
|
|
165
|
+
},
|
|
166
|
+
exc_info=True,
|
|
167
|
+
)
|
|
168
|
+
raise
|
|
169
|
+
|
|
170
|
+
async def execute_query(
|
|
171
|
+
self,
|
|
172
|
+
config: KnowledgeServiceConfig,
|
|
173
|
+
query_text: str,
|
|
174
|
+
service_file_ids: Optional[List[str]] = None,
|
|
175
|
+
query_metadata: Optional[Dict[str, Any]] = None,
|
|
176
|
+
assistant_prompt: Optional[str] = None,
|
|
177
|
+
) -> QueryResult:
|
|
178
|
+
"""Execute a query against Anthropic.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
config: KnowledgeServiceConfig for this operation
|
|
182
|
+
query_text: The query to execute
|
|
183
|
+
service_file_ids: Optional list of Anthropic file IDs to provide
|
|
184
|
+
as context for the query
|
|
185
|
+
query_metadata: Optional Anthropic-specific configuration such as
|
|
186
|
+
model, temperature, max_tokens, etc.
|
|
187
|
+
assistant_prompt: Optional assistant message content to constrain
|
|
188
|
+
or prime the model's response
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
QueryResult with Anthropic query results
|
|
192
|
+
"""
|
|
193
|
+
logger.debug(
|
|
194
|
+
"Executing query with Anthropic",
|
|
195
|
+
extra={
|
|
196
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
197
|
+
"query_text": query_text,
|
|
198
|
+
"document_count": (len(service_file_ids) if service_file_ids else 0),
|
|
199
|
+
"file_count": (len(service_file_ids) if service_file_ids else 0),
|
|
200
|
+
},
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
start_time = time.time()
|
|
204
|
+
query_id = f"anthropic_{uuid.uuid4().hex[:12]}"
|
|
205
|
+
|
|
206
|
+
# Extract configuration from query_metadata
|
|
207
|
+
metadata = query_metadata or {}
|
|
208
|
+
model = metadata.get("model", DEFAULT_MODEL)
|
|
209
|
+
max_tokens = metadata.get("max_tokens", DEFAULT_MAX_TOKENS)
|
|
210
|
+
temperature = metadata.get("temperature")
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
# Get Anthropic client for this operation
|
|
214
|
+
client = self._get_client(config)
|
|
215
|
+
|
|
216
|
+
# Prepare the message content with file attachments if provided
|
|
217
|
+
content_parts = []
|
|
218
|
+
|
|
219
|
+
# Add file attachments if service_file_ids are provided
|
|
220
|
+
if service_file_ids:
|
|
221
|
+
for file_id in service_file_ids:
|
|
222
|
+
content_parts.append(
|
|
223
|
+
{
|
|
224
|
+
"type": "document",
|
|
225
|
+
"source": {"type": "file", "file_id": file_id},
|
|
226
|
+
}
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Add the text query
|
|
230
|
+
content_parts.append({"type": "text", "text": query_text})
|
|
231
|
+
|
|
232
|
+
# Prepare messages for the API
|
|
233
|
+
messages = [{"role": "user", "content": content_parts}]
|
|
234
|
+
|
|
235
|
+
# Add assistant message if provided to constrain response
|
|
236
|
+
if assistant_prompt:
|
|
237
|
+
messages.append({"role": "assistant", "content": assistant_prompt})
|
|
238
|
+
|
|
239
|
+
create_params = {
|
|
240
|
+
"model": model,
|
|
241
|
+
"max_tokens": max_tokens,
|
|
242
|
+
"messages": messages,
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
# Add temperature if specified
|
|
246
|
+
if temperature is not None:
|
|
247
|
+
create_params["temperature"] = temperature
|
|
248
|
+
|
|
249
|
+
response = await client.messages.create(**create_params)
|
|
250
|
+
|
|
251
|
+
# Calculate execution time
|
|
252
|
+
execution_time_ms = int((time.time() - start_time) * 1000)
|
|
253
|
+
|
|
254
|
+
# Validate response has exactly one content block of type 'text'
|
|
255
|
+
if len(response.content) != 1:
|
|
256
|
+
raise ValueError(
|
|
257
|
+
f"Expected exactly 1 content block, got " f"{len(response.content)}"
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
content_block = response.content[0]
|
|
261
|
+
|
|
262
|
+
if not hasattr(content_block, "type") or content_block.type != "text":
|
|
263
|
+
block_type = getattr(content_block, "type", "unknown")
|
|
264
|
+
raise ValueError(
|
|
265
|
+
f"Expected content block type 'text', got '{block_type}'"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
if not hasattr(content_block, "text"):
|
|
269
|
+
raise ValueError("Text content block missing 'text' attribute")
|
|
270
|
+
|
|
271
|
+
response_text = str(content_block.text)
|
|
272
|
+
|
|
273
|
+
logger.debug(
|
|
274
|
+
"Single text content block validated and extracted",
|
|
275
|
+
extra={
|
|
276
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
277
|
+
"query_id": query_id,
|
|
278
|
+
"response_length": len(response_text),
|
|
279
|
+
},
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# Structure the result with single text content
|
|
283
|
+
result_data = {
|
|
284
|
+
"response": response_text,
|
|
285
|
+
"model": model,
|
|
286
|
+
"service": "anthropic",
|
|
287
|
+
"sources": service_file_ids or [],
|
|
288
|
+
"usage": {
|
|
289
|
+
"input_tokens": response.usage.input_tokens,
|
|
290
|
+
"output_tokens": response.usage.output_tokens,
|
|
291
|
+
},
|
|
292
|
+
"stop_reason": response.stop_reason,
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
result = QueryResult(
|
|
296
|
+
query_id=query_id,
|
|
297
|
+
query_text=query_text,
|
|
298
|
+
result_data=result_data,
|
|
299
|
+
execution_time_ms=execution_time_ms,
|
|
300
|
+
created_at=datetime.now(timezone.utc),
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
logger.info(
|
|
304
|
+
"Query executed with Anthropic successfully",
|
|
305
|
+
extra={
|
|
306
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
307
|
+
"query_id": query_id,
|
|
308
|
+
"execution_time_ms": execution_time_ms,
|
|
309
|
+
"input_tokens": response.usage.input_tokens,
|
|
310
|
+
"output_tokens": response.usage.output_tokens,
|
|
311
|
+
"file_count": (len(service_file_ids) if service_file_ids else 0),
|
|
312
|
+
},
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
return result
|
|
316
|
+
|
|
317
|
+
except Exception as e:
|
|
318
|
+
execution_time_ms = int((time.time() - start_time) * 1000)
|
|
319
|
+
logger.error(
|
|
320
|
+
"Failed to execute query with Anthropic",
|
|
321
|
+
extra={
|
|
322
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
323
|
+
"query_id": query_id,
|
|
324
|
+
"query_text": query_text,
|
|
325
|
+
"execution_time_ms": execution_time_ms,
|
|
326
|
+
"file_count": (len(service_file_ids) if service_file_ids else 0),
|
|
327
|
+
"error": str(e),
|
|
328
|
+
},
|
|
329
|
+
exc_info=True,
|
|
330
|
+
)
|
|
331
|
+
raise
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for AnthropicKnowledgeService implementation.
|
|
3
|
+
|
|
4
|
+
This module contains tests for the Anthropic implementation of the
|
|
5
|
+
KnowledgeService protocol, verifying file registration and query
|
|
6
|
+
execution functionality.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
from unittest.mock import AsyncMock, MagicMock, patch
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
import io
|
|
13
|
+
|
|
14
|
+
from julee.domain.models.knowledge_service_config import (
|
|
15
|
+
KnowledgeServiceConfig,
|
|
16
|
+
)
|
|
17
|
+
from julee.domain.models.document import Document, DocumentStatus
|
|
18
|
+
from julee.domain.models.knowledge_service_config import ServiceApi
|
|
19
|
+
from julee.domain.models.custom_fields.content_stream import (
|
|
20
|
+
ContentStream,
|
|
21
|
+
)
|
|
22
|
+
from julee.services.knowledge_service.anthropic import (
|
|
23
|
+
knowledge_service as anthropic_ks,
|
|
24
|
+
)
|
|
25
|
+
from julee.services.knowledge_service.anthropic import (
|
|
26
|
+
knowledge_service as anthropic_ks_module,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.fixture
|
|
31
|
+
def test_document() -> Document:
|
|
32
|
+
"""Create a test Document for testing."""
|
|
33
|
+
content_text = "This is test document content for knowledge service testing."
|
|
34
|
+
content_bytes = content_text.encode("utf-8")
|
|
35
|
+
content_stream = ContentStream(io.BytesIO(content_bytes))
|
|
36
|
+
|
|
37
|
+
return Document(
|
|
38
|
+
document_id="test-doc-123",
|
|
39
|
+
original_filename="test_document.txt",
|
|
40
|
+
content_type="text/plain",
|
|
41
|
+
size_bytes=len(content_bytes),
|
|
42
|
+
content_multihash="test-hash-123",
|
|
43
|
+
status=DocumentStatus.CAPTURED,
|
|
44
|
+
content=content_stream,
|
|
45
|
+
created_at=datetime.now(timezone.utc),
|
|
46
|
+
updated_at=datetime.now(timezone.utc),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@pytest.fixture
|
|
51
|
+
def knowledge_service_config() -> KnowledgeServiceConfig:
|
|
52
|
+
"""Create a test KnowledgeServiceConfig for Anthropic."""
|
|
53
|
+
return KnowledgeServiceConfig(
|
|
54
|
+
knowledge_service_id="ks-anthropic-test",
|
|
55
|
+
name="Test Anthropic Service",
|
|
56
|
+
description="Anthropic service for testing",
|
|
57
|
+
service_api=ServiceApi.ANTHROPIC,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@pytest.fixture
|
|
62
|
+
def mock_anthropic_client() -> MagicMock:
|
|
63
|
+
"""Create a mock Anthropic client."""
|
|
64
|
+
mock_client = MagicMock()
|
|
65
|
+
|
|
66
|
+
# Mock the messages.create response
|
|
67
|
+
mock_response = MagicMock()
|
|
68
|
+
mock_content_block = MagicMock()
|
|
69
|
+
mock_content_block.type = "text"
|
|
70
|
+
mock_content_block.text = "This is a test response from Anthropic."
|
|
71
|
+
mock_response.content = [mock_content_block]
|
|
72
|
+
mock_response.usage.input_tokens = 150
|
|
73
|
+
mock_response.usage.output_tokens = 25
|
|
74
|
+
mock_response.stop_reason = "end_turn"
|
|
75
|
+
|
|
76
|
+
mock_client.messages.create = AsyncMock(return_value=mock_response)
|
|
77
|
+
|
|
78
|
+
return mock_client
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class TestAnthropicKnowledgeService:
|
|
82
|
+
"""Test cases for AnthropicKnowledgeService."""
|
|
83
|
+
|
|
84
|
+
@patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"})
|
|
85
|
+
async def test_execute_query_without_files(
|
|
86
|
+
self,
|
|
87
|
+
knowledge_service_config: KnowledgeServiceConfig,
|
|
88
|
+
mock_anthropic_client: MagicMock,
|
|
89
|
+
) -> None:
|
|
90
|
+
"""Test execute_query without service file IDs."""
|
|
91
|
+
with patch(
|
|
92
|
+
"julee.services.knowledge_service.anthropic.knowledge_service.AsyncAnthropic"
|
|
93
|
+
) as mock_anthropic:
|
|
94
|
+
mock_anthropic.return_value = mock_anthropic_client
|
|
95
|
+
|
|
96
|
+
service = anthropic_ks.AnthropicKnowledgeService()
|
|
97
|
+
|
|
98
|
+
query_text = "What is machine learning?"
|
|
99
|
+
result = await service.execute_query(knowledge_service_config, query_text)
|
|
100
|
+
|
|
101
|
+
# Verify the result structure
|
|
102
|
+
assert result.query_text == query_text
|
|
103
|
+
assert (
|
|
104
|
+
result.result_data["response"]
|
|
105
|
+
== "This is a test response from Anthropic."
|
|
106
|
+
)
|
|
107
|
+
assert result.result_data["model"] == anthropic_ks_module.DEFAULT_MODEL
|
|
108
|
+
assert result.result_data["service"] == "anthropic"
|
|
109
|
+
assert result.result_data["sources"] == []
|
|
110
|
+
assert result.result_data["usage"]["input_tokens"] == 150
|
|
111
|
+
assert result.result_data["usage"]["output_tokens"] == 25
|
|
112
|
+
assert result.result_data["stop_reason"] == "end_turn"
|
|
113
|
+
assert result.execution_time_ms is not None
|
|
114
|
+
assert result.execution_time_ms >= 0
|
|
115
|
+
assert isinstance(result.created_at, datetime)
|
|
116
|
+
|
|
117
|
+
# Verify the API call was made correctly
|
|
118
|
+
mock_anthropic_client.messages.create.assert_called_once()
|
|
119
|
+
call_args = mock_anthropic_client.messages.create.call_args
|
|
120
|
+
assert call_args[1]["model"] == anthropic_ks_module.DEFAULT_MODEL
|
|
121
|
+
assert call_args[1]["max_tokens"] == anthropic_ks_module.DEFAULT_MAX_TOKENS
|
|
122
|
+
assert len(call_args[1]["messages"]) == 1
|
|
123
|
+
assert call_args[1]["messages"][0]["role"] == "user"
|
|
124
|
+
|
|
125
|
+
# Should have only one content part (the text query)
|
|
126
|
+
content_parts = call_args[1]["messages"][0]["content"]
|
|
127
|
+
assert len(content_parts) == 1
|
|
128
|
+
assert content_parts[0]["type"] == "text"
|
|
129
|
+
assert content_parts[0]["text"] == query_text
|
|
130
|
+
|
|
131
|
+
@patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"})
|
|
132
|
+
async def test_execute_query_with_files(
|
|
133
|
+
self,
|
|
134
|
+
knowledge_service_config: KnowledgeServiceConfig,
|
|
135
|
+
mock_anthropic_client: MagicMock,
|
|
136
|
+
) -> None:
|
|
137
|
+
"""Test execute_query with service file IDs."""
|
|
138
|
+
with patch(
|
|
139
|
+
"julee.services.knowledge_service.anthropic.knowledge_service.AsyncAnthropic"
|
|
140
|
+
) as mock_anthropic:
|
|
141
|
+
mock_anthropic.return_value = mock_anthropic_client
|
|
142
|
+
|
|
143
|
+
service = anthropic_ks.AnthropicKnowledgeService()
|
|
144
|
+
|
|
145
|
+
query_text = "What is in the document?"
|
|
146
|
+
service_file_ids = ["file_123", "file_456"]
|
|
147
|
+
result = await service.execute_query(
|
|
148
|
+
knowledge_service_config,
|
|
149
|
+
query_text,
|
|
150
|
+
service_file_ids=service_file_ids,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Verify the result structure
|
|
154
|
+
assert result.query_text == query_text
|
|
155
|
+
assert result.result_data["sources"] == service_file_ids
|
|
156
|
+
assert result.execution_time_ms is not None
|
|
157
|
+
assert result.execution_time_ms >= 0
|
|
158
|
+
|
|
159
|
+
# Verify the API call was made with file attachments
|
|
160
|
+
mock_anthropic_client.messages.create.assert_called_once()
|
|
161
|
+
call_args = mock_anthropic_client.messages.create.call_args
|
|
162
|
+
|
|
163
|
+
# Should have file attachments plus text query
|
|
164
|
+
content_parts = call_args[1]["messages"][0]["content"]
|
|
165
|
+
assert len(content_parts) == 3 # 2 files + 1 text query
|
|
166
|
+
|
|
167
|
+
# Check file attachments
|
|
168
|
+
assert content_parts[0]["type"] == "document"
|
|
169
|
+
assert content_parts[0]["source"]["type"] == "file"
|
|
170
|
+
assert content_parts[0]["source"]["file_id"] == "file_123"
|
|
171
|
+
|
|
172
|
+
assert content_parts[1]["type"] == "document"
|
|
173
|
+
assert content_parts[1]["source"]["type"] == "file"
|
|
174
|
+
assert content_parts[1]["source"]["file_id"] == "file_456"
|
|
175
|
+
|
|
176
|
+
# Check text query
|
|
177
|
+
assert content_parts[2]["type"] == "text"
|
|
178
|
+
assert content_parts[2]["text"] == query_text
|
|
179
|
+
|
|
180
|
+
@patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"})
|
|
181
|
+
async def test_execute_query_handles_api_error(
|
|
182
|
+
self,
|
|
183
|
+
knowledge_service_config: KnowledgeServiceConfig,
|
|
184
|
+
) -> None:
|
|
185
|
+
"""Test execute_query handles API errors gracefully."""
|
|
186
|
+
mock_client = MagicMock()
|
|
187
|
+
mock_client.messages.create = AsyncMock(side_effect=Exception("API Error"))
|
|
188
|
+
|
|
189
|
+
with patch(
|
|
190
|
+
"julee.services.knowledge_service.anthropic.knowledge_service.AsyncAnthropic"
|
|
191
|
+
) as mock_anthropic:
|
|
192
|
+
mock_anthropic.return_value = mock_client
|
|
193
|
+
|
|
194
|
+
service = anthropic_ks.AnthropicKnowledgeService()
|
|
195
|
+
|
|
196
|
+
with pytest.raises(Exception):
|
|
197
|
+
await service.execute_query(knowledge_service_config, "Test query")
|
|
198
|
+
|
|
199
|
+
@patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"})
|
|
200
|
+
async def test_query_id_generation(
|
|
201
|
+
self,
|
|
202
|
+
knowledge_service_config: KnowledgeServiceConfig,
|
|
203
|
+
mock_anthropic_client: MagicMock,
|
|
204
|
+
) -> None:
|
|
205
|
+
"""Test that query IDs are unique and properly formatted."""
|
|
206
|
+
with patch(
|
|
207
|
+
"julee.services.knowledge_service.anthropic.knowledge_service.AsyncAnthropic"
|
|
208
|
+
) as mock_anthropic:
|
|
209
|
+
mock_anthropic.return_value = mock_anthropic_client
|
|
210
|
+
|
|
211
|
+
service = anthropic_ks.AnthropicKnowledgeService()
|
|
212
|
+
|
|
213
|
+
# Execute two queries
|
|
214
|
+
result1 = await service.execute_query(
|
|
215
|
+
knowledge_service_config, "First query"
|
|
216
|
+
)
|
|
217
|
+
result2 = await service.execute_query(
|
|
218
|
+
knowledge_service_config, "Second query"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Query IDs should be unique and follow expected format
|
|
222
|
+
assert result1.query_id != result2.query_id
|
|
223
|
+
assert result1.query_id.startswith("anthropic_")
|
|
224
|
+
assert result2.query_id.startswith("anthropic_")
|
|
225
|
+
assert len(result1.query_id) == len("anthropic_") + 12 # UUID hex[:12]
|
|
226
|
+
assert len(result2.query_id) == len("anthropic_") + 12
|
|
227
|
+
|
|
228
|
+
@patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"})
|
|
229
|
+
async def test_empty_service_file_ids(
|
|
230
|
+
self,
|
|
231
|
+
knowledge_service_config: KnowledgeServiceConfig,
|
|
232
|
+
mock_anthropic_client: MagicMock,
|
|
233
|
+
) -> None:
|
|
234
|
+
"""Test execute_query with empty service_file_ids list."""
|
|
235
|
+
with patch(
|
|
236
|
+
"julee.services.knowledge_service.anthropic.knowledge_service.AsyncAnthropic"
|
|
237
|
+
) as mock_anthropic:
|
|
238
|
+
mock_anthropic.return_value = mock_anthropic_client
|
|
239
|
+
|
|
240
|
+
service = anthropic_ks.AnthropicKnowledgeService()
|
|
241
|
+
|
|
242
|
+
query_text = "What is in the document?"
|
|
243
|
+
result = await service.execute_query(
|
|
244
|
+
knowledge_service_config, query_text, service_file_ids=[]
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Should behave the same as None
|
|
248
|
+
assert result.result_data["sources"] == []
|
|
249
|
+
|
|
250
|
+
# Verify API call structure
|
|
251
|
+
call_args = mock_anthropic_client.messages.create.call_args
|
|
252
|
+
content_parts = call_args[1]["messages"][0]["content"]
|
|
253
|
+
assert len(content_parts) == 1 # Only text query, no files
|
|
254
|
+
assert content_parts[0]["type"] == "text"
|
|
255
|
+
|
|
256
|
+
@patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"})
|
|
257
|
+
async def test_execute_query_with_metadata(
|
|
258
|
+
self,
|
|
259
|
+
knowledge_service_config: KnowledgeServiceConfig,
|
|
260
|
+
mock_anthropic_client: MagicMock,
|
|
261
|
+
) -> None:
|
|
262
|
+
"""Test execute_query with query_metadata configuration."""
|
|
263
|
+
with patch(
|
|
264
|
+
"julee.services.knowledge_service.anthropic.knowledge_service.AsyncAnthropic"
|
|
265
|
+
) as mock_anthropic:
|
|
266
|
+
mock_anthropic.return_value = mock_anthropic_client
|
|
267
|
+
|
|
268
|
+
service = anthropic_ks.AnthropicKnowledgeService()
|
|
269
|
+
|
|
270
|
+
metadata = {
|
|
271
|
+
"model": "claude-opus-4-1-20250805",
|
|
272
|
+
"max_tokens": 2000,
|
|
273
|
+
"temperature": 0.7,
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
query_text = "Custom query with metadata"
|
|
277
|
+
result = await service.execute_query(
|
|
278
|
+
knowledge_service_config, query_text, query_metadata=metadata
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Verify the result uses metadata values
|
|
282
|
+
assert result.result_data["model"] == "claude-opus-4-1-20250805"
|
|
283
|
+
assert result.execution_time_ms is not None
|
|
284
|
+
assert result.execution_time_ms >= 0
|
|
285
|
+
|
|
286
|
+
# Verify API call used metadata values
|
|
287
|
+
mock_anthropic_client.messages.create.assert_called_once()
|
|
288
|
+
call_args = mock_anthropic_client.messages.create.call_args
|
|
289
|
+
assert call_args[1]["model"] == "claude-opus-4-1-20250805"
|
|
290
|
+
assert call_args[1]["max_tokens"] == 2000
|
|
291
|
+
assert call_args[1]["temperature"] == 0.7
|
|
292
|
+
|
|
293
|
+
@patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"})
|
|
294
|
+
async def test_execute_query_metadata_defaults(
|
|
295
|
+
self,
|
|
296
|
+
knowledge_service_config: KnowledgeServiceConfig,
|
|
297
|
+
mock_anthropic_client: MagicMock,
|
|
298
|
+
) -> None:
|
|
299
|
+
"""Test execute_query uses default values when metadata is None."""
|
|
300
|
+
with patch(
|
|
301
|
+
"julee.services.knowledge_service.anthropic.knowledge_service.AsyncAnthropic"
|
|
302
|
+
) as mock_anthropic:
|
|
303
|
+
mock_anthropic.return_value = mock_anthropic_client
|
|
304
|
+
|
|
305
|
+
service = anthropic_ks.AnthropicKnowledgeService()
|
|
306
|
+
|
|
307
|
+
result = await service.execute_query(
|
|
308
|
+
knowledge_service_config, "Test query", query_metadata=None
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Verify defaults are used
|
|
312
|
+
assert result.result_data["model"] == anthropic_ks_module.DEFAULT_MODEL
|
|
313
|
+
|
|
314
|
+
# Verify API call used defaults
|
|
315
|
+
call_args = mock_anthropic_client.messages.create.call_args
|
|
316
|
+
assert call_args[1]["model"] == anthropic_ks_module.DEFAULT_MODEL
|
|
317
|
+
assert call_args[1]["max_tokens"] == anthropic_ks_module.DEFAULT_MAX_TOKENS
|
|
318
|
+
assert "temperature" not in call_args[1] # Not set by default
|