julee 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- julee/__init__.py +3 -0
- julee/api/__init__.py +20 -0
- julee/api/app.py +180 -0
- julee/api/dependencies.py +257 -0
- julee/api/requests.py +175 -0
- julee/api/responses.py +43 -0
- julee/api/routers/__init__.py +43 -0
- julee/api/routers/assembly_specifications.py +212 -0
- julee/api/routers/documents.py +182 -0
- julee/api/routers/knowledge_service_configs.py +79 -0
- julee/api/routers/knowledge_service_queries.py +293 -0
- julee/api/routers/system.py +137 -0
- julee/api/routers/workflows.py +234 -0
- julee/api/services/__init__.py +20 -0
- julee/api/services/system_initialization.py +214 -0
- julee/api/tests/__init__.py +14 -0
- julee/api/tests/routers/__init__.py +17 -0
- julee/api/tests/routers/test_assembly_specifications.py +749 -0
- julee/api/tests/routers/test_documents.py +301 -0
- julee/api/tests/routers/test_knowledge_service_configs.py +234 -0
- julee/api/tests/routers/test_knowledge_service_queries.py +738 -0
- julee/api/tests/routers/test_system.py +179 -0
- julee/api/tests/routers/test_workflows.py +393 -0
- julee/api/tests/test_app.py +285 -0
- julee/api/tests/test_dependencies.py +245 -0
- julee/api/tests/test_requests.py +250 -0
- julee/domain/__init__.py +22 -0
- julee/domain/models/__init__.py +49 -0
- julee/domain/models/assembly/__init__.py +17 -0
- julee/domain/models/assembly/assembly.py +103 -0
- julee/domain/models/assembly/tests/__init__.py +0 -0
- julee/domain/models/assembly/tests/factories.py +37 -0
- julee/domain/models/assembly/tests/test_assembly.py +430 -0
- julee/domain/models/assembly_specification/__init__.py +24 -0
- julee/domain/models/assembly_specification/assembly_specification.py +172 -0
- julee/domain/models/assembly_specification/knowledge_service_query.py +123 -0
- julee/domain/models/assembly_specification/tests/__init__.py +0 -0
- julee/domain/models/assembly_specification/tests/factories.py +78 -0
- julee/domain/models/assembly_specification/tests/test_assembly_specification.py +490 -0
- julee/domain/models/assembly_specification/tests/test_knowledge_service_query.py +310 -0
- julee/domain/models/custom_fields/__init__.py +0 -0
- julee/domain/models/custom_fields/content_stream.py +68 -0
- julee/domain/models/custom_fields/tests/__init__.py +0 -0
- julee/domain/models/custom_fields/tests/test_custom_fields.py +53 -0
- julee/domain/models/document/__init__.py +17 -0
- julee/domain/models/document/document.py +150 -0
- julee/domain/models/document/tests/__init__.py +0 -0
- julee/domain/models/document/tests/factories.py +76 -0
- julee/domain/models/document/tests/test_document.py +297 -0
- julee/domain/models/knowledge_service_config/__init__.py +17 -0
- julee/domain/models/knowledge_service_config/knowledge_service_config.py +86 -0
- julee/domain/models/policy/__init__.py +15 -0
- julee/domain/models/policy/document_policy_validation.py +220 -0
- julee/domain/models/policy/policy.py +203 -0
- julee/domain/models/policy/tests/__init__.py +0 -0
- julee/domain/models/policy/tests/factories.py +47 -0
- julee/domain/models/policy/tests/test_document_policy_validation.py +420 -0
- julee/domain/models/policy/tests/test_policy.py +546 -0
- julee/domain/repositories/__init__.py +27 -0
- julee/domain/repositories/assembly.py +45 -0
- julee/domain/repositories/assembly_specification.py +52 -0
- julee/domain/repositories/base.py +146 -0
- julee/domain/repositories/document.py +49 -0
- julee/domain/repositories/document_policy_validation.py +52 -0
- julee/domain/repositories/knowledge_service_config.py +54 -0
- julee/domain/repositories/knowledge_service_query.py +44 -0
- julee/domain/repositories/policy.py +49 -0
- julee/domain/use_cases/__init__.py +17 -0
- julee/domain/use_cases/decorators.py +107 -0
- julee/domain/use_cases/extract_assemble_data.py +649 -0
- julee/domain/use_cases/initialize_system_data.py +842 -0
- julee/domain/use_cases/tests/__init__.py +7 -0
- julee/domain/use_cases/tests/test_extract_assemble_data.py +548 -0
- julee/domain/use_cases/tests/test_initialize_system_data.py +455 -0
- julee/domain/use_cases/tests/test_validate_document.py +1228 -0
- julee/domain/use_cases/validate_document.py +736 -0
- julee/fixtures/assembly_specifications.yaml +70 -0
- julee/fixtures/documents.yaml +178 -0
- julee/fixtures/knowledge_service_configs.yaml +37 -0
- julee/fixtures/knowledge_service_queries.yaml +27 -0
- julee/repositories/__init__.py +17 -0
- julee/repositories/memory/__init__.py +31 -0
- julee/repositories/memory/assembly.py +84 -0
- julee/repositories/memory/assembly_specification.py +125 -0
- julee/repositories/memory/base.py +227 -0
- julee/repositories/memory/document.py +149 -0
- julee/repositories/memory/document_policy_validation.py +104 -0
- julee/repositories/memory/knowledge_service_config.py +123 -0
- julee/repositories/memory/knowledge_service_query.py +120 -0
- julee/repositories/memory/policy.py +87 -0
- julee/repositories/memory/tests/__init__.py +0 -0
- julee/repositories/memory/tests/test_document.py +212 -0
- julee/repositories/memory/tests/test_document_policy_validation.py +161 -0
- julee/repositories/memory/tests/test_policy.py +443 -0
- julee/repositories/minio/__init__.py +31 -0
- julee/repositories/minio/assembly.py +103 -0
- julee/repositories/minio/assembly_specification.py +170 -0
- julee/repositories/minio/client.py +570 -0
- julee/repositories/minio/document.py +530 -0
- julee/repositories/minio/document_policy_validation.py +120 -0
- julee/repositories/minio/knowledge_service_config.py +187 -0
- julee/repositories/minio/knowledge_service_query.py +211 -0
- julee/repositories/minio/policy.py +106 -0
- julee/repositories/minio/tests/__init__.py +0 -0
- julee/repositories/minio/tests/fake_client.py +213 -0
- julee/repositories/minio/tests/test_assembly.py +374 -0
- julee/repositories/minio/tests/test_assembly_specification.py +391 -0
- julee/repositories/minio/tests/test_client_protocol.py +57 -0
- julee/repositories/minio/tests/test_document.py +591 -0
- julee/repositories/minio/tests/test_document_policy_validation.py +192 -0
- julee/repositories/minio/tests/test_knowledge_service_config.py +374 -0
- julee/repositories/minio/tests/test_knowledge_service_query.py +438 -0
- julee/repositories/minio/tests/test_policy.py +559 -0
- julee/repositories/temporal/__init__.py +38 -0
- julee/repositories/temporal/activities.py +114 -0
- julee/repositories/temporal/activity_names.py +34 -0
- julee/repositories/temporal/proxies.py +159 -0
- julee/services/__init__.py +18 -0
- julee/services/knowledge_service/__init__.py +48 -0
- julee/services/knowledge_service/anthropic/__init__.py +12 -0
- julee/services/knowledge_service/anthropic/knowledge_service.py +331 -0
- julee/services/knowledge_service/anthropic/tests/test_knowledge_service.py +318 -0
- julee/services/knowledge_service/factory.py +138 -0
- julee/services/knowledge_service/knowledge_service.py +160 -0
- julee/services/knowledge_service/memory/__init__.py +13 -0
- julee/services/knowledge_service/memory/knowledge_service.py +278 -0
- julee/services/knowledge_service/memory/test_knowledge_service.py +345 -0
- julee/services/knowledge_service/test_factory.py +112 -0
- julee/services/temporal/__init__.py +38 -0
- julee/services/temporal/activities.py +86 -0
- julee/services/temporal/activity_names.py +22 -0
- julee/services/temporal/proxies.py +41 -0
- julee/util/__init__.py +0 -0
- julee/util/domain.py +119 -0
- julee/util/repos/__init__.py +0 -0
- julee/util/repos/minio/__init__.py +0 -0
- julee/util/repos/minio/file_storage.py +213 -0
- julee/util/repos/temporal/__init__.py +11 -0
- julee/util/repos/temporal/client_proxies/file_storage.py +68 -0
- julee/util/repos/temporal/data_converter.py +123 -0
- julee/util/repos/temporal/minio_file_storage.py +12 -0
- julee/util/repos/temporal/proxies/__init__.py +0 -0
- julee/util/repos/temporal/proxies/file_storage.py +58 -0
- julee/util/repositories.py +55 -0
- julee/util/temporal/__init__.py +22 -0
- julee/util/temporal/activities.py +123 -0
- julee/util/temporal/decorators.py +473 -0
- julee/util/tests/__init__.py +1 -0
- julee/util/tests/test_decorators.py +770 -0
- julee/util/validation/__init__.py +29 -0
- julee/util/validation/repository.py +100 -0
- julee/util/validation/type_guards.py +369 -0
- julee/worker.py +211 -0
- julee/workflows/__init__.py +26 -0
- julee/workflows/extract_assemble.py +215 -0
- julee/workflows/validate_document.py +228 -0
- julee-0.1.0.dist-info/METADATA +195 -0
- julee-0.1.0.dist-info/RECORD +161 -0
- julee-0.1.0.dist-info/WHEEL +5 -0
- julee-0.1.0.dist-info/licenses/LICENSE +674 -0
- julee-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Factory function for creating KnowledgeService implementations.
|
|
3
|
+
|
|
4
|
+
This module provides the factory function for creating configured
|
|
5
|
+
KnowledgeService instances based on the service API configuration.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
from julee.domain.models.knowledge_service_config import (
|
|
11
|
+
KnowledgeServiceConfig,
|
|
12
|
+
ServiceApi,
|
|
13
|
+
)
|
|
14
|
+
from .knowledge_service import KnowledgeService
|
|
15
|
+
from .anthropic import AnthropicKnowledgeService
|
|
16
|
+
from julee.domain.models.document import Document
|
|
17
|
+
from julee.services.knowledge_service import (
|
|
18
|
+
QueryResult,
|
|
19
|
+
FileRegistrationResult,
|
|
20
|
+
)
|
|
21
|
+
from typing import Dict, Any, List, Optional
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ConfigurableKnowledgeService(KnowledgeService):
|
|
28
|
+
"""
|
|
29
|
+
KnowledgeService implementation that uses the factory pattern.
|
|
30
|
+
|
|
31
|
+
This class implements the KnowledgeService protocol by delegating to
|
|
32
|
+
a factory-created service instance. It can be wrapped by temporal
|
|
33
|
+
decorators while maintaining proper protocol compliance.
|
|
34
|
+
|
|
35
|
+
No constructor configuration is required - the factory is called
|
|
36
|
+
within each method using the provided config parameter.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
async def register_file(
|
|
40
|
+
self, config: KnowledgeServiceConfig, document: Document
|
|
41
|
+
) -> FileRegistrationResult:
|
|
42
|
+
"""Register a document with the knowledge service."""
|
|
43
|
+
service = knowledge_service_factory(config)
|
|
44
|
+
return await service.register_file(config, document)
|
|
45
|
+
|
|
46
|
+
async def execute_query(
|
|
47
|
+
self,
|
|
48
|
+
config: KnowledgeServiceConfig,
|
|
49
|
+
query_text: str,
|
|
50
|
+
service_file_ids: Optional[List[str]] = None,
|
|
51
|
+
query_metadata: Optional[Dict[str, Any]] = None,
|
|
52
|
+
assistant_prompt: Optional[str] = None,
|
|
53
|
+
) -> QueryResult:
|
|
54
|
+
"""Execute a query against the knowledge service."""
|
|
55
|
+
service = knowledge_service_factory(config)
|
|
56
|
+
return await service.execute_query(
|
|
57
|
+
config=config,
|
|
58
|
+
query_text=query_text,
|
|
59
|
+
service_file_ids=service_file_ids,
|
|
60
|
+
query_metadata=query_metadata,
|
|
61
|
+
assistant_prompt=assistant_prompt,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def knowledge_service_factory(
|
|
66
|
+
knowledge_service_config: "KnowledgeServiceConfig",
|
|
67
|
+
) -> KnowledgeService:
|
|
68
|
+
"""Create a configured KnowledgeService instance.
|
|
69
|
+
|
|
70
|
+
This factory function takes a KnowledgeServiceConfig domain object
|
|
71
|
+
(containing metadata and service_api information) and returns a properly
|
|
72
|
+
configured KnowledgeService implementation that can handle external
|
|
73
|
+
operations.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
knowledge_service_config: KnowledgeServiceConfig domain object with
|
|
77
|
+
configuration and API information
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Configured KnowledgeService implementation ready for external
|
|
81
|
+
operations
|
|
82
|
+
|
|
83
|
+
Raises:
|
|
84
|
+
ValueError: If the service_api is not supported
|
|
85
|
+
|
|
86
|
+
Example:
|
|
87
|
+
>>> from julee.domain import KnowledgeServiceConfig
|
|
88
|
+
>>> from julee.domain.models.knowledge_service_config import (
|
|
89
|
+
... ServiceApi
|
|
90
|
+
... )
|
|
91
|
+
>>> config = KnowledgeServiceConfig(
|
|
92
|
+
... knowledge_service_id="ks-123",
|
|
93
|
+
... name="My Anthropic Service",
|
|
94
|
+
... description="Anthropic-powered document analysis",
|
|
95
|
+
... service_api=ServiceApi.ANTHROPIC
|
|
96
|
+
... )
|
|
97
|
+
>>> service = knowledge_service_factory(config)
|
|
98
|
+
>>> result = await service.register_file(document)
|
|
99
|
+
"""
|
|
100
|
+
logger.debug(
|
|
101
|
+
"Creating KnowledgeService via factory",
|
|
102
|
+
extra={
|
|
103
|
+
"knowledge_service_id": (knowledge_service_config.knowledge_service_id),
|
|
104
|
+
"service_api": knowledge_service_config.service_api.value,
|
|
105
|
+
},
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Route to appropriate implementation based on service_api
|
|
109
|
+
service: KnowledgeService
|
|
110
|
+
if knowledge_service_config.service_api == ServiceApi.ANTHROPIC:
|
|
111
|
+
service = AnthropicKnowledgeService()
|
|
112
|
+
else:
|
|
113
|
+
raise ValueError(
|
|
114
|
+
f"Unsupported service API: {knowledge_service_config.service_api}"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Validate that the service satisfies the protocol
|
|
118
|
+
from . import ensure_knowledge_service
|
|
119
|
+
|
|
120
|
+
validated_service = ensure_knowledge_service(service)
|
|
121
|
+
|
|
122
|
+
logger.info(
|
|
123
|
+
"KnowledgeService created successfully",
|
|
124
|
+
extra={
|
|
125
|
+
"knowledge_service_id": (knowledge_service_config.knowledge_service_id),
|
|
126
|
+
"service_api": knowledge_service_config.service_api.value,
|
|
127
|
+
"implementation": type(validated_service).__name__,
|
|
128
|
+
},
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return validated_service
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# Export both the factory function and the configurable class
|
|
135
|
+
__all__ = [
|
|
136
|
+
"knowledge_service_factory",
|
|
137
|
+
"ConfigurableKnowledgeService",
|
|
138
|
+
]
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""
|
|
2
|
+
KnowledgeService protocol for external service operations in the Capture,
|
|
3
|
+
Extract, Assemble, Publish workflow.
|
|
4
|
+
|
|
5
|
+
This module defines the KnowledgeService protocol that handles interactions
|
|
6
|
+
with external knowledge services, including document registration and query
|
|
7
|
+
execution. This protocol is separate from the repository layer which only
|
|
8
|
+
handles local metadata persistence.
|
|
9
|
+
|
|
10
|
+
Concrete implementations of this protocol are provided for different external
|
|
11
|
+
services (Anthropic, OpenAI, etc.) and are created via factory functions.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import (
|
|
15
|
+
Protocol,
|
|
16
|
+
Optional,
|
|
17
|
+
List,
|
|
18
|
+
runtime_checkable,
|
|
19
|
+
Dict,
|
|
20
|
+
Any,
|
|
21
|
+
TYPE_CHECKING,
|
|
22
|
+
)
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from pydantic import BaseModel, Field
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from julee.domain.models.knowledge_service_config import (
|
|
28
|
+
KnowledgeServiceConfig,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
from julee.domain.models.document import Document
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class QueryResult(BaseModel):
|
|
35
|
+
"""Result of a knowledge service query execution."""
|
|
36
|
+
|
|
37
|
+
query_id: str = Field(description="Unique identifier for this query execution")
|
|
38
|
+
query_text: str = Field(description="The original query text that was executed")
|
|
39
|
+
result_data: Dict[str, Any] = Field(
|
|
40
|
+
default_factory=dict,
|
|
41
|
+
description="The structured result data from the query",
|
|
42
|
+
)
|
|
43
|
+
execution_time_ms: Optional[int] = Field(
|
|
44
|
+
default=None,
|
|
45
|
+
description="Time taken to execute the query in milliseconds",
|
|
46
|
+
)
|
|
47
|
+
created_at: Optional[datetime] = Field(
|
|
48
|
+
default_factory=lambda: datetime.now(timezone.utc)
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class FileRegistrationResult(BaseModel):
|
|
53
|
+
"""Result of registering a file with a knowledge service."""
|
|
54
|
+
|
|
55
|
+
document_id: str = Field(description="The original document ID from our system")
|
|
56
|
+
knowledge_service_file_id: str = Field(
|
|
57
|
+
description="The file identifier assigned by the knowledge service"
|
|
58
|
+
)
|
|
59
|
+
registration_metadata: Dict[str, Any] = Field(
|
|
60
|
+
default_factory=dict,
|
|
61
|
+
description="Additional metadata from the registration process",
|
|
62
|
+
)
|
|
63
|
+
created_at: Optional[datetime] = Field(
|
|
64
|
+
default_factory=lambda: datetime.now(timezone.utc)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@runtime_checkable
|
|
69
|
+
class KnowledgeService(Protocol):
|
|
70
|
+
"""
|
|
71
|
+
Protocol for interacting with external knowledge services.
|
|
72
|
+
|
|
73
|
+
This protocol defines the interface for external operations that were
|
|
74
|
+
moved out of the repository layer. Implementations handle the specifics
|
|
75
|
+
of different knowledge service APIs (Anthropic, OpenAI, etc.).
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
async def register_file(
|
|
79
|
+
self, config: "KnowledgeServiceConfig", document: Document
|
|
80
|
+
) -> FileRegistrationResult:
|
|
81
|
+
"""Register a document file with the external knowledge service.
|
|
82
|
+
|
|
83
|
+
This method registers a document with the external knowledge service,
|
|
84
|
+
allowing that service to analyze and index the document content for
|
|
85
|
+
future queries.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
config: KnowledgeServiceConfig for the service to use
|
|
89
|
+
document: Document domain object to register
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
FileRegistrationResult containing registration details and the
|
|
93
|
+
service's internal file identifier
|
|
94
|
+
|
|
95
|
+
Implementation Notes:
|
|
96
|
+
- Must be idempotent: re-registering same document returns same result
|
|
97
|
+
- Should handle service unavailability gracefully
|
|
98
|
+
- Must return the service's internal file ID for future queries
|
|
99
|
+
- Document content is accessed directly from the Document object
|
|
100
|
+
- Should handle various document formats and sizes
|
|
101
|
+
|
|
102
|
+
Workflow Context:
|
|
103
|
+
In Temporal workflows, this method is implemented as an activity
|
|
104
|
+
to ensure registration results are durably stored and consistent
|
|
105
|
+
across workflow replays.
|
|
106
|
+
"""
|
|
107
|
+
...
|
|
108
|
+
|
|
109
|
+
async def execute_query(
|
|
110
|
+
self,
|
|
111
|
+
config: "KnowledgeServiceConfig",
|
|
112
|
+
query_text: str,
|
|
113
|
+
service_file_ids: Optional[List[str]] = None,
|
|
114
|
+
query_metadata: Optional[Dict[str, Any]] = None,
|
|
115
|
+
assistant_prompt: Optional[str] = None,
|
|
116
|
+
) -> QueryResult:
|
|
117
|
+
"""Execute a query against the external knowledge service.
|
|
118
|
+
|
|
119
|
+
This method executes a text query against the knowledge service,
|
|
120
|
+
optionally scoping the query to specific documents that have been
|
|
121
|
+
previously registered with the service.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
config: KnowledgeServiceConfig for the service to use
|
|
125
|
+
query_text: The query to execute (natural language or structured)
|
|
126
|
+
service_file_ids: Optional list of service file IDs to provide as
|
|
127
|
+
context for the query. These are the IDs returned
|
|
128
|
+
by the knowledge service from register_file
|
|
129
|
+
operations, and are included in the query to give
|
|
130
|
+
the service access to specific documents.
|
|
131
|
+
query_metadata: Optional service-specific metadata and
|
|
132
|
+
configuration options such as model selection,
|
|
133
|
+
temperature, max_tokens, etc. The structure depends
|
|
134
|
+
on the specific knowledge service being used.
|
|
135
|
+
assistant_prompt: Optional assistant message content to constrain
|
|
136
|
+
or prime the model's response. This is added as
|
|
137
|
+
the final assistant message before the model
|
|
138
|
+
generates its response, allowing control over
|
|
139
|
+
response format and structure.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
QueryResult containing query results and execution metadata
|
|
143
|
+
|
|
144
|
+
Implementation Notes:
|
|
145
|
+
- Must be idempotent: same query returns consistent results
|
|
146
|
+
- Service file IDs are provided as context to enhance query responses
|
|
147
|
+
- Should handle service unavailability gracefully
|
|
148
|
+
- Query results should be structured as domain objects
|
|
149
|
+
- Should track execution time and metadata
|
|
150
|
+
- Must handle various query formats (natural language, structured,
|
|
151
|
+
etc.)
|
|
152
|
+
- Should validate that service_file_ids exist in the service before
|
|
153
|
+
including them in the query context
|
|
154
|
+
|
|
155
|
+
Workflow Context:
|
|
156
|
+
In Temporal workflows, this method is implemented as an activity
|
|
157
|
+
to ensure query results are durably stored and can be replayed
|
|
158
|
+
consistently.
|
|
159
|
+
"""
|
|
160
|
+
...
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory-based implementation of KnowledgeService for testing and development.
|
|
3
|
+
|
|
4
|
+
This module provides an in-memory implementation of the KnowledgeService
|
|
5
|
+
protocol that stores file registrations in memory and returns configurable
|
|
6
|
+
canned responses for queries.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .knowledge_service import MemoryKnowledgeService
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"MemoryKnowledgeService",
|
|
13
|
+
]
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory implementation of KnowledgeService for testing and development.
|
|
3
|
+
|
|
4
|
+
This module provides an in-memory implementation of the KnowledgeService
|
|
5
|
+
protocol that stores file registrations in a dictionary and returns
|
|
6
|
+
configurable canned query responses. Useful for testing and development
|
|
7
|
+
scenarios where external service dependencies should be avoided.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Optional, List, Dict, Deque, Any
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from collections import deque
|
|
14
|
+
|
|
15
|
+
from julee.domain.models.knowledge_service_config import (
|
|
16
|
+
KnowledgeServiceConfig,
|
|
17
|
+
)
|
|
18
|
+
from julee.domain.models.document import Document
|
|
19
|
+
from ..knowledge_service import (
|
|
20
|
+
KnowledgeService,
|
|
21
|
+
QueryResult,
|
|
22
|
+
FileRegistrationResult,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MemoryKnowledgeService(KnowledgeService):
|
|
29
|
+
"""
|
|
30
|
+
In-memory implementation of the KnowledgeService protocol.
|
|
31
|
+
|
|
32
|
+
This class stores file registrations in memory using a dictionary
|
|
33
|
+
keyed by knowledge_service_file_id. Query results are returned from
|
|
34
|
+
a configurable queue of canned responses.
|
|
35
|
+
|
|
36
|
+
Useful for testing and development scenarios where you want to avoid
|
|
37
|
+
external service dependencies while still exercising the full
|
|
38
|
+
knowledge service workflow.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
config: KnowledgeServiceConfig,
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Initialize memory knowledge service with configuration.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
config: KnowledgeServiceConfig domain object containing metadata
|
|
49
|
+
and service configuration
|
|
50
|
+
"""
|
|
51
|
+
logger.debug(
|
|
52
|
+
"Initializing MemoryKnowledgeService",
|
|
53
|
+
extra={
|
|
54
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
55
|
+
"service_name": config.name,
|
|
56
|
+
},
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
self.config = config
|
|
60
|
+
|
|
61
|
+
# Storage for file registrations, keyed by knowledge_service_file_id
|
|
62
|
+
self._registered_files: Dict[str, FileRegistrationResult] = {}
|
|
63
|
+
|
|
64
|
+
# Queue of canned query results to return
|
|
65
|
+
self._canned_query_results: Deque[QueryResult] = deque()
|
|
66
|
+
|
|
67
|
+
def add_canned_query_result(self, query_result: QueryResult) -> None:
|
|
68
|
+
"""Add a canned query result to be returned by execute_query.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
query_result: QueryResult to return from future execute_query
|
|
72
|
+
calls
|
|
73
|
+
"""
|
|
74
|
+
logger.debug(
|
|
75
|
+
"Adding canned query result",
|
|
76
|
+
extra={
|
|
77
|
+
"knowledge_service_id": self.config.knowledge_service_id,
|
|
78
|
+
"query_id": query_result.query_id,
|
|
79
|
+
},
|
|
80
|
+
)
|
|
81
|
+
self._canned_query_results.append(query_result)
|
|
82
|
+
|
|
83
|
+
def add_canned_query_results(self, query_results: List[QueryResult]) -> None:
|
|
84
|
+
"""Add multiple canned query results to be returned by execute_query.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
query_results: List of QueryResult objects to return from future
|
|
88
|
+
execute_query calls
|
|
89
|
+
"""
|
|
90
|
+
logger.debug(
|
|
91
|
+
"Adding multiple canned query results",
|
|
92
|
+
extra={
|
|
93
|
+
"knowledge_service_id": self.config.knowledge_service_id,
|
|
94
|
+
"count": len(query_results),
|
|
95
|
+
},
|
|
96
|
+
)
|
|
97
|
+
self._canned_query_results.extend(query_results)
|
|
98
|
+
|
|
99
|
+
def clear_canned_query_results(self) -> None:
|
|
100
|
+
"""Clear all canned query results."""
|
|
101
|
+
logger.debug(
|
|
102
|
+
"Clearing canned query results",
|
|
103
|
+
extra={
|
|
104
|
+
"knowledge_service_id": self.config.knowledge_service_id,
|
|
105
|
+
"count": len(self._canned_query_results),
|
|
106
|
+
},
|
|
107
|
+
)
|
|
108
|
+
self._canned_query_results.clear()
|
|
109
|
+
|
|
110
|
+
def get_registered_file(
|
|
111
|
+
self, knowledge_service_file_id: str
|
|
112
|
+
) -> Optional[FileRegistrationResult]:
|
|
113
|
+
"""Get a registered file by its knowledge service file ID.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
knowledge_service_file_id: The file ID assigned by this service
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
FileRegistrationResult if found, None otherwise
|
|
120
|
+
"""
|
|
121
|
+
return self._registered_files.get(knowledge_service_file_id)
|
|
122
|
+
|
|
123
|
+
def get_all_registered_files(self) -> Dict[str, FileRegistrationResult]:
|
|
124
|
+
"""Get all registered files.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Dictionary mapping knowledge_service_file_id to
|
|
128
|
+
FileRegistrationResult
|
|
129
|
+
"""
|
|
130
|
+
return self._registered_files.copy()
|
|
131
|
+
|
|
132
|
+
async def register_file(
|
|
133
|
+
self, config: KnowledgeServiceConfig, document: Document
|
|
134
|
+
) -> FileRegistrationResult:
|
|
135
|
+
"""Register a document file by storing metadata in memory.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
config: KnowledgeServiceConfig for this operation
|
|
139
|
+
document: Document domain object to register
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
FileRegistrationResult with memory-specific details
|
|
143
|
+
"""
|
|
144
|
+
logger.debug(
|
|
145
|
+
"Registering file with memory service",
|
|
146
|
+
extra={
|
|
147
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
148
|
+
"document_id": document.document_id,
|
|
149
|
+
},
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Check if already registered
|
|
153
|
+
for existing_result in self._registered_files.values():
|
|
154
|
+
if existing_result.document_id == document.document_id:
|
|
155
|
+
logger.debug(
|
|
156
|
+
"Document already registered, returning existing result",
|
|
157
|
+
extra={
|
|
158
|
+
"knowledge_service_id": (config.knowledge_service_id),
|
|
159
|
+
"document_id": document.document_id,
|
|
160
|
+
"knowledge_service_file_id": (
|
|
161
|
+
existing_result.knowledge_service_file_id
|
|
162
|
+
),
|
|
163
|
+
},
|
|
164
|
+
)
|
|
165
|
+
return existing_result
|
|
166
|
+
|
|
167
|
+
# Generate a unique file ID for this service
|
|
168
|
+
timestamp = int(datetime.now().timestamp())
|
|
169
|
+
memory_file_id = f"memory_{document.document_id}_{timestamp}"
|
|
170
|
+
|
|
171
|
+
# Create registration result
|
|
172
|
+
result = FileRegistrationResult(
|
|
173
|
+
document_id=document.document_id,
|
|
174
|
+
knowledge_service_file_id=memory_file_id,
|
|
175
|
+
registration_metadata={
|
|
176
|
+
"service": "memory",
|
|
177
|
+
"registered_via": "in_memory_storage",
|
|
178
|
+
"timestamp": timestamp,
|
|
179
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
180
|
+
"filename": document.original_filename,
|
|
181
|
+
"content_type": document.content_type,
|
|
182
|
+
"size_bytes": document.size_bytes,
|
|
183
|
+
},
|
|
184
|
+
created_at=datetime.now(timezone.utc),
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Store in memory dictionary keyed by knowledge_service_file_id
|
|
188
|
+
self._registered_files[memory_file_id] = result
|
|
189
|
+
|
|
190
|
+
logger.info(
|
|
191
|
+
"File registered with MemoryKnowledgeService",
|
|
192
|
+
extra={
|
|
193
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
194
|
+
"document_id": document.document_id,
|
|
195
|
+
"knowledge_service_file_id": memory_file_id,
|
|
196
|
+
"total_registered": len(self._registered_files),
|
|
197
|
+
},
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return result
|
|
201
|
+
|
|
202
|
+
async def execute_query(
|
|
203
|
+
self,
|
|
204
|
+
config: KnowledgeServiceConfig,
|
|
205
|
+
query_text: str,
|
|
206
|
+
service_file_ids: Optional[List[str]] = None,
|
|
207
|
+
query_metadata: Optional[Dict[str, Any]] = None,
|
|
208
|
+
assistant_prompt: Optional[str] = None,
|
|
209
|
+
) -> QueryResult:
|
|
210
|
+
"""Execute a query by returning a canned response.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
config: KnowledgeServiceConfig for this operation
|
|
214
|
+
query_text: The query to execute
|
|
215
|
+
service_file_ids: Optional list of service file IDs for query
|
|
216
|
+
query_metadata: Optional service-specific metadata (ignored in
|
|
217
|
+
memory implementation)
|
|
218
|
+
assistant_prompt: Optional assistant message content (ignored in
|
|
219
|
+
memory implementation)
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
QueryResult from the queue of canned responses
|
|
223
|
+
|
|
224
|
+
Raises:
|
|
225
|
+
ValueError: If no canned query results are available
|
|
226
|
+
"""
|
|
227
|
+
logger.debug(
|
|
228
|
+
"Executing query with MemoryKnowledgeService",
|
|
229
|
+
extra={
|
|
230
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
231
|
+
"query_text": query_text,
|
|
232
|
+
"document_count": (len(service_file_ids) if service_file_ids else 0),
|
|
233
|
+
},
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Check if we have canned results available
|
|
237
|
+
if not self._canned_query_results:
|
|
238
|
+
error_msg = (
|
|
239
|
+
"No canned query results available. Use "
|
|
240
|
+
"add_canned_query_result() to configure responses."
|
|
241
|
+
)
|
|
242
|
+
logger.error(
|
|
243
|
+
error_msg,
|
|
244
|
+
extra={
|
|
245
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
246
|
+
"query_text": query_text,
|
|
247
|
+
},
|
|
248
|
+
)
|
|
249
|
+
raise ValueError(error_msg)
|
|
250
|
+
|
|
251
|
+
# Pop and return the next canned result
|
|
252
|
+
result = self._canned_query_results.popleft()
|
|
253
|
+
|
|
254
|
+
# Update the result to reflect the actual query parameters
|
|
255
|
+
updated_result = QueryResult(
|
|
256
|
+
query_id=result.query_id,
|
|
257
|
+
query_text=query_text, # Use actual query text
|
|
258
|
+
result_data={
|
|
259
|
+
**result.result_data,
|
|
260
|
+
"queried_documents": service_file_ids or [],
|
|
261
|
+
"service": "memory",
|
|
262
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
263
|
+
},
|
|
264
|
+
execution_time_ms=result.execution_time_ms,
|
|
265
|
+
created_at=datetime.now(timezone.utc),
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
logger.info(
|
|
269
|
+
"Query executed with MemoryKnowledgeService",
|
|
270
|
+
extra={
|
|
271
|
+
"knowledge_service_id": config.knowledge_service_id,
|
|
272
|
+
"query_id": updated_result.query_id,
|
|
273
|
+
"execution_time_ms": updated_result.execution_time_ms,
|
|
274
|
+
"remaining_canned_results": len(self._canned_query_results),
|
|
275
|
+
},
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
return updated_result
|