cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +5 -1
- cognee/api/v1/add/add.py +1 -2
- cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
- cognee/api/v1/cognify/cognify.py +16 -24
- cognee/api/v1/cognify/routers/__init__.py +1 -0
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +37 -12
- cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
- cognee/api/v1/search/search.py +0 -4
- cognee/api/v1/ui/ui.py +68 -38
- cognee/context_global_variables.py +16 -61
- cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +28 -16
- cognee/infrastructure/databases/graph/config.py +0 -3
- cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
- cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
- cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
- cognee/infrastructure/databases/utils/__init__.py +0 -3
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
- cognee/infrastructure/databases/vector/config.py +0 -2
- cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
- cognee/infrastructure/files/storage/s3_config.py +0 -2
- cognee/infrastructure/llm/LLMGateway.py +2 -5
- cognee/infrastructure/llm/config.py +0 -35
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
- cognee/modules/cognify/config.py +0 -2
- cognee/modules/data/deletion/prune_system.py +2 -52
- cognee/modules/data/methods/delete_dataset.py +0 -26
- cognee/modules/engine/models/__init__.py +0 -1
- cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
- cognee/modules/memify/memify.py +7 -1
- cognee/modules/pipelines/operations/pipeline.py +2 -18
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/code_retriever.py +232 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_retriever.py +0 -10
- cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
- cognee/modules/retrieval/temporal_retriever.py +0 -4
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
- cognee/modules/search/methods/get_search_type_tools.py +8 -54
- cognee/modules/search/methods/no_access_control_search.py +0 -4
- cognee/modules/search/methods/search.py +0 -21
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +0 -19
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +3 -15
- cognee/shared/logging_utils.py +0 -4
- cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
- cognee/tasks/code/get_local_dependencies_checker.py +20 -0
- cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
- cognee/tasks/documents/__init__.py +1 -0
- cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
- cognee/tasks/graph/extract_graph_from_data.py +10 -9
- cognee/tasks/repo_processor/__init__.py +2 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
- cognee/tasks/repo_processor/get_non_code_files.py +158 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
- cognee/tasks/storage/add_data_points.py +2 -142
- cognee/tests/test_cognee_server_start.py +4 -2
- cognee/tests/test_conversation_history.py +1 -23
- cognee/tests/test_delete_bmw_example.py +60 -0
- cognee/tests/test_search_db.py +1 -37
- cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
- cognee/api/v1/ui/node_setup.py +0 -360
- cognee/api/v1/ui/npm_utils.py +0 -50
- cognee/eval_framework/Dockerfile +0 -29
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
- cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
- cognee/modules/engine/models/Triplet.py +0 -9
- cognee/modules/retrieval/register_retriever.py +0 -10
- cognee/modules/retrieval/registered_community_retrievers.py +0 -1
- cognee/modules/retrieval/triplet_retriever.py +0 -182
- cognee/shared/rate_limiting.py +0 -30
- cognee/tasks/memify/get_triplet_datapoints.py +0 -289
- cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
- cognee/tests/integration/tasks/test_add_data_points.py +0 -139
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
- cognee/tests/test_dataset_database_handler.py +0 -137
- cognee/tests/test_dataset_delete.py +0 -76
- cognee/tests/test_edge_centered_payload.py +0 -170
- cognee/tests/test_pipeline_cache.py +0 -164
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
- cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,168 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import asyncio
|
|
3
|
-
import requests
|
|
4
|
-
import base64
|
|
5
|
-
import hashlib
|
|
6
|
-
from uuid import UUID
|
|
7
|
-
from typing import Optional
|
|
8
|
-
from cryptography.fernet import Fernet
|
|
9
|
-
|
|
10
|
-
from cognee.infrastructure.databases.graph import get_graph_config
|
|
11
|
-
from cognee.modules.users.models import User, DatasetDatabase
|
|
12
|
-
from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|
16
|
-
"""
|
|
17
|
-
Handler for a quick development PoC integration of Cognee multi-user and permission mode with Neo4j Aura databases.
|
|
18
|
-
This handler creates a new Neo4j Aura instance for each Cognee dataset created.
|
|
19
|
-
|
|
20
|
-
Improvements needed to be production ready:
|
|
21
|
-
- Secret management for client credentials, currently secrets are encrypted and stored in the Cognee relational database,
|
|
22
|
-
a secret manager or a similar system should be used instead.
|
|
23
|
-
|
|
24
|
-
Quality of life improvements:
|
|
25
|
-
- Allow configuration of different Neo4j Aura plans and regions.
|
|
26
|
-
- Requests should be made async, currently a blocking requests library is used.
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
@classmethod
|
|
30
|
-
async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
|
|
31
|
-
"""
|
|
32
|
-
Create a new Neo4j Aura instance for the dataset. Return connection info that will be mapped to the dataset.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
dataset_id: Dataset UUID
|
|
36
|
-
user: User object who owns the dataset and is making the request
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
dict: Connection details for the created Neo4j instance
|
|
40
|
-
|
|
41
|
-
"""
|
|
42
|
-
graph_config = get_graph_config()
|
|
43
|
-
|
|
44
|
-
if graph_config.graph_database_provider != "neo4j":
|
|
45
|
-
raise ValueError(
|
|
46
|
-
"Neo4jAuraDevDatasetDatabaseHandler can only be used with Neo4j graph database provider."
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
graph_db_name = f"{dataset_id}"
|
|
50
|
-
|
|
51
|
-
# Client credentials and encryption
|
|
52
|
-
client_id = os.environ.get("NEO4J_CLIENT_ID", None)
|
|
53
|
-
client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None)
|
|
54
|
-
tenant_id = os.environ.get("NEO4J_TENANT_ID", None)
|
|
55
|
-
encryption_env_key = os.environ.get("NEO4J_ENCRYPTION_KEY", "test_key")
|
|
56
|
-
encryption_key = base64.urlsafe_b64encode(
|
|
57
|
-
hashlib.sha256(encryption_env_key.encode()).digest()
|
|
58
|
-
)
|
|
59
|
-
cipher = Fernet(encryption_key)
|
|
60
|
-
|
|
61
|
-
if client_id is None or client_secret is None or tenant_id is None:
|
|
62
|
-
raise ValueError(
|
|
63
|
-
"NEO4J_CLIENT_ID, NEO4J_CLIENT_SECRET, and NEO4J_TENANT_ID environment variables must be set to use Neo4j Aura DatasetDatabase Handling."
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
# Make the request with HTTP Basic Auth
|
|
67
|
-
def get_aura_token(client_id: str, client_secret: str) -> dict:
|
|
68
|
-
url = "https://api.neo4j.io/oauth/token"
|
|
69
|
-
data = {"grant_type": "client_credentials"} # sent as application/x-www-form-urlencoded
|
|
70
|
-
|
|
71
|
-
resp = requests.post(url, data=data, auth=(client_id, client_secret))
|
|
72
|
-
resp.raise_for_status() # raises if the request failed
|
|
73
|
-
return resp.json()
|
|
74
|
-
|
|
75
|
-
resp = get_aura_token(client_id, client_secret)
|
|
76
|
-
|
|
77
|
-
url = "https://api.neo4j.io/v1/instances"
|
|
78
|
-
|
|
79
|
-
headers = {
|
|
80
|
-
"accept": "application/json",
|
|
81
|
-
"Authorization": f"Bearer {resp['access_token']}",
|
|
82
|
-
"Content-Type": "application/json",
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
# TODO: Maybe we can allow **kwargs parameter forwarding for cases like these
|
|
86
|
-
# Too allow different configurations between datasets
|
|
87
|
-
payload = {
|
|
88
|
-
"version": "5",
|
|
89
|
-
"region": "europe-west1",
|
|
90
|
-
"memory": "1GB",
|
|
91
|
-
"name": graph_db_name[
|
|
92
|
-
0:29
|
|
93
|
-
], # TODO: Find better name to name Neo4j instance within 30 character limit
|
|
94
|
-
"type": "professional-db",
|
|
95
|
-
"tenant_id": tenant_id,
|
|
96
|
-
"cloud_provider": "gcp",
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
response = requests.post(url, headers=headers, json=payload)
|
|
100
|
-
|
|
101
|
-
graph_db_name = "neo4j" # Has to be 'neo4j' for Aura
|
|
102
|
-
graph_db_url = response.json()["data"]["connection_url"]
|
|
103
|
-
graph_db_key = resp["access_token"]
|
|
104
|
-
graph_db_username = response.json()["data"]["username"]
|
|
105
|
-
graph_db_password = response.json()["data"]["password"]
|
|
106
|
-
|
|
107
|
-
async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict):
|
|
108
|
-
# Poll until the instance is running
|
|
109
|
-
status_url = f"https://api.neo4j.io/v1/instances/{instance_id}"
|
|
110
|
-
status = ""
|
|
111
|
-
for attempt in range(30): # Try for up to ~5 minutes
|
|
112
|
-
status_resp = requests.get(
|
|
113
|
-
status_url, headers=headers
|
|
114
|
-
) # TODO: Use async requests with httpx
|
|
115
|
-
status = status_resp.json()["data"]["status"]
|
|
116
|
-
if status.lower() == "running":
|
|
117
|
-
return
|
|
118
|
-
await asyncio.sleep(10)
|
|
119
|
-
raise TimeoutError(
|
|
120
|
-
f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}"
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
instance_id = response.json()["data"]["id"]
|
|
124
|
-
await _wait_for_neo4j_instance_provisioning(instance_id, headers)
|
|
125
|
-
|
|
126
|
-
encrypted_db_password_bytes = cipher.encrypt(graph_db_password.encode())
|
|
127
|
-
encrypted_db_password_string = encrypted_db_password_bytes.decode()
|
|
128
|
-
|
|
129
|
-
return {
|
|
130
|
-
"graph_database_name": graph_db_name,
|
|
131
|
-
"graph_database_url": graph_db_url,
|
|
132
|
-
"graph_database_provider": "neo4j",
|
|
133
|
-
"graph_database_key": graph_db_key,
|
|
134
|
-
"graph_dataset_database_handler": "neo4j_aura_dev",
|
|
135
|
-
"graph_database_connection_info": {
|
|
136
|
-
"graph_database_username": graph_db_username,
|
|
137
|
-
"graph_database_password": encrypted_db_password_string,
|
|
138
|
-
},
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
@classmethod
|
|
142
|
-
async def resolve_dataset_connection_info(
|
|
143
|
-
cls, dataset_database: DatasetDatabase
|
|
144
|
-
) -> DatasetDatabase:
|
|
145
|
-
"""
|
|
146
|
-
Resolve and decrypt connection info for the Neo4j dataset database.
|
|
147
|
-
In this case, decrypt the password stored in the database.
|
|
148
|
-
|
|
149
|
-
Args:
|
|
150
|
-
dataset_database: DatasetDatabase instance containing encrypted connection info.
|
|
151
|
-
"""
|
|
152
|
-
encryption_env_key = os.environ.get("NEO4J_ENCRYPTION_KEY", "test_key")
|
|
153
|
-
encryption_key = base64.urlsafe_b64encode(
|
|
154
|
-
hashlib.sha256(encryption_env_key.encode()).digest()
|
|
155
|
-
)
|
|
156
|
-
cipher = Fernet(encryption_key)
|
|
157
|
-
graph_db_password = cipher.decrypt(
|
|
158
|
-
dataset_database.graph_database_connection_info["graph_database_password"].encode()
|
|
159
|
-
).decode()
|
|
160
|
-
|
|
161
|
-
dataset_database.graph_database_connection_info["graph_database_password"] = (
|
|
162
|
-
graph_db_password
|
|
163
|
-
)
|
|
164
|
-
return dataset_database
|
|
165
|
-
|
|
166
|
-
@classmethod
|
|
167
|
-
async def delete_dataset(cls, dataset_database: DatasetDatabase):
|
|
168
|
-
pass
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def get_graph_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
|
|
5
|
-
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
|
|
6
|
-
supported_dataset_database_handlers,
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler]
|
|
10
|
-
return handler
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def get_vector_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
|
|
5
|
-
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
|
|
6
|
-
supported_dataset_database_handlers,
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
handler = supported_dataset_database_handlers[dataset_database.vector_dataset_database_handler]
|
|
10
|
-
return handler
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import (
|
|
2
|
-
get_graph_dataset_database_handler,
|
|
3
|
-
)
|
|
4
|
-
from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import (
|
|
5
|
-
get_vector_dataset_database_handler,
|
|
6
|
-
)
|
|
7
|
-
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
async def resolve_dataset_database_connection_info(
|
|
11
|
-
dataset_database: DatasetDatabase,
|
|
12
|
-
) -> DatasetDatabase:
|
|
13
|
-
"""
|
|
14
|
-
Resolve the connection info for the given DatasetDatabase instance.
|
|
15
|
-
Resolve both vector and graph database connection info and return the updated DatasetDatabase instance.
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
dataset_database: DatasetDatabase instance
|
|
19
|
-
Returns:
|
|
20
|
-
DatasetDatabase instance with resolved connection info
|
|
21
|
-
"""
|
|
22
|
-
vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database)
|
|
23
|
-
graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database)
|
|
24
|
-
dataset_database = await vector_dataset_database_handler[
|
|
25
|
-
"handler_instance"
|
|
26
|
-
].resolve_dataset_connection_info(dataset_database)
|
|
27
|
-
dataset_database = await graph_dataset_database_handler[
|
|
28
|
-
"handler_instance"
|
|
29
|
-
].resolve_dataset_connection_info(dataset_database)
|
|
30
|
-
return dataset_database
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from uuid import UUID
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
from cognee.infrastructure.databases.vector.create_vector_engine import create_vector_engine
|
|
6
|
-
from cognee.modules.users.models import User
|
|
7
|
-
from cognee.modules.users.models import DatasetDatabase
|
|
8
|
-
from cognee.base_config import get_base_config
|
|
9
|
-
from cognee.infrastructure.databases.vector import get_vectordb_config
|
|
10
|
-
from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class LanceDBDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|
14
|
-
"""
|
|
15
|
-
Handler for interacting with LanceDB Dataset databases.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
@classmethod
|
|
19
|
-
async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
|
|
20
|
-
vector_config = get_vectordb_config()
|
|
21
|
-
base_config = get_base_config()
|
|
22
|
-
|
|
23
|
-
if vector_config.vector_db_provider != "lancedb":
|
|
24
|
-
raise ValueError(
|
|
25
|
-
"LanceDBDatasetDatabaseHandler can only be used with LanceDB vector database provider."
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
databases_directory_path = os.path.join(
|
|
29
|
-
base_config.system_root_directory, "databases", str(user.id)
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
vector_db_name = f"{dataset_id}.lance.db"
|
|
33
|
-
|
|
34
|
-
return {
|
|
35
|
-
"vector_database_provider": vector_config.vector_db_provider,
|
|
36
|
-
"vector_database_url": os.path.join(databases_directory_path, vector_db_name),
|
|
37
|
-
"vector_database_key": vector_config.vector_db_key,
|
|
38
|
-
"vector_database_name": vector_db_name,
|
|
39
|
-
"vector_dataset_database_handler": "lancedb",
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
@classmethod
|
|
43
|
-
async def delete_dataset(cls, dataset_database: DatasetDatabase):
|
|
44
|
-
vector_engine = create_vector_engine(
|
|
45
|
-
vector_db_provider=dataset_database.vector_database_provider,
|
|
46
|
-
vector_db_url=dataset_database.vector_database_url,
|
|
47
|
-
vector_db_key=dataset_database.vector_database_key,
|
|
48
|
-
vector_db_name=dataset_database.vector_database_name,
|
|
49
|
-
)
|
|
50
|
-
await vector_engine.prune()
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py
DELETED
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
import litellm
|
|
2
|
-
import instructor
|
|
3
|
-
from typing import Type
|
|
4
|
-
from pydantic import BaseModel
|
|
5
|
-
from litellm.exceptions import ContentPolicyViolationError
|
|
6
|
-
from instructor.exceptions import InstructorRetryException
|
|
7
|
-
|
|
8
|
-
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
9
|
-
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
|
|
10
|
-
LLMInterface,
|
|
11
|
-
)
|
|
12
|
-
from cognee.infrastructure.llm.exceptions import (
|
|
13
|
-
ContentPolicyFilterError,
|
|
14
|
-
MissingSystemPromptPathError,
|
|
15
|
-
)
|
|
16
|
-
from cognee.infrastructure.files.storage.s3_config import get_s3_config
|
|
17
|
-
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
|
|
18
|
-
rate_limit_async,
|
|
19
|
-
rate_limit_sync,
|
|
20
|
-
sleep_and_retry_async,
|
|
21
|
-
sleep_and_retry_sync,
|
|
22
|
-
)
|
|
23
|
-
from cognee.modules.observability.get_observe import get_observe
|
|
24
|
-
|
|
25
|
-
observe = get_observe()
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class BedrockAdapter(LLMInterface):
|
|
29
|
-
"""
|
|
30
|
-
Adapter for AWS Bedrock API with support for three authentication methods:
|
|
31
|
-
1. API Key (Bearer Token)
|
|
32
|
-
2. AWS Credentials (access key + secret key)
|
|
33
|
-
3. AWS Profile (boto3 credential chain)
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
name = "Bedrock"
|
|
37
|
-
model: str
|
|
38
|
-
api_key: str
|
|
39
|
-
default_instructor_mode = "json_schema_mode"
|
|
40
|
-
|
|
41
|
-
MAX_RETRIES = 5
|
|
42
|
-
|
|
43
|
-
def __init__(
|
|
44
|
-
self,
|
|
45
|
-
model: str,
|
|
46
|
-
api_key: str = None,
|
|
47
|
-
max_completion_tokens: int = 16384,
|
|
48
|
-
streaming: bool = False,
|
|
49
|
-
instructor_mode: str = None,
|
|
50
|
-
):
|
|
51
|
-
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
|
52
|
-
|
|
53
|
-
self.aclient = instructor.from_litellm(
|
|
54
|
-
litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
|
|
55
|
-
)
|
|
56
|
-
self.client = instructor.from_litellm(litellm.completion)
|
|
57
|
-
self.model = model
|
|
58
|
-
self.api_key = api_key
|
|
59
|
-
self.max_completion_tokens = max_completion_tokens
|
|
60
|
-
self.streaming = streaming
|
|
61
|
-
|
|
62
|
-
def _create_bedrock_request(
|
|
63
|
-
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
|
|
64
|
-
) -> dict:
|
|
65
|
-
"""Create Bedrock request with authentication."""
|
|
66
|
-
|
|
67
|
-
request_params = {
|
|
68
|
-
"model": self.model,
|
|
69
|
-
"custom_llm_provider": "bedrock",
|
|
70
|
-
"drop_params": True,
|
|
71
|
-
"messages": [
|
|
72
|
-
{"role": "user", "content": text_input},
|
|
73
|
-
{"role": "system", "content": system_prompt},
|
|
74
|
-
],
|
|
75
|
-
"response_model": response_model,
|
|
76
|
-
"max_retries": self.MAX_RETRIES,
|
|
77
|
-
"max_completion_tokens": self.max_completion_tokens,
|
|
78
|
-
"stream": self.streaming,
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
s3_config = get_s3_config()
|
|
82
|
-
|
|
83
|
-
# Add authentication parameters
|
|
84
|
-
if self.api_key:
|
|
85
|
-
request_params["api_key"] = self.api_key
|
|
86
|
-
elif s3_config.aws_access_key_id and s3_config.aws_secret_access_key:
|
|
87
|
-
request_params["aws_access_key_id"] = s3_config.aws_access_key_id
|
|
88
|
-
request_params["aws_secret_access_key"] = s3_config.aws_secret_access_key
|
|
89
|
-
if s3_config.aws_session_token:
|
|
90
|
-
request_params["aws_session_token"] = s3_config.aws_session_token
|
|
91
|
-
elif s3_config.aws_profile_name:
|
|
92
|
-
request_params["aws_profile_name"] = s3_config.aws_profile_name
|
|
93
|
-
|
|
94
|
-
if s3_config.aws_region:
|
|
95
|
-
request_params["aws_region_name"] = s3_config.aws_region
|
|
96
|
-
|
|
97
|
-
# Add optional parameters
|
|
98
|
-
if s3_config.aws_bedrock_runtime_endpoint:
|
|
99
|
-
request_params["aws_bedrock_runtime_endpoint"] = s3_config.aws_bedrock_runtime_endpoint
|
|
100
|
-
|
|
101
|
-
return request_params
|
|
102
|
-
|
|
103
|
-
@observe(as_type="generation")
|
|
104
|
-
@sleep_and_retry_async()
|
|
105
|
-
@rate_limit_async
|
|
106
|
-
async def acreate_structured_output(
|
|
107
|
-
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
|
|
108
|
-
) -> BaseModel:
|
|
109
|
-
"""Generate structured output from AWS Bedrock API."""
|
|
110
|
-
|
|
111
|
-
try:
|
|
112
|
-
request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
|
|
113
|
-
return await self.aclient.chat.completions.create(**request_params)
|
|
114
|
-
|
|
115
|
-
except (
|
|
116
|
-
ContentPolicyViolationError,
|
|
117
|
-
InstructorRetryException,
|
|
118
|
-
) as error:
|
|
119
|
-
if (
|
|
120
|
-
isinstance(error, InstructorRetryException)
|
|
121
|
-
and "content management policy" not in str(error).lower()
|
|
122
|
-
):
|
|
123
|
-
raise error
|
|
124
|
-
|
|
125
|
-
raise ContentPolicyFilterError(
|
|
126
|
-
f"The provided input contains content that is not aligned with our content policy: {text_input}"
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
@observe
|
|
130
|
-
@sleep_and_retry_sync()
|
|
131
|
-
@rate_limit_sync
|
|
132
|
-
def create_structured_output(
|
|
133
|
-
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
|
|
134
|
-
) -> BaseModel:
|
|
135
|
-
"""Generate structured output from AWS Bedrock API (synchronous)."""
|
|
136
|
-
|
|
137
|
-
request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
|
|
138
|
-
return self.client.chat.completions.create(**request_params)
|
|
139
|
-
|
|
140
|
-
def show_prompt(self, text_input: str, system_prompt: str) -> str:
|
|
141
|
-
"""Format and display the prompt for a user query."""
|
|
142
|
-
if not text_input:
|
|
143
|
-
text_input = "No user input provided."
|
|
144
|
-
if not system_prompt:
|
|
145
|
-
raise MissingSystemPromptPathError()
|
|
146
|
-
system_prompt = LLMGateway.read_query_prompt(system_prompt)
|
|
147
|
-
|
|
148
|
-
formatted_prompt = (
|
|
149
|
-
f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
|
|
150
|
-
if system_prompt
|
|
151
|
-
else None
|
|
152
|
-
)
|
|
153
|
-
return formatted_prompt
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
|
|
3
|
-
from cognee import memify
|
|
4
|
-
from cognee.context_global_variables import (
|
|
5
|
-
set_database_global_context_variables,
|
|
6
|
-
)
|
|
7
|
-
from cognee.exceptions import CogneeValidationError
|
|
8
|
-
from cognee.modules.data.methods import get_authorized_existing_datasets
|
|
9
|
-
from cognee.shared.logging_utils import get_logger
|
|
10
|
-
from cognee.modules.pipelines.tasks.task import Task
|
|
11
|
-
from cognee.modules.users.models import User
|
|
12
|
-
from cognee.tasks.memify.get_triplet_datapoints import get_triplet_datapoints
|
|
13
|
-
from cognee.tasks.storage import index_data_points
|
|
14
|
-
|
|
15
|
-
logger = get_logger("create_triplet_embeddings")
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
async def create_triplet_embeddings(
|
|
19
|
-
user: User,
|
|
20
|
-
dataset: str = "main_dataset",
|
|
21
|
-
run_in_background: bool = False,
|
|
22
|
-
triplets_batch_size: int = 100,
|
|
23
|
-
) -> dict[str, Any]:
|
|
24
|
-
dataset_to_write = await get_authorized_existing_datasets(
|
|
25
|
-
user=user, datasets=[dataset], permission_type="write"
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
if not dataset_to_write:
|
|
29
|
-
raise CogneeValidationError(
|
|
30
|
-
message=f"User does not have write access to dataset: {dataset}",
|
|
31
|
-
log=False,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
await set_database_global_context_variables(
|
|
35
|
-
dataset_to_write[0].id, dataset_to_write[0].owner_id
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
extraction_tasks = [Task(get_triplet_datapoints, triplets_batch_size=triplets_batch_size)]
|
|
39
|
-
|
|
40
|
-
enrichment_tasks = [
|
|
41
|
-
Task(index_data_points, task_config={"batch_size": triplets_batch_size}),
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
result = await memify(
|
|
45
|
-
extraction_tasks=extraction_tasks,
|
|
46
|
-
enrichment_tasks=enrichment_tasks,
|
|
47
|
-
dataset=dataset_to_write[0].id,
|
|
48
|
-
data=[{}],
|
|
49
|
-
user=user,
|
|
50
|
-
run_in_background=run_in_background,
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
return result
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
from typing import Type
|
|
2
|
-
|
|
3
|
-
from .base_retriever import BaseRetriever
|
|
4
|
-
from .registered_community_retrievers import registered_community_retrievers
|
|
5
|
-
from ..search.types import SearchType
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def use_retriever(search_type: SearchType, retriever: Type[BaseRetriever]):
|
|
9
|
-
"""Register a retriever class for a given search type."""
|
|
10
|
-
registered_community_retrievers[search_type] = retriever
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
registered_community_retrievers = {}
|