cognee 0.4.1__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +8 -0
- cognee/api/v1/add/routers/get_add_router.py +3 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +28 -1
- cognee/api/v1/ontologies/__init__.py +4 -0
- cognee/api/v1/ontologies/ontologies.py +183 -0
- cognee/api/v1/ontologies/routers/__init__.py +0 -0
- cognee/api/v1/ontologies/routers/get_ontology_router.py +107 -0
- cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
- cognee/cli/commands/cognify_command.py +8 -1
- cognee/cli/config.py +1 -1
- cognee/context_global_variables.py +41 -9
- cognee/infrastructure/databases/cache/config.py +3 -1
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
- cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
- cognee/infrastructure/databases/graph/config.py +4 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +2 -0
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +37 -3
- cognee/infrastructure/databases/vector/config.py +3 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +5 -1
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +1 -4
- cognee/infrastructure/engine/models/Edge.py +13 -1
- cognee/infrastructure/files/utils/guess_file_type.py +4 -0
- cognee/infrastructure/llm/config.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +7 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +7 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +8 -16
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +12 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +13 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +5 -2
- cognee/infrastructure/loaders/LoaderEngine.py +1 -0
- cognee/infrastructure/loaders/core/__init__.py +2 -1
- cognee/infrastructure/loaders/core/csv_loader.py +93 -0
- cognee/infrastructure/loaders/core/text_loader.py +1 -2
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
- cognee/infrastructure/loaders/supported_loaders.py +2 -1
- cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
- cognee/modules/chunking/CsvChunker.py +35 -0
- cognee/modules/chunking/models/DocumentChunk.py +2 -1
- cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/create_dataset.py +4 -2
- cognee/modules/data/methods/get_dataset_ids.py +5 -1
- cognee/modules/data/methods/get_unique_data_id.py +68 -0
- cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
- cognee/modules/data/models/Dataset.py +2 -0
- cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
- cognee/modules/data/processing/document_types/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +4 -2
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
- cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
- cognee/modules/ingestion/identify.py +4 -4
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
- cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
- cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
- cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
- cognee/modules/retrieval/base_graph_retriever.py +7 -3
- cognee/modules/retrieval/base_retriever.py +7 -3
- cognee/modules/retrieval/completion_retriever.py +11 -4
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +6 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +14 -51
- cognee/modules/retrieval/graph_completion_retriever.py +4 -1
- cognee/modules/retrieval/temporal_retriever.py +9 -2
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +1 -1
- cognee/modules/retrieval/utils/completion.py +2 -22
- cognee/modules/run_custom_pipeline/__init__.py +1 -0
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +69 -0
- cognee/modules/search/methods/search.py +5 -3
- cognee/modules/users/methods/create_user.py +12 -27
- cognee/modules/users/methods/get_authenticated_user.py +2 -1
- cognee/modules/users/methods/get_default_user.py +4 -2
- cognee/modules/users/methods/get_user.py +1 -1
- cognee/modules/users/methods/get_user_by_email.py +1 -1
- cognee/modules/users/models/DatasetDatabase.py +9 -0
- cognee/modules/users/models/Tenant.py +6 -7
- cognee/modules/users/models/User.py +6 -5
- cognee/modules/users/models/UserTenant.py +12 -0
- cognee/modules/users/models/__init__.py +1 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
- cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
- cognee/modules/users/tenants/methods/__init__.py +1 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
- cognee/modules/users/tenants/methods/create_tenant.py +22 -8
- cognee/modules/users/tenants/methods/select_tenant.py +62 -0
- cognee/shared/logging_utils.py +2 -0
- cognee/tasks/chunks/__init__.py +1 -0
- cognee/tasks/chunks/chunk_by_row.py +94 -0
- cognee/tasks/documents/classify_documents.py +2 -0
- cognee/tasks/feedback/generate_improved_answers.py +3 -3
- cognee/tasks/ingestion/ingest_data.py +1 -1
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/cognify_session.py +41 -0
- cognee/tasks/memify/extract_user_sessions.py +73 -0
- cognee/tasks/storage/index_data_points.py +33 -22
- cognee/tasks/storage/index_graph_edges.py +37 -57
- cognee/tests/integration/documents/CsvDocument_test.py +70 -0
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
- cognee/tests/test_add_docling_document.py +2 -2
- cognee/tests/test_cognee_server_start.py +84 -1
- cognee/tests/test_conversation_history.py +45 -4
- cognee/tests/test_data/example_with_header.csv +3 -0
- cognee/tests/test_delete_bmw_example.py +60 -0
- cognee/tests/test_edge_ingestion.py +27 -0
- cognee/tests/test_feedback_enrichment.py +1 -1
- cognee/tests/test_library.py +6 -4
- cognee/tests/test_load.py +62 -0
- cognee/tests/test_multi_tenancy.py +165 -0
- cognee/tests/test_parallel_databases.py +2 -0
- cognee/tests/test_relational_db_migration.py +54 -2
- cognee/tests/test_search_db.py +7 -1
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
- cognee/tests/unit/api/test_ontology_endpoint.py +264 -0
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
- cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
- cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
- cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
- cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
- cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
- cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
- cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
- cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +88 -71
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +135 -104
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -1
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
import time
|
|
6
|
+
import threading
|
|
7
|
+
import diskcache as dc
|
|
8
|
+
|
|
9
|
+
from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
|
|
10
|
+
from cognee.infrastructure.databases.exceptions.exceptions import (
|
|
11
|
+
CacheConnectionError,
|
|
12
|
+
SharedKuzuLockRequiresRedisError,
|
|
13
|
+
)
|
|
14
|
+
from cognee.infrastructure.files.storage.get_storage_config import get_storage_config
|
|
15
|
+
from cognee.shared.logging_utils import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger("FSCacheAdapter")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FSCacheAdapter(CacheDBInterface):
|
|
21
|
+
def __init__(self):
|
|
22
|
+
default_key = "sessions_db"
|
|
23
|
+
|
|
24
|
+
storage_config = get_storage_config()
|
|
25
|
+
data_root_directory = storage_config["data_root_directory"]
|
|
26
|
+
cache_directory = os.path.join(data_root_directory, ".cognee_fs_cache", default_key)
|
|
27
|
+
os.makedirs(cache_directory, exist_ok=True)
|
|
28
|
+
self.cache = dc.Cache(directory=cache_directory)
|
|
29
|
+
self.cache.expire()
|
|
30
|
+
|
|
31
|
+
logger.debug(f"FSCacheAdapter initialized with cache directory: {cache_directory}")
|
|
32
|
+
|
|
33
|
+
def acquire_lock(self):
|
|
34
|
+
"""Lock acquisition is not available for filesystem cache backend."""
|
|
35
|
+
message = "Shared Kuzu lock requires Redis cache backend."
|
|
36
|
+
logger.error(message)
|
|
37
|
+
raise SharedKuzuLockRequiresRedisError()
|
|
38
|
+
|
|
39
|
+
def release_lock(self):
|
|
40
|
+
"""Lock release is not available for filesystem cache backend."""
|
|
41
|
+
message = "Shared Kuzu lock requires Redis cache backend."
|
|
42
|
+
logger.error(message)
|
|
43
|
+
raise SharedKuzuLockRequiresRedisError()
|
|
44
|
+
|
|
45
|
+
async def add_qa(
|
|
46
|
+
self,
|
|
47
|
+
user_id: str,
|
|
48
|
+
session_id: str,
|
|
49
|
+
question: str,
|
|
50
|
+
context: str,
|
|
51
|
+
answer: str,
|
|
52
|
+
ttl: int | None = 86400,
|
|
53
|
+
):
|
|
54
|
+
try:
|
|
55
|
+
session_key = f"agent_sessions:{user_id}:{session_id}"
|
|
56
|
+
|
|
57
|
+
qa_entry = {
|
|
58
|
+
"time": datetime.utcnow().isoformat(),
|
|
59
|
+
"question": question,
|
|
60
|
+
"context": context,
|
|
61
|
+
"answer": answer,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
existing_value = self.cache.get(session_key)
|
|
65
|
+
if existing_value is not None:
|
|
66
|
+
value: list = json.loads(existing_value)
|
|
67
|
+
value.append(qa_entry)
|
|
68
|
+
else:
|
|
69
|
+
value = [qa_entry]
|
|
70
|
+
|
|
71
|
+
self.cache.set(session_key, json.dumps(value), expire=ttl)
|
|
72
|
+
except Exception as e:
|
|
73
|
+
error_msg = f"Unexpected error while adding Q&A to diskcache: {str(e)}"
|
|
74
|
+
logger.error(error_msg)
|
|
75
|
+
raise CacheConnectionError(error_msg) from e
|
|
76
|
+
|
|
77
|
+
async def get_latest_qa(self, user_id: str, session_id: str, last_n: int = 5):
|
|
78
|
+
session_key = f"agent_sessions:{user_id}:{session_id}"
|
|
79
|
+
value = self.cache.get(session_key)
|
|
80
|
+
if value is None:
|
|
81
|
+
return None
|
|
82
|
+
entries = json.loads(value)
|
|
83
|
+
return entries[-last_n:] if len(entries) > last_n else entries
|
|
84
|
+
|
|
85
|
+
async def get_all_qas(self, user_id: str, session_id: str):
|
|
86
|
+
session_key = f"agent_sessions:{user_id}:{session_id}"
|
|
87
|
+
value = self.cache.get(session_key)
|
|
88
|
+
if value is None:
|
|
89
|
+
return None
|
|
90
|
+
return json.loads(value)
|
|
91
|
+
|
|
92
|
+
async def close(self):
|
|
93
|
+
if self.cache is not None:
|
|
94
|
+
self.cache.expire()
|
|
95
|
+
self.cache.close()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async def main():
|
|
99
|
+
adapter = FSCacheAdapter()
|
|
100
|
+
session_id = "demo_session"
|
|
101
|
+
user_id = "demo_user_id"
|
|
102
|
+
|
|
103
|
+
print("\nAdding sample Q/A pairs...")
|
|
104
|
+
await adapter.add_qa(
|
|
105
|
+
user_id,
|
|
106
|
+
session_id,
|
|
107
|
+
"What is Redis?",
|
|
108
|
+
"Basic DB context",
|
|
109
|
+
"Redis is an in-memory data store.",
|
|
110
|
+
)
|
|
111
|
+
await adapter.add_qa(
|
|
112
|
+
user_id,
|
|
113
|
+
session_id,
|
|
114
|
+
"Who created Redis?",
|
|
115
|
+
"Historical context",
|
|
116
|
+
"Salvatore Sanfilippo (antirez).",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
print("\nLatest QA:")
|
|
120
|
+
latest = await adapter.get_latest_qa(user_id, session_id)
|
|
121
|
+
print(json.dumps(latest, indent=2))
|
|
122
|
+
|
|
123
|
+
print("\nLast 2 QAs:")
|
|
124
|
+
last_two = await adapter.get_latest_qa(user_id, session_id, last_n=2)
|
|
125
|
+
print(json.dumps(last_two, indent=2))
|
|
126
|
+
|
|
127
|
+
session_id = "session_expire_demo"
|
|
128
|
+
|
|
129
|
+
await adapter.add_qa(
|
|
130
|
+
user_id,
|
|
131
|
+
session_id,
|
|
132
|
+
"What is Redis?",
|
|
133
|
+
"Database context",
|
|
134
|
+
"Redis is an in-memory data store.",
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
await adapter.add_qa(
|
|
138
|
+
user_id,
|
|
139
|
+
session_id,
|
|
140
|
+
"Who created Redis?",
|
|
141
|
+
"History context",
|
|
142
|
+
"Salvatore Sanfilippo (antirez).",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
print(await adapter.get_all_qas(user_id, session_id))
|
|
146
|
+
|
|
147
|
+
await adapter.close()
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
if __name__ == "__main__":
|
|
151
|
+
asyncio.run(main())
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
"""Factory to get the appropriate cache coordination engine (e.g., Redis)."""
|
|
2
2
|
|
|
3
3
|
from functools import lru_cache
|
|
4
|
+
import os
|
|
4
5
|
from typing import Optional
|
|
5
6
|
from cognee.infrastructure.databases.cache.config import get_cache_config
|
|
6
7
|
from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
|
|
8
|
+
from cognee.infrastructure.databases.cache.fscache.FsCacheAdapter import FSCacheAdapter
|
|
7
9
|
|
|
8
10
|
config = get_cache_config()
|
|
9
11
|
|
|
@@ -33,20 +35,28 @@ def create_cache_engine(
|
|
|
33
35
|
|
|
34
36
|
Returns:
|
|
35
37
|
--------
|
|
36
|
-
- CacheDBInterface: An instance of the appropriate cache adapter.
|
|
38
|
+
- CacheDBInterface: An instance of the appropriate cache adapter.
|
|
37
39
|
"""
|
|
38
40
|
if config.caching:
|
|
39
41
|
from cognee.infrastructure.databases.cache.redis.RedisAdapter import RedisAdapter
|
|
40
42
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
43
|
+
if config.cache_backend == "redis":
|
|
44
|
+
return RedisAdapter(
|
|
45
|
+
host=cache_host,
|
|
46
|
+
port=cache_port,
|
|
47
|
+
username=cache_username,
|
|
48
|
+
password=cache_password,
|
|
49
|
+
lock_name=lock_key,
|
|
50
|
+
timeout=agentic_lock_expire,
|
|
51
|
+
blocking_timeout=agentic_lock_timeout,
|
|
52
|
+
)
|
|
53
|
+
elif config.cache_backend == "fs":
|
|
54
|
+
return FSCacheAdapter()
|
|
55
|
+
else:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"Unsupported cache backend: '{config.cache_backend}'. "
|
|
58
|
+
f"Supported backends are: 'redis', 'fs'"
|
|
59
|
+
)
|
|
50
60
|
else:
|
|
51
61
|
return None
|
|
52
62
|
|
|
@@ -148,3 +148,19 @@ class CacheConnectionError(CogneeConfigurationError):
|
|
|
148
148
|
status_code: int = status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
149
149
|
):
|
|
150
150
|
super().__init__(message, name, status_code)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class SharedKuzuLockRequiresRedisError(CogneeConfigurationError):
|
|
154
|
+
"""
|
|
155
|
+
Raised when shared Kuzu locking is requested without configuring the Redis backend.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
def __init__(
|
|
159
|
+
self,
|
|
160
|
+
message: str = (
|
|
161
|
+
"Shared Kuzu lock requires Redis cache backend. Configure Redis to enable shared Kuzu locking."
|
|
162
|
+
),
|
|
163
|
+
name: str = "SharedKuzuLockRequiresRedisError",
|
|
164
|
+
status_code: int = status.HTTP_400_BAD_REQUEST,
|
|
165
|
+
):
|
|
166
|
+
super().__init__(message, name, status_code)
|
|
@@ -26,6 +26,7 @@ class GraphConfig(BaseSettings):
|
|
|
26
26
|
- graph_database_username
|
|
27
27
|
- graph_database_password
|
|
28
28
|
- graph_database_port
|
|
29
|
+
- graph_database_key
|
|
29
30
|
- graph_file_path
|
|
30
31
|
- graph_model
|
|
31
32
|
- graph_topology
|
|
@@ -41,6 +42,7 @@ class GraphConfig(BaseSettings):
|
|
|
41
42
|
graph_database_username: str = ""
|
|
42
43
|
graph_database_password: str = ""
|
|
43
44
|
graph_database_port: int = 123
|
|
45
|
+
graph_database_key: str = ""
|
|
44
46
|
graph_file_path: str = ""
|
|
45
47
|
graph_filename: str = ""
|
|
46
48
|
graph_model: object = KnowledgeGraph
|
|
@@ -90,6 +92,7 @@ class GraphConfig(BaseSettings):
|
|
|
90
92
|
"graph_database_username": self.graph_database_username,
|
|
91
93
|
"graph_database_password": self.graph_database_password,
|
|
92
94
|
"graph_database_port": self.graph_database_port,
|
|
95
|
+
"graph_database_key": self.graph_database_key,
|
|
93
96
|
"graph_file_path": self.graph_file_path,
|
|
94
97
|
"graph_model": self.graph_model,
|
|
95
98
|
"graph_topology": self.graph_topology,
|
|
@@ -116,6 +119,7 @@ class GraphConfig(BaseSettings):
|
|
|
116
119
|
"graph_database_username": self.graph_database_username,
|
|
117
120
|
"graph_database_password": self.graph_database_password,
|
|
118
121
|
"graph_database_port": self.graph_database_port,
|
|
122
|
+
"graph_database_key": self.graph_database_key,
|
|
119
123
|
"graph_file_path": self.graph_file_path,
|
|
120
124
|
}
|
|
121
125
|
|
|
@@ -33,6 +33,7 @@ def create_graph_engine(
|
|
|
33
33
|
graph_database_username="",
|
|
34
34
|
graph_database_password="",
|
|
35
35
|
graph_database_port="",
|
|
36
|
+
graph_database_key="",
|
|
36
37
|
):
|
|
37
38
|
"""
|
|
38
39
|
Create a graph engine based on the specified provider type.
|
|
@@ -69,6 +70,7 @@ def create_graph_engine(
|
|
|
69
70
|
graph_database_url=graph_database_url,
|
|
70
71
|
graph_database_username=graph_database_username,
|
|
71
72
|
graph_database_password=graph_database_password,
|
|
73
|
+
database_name=graph_database_name,
|
|
72
74
|
)
|
|
73
75
|
|
|
74
76
|
if graph_database_provider == "neo4j":
|
|
@@ -416,6 +416,15 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|
|
416
416
|
self._client.query(f"MATCH (n :{self._VECTOR_NODE_LABEL}) DETACH DELETE n")
|
|
417
417
|
pass
|
|
418
418
|
|
|
419
|
+
async def is_empty(self) -> bool:
|
|
420
|
+
query = """
|
|
421
|
+
MATCH (n)
|
|
422
|
+
RETURN true
|
|
423
|
+
LIMIT 1;
|
|
424
|
+
"""
|
|
425
|
+
query_result = await self._client.query(query)
|
|
426
|
+
return len(query_result) == 0
|
|
427
|
+
|
|
419
428
|
@staticmethod
|
|
420
429
|
def _get_scored_result(
|
|
421
430
|
item: dict, with_vector: bool = False, with_score: bool = False
|
|
@@ -1,11 +1,15 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from uuid import UUID
|
|
2
3
|
from typing import Union
|
|
3
4
|
|
|
4
5
|
from sqlalchemy import select
|
|
5
6
|
from sqlalchemy.exc import IntegrityError
|
|
6
|
-
from cognee.modules.data.methods import create_dataset
|
|
7
7
|
|
|
8
|
+
from cognee.base_config import get_base_config
|
|
9
|
+
from cognee.modules.data.methods import create_dataset
|
|
8
10
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
11
|
+
from cognee.infrastructure.databases.vector import get_vectordb_config
|
|
12
|
+
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
9
13
|
from cognee.modules.data.methods import get_unique_dataset_id
|
|
10
14
|
from cognee.modules.users.models import DatasetDatabase
|
|
11
15
|
from cognee.modules.users.models import User
|
|
@@ -32,8 +36,32 @@ async def get_or_create_dataset_database(
|
|
|
32
36
|
|
|
33
37
|
dataset_id = await get_unique_dataset_id(dataset, user)
|
|
34
38
|
|
|
35
|
-
|
|
36
|
-
|
|
39
|
+
vector_config = get_vectordb_config()
|
|
40
|
+
graph_config = get_graph_config()
|
|
41
|
+
|
|
42
|
+
# Note: for hybrid databases both graph and vector DB name have to be the same
|
|
43
|
+
if graph_config.graph_database_provider == "kuzu":
|
|
44
|
+
graph_db_name = f"{dataset_id}.pkl"
|
|
45
|
+
else:
|
|
46
|
+
graph_db_name = f"{dataset_id}"
|
|
47
|
+
|
|
48
|
+
if vector_config.vector_db_provider == "lancedb":
|
|
49
|
+
vector_db_name = f"{dataset_id}.lance.db"
|
|
50
|
+
else:
|
|
51
|
+
vector_db_name = f"{dataset_id}"
|
|
52
|
+
|
|
53
|
+
base_config = get_base_config()
|
|
54
|
+
databases_directory_path = os.path.join(
|
|
55
|
+
base_config.system_root_directory, "databases", str(user.id)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Determine vector database URL
|
|
59
|
+
if vector_config.vector_db_provider == "lancedb":
|
|
60
|
+
vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name)
|
|
61
|
+
else:
|
|
62
|
+
vector_db_url = vector_config.vector_database_url
|
|
63
|
+
|
|
64
|
+
# Determine graph database URL
|
|
37
65
|
|
|
38
66
|
async with db_engine.get_async_session() as session:
|
|
39
67
|
# Create dataset if it doesn't exist
|
|
@@ -55,6 +83,12 @@ async def get_or_create_dataset_database(
|
|
|
55
83
|
dataset_id=dataset_id,
|
|
56
84
|
vector_database_name=vector_db_name,
|
|
57
85
|
graph_database_name=graph_db_name,
|
|
86
|
+
vector_database_provider=vector_config.vector_db_provider,
|
|
87
|
+
graph_database_provider=graph_config.graph_database_provider,
|
|
88
|
+
vector_database_url=vector_db_url,
|
|
89
|
+
graph_database_url=graph_config.graph_database_url,
|
|
90
|
+
vector_database_key=vector_config.vector_db_key,
|
|
91
|
+
graph_database_key=graph_config.graph_database_key,
|
|
58
92
|
)
|
|
59
93
|
|
|
60
94
|
try:
|
|
@@ -18,12 +18,14 @@ class VectorConfig(BaseSettings):
|
|
|
18
18
|
Instance variables:
|
|
19
19
|
- vector_db_url: The URL of the vector database.
|
|
20
20
|
- vector_db_port: The port for the vector database.
|
|
21
|
+
- vector_db_name: The name of the vector database.
|
|
21
22
|
- vector_db_key: The key for accessing the vector database.
|
|
22
23
|
- vector_db_provider: The provider for the vector database.
|
|
23
24
|
"""
|
|
24
25
|
|
|
25
26
|
vector_db_url: str = ""
|
|
26
27
|
vector_db_port: int = 1234
|
|
28
|
+
vector_db_name: str = ""
|
|
27
29
|
vector_db_key: str = ""
|
|
28
30
|
vector_db_provider: str = "lancedb"
|
|
29
31
|
|
|
@@ -58,6 +60,7 @@ class VectorConfig(BaseSettings):
|
|
|
58
60
|
return {
|
|
59
61
|
"vector_db_url": self.vector_db_url,
|
|
60
62
|
"vector_db_port": self.vector_db_port,
|
|
63
|
+
"vector_db_name": self.vector_db_name,
|
|
61
64
|
"vector_db_key": self.vector_db_key,
|
|
62
65
|
"vector_db_provider": self.vector_db_provider,
|
|
63
66
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from .supported_databases import supported_databases
|
|
2
2
|
from .embeddings import get_embedding_engine
|
|
3
|
+
from cognee.infrastructure.databases.graph.config import get_graph_context_config
|
|
3
4
|
|
|
4
5
|
from functools import lru_cache
|
|
5
6
|
|
|
@@ -8,6 +9,7 @@ from functools import lru_cache
|
|
|
8
9
|
def create_vector_engine(
|
|
9
10
|
vector_db_provider: str,
|
|
10
11
|
vector_db_url: str,
|
|
12
|
+
vector_db_name: str,
|
|
11
13
|
vector_db_port: str = "",
|
|
12
14
|
vector_db_key: str = "",
|
|
13
15
|
):
|
|
@@ -27,6 +29,7 @@ def create_vector_engine(
|
|
|
27
29
|
- vector_db_url (str): The URL for the vector database instance.
|
|
28
30
|
- vector_db_port (str): The port for the vector database instance. Required for some
|
|
29
31
|
providers.
|
|
32
|
+
- vector_db_name (str): The name of the vector database instance.
|
|
30
33
|
- vector_db_key (str): The API key or access token for the vector database instance.
|
|
31
34
|
- vector_db_provider (str): The name of the vector database provider to use (e.g.,
|
|
32
35
|
'pgvector').
|
|
@@ -45,6 +48,7 @@ def create_vector_engine(
|
|
|
45
48
|
url=vector_db_url,
|
|
46
49
|
api_key=vector_db_key,
|
|
47
50
|
embedding_engine=embedding_engine,
|
|
51
|
+
database_name=vector_db_name,
|
|
48
52
|
)
|
|
49
53
|
|
|
50
54
|
if vector_db_provider.lower() == "pgvector":
|
|
@@ -133,6 +137,6 @@ def create_vector_engine(
|
|
|
133
137
|
|
|
134
138
|
else:
|
|
135
139
|
raise EnvironmentError(
|
|
136
|
-
f"Unsupported
|
|
140
|
+
f"Unsupported vector database provider: {vector_db_provider}. "
|
|
137
141
|
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}"
|
|
138
142
|
)
|
|
@@ -124,10 +124,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
|
|
124
124
|
self.endpoint, json=payload, headers=headers, timeout=60.0
|
|
125
125
|
) as response:
|
|
126
126
|
data = await response.json()
|
|
127
|
-
|
|
128
|
-
return data["embeddings"][0]
|
|
129
|
-
else:
|
|
130
|
-
return data["data"][0]["embedding"]
|
|
127
|
+
return data["embeddings"][0]
|
|
131
128
|
|
|
132
129
|
def get_vector_size(self) -> int:
|
|
133
130
|
"""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pydantic import BaseModel
|
|
1
|
+
from pydantic import BaseModel, field_validator
|
|
2
2
|
from typing import Optional, Any, Dict
|
|
3
3
|
|
|
4
4
|
|
|
@@ -18,9 +18,21 @@ class Edge(BaseModel):
|
|
|
18
18
|
|
|
19
19
|
# Mixed usage
|
|
20
20
|
has_items: (Edge(weight=0.5, weights={"confidence": 0.9}), list[Item])
|
|
21
|
+
|
|
22
|
+
# With edge_text for rich embedding representation
|
|
23
|
+
contains: (Edge(relationship_type="contains", edge_text="relationship_name: contains; entity_description: Alice"), Entity)
|
|
21
24
|
"""
|
|
22
25
|
|
|
23
26
|
weight: Optional[float] = None
|
|
24
27
|
weights: Optional[Dict[str, float]] = None
|
|
25
28
|
relationship_type: Optional[str] = None
|
|
26
29
|
properties: Optional[Dict[str, Any]] = None
|
|
30
|
+
edge_text: Optional[str] = None
|
|
31
|
+
|
|
32
|
+
@field_validator("edge_text", mode="before")
|
|
33
|
+
@classmethod
|
|
34
|
+
def ensure_edge_text(cls, v, info):
|
|
35
|
+
"""Auto-populate edge_text from relationship_type if not explicitly provided."""
|
|
36
|
+
if v is None and info.data.get("relationship_type"):
|
|
37
|
+
return info.data["relationship_type"]
|
|
38
|
+
return v
|
|
@@ -55,6 +55,10 @@ def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type
|
|
|
55
55
|
file_type = Type("text/plain", "txt")
|
|
56
56
|
return file_type
|
|
57
57
|
|
|
58
|
+
if ext in [".csv"]:
|
|
59
|
+
file_type = Type("text/csv", "csv")
|
|
60
|
+
return file_type
|
|
61
|
+
|
|
58
62
|
file_type = filetype.guess(file)
|
|
59
63
|
|
|
60
64
|
# If file type could not be determined consider it a plain text file as they don't have magic number encoding
|
|
@@ -38,6 +38,7 @@ class LLMConfig(BaseSettings):
|
|
|
38
38
|
"""
|
|
39
39
|
|
|
40
40
|
structured_output_framework: str = "instructor"
|
|
41
|
+
llm_instructor_mode: str = ""
|
|
41
42
|
llm_provider: str = "openai"
|
|
42
43
|
llm_model: str = "openai/gpt-5-mini"
|
|
43
44
|
llm_endpoint: str = ""
|
|
@@ -181,6 +182,7 @@ class LLMConfig(BaseSettings):
|
|
|
181
182
|
instance.
|
|
182
183
|
"""
|
|
183
184
|
return {
|
|
185
|
+
"llm_instructor_mode": self.llm_instructor_mode.lower(),
|
|
184
186
|
"provider": self.llm_provider,
|
|
185
187
|
"model": self.llm_model,
|
|
186
188
|
"endpoint": self.llm_endpoint,
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
CHANGED
|
@@ -28,13 +28,16 @@ class AnthropicAdapter(LLMInterface):
|
|
|
28
28
|
|
|
29
29
|
name = "Anthropic"
|
|
30
30
|
model: str
|
|
31
|
+
default_instructor_mode = "anthropic_tools"
|
|
31
32
|
|
|
32
|
-
def __init__(self, max_completion_tokens: int, model: str = None):
|
|
33
|
+
def __init__(self, max_completion_tokens: int, model: str = None, instructor_mode: str = None):
|
|
33
34
|
import anthropic
|
|
34
35
|
|
|
36
|
+
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
|
37
|
+
|
|
35
38
|
self.aclient = instructor.patch(
|
|
36
39
|
create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
|
|
37
|
-
mode=instructor.Mode.
|
|
40
|
+
mode=instructor.Mode(self.instructor_mode),
|
|
38
41
|
)
|
|
39
42
|
|
|
40
43
|
self.model = model
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py
CHANGED
|
@@ -41,6 +41,7 @@ class GeminiAdapter(LLMInterface):
|
|
|
41
41
|
name: str
|
|
42
42
|
model: str
|
|
43
43
|
api_key: str
|
|
44
|
+
default_instructor_mode = "json_mode"
|
|
44
45
|
|
|
45
46
|
def __init__(
|
|
46
47
|
self,
|
|
@@ -49,6 +50,7 @@ class GeminiAdapter(LLMInterface):
|
|
|
49
50
|
model: str,
|
|
50
51
|
api_version: str,
|
|
51
52
|
max_completion_tokens: int,
|
|
53
|
+
instructor_mode: str = None,
|
|
52
54
|
fallback_model: str = None,
|
|
53
55
|
fallback_api_key: str = None,
|
|
54
56
|
fallback_endpoint: str = None,
|
|
@@ -63,7 +65,11 @@ class GeminiAdapter(LLMInterface):
|
|
|
63
65
|
self.fallback_api_key = fallback_api_key
|
|
64
66
|
self.fallback_endpoint = fallback_endpoint
|
|
65
67
|
|
|
66
|
-
self.
|
|
68
|
+
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
|
69
|
+
|
|
70
|
+
self.aclient = instructor.from_litellm(
|
|
71
|
+
litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
|
|
72
|
+
)
|
|
67
73
|
|
|
68
74
|
@retry(
|
|
69
75
|
stop=stop_after_delay(128),
|
|
@@ -41,6 +41,7 @@ class GenericAPIAdapter(LLMInterface):
|
|
|
41
41
|
name: str
|
|
42
42
|
model: str
|
|
43
43
|
api_key: str
|
|
44
|
+
default_instructor_mode = "json_mode"
|
|
44
45
|
|
|
45
46
|
def __init__(
|
|
46
47
|
self,
|
|
@@ -49,6 +50,7 @@ class GenericAPIAdapter(LLMInterface):
|
|
|
49
50
|
model: str,
|
|
50
51
|
name: str,
|
|
51
52
|
max_completion_tokens: int,
|
|
53
|
+
instructor_mode: str = None,
|
|
52
54
|
fallback_model: str = None,
|
|
53
55
|
fallback_api_key: str = None,
|
|
54
56
|
fallback_endpoint: str = None,
|
|
@@ -63,7 +65,11 @@ class GenericAPIAdapter(LLMInterface):
|
|
|
63
65
|
self.fallback_api_key = fallback_api_key
|
|
64
66
|
self.fallback_endpoint = fallback_endpoint
|
|
65
67
|
|
|
66
|
-
self.
|
|
68
|
+
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
|
69
|
+
|
|
70
|
+
self.aclient = instructor.from_litellm(
|
|
71
|
+
litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
|
|
72
|
+
)
|
|
67
73
|
|
|
68
74
|
@retry(
|
|
69
75
|
stop=stop_after_delay(128),
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py
CHANGED
|
@@ -81,6 +81,7 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
81
81
|
model=llm_config.llm_model,
|
|
82
82
|
transcription_model=llm_config.transcription_model,
|
|
83
83
|
max_completion_tokens=max_completion_tokens,
|
|
84
|
+
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
84
85
|
streaming=llm_config.llm_streaming,
|
|
85
86
|
fallback_api_key=llm_config.fallback_api_key,
|
|
86
87
|
fallback_endpoint=llm_config.fallback_endpoint,
|
|
@@ -101,6 +102,7 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
101
102
|
llm_config.llm_model,
|
|
102
103
|
"Ollama",
|
|
103
104
|
max_completion_tokens=max_completion_tokens,
|
|
105
|
+
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
104
106
|
)
|
|
105
107
|
|
|
106
108
|
elif provider == LLMProvider.ANTHROPIC:
|
|
@@ -109,7 +111,9 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
109
111
|
)
|
|
110
112
|
|
|
111
113
|
return AnthropicAdapter(
|
|
112
|
-
max_completion_tokens=max_completion_tokens,
|
|
114
|
+
max_completion_tokens=max_completion_tokens,
|
|
115
|
+
model=llm_config.llm_model,
|
|
116
|
+
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
113
117
|
)
|
|
114
118
|
|
|
115
119
|
elif provider == LLMProvider.CUSTOM:
|
|
@@ -126,6 +130,7 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
126
130
|
llm_config.llm_model,
|
|
127
131
|
"Custom",
|
|
128
132
|
max_completion_tokens=max_completion_tokens,
|
|
133
|
+
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
129
134
|
fallback_api_key=llm_config.fallback_api_key,
|
|
130
135
|
fallback_endpoint=llm_config.fallback_endpoint,
|
|
131
136
|
fallback_model=llm_config.fallback_model,
|
|
@@ -145,6 +150,7 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
145
150
|
max_completion_tokens=max_completion_tokens,
|
|
146
151
|
endpoint=llm_config.llm_endpoint,
|
|
147
152
|
api_version=llm_config.llm_api_version,
|
|
153
|
+
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
148
154
|
)
|
|
149
155
|
|
|
150
156
|
elif provider == LLMProvider.MISTRAL:
|
|
@@ -160,21 +166,7 @@ def get_llm_client(raise_api_key_error: bool = True):
|
|
|
160
166
|
model=llm_config.llm_model,
|
|
161
167
|
max_completion_tokens=max_completion_tokens,
|
|
162
168
|
endpoint=llm_config.llm_endpoint,
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
elif provider == LLMProvider.MISTRAL:
|
|
166
|
-
if llm_config.llm_api_key is None:
|
|
167
|
-
raise LLMAPIKeyNotSetError()
|
|
168
|
-
|
|
169
|
-
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.mistral.adapter import (
|
|
170
|
-
MistralAdapter,
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
return MistralAdapter(
|
|
174
|
-
api_key=llm_config.llm_api_key,
|
|
175
|
-
model=llm_config.llm_model,
|
|
176
|
-
max_completion_tokens=max_completion_tokens,
|
|
177
|
-
endpoint=llm_config.llm_endpoint,
|
|
169
|
+
instructor_mode=llm_config.llm_instructor_mode.lower(),
|
|
178
170
|
)
|
|
179
171
|
|
|
180
172
|
else:
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py
CHANGED
|
@@ -37,16 +37,26 @@ class MistralAdapter(LLMInterface):
|
|
|
37
37
|
model: str
|
|
38
38
|
api_key: str
|
|
39
39
|
max_completion_tokens: int
|
|
40
|
+
default_instructor_mode = "mistral_tools"
|
|
40
41
|
|
|
41
|
-
def __init__(
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
api_key: str,
|
|
45
|
+
model: str,
|
|
46
|
+
max_completion_tokens: int,
|
|
47
|
+
endpoint: str = None,
|
|
48
|
+
instructor_mode: str = None,
|
|
49
|
+
):
|
|
42
50
|
from mistralai import Mistral
|
|
43
51
|
|
|
44
52
|
self.model = model
|
|
45
53
|
self.max_completion_tokens = max_completion_tokens
|
|
46
54
|
|
|
55
|
+
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
|
|
56
|
+
|
|
47
57
|
self.aclient = instructor.from_litellm(
|
|
48
58
|
litellm.acompletion,
|
|
49
|
-
mode=instructor.Mode.
|
|
59
|
+
mode=instructor.Mode(self.instructor_mode),
|
|
50
60
|
api_key=get_llm_config().llm_api_key,
|
|
51
61
|
)
|
|
52
62
|
|