cognee 0.4.1__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +8 -0
  3. cognee/api/v1/add/routers/get_add_router.py +3 -1
  4. cognee/api/v1/cognify/routers/get_cognify_router.py +28 -1
  5. cognee/api/v1/ontologies/__init__.py +4 -0
  6. cognee/api/v1/ontologies/ontologies.py +183 -0
  7. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  8. cognee/api/v1/ontologies/routers/get_ontology_router.py +107 -0
  9. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  10. cognee/cli/commands/cognify_command.py +8 -1
  11. cognee/cli/config.py +1 -1
  12. cognee/context_global_variables.py +41 -9
  13. cognee/infrastructure/databases/cache/config.py +3 -1
  14. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  15. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  16. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  17. cognee/infrastructure/databases/graph/config.py +4 -0
  18. cognee/infrastructure/databases/graph/get_graph_engine.py +2 -0
  19. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  20. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +37 -3
  21. cognee/infrastructure/databases/vector/config.py +3 -0
  22. cognee/infrastructure/databases/vector/create_vector_engine.py +5 -1
  23. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +1 -4
  24. cognee/infrastructure/engine/models/Edge.py +13 -1
  25. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  26. cognee/infrastructure/llm/config.py +2 -0
  27. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -2
  28. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +7 -1
  29. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +7 -1
  30. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +8 -16
  31. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +12 -2
  32. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +13 -2
  33. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +5 -2
  34. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  35. cognee/infrastructure/loaders/core/__init__.py +2 -1
  36. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  37. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  38. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  39. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  40. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  41. cognee/modules/chunking/CsvChunker.py +35 -0
  42. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  43. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  44. cognee/modules/data/methods/__init__.py +1 -0
  45. cognee/modules/data/methods/create_dataset.py +4 -2
  46. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  47. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  48. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  49. cognee/modules/data/models/Dataset.py +2 -0
  50. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  51. cognee/modules/data/processing/document_types/__init__.py +1 -0
  52. cognee/modules/graph/cognee_graph/CogneeGraph.py +4 -2
  53. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  54. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  55. cognee/modules/ingestion/identify.py +4 -4
  56. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  57. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  58. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  59. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  60. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  61. cognee/modules/retrieval/base_retriever.py +7 -3
  62. cognee/modules/retrieval/completion_retriever.py +11 -4
  63. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +6 -2
  64. cognee/modules/retrieval/graph_completion_cot_retriever.py +14 -51
  65. cognee/modules/retrieval/graph_completion_retriever.py +4 -1
  66. cognee/modules/retrieval/temporal_retriever.py +9 -2
  67. cognee/modules/retrieval/utils/brute_force_triplet_search.py +1 -1
  68. cognee/modules/retrieval/utils/completion.py +2 -22
  69. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  70. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +69 -0
  71. cognee/modules/search/methods/search.py +5 -3
  72. cognee/modules/users/methods/create_user.py +12 -27
  73. cognee/modules/users/methods/get_authenticated_user.py +2 -1
  74. cognee/modules/users/methods/get_default_user.py +4 -2
  75. cognee/modules/users/methods/get_user.py +1 -1
  76. cognee/modules/users/methods/get_user_by_email.py +1 -1
  77. cognee/modules/users/models/DatasetDatabase.py +9 -0
  78. cognee/modules/users/models/Tenant.py +6 -7
  79. cognee/modules/users/models/User.py +6 -5
  80. cognee/modules/users/models/UserTenant.py +12 -0
  81. cognee/modules/users/models/__init__.py +1 -0
  82. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  83. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  84. cognee/modules/users/tenants/methods/__init__.py +1 -0
  85. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  86. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  87. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  88. cognee/shared/logging_utils.py +2 -0
  89. cognee/tasks/chunks/__init__.py +1 -0
  90. cognee/tasks/chunks/chunk_by_row.py +94 -0
  91. cognee/tasks/documents/classify_documents.py +2 -0
  92. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  93. cognee/tasks/ingestion/ingest_data.py +1 -1
  94. cognee/tasks/memify/__init__.py +2 -0
  95. cognee/tasks/memify/cognify_session.py +41 -0
  96. cognee/tasks/memify/extract_user_sessions.py +73 -0
  97. cognee/tasks/storage/index_data_points.py +33 -22
  98. cognee/tasks/storage/index_graph_edges.py +37 -57
  99. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  100. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  101. cognee/tests/test_add_docling_document.py +2 -2
  102. cognee/tests/test_cognee_server_start.py +84 -1
  103. cognee/tests/test_conversation_history.py +45 -4
  104. cognee/tests/test_data/example_with_header.csv +3 -0
  105. cognee/tests/test_delete_bmw_example.py +60 -0
  106. cognee/tests/test_edge_ingestion.py +27 -0
  107. cognee/tests/test_feedback_enrichment.py +1 -1
  108. cognee/tests/test_library.py +6 -4
  109. cognee/tests/test_load.py +62 -0
  110. cognee/tests/test_multi_tenancy.py +165 -0
  111. cognee/tests/test_parallel_databases.py +2 -0
  112. cognee/tests/test_relational_db_migration.py +54 -2
  113. cognee/tests/test_search_db.py +7 -1
  114. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  115. cognee/tests/unit/api/test_ontology_endpoint.py +264 -0
  116. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  117. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  118. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  119. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  120. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  121. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  122. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  123. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  124. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  125. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  126. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  127. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  128. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  129. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  130. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +88 -71
  131. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +135 -104
  132. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
  133. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -1
  134. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
  135. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,151 @@
1
+ import asyncio
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+ import time
6
+ import threading
7
+ import diskcache as dc
8
+
9
+ from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
10
+ from cognee.infrastructure.databases.exceptions.exceptions import (
11
+ CacheConnectionError,
12
+ SharedKuzuLockRequiresRedisError,
13
+ )
14
+ from cognee.infrastructure.files.storage.get_storage_config import get_storage_config
15
+ from cognee.shared.logging_utils import get_logger
16
+
17
+ logger = get_logger("FSCacheAdapter")
18
+
19
+
20
+ class FSCacheAdapter(CacheDBInterface):
21
+ def __init__(self):
22
+ default_key = "sessions_db"
23
+
24
+ storage_config = get_storage_config()
25
+ data_root_directory = storage_config["data_root_directory"]
26
+ cache_directory = os.path.join(data_root_directory, ".cognee_fs_cache", default_key)
27
+ os.makedirs(cache_directory, exist_ok=True)
28
+ self.cache = dc.Cache(directory=cache_directory)
29
+ self.cache.expire()
30
+
31
+ logger.debug(f"FSCacheAdapter initialized with cache directory: {cache_directory}")
32
+
33
+ def acquire_lock(self):
34
+ """Lock acquisition is not available for filesystem cache backend."""
35
+ message = "Shared Kuzu lock requires Redis cache backend."
36
+ logger.error(message)
37
+ raise SharedKuzuLockRequiresRedisError()
38
+
39
+ def release_lock(self):
40
+ """Lock release is not available for filesystem cache backend."""
41
+ message = "Shared Kuzu lock requires Redis cache backend."
42
+ logger.error(message)
43
+ raise SharedKuzuLockRequiresRedisError()
44
+
45
+ async def add_qa(
46
+ self,
47
+ user_id: str,
48
+ session_id: str,
49
+ question: str,
50
+ context: str,
51
+ answer: str,
52
+ ttl: int | None = 86400,
53
+ ):
54
+ try:
55
+ session_key = f"agent_sessions:{user_id}:{session_id}"
56
+
57
+ qa_entry = {
58
+ "time": datetime.utcnow().isoformat(),
59
+ "question": question,
60
+ "context": context,
61
+ "answer": answer,
62
+ }
63
+
64
+ existing_value = self.cache.get(session_key)
65
+ if existing_value is not None:
66
+ value: list = json.loads(existing_value)
67
+ value.append(qa_entry)
68
+ else:
69
+ value = [qa_entry]
70
+
71
+ self.cache.set(session_key, json.dumps(value), expire=ttl)
72
+ except Exception as e:
73
+ error_msg = f"Unexpected error while adding Q&A to diskcache: {str(e)}"
74
+ logger.error(error_msg)
75
+ raise CacheConnectionError(error_msg) from e
76
+
77
+ async def get_latest_qa(self, user_id: str, session_id: str, last_n: int = 5):
78
+ session_key = f"agent_sessions:{user_id}:{session_id}"
79
+ value = self.cache.get(session_key)
80
+ if value is None:
81
+ return None
82
+ entries = json.loads(value)
83
+ return entries[-last_n:] if len(entries) > last_n else entries
84
+
85
+ async def get_all_qas(self, user_id: str, session_id: str):
86
+ session_key = f"agent_sessions:{user_id}:{session_id}"
87
+ value = self.cache.get(session_key)
88
+ if value is None:
89
+ return None
90
+ return json.loads(value)
91
+
92
+ async def close(self):
93
+ if self.cache is not None:
94
+ self.cache.expire()
95
+ self.cache.close()
96
+
97
+
98
+ async def main():
99
+ adapter = FSCacheAdapter()
100
+ session_id = "demo_session"
101
+ user_id = "demo_user_id"
102
+
103
+ print("\nAdding sample Q/A pairs...")
104
+ await adapter.add_qa(
105
+ user_id,
106
+ session_id,
107
+ "What is Redis?",
108
+ "Basic DB context",
109
+ "Redis is an in-memory data store.",
110
+ )
111
+ await adapter.add_qa(
112
+ user_id,
113
+ session_id,
114
+ "Who created Redis?",
115
+ "Historical context",
116
+ "Salvatore Sanfilippo (antirez).",
117
+ )
118
+
119
+ print("\nLatest QA:")
120
+ latest = await adapter.get_latest_qa(user_id, session_id)
121
+ print(json.dumps(latest, indent=2))
122
+
123
+ print("\nLast 2 QAs:")
124
+ last_two = await adapter.get_latest_qa(user_id, session_id, last_n=2)
125
+ print(json.dumps(last_two, indent=2))
126
+
127
+ session_id = "session_expire_demo"
128
+
129
+ await adapter.add_qa(
130
+ user_id,
131
+ session_id,
132
+ "What is Redis?",
133
+ "Database context",
134
+ "Redis is an in-memory data store.",
135
+ )
136
+
137
+ await adapter.add_qa(
138
+ user_id,
139
+ session_id,
140
+ "Who created Redis?",
141
+ "History context",
142
+ "Salvatore Sanfilippo (antirez).",
143
+ )
144
+
145
+ print(await adapter.get_all_qas(user_id, session_id))
146
+
147
+ await adapter.close()
148
+
149
+
150
+ if __name__ == "__main__":
151
+ asyncio.run(main())
@@ -1,9 +1,11 @@
1
1
  """Factory to get the appropriate cache coordination engine (e.g., Redis)."""
2
2
 
3
3
  from functools import lru_cache
4
+ import os
4
5
  from typing import Optional
5
6
  from cognee.infrastructure.databases.cache.config import get_cache_config
6
7
  from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
8
+ from cognee.infrastructure.databases.cache.fscache.FsCacheAdapter import FSCacheAdapter
7
9
 
8
10
  config = get_cache_config()
9
11
 
@@ -33,20 +35,28 @@ def create_cache_engine(
33
35
 
34
36
  Returns:
35
37
  --------
36
- - CacheDBInterface: An instance of the appropriate cache adapter. :TODO: Now we support only Redis. later if we add more here we can split the logic
38
+ - CacheDBInterface: An instance of the appropriate cache adapter.
37
39
  """
38
40
  if config.caching:
39
41
  from cognee.infrastructure.databases.cache.redis.RedisAdapter import RedisAdapter
40
42
 
41
- return RedisAdapter(
42
- host=cache_host,
43
- port=cache_port,
44
- username=cache_username,
45
- password=cache_password,
46
- lock_name=lock_key,
47
- timeout=agentic_lock_expire,
48
- blocking_timeout=agentic_lock_timeout,
49
- )
43
+ if config.cache_backend == "redis":
44
+ return RedisAdapter(
45
+ host=cache_host,
46
+ port=cache_port,
47
+ username=cache_username,
48
+ password=cache_password,
49
+ lock_name=lock_key,
50
+ timeout=agentic_lock_expire,
51
+ blocking_timeout=agentic_lock_timeout,
52
+ )
53
+ elif config.cache_backend == "fs":
54
+ return FSCacheAdapter()
55
+ else:
56
+ raise ValueError(
57
+ f"Unsupported cache backend: '{config.cache_backend}'. "
58
+ f"Supported backends are: 'redis', 'fs'"
59
+ )
50
60
  else:
51
61
  return None
52
62
 
@@ -148,3 +148,19 @@ class CacheConnectionError(CogneeConfigurationError):
148
148
  status_code: int = status.HTTP_503_SERVICE_UNAVAILABLE,
149
149
  ):
150
150
  super().__init__(message, name, status_code)
151
+
152
+
153
+ class SharedKuzuLockRequiresRedisError(CogneeConfigurationError):
154
+ """
155
+ Raised when shared Kuzu locking is requested without configuring the Redis backend.
156
+ """
157
+
158
+ def __init__(
159
+ self,
160
+ message: str = (
161
+ "Shared Kuzu lock requires Redis cache backend. Configure Redis to enable shared Kuzu locking."
162
+ ),
163
+ name: str = "SharedKuzuLockRequiresRedisError",
164
+ status_code: int = status.HTTP_400_BAD_REQUEST,
165
+ ):
166
+ super().__init__(message, name, status_code)
@@ -26,6 +26,7 @@ class GraphConfig(BaseSettings):
26
26
  - graph_database_username
27
27
  - graph_database_password
28
28
  - graph_database_port
29
+ - graph_database_key
29
30
  - graph_file_path
30
31
  - graph_model
31
32
  - graph_topology
@@ -41,6 +42,7 @@ class GraphConfig(BaseSettings):
41
42
  graph_database_username: str = ""
42
43
  graph_database_password: str = ""
43
44
  graph_database_port: int = 123
45
+ graph_database_key: str = ""
44
46
  graph_file_path: str = ""
45
47
  graph_filename: str = ""
46
48
  graph_model: object = KnowledgeGraph
@@ -90,6 +92,7 @@ class GraphConfig(BaseSettings):
90
92
  "graph_database_username": self.graph_database_username,
91
93
  "graph_database_password": self.graph_database_password,
92
94
  "graph_database_port": self.graph_database_port,
95
+ "graph_database_key": self.graph_database_key,
93
96
  "graph_file_path": self.graph_file_path,
94
97
  "graph_model": self.graph_model,
95
98
  "graph_topology": self.graph_topology,
@@ -116,6 +119,7 @@ class GraphConfig(BaseSettings):
116
119
  "graph_database_username": self.graph_database_username,
117
120
  "graph_database_password": self.graph_database_password,
118
121
  "graph_database_port": self.graph_database_port,
122
+ "graph_database_key": self.graph_database_key,
119
123
  "graph_file_path": self.graph_file_path,
120
124
  }
121
125
 
@@ -33,6 +33,7 @@ def create_graph_engine(
33
33
  graph_database_username="",
34
34
  graph_database_password="",
35
35
  graph_database_port="",
36
+ graph_database_key="",
36
37
  ):
37
38
  """
38
39
  Create a graph engine based on the specified provider type.
@@ -69,6 +70,7 @@ def create_graph_engine(
69
70
  graph_database_url=graph_database_url,
70
71
  graph_database_username=graph_database_username,
71
72
  graph_database_password=graph_database_password,
73
+ database_name=graph_database_name,
72
74
  )
73
75
 
74
76
  if graph_database_provider == "neo4j":
@@ -416,6 +416,15 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
416
416
  self._client.query(f"MATCH (n :{self._VECTOR_NODE_LABEL}) DETACH DELETE n")
417
417
  pass
418
418
 
419
+ async def is_empty(self) -> bool:
420
+ query = """
421
+ MATCH (n)
422
+ RETURN true
423
+ LIMIT 1;
424
+ """
425
+ query_result = await self._client.query(query)
426
+ return len(query_result) == 0
427
+
419
428
  @staticmethod
420
429
  def _get_scored_result(
421
430
  item: dict, with_vector: bool = False, with_score: bool = False
@@ -1,11 +1,15 @@
1
+ import os
1
2
  from uuid import UUID
2
3
  from typing import Union
3
4
 
4
5
  from sqlalchemy import select
5
6
  from sqlalchemy.exc import IntegrityError
6
- from cognee.modules.data.methods import create_dataset
7
7
 
8
+ from cognee.base_config import get_base_config
9
+ from cognee.modules.data.methods import create_dataset
8
10
  from cognee.infrastructure.databases.relational import get_relational_engine
11
+ from cognee.infrastructure.databases.vector import get_vectordb_config
12
+ from cognee.infrastructure.databases.graph.config import get_graph_config
9
13
  from cognee.modules.data.methods import get_unique_dataset_id
10
14
  from cognee.modules.users.models import DatasetDatabase
11
15
  from cognee.modules.users.models import User
@@ -32,8 +36,32 @@ async def get_or_create_dataset_database(
32
36
 
33
37
  dataset_id = await get_unique_dataset_id(dataset, user)
34
38
 
35
- vector_db_name = f"{dataset_id}.lance.db"
36
- graph_db_name = f"{dataset_id}.pkl"
39
+ vector_config = get_vectordb_config()
40
+ graph_config = get_graph_config()
41
+
42
+ # Note: for hybrid databases both graph and vector DB name have to be the same
43
+ if graph_config.graph_database_provider == "kuzu":
44
+ graph_db_name = f"{dataset_id}.pkl"
45
+ else:
46
+ graph_db_name = f"{dataset_id}"
47
+
48
+ if vector_config.vector_db_provider == "lancedb":
49
+ vector_db_name = f"{dataset_id}.lance.db"
50
+ else:
51
+ vector_db_name = f"{dataset_id}"
52
+
53
+ base_config = get_base_config()
54
+ databases_directory_path = os.path.join(
55
+ base_config.system_root_directory, "databases", str(user.id)
56
+ )
57
+
58
+ # Determine vector database URL
59
+ if vector_config.vector_db_provider == "lancedb":
60
+ vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name)
61
+ else:
62
+ vector_db_url = vector_config.vector_database_url
63
+
64
+ # Determine graph database URL
37
65
 
38
66
  async with db_engine.get_async_session() as session:
39
67
  # Create dataset if it doesn't exist
@@ -55,6 +83,12 @@ async def get_or_create_dataset_database(
55
83
  dataset_id=dataset_id,
56
84
  vector_database_name=vector_db_name,
57
85
  graph_database_name=graph_db_name,
86
+ vector_database_provider=vector_config.vector_db_provider,
87
+ graph_database_provider=graph_config.graph_database_provider,
88
+ vector_database_url=vector_db_url,
89
+ graph_database_url=graph_config.graph_database_url,
90
+ vector_database_key=vector_config.vector_db_key,
91
+ graph_database_key=graph_config.graph_database_key,
58
92
  )
59
93
 
60
94
  try:
@@ -18,12 +18,14 @@ class VectorConfig(BaseSettings):
18
18
  Instance variables:
19
19
  - vector_db_url: The URL of the vector database.
20
20
  - vector_db_port: The port for the vector database.
21
+ - vector_db_name: The name of the vector database.
21
22
  - vector_db_key: The key for accessing the vector database.
22
23
  - vector_db_provider: The provider for the vector database.
23
24
  """
24
25
 
25
26
  vector_db_url: str = ""
26
27
  vector_db_port: int = 1234
28
+ vector_db_name: str = ""
27
29
  vector_db_key: str = ""
28
30
  vector_db_provider: str = "lancedb"
29
31
 
@@ -58,6 +60,7 @@ class VectorConfig(BaseSettings):
58
60
  return {
59
61
  "vector_db_url": self.vector_db_url,
60
62
  "vector_db_port": self.vector_db_port,
63
+ "vector_db_name": self.vector_db_name,
61
64
  "vector_db_key": self.vector_db_key,
62
65
  "vector_db_provider": self.vector_db_provider,
63
66
  }
@@ -1,5 +1,6 @@
1
1
  from .supported_databases import supported_databases
2
2
  from .embeddings import get_embedding_engine
3
+ from cognee.infrastructure.databases.graph.config import get_graph_context_config
3
4
 
4
5
  from functools import lru_cache
5
6
 
@@ -8,6 +9,7 @@ from functools import lru_cache
8
9
  def create_vector_engine(
9
10
  vector_db_provider: str,
10
11
  vector_db_url: str,
12
+ vector_db_name: str,
11
13
  vector_db_port: str = "",
12
14
  vector_db_key: str = "",
13
15
  ):
@@ -27,6 +29,7 @@ def create_vector_engine(
27
29
  - vector_db_url (str): The URL for the vector database instance.
28
30
  - vector_db_port (str): The port for the vector database instance. Required for some
29
31
  providers.
32
+ - vector_db_name (str): The name of the vector database instance.
30
33
  - vector_db_key (str): The API key or access token for the vector database instance.
31
34
  - vector_db_provider (str): The name of the vector database provider to use (e.g.,
32
35
  'pgvector').
@@ -45,6 +48,7 @@ def create_vector_engine(
45
48
  url=vector_db_url,
46
49
  api_key=vector_db_key,
47
50
  embedding_engine=embedding_engine,
51
+ database_name=vector_db_name,
48
52
  )
49
53
 
50
54
  if vector_db_provider.lower() == "pgvector":
@@ -133,6 +137,6 @@ def create_vector_engine(
133
137
 
134
138
  else:
135
139
  raise EnvironmentError(
136
- f"Unsupported graph database provider: {vector_db_provider}. "
140
+ f"Unsupported vector database provider: {vector_db_provider}. "
137
141
  f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}"
138
142
  )
@@ -124,10 +124,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
124
124
  self.endpoint, json=payload, headers=headers, timeout=60.0
125
125
  ) as response:
126
126
  data = await response.json()
127
- if "embeddings" in data:
128
- return data["embeddings"][0]
129
- else:
130
- return data["data"][0]["embedding"]
127
+ return data["embeddings"][0]
131
128
 
132
129
  def get_vector_size(self) -> int:
133
130
  """
@@ -1,4 +1,4 @@
1
- from pydantic import BaseModel
1
+ from pydantic import BaseModel, field_validator
2
2
  from typing import Optional, Any, Dict
3
3
 
4
4
 
@@ -18,9 +18,21 @@ class Edge(BaseModel):
18
18
 
19
19
  # Mixed usage
20
20
  has_items: (Edge(weight=0.5, weights={"confidence": 0.9}), list[Item])
21
+
22
+ # With edge_text for rich embedding representation
23
+ contains: (Edge(relationship_type="contains", edge_text="relationship_name: contains; entity_description: Alice"), Entity)
21
24
  """
22
25
 
23
26
  weight: Optional[float] = None
24
27
  weights: Optional[Dict[str, float]] = None
25
28
  relationship_type: Optional[str] = None
26
29
  properties: Optional[Dict[str, Any]] = None
30
+ edge_text: Optional[str] = None
31
+
32
+ @field_validator("edge_text", mode="before")
33
+ @classmethod
34
+ def ensure_edge_text(cls, v, info):
35
+ """Auto-populate edge_text from relationship_type if not explicitly provided."""
36
+ if v is None and info.data.get("relationship_type"):
37
+ return info.data["relationship_type"]
38
+ return v
@@ -55,6 +55,10 @@ def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type
55
55
  file_type = Type("text/plain", "txt")
56
56
  return file_type
57
57
 
58
+ if ext in [".csv"]:
59
+ file_type = Type("text/csv", "csv")
60
+ return file_type
61
+
58
62
  file_type = filetype.guess(file)
59
63
 
60
64
  # If file type could not be determined consider it a plain text file as they don't have magic number encoding
@@ -38,6 +38,7 @@ class LLMConfig(BaseSettings):
38
38
  """
39
39
 
40
40
  structured_output_framework: str = "instructor"
41
+ llm_instructor_mode: str = ""
41
42
  llm_provider: str = "openai"
42
43
  llm_model: str = "openai/gpt-5-mini"
43
44
  llm_endpoint: str = ""
@@ -181,6 +182,7 @@ class LLMConfig(BaseSettings):
181
182
  instance.
182
183
  """
183
184
  return {
185
+ "llm_instructor_mode": self.llm_instructor_mode.lower(),
184
186
  "provider": self.llm_provider,
185
187
  "model": self.llm_model,
186
188
  "endpoint": self.llm_endpoint,
@@ -28,13 +28,16 @@ class AnthropicAdapter(LLMInterface):
28
28
 
29
29
  name = "Anthropic"
30
30
  model: str
31
+ default_instructor_mode = "anthropic_tools"
31
32
 
32
- def __init__(self, max_completion_tokens: int, model: str = None):
33
+ def __init__(self, max_completion_tokens: int, model: str = None, instructor_mode: str = None):
33
34
  import anthropic
34
35
 
36
+ self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
37
+
35
38
  self.aclient = instructor.patch(
36
39
  create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
37
- mode=instructor.Mode.ANTHROPIC_TOOLS,
40
+ mode=instructor.Mode(self.instructor_mode),
38
41
  )
39
42
 
40
43
  self.model = model
@@ -41,6 +41,7 @@ class GeminiAdapter(LLMInterface):
41
41
  name: str
42
42
  model: str
43
43
  api_key: str
44
+ default_instructor_mode = "json_mode"
44
45
 
45
46
  def __init__(
46
47
  self,
@@ -49,6 +50,7 @@ class GeminiAdapter(LLMInterface):
49
50
  model: str,
50
51
  api_version: str,
51
52
  max_completion_tokens: int,
53
+ instructor_mode: str = None,
52
54
  fallback_model: str = None,
53
55
  fallback_api_key: str = None,
54
56
  fallback_endpoint: str = None,
@@ -63,7 +65,11 @@ class GeminiAdapter(LLMInterface):
63
65
  self.fallback_api_key = fallback_api_key
64
66
  self.fallback_endpoint = fallback_endpoint
65
67
 
66
- self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
68
+ self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
69
+
70
+ self.aclient = instructor.from_litellm(
71
+ litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
72
+ )
67
73
 
68
74
  @retry(
69
75
  stop=stop_after_delay(128),
@@ -41,6 +41,7 @@ class GenericAPIAdapter(LLMInterface):
41
41
  name: str
42
42
  model: str
43
43
  api_key: str
44
+ default_instructor_mode = "json_mode"
44
45
 
45
46
  def __init__(
46
47
  self,
@@ -49,6 +50,7 @@ class GenericAPIAdapter(LLMInterface):
49
50
  model: str,
50
51
  name: str,
51
52
  max_completion_tokens: int,
53
+ instructor_mode: str = None,
52
54
  fallback_model: str = None,
53
55
  fallback_api_key: str = None,
54
56
  fallback_endpoint: str = None,
@@ -63,7 +65,11 @@ class GenericAPIAdapter(LLMInterface):
63
65
  self.fallback_api_key = fallback_api_key
64
66
  self.fallback_endpoint = fallback_endpoint
65
67
 
66
- self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
68
+ self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
69
+
70
+ self.aclient = instructor.from_litellm(
71
+ litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
72
+ )
67
73
 
68
74
  @retry(
69
75
  stop=stop_after_delay(128),
@@ -81,6 +81,7 @@ def get_llm_client(raise_api_key_error: bool = True):
81
81
  model=llm_config.llm_model,
82
82
  transcription_model=llm_config.transcription_model,
83
83
  max_completion_tokens=max_completion_tokens,
84
+ instructor_mode=llm_config.llm_instructor_mode.lower(),
84
85
  streaming=llm_config.llm_streaming,
85
86
  fallback_api_key=llm_config.fallback_api_key,
86
87
  fallback_endpoint=llm_config.fallback_endpoint,
@@ -101,6 +102,7 @@ def get_llm_client(raise_api_key_error: bool = True):
101
102
  llm_config.llm_model,
102
103
  "Ollama",
103
104
  max_completion_tokens=max_completion_tokens,
105
+ instructor_mode=llm_config.llm_instructor_mode.lower(),
104
106
  )
105
107
 
106
108
  elif provider == LLMProvider.ANTHROPIC:
@@ -109,7 +111,9 @@ def get_llm_client(raise_api_key_error: bool = True):
109
111
  )
110
112
 
111
113
  return AnthropicAdapter(
112
- max_completion_tokens=max_completion_tokens, model=llm_config.llm_model
114
+ max_completion_tokens=max_completion_tokens,
115
+ model=llm_config.llm_model,
116
+ instructor_mode=llm_config.llm_instructor_mode.lower(),
113
117
  )
114
118
 
115
119
  elif provider == LLMProvider.CUSTOM:
@@ -126,6 +130,7 @@ def get_llm_client(raise_api_key_error: bool = True):
126
130
  llm_config.llm_model,
127
131
  "Custom",
128
132
  max_completion_tokens=max_completion_tokens,
133
+ instructor_mode=llm_config.llm_instructor_mode.lower(),
129
134
  fallback_api_key=llm_config.fallback_api_key,
130
135
  fallback_endpoint=llm_config.fallback_endpoint,
131
136
  fallback_model=llm_config.fallback_model,
@@ -145,6 +150,7 @@ def get_llm_client(raise_api_key_error: bool = True):
145
150
  max_completion_tokens=max_completion_tokens,
146
151
  endpoint=llm_config.llm_endpoint,
147
152
  api_version=llm_config.llm_api_version,
153
+ instructor_mode=llm_config.llm_instructor_mode.lower(),
148
154
  )
149
155
 
150
156
  elif provider == LLMProvider.MISTRAL:
@@ -160,21 +166,7 @@ def get_llm_client(raise_api_key_error: bool = True):
160
166
  model=llm_config.llm_model,
161
167
  max_completion_tokens=max_completion_tokens,
162
168
  endpoint=llm_config.llm_endpoint,
163
- )
164
-
165
- elif provider == LLMProvider.MISTRAL:
166
- if llm_config.llm_api_key is None:
167
- raise LLMAPIKeyNotSetError()
168
-
169
- from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.mistral.adapter import (
170
- MistralAdapter,
171
- )
172
-
173
- return MistralAdapter(
174
- api_key=llm_config.llm_api_key,
175
- model=llm_config.llm_model,
176
- max_completion_tokens=max_completion_tokens,
177
- endpoint=llm_config.llm_endpoint,
169
+ instructor_mode=llm_config.llm_instructor_mode.lower(),
178
170
  )
179
171
 
180
172
  else:
@@ -37,16 +37,26 @@ class MistralAdapter(LLMInterface):
37
37
  model: str
38
38
  api_key: str
39
39
  max_completion_tokens: int
40
+ default_instructor_mode = "mistral_tools"
40
41
 
41
- def __init__(self, api_key: str, model: str, max_completion_tokens: int, endpoint: str = None):
42
+ def __init__(
43
+ self,
44
+ api_key: str,
45
+ model: str,
46
+ max_completion_tokens: int,
47
+ endpoint: str = None,
48
+ instructor_mode: str = None,
49
+ ):
42
50
  from mistralai import Mistral
43
51
 
44
52
  self.model = model
45
53
  self.max_completion_tokens = max_completion_tokens
46
54
 
55
+ self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
56
+
47
57
  self.aclient = instructor.from_litellm(
48
58
  litellm.acompletion,
49
- mode=instructor.Mode.MISTRAL_TOOLS,
59
+ mode=instructor.Mode(self.instructor_mode),
50
60
  api_key=get_llm_config().llm_api_key,
51
61
  )
52
62