cognee 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/health.py +2 -12
  3. cognee/api/v1/add/add.py +46 -6
  4. cognee/api/v1/add/routers/get_add_router.py +11 -2
  5. cognee/api/v1/cognify/cognify.py +29 -9
  6. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  7. cognee/api/v1/datasets/datasets.py +11 -0
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
  9. cognee/api/v1/delete/routers/get_delete_router.py +2 -0
  10. cognee/api/v1/memify/routers/get_memify_router.py +2 -1
  11. cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
  12. cognee/api/v1/responses/default_tools.py +0 -1
  13. cognee/api/v1/responses/dispatch_function.py +1 -1
  14. cognee/api/v1/responses/routers/default_tools.py +0 -1
  15. cognee/api/v1/search/routers/get_search_router.py +3 -3
  16. cognee/api/v1/search/search.py +11 -9
  17. cognee/api/v1/settings/routers/get_settings_router.py +7 -1
  18. cognee/api/v1/sync/routers/get_sync_router.py +3 -0
  19. cognee/api/v1/ui/ui.py +45 -16
  20. cognee/api/v1/update/routers/get_update_router.py +3 -1
  21. cognee/api/v1/update/update.py +3 -3
  22. cognee/api/v1/users/routers/get_visualize_router.py +2 -0
  23. cognee/cli/_cognee.py +61 -10
  24. cognee/cli/commands/add_command.py +3 -3
  25. cognee/cli/commands/cognify_command.py +3 -3
  26. cognee/cli/commands/config_command.py +9 -7
  27. cognee/cli/commands/delete_command.py +3 -3
  28. cognee/cli/commands/search_command.py +3 -7
  29. cognee/cli/config.py +0 -1
  30. cognee/context_global_variables.py +5 -0
  31. cognee/exceptions/exceptions.py +1 -1
  32. cognee/infrastructure/databases/cache/__init__.py +2 -0
  33. cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
  34. cognee/infrastructure/databases/cache/config.py +44 -0
  35. cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
  36. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
  37. cognee/infrastructure/databases/exceptions/__init__.py +1 -0
  38. cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
  39. cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
  40. cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
  41. cognee/infrastructure/databases/graph/kuzu/adapter.py +76 -47
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
  43. cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
  44. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
  46. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
  47. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
  48. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
  49. cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
  50. cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
  52. cognee/infrastructure/files/exceptions.py +1 -1
  53. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
  54. cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
  55. cognee/infrastructure/files/utils/guess_file_type.py +6 -0
  56. cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
  57. cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
  58. cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
  59. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
  60. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
  61. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
  62. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
  63. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
  68. cognee/infrastructure/loaders/LoaderEngine.py +27 -7
  69. cognee/infrastructure/loaders/external/__init__.py +7 -0
  70. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
  71. cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
  72. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  73. cognee/modules/data/exceptions/exceptions.py +1 -1
  74. cognee/modules/data/methods/__init__.py +3 -0
  75. cognee/modules/data/methods/get_dataset_data.py +4 -1
  76. cognee/modules/data/methods/has_dataset_data.py +21 -0
  77. cognee/modules/engine/models/TableRow.py +0 -1
  78. cognee/modules/ingestion/save_data_to_file.py +9 -2
  79. cognee/modules/pipelines/exceptions/exceptions.py +1 -1
  80. cognee/modules/pipelines/operations/pipeline.py +12 -1
  81. cognee/modules/pipelines/operations/run_tasks.py +25 -197
  82. cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
  83. cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
  84. cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
  85. cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
  86. cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
  87. cognee/modules/retrieval/base_graph_retriever.py +3 -1
  88. cognee/modules/retrieval/base_retriever.py +3 -1
  89. cognee/modules/retrieval/chunks_retriever.py +5 -1
  90. cognee/modules/retrieval/code_retriever.py +20 -2
  91. cognee/modules/retrieval/completion_retriever.py +50 -9
  92. cognee/modules/retrieval/cypher_search_retriever.py +11 -1
  93. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
  94. cognee/modules/retrieval/graph_completion_cot_retriever.py +152 -22
  95. cognee/modules/retrieval/graph_completion_retriever.py +54 -10
  96. cognee/modules/retrieval/lexical_retriever.py +20 -2
  97. cognee/modules/retrieval/natural_language_retriever.py +10 -1
  98. cognee/modules/retrieval/summaries_retriever.py +5 -1
  99. cognee/modules/retrieval/temporal_retriever.py +62 -10
  100. cognee/modules/retrieval/user_qa_feedback.py +3 -2
  101. cognee/modules/retrieval/utils/completion.py +30 -4
  102. cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
  103. cognee/modules/retrieval/utils/session_cache.py +156 -0
  104. cognee/modules/search/methods/get_search_type_tools.py +0 -5
  105. cognee/modules/search/methods/no_access_control_search.py +12 -1
  106. cognee/modules/search/methods/search.py +51 -5
  107. cognee/modules/search/types/SearchType.py +0 -1
  108. cognee/modules/settings/get_settings.py +23 -0
  109. cognee/modules/users/methods/get_authenticated_user.py +3 -1
  110. cognee/modules/users/methods/get_default_user.py +1 -6
  111. cognee/modules/users/roles/methods/create_role.py +2 -2
  112. cognee/modules/users/tenants/methods/create_tenant.py +2 -2
  113. cognee/shared/exceptions/exceptions.py +1 -1
  114. cognee/shared/logging_utils.py +18 -11
  115. cognee/shared/utils.py +24 -2
  116. cognee/tasks/codingagents/coding_rule_associations.py +1 -2
  117. cognee/tasks/documents/exceptions/exceptions.py +1 -1
  118. cognee/tasks/feedback/__init__.py +13 -0
  119. cognee/tasks/feedback/create_enrichments.py +84 -0
  120. cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
  121. cognee/tasks/feedback/generate_improved_answers.py +130 -0
  122. cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
  123. cognee/tasks/feedback/models.py +26 -0
  124. cognee/tasks/graph/extract_graph_from_data.py +2 -0
  125. cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
  126. cognee/tasks/ingestion/ingest_data.py +11 -5
  127. cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
  128. cognee/tasks/storage/add_data_points.py +3 -10
  129. cognee/tasks/storage/index_data_points.py +19 -14
  130. cognee/tasks/storage/index_graph_edges.py +25 -11
  131. cognee/tasks/web_scraper/__init__.py +34 -0
  132. cognee/tasks/web_scraper/config.py +26 -0
  133. cognee/tasks/web_scraper/default_url_crawler.py +446 -0
  134. cognee/tasks/web_scraper/models.py +46 -0
  135. cognee/tasks/web_scraper/types.py +4 -0
  136. cognee/tasks/web_scraper/utils.py +142 -0
  137. cognee/tasks/web_scraper/web_scraper_task.py +396 -0
  138. cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
  139. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
  140. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
  141. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
  142. cognee/tests/subprocesses/reader.py +25 -0
  143. cognee/tests/subprocesses/simple_cognify_1.py +31 -0
  144. cognee/tests/subprocesses/simple_cognify_2.py +31 -0
  145. cognee/tests/subprocesses/writer.py +32 -0
  146. cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
  147. cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
  148. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
  149. cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
  150. cognee/tests/test_add_docling_document.py +56 -0
  151. cognee/tests/test_chromadb.py +7 -11
  152. cognee/tests/test_concurrent_subprocess_access.py +76 -0
  153. cognee/tests/test_conversation_history.py +240 -0
  154. cognee/tests/test_feedback_enrichment.py +174 -0
  155. cognee/tests/test_kuzu.py +27 -15
  156. cognee/tests/test_lancedb.py +7 -11
  157. cognee/tests/test_library.py +32 -2
  158. cognee/tests/test_neo4j.py +24 -16
  159. cognee/tests/test_neptune_analytics_vector.py +7 -11
  160. cognee/tests/test_permissions.py +9 -13
  161. cognee/tests/test_pgvector.py +4 -4
  162. cognee/tests/test_remote_kuzu.py +8 -11
  163. cognee/tests/test_s3_file_storage.py +1 -1
  164. cognee/tests/test_search_db.py +6 -8
  165. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
  166. cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
  167. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
  168. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +21 -6
  169. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +178 -139
  170. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +1 -0
  171. distributed/Dockerfile +0 -3
  172. distributed/entrypoint.py +21 -9
  173. distributed/signal.py +5 -0
  174. distributed/workers/data_point_saving_worker.py +64 -34
  175. distributed/workers/graph_saving_worker.py +71 -47
  176. cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
  177. cognee/modules/retrieval/insights_retriever.py +0 -133
  178. cognee/tests/test_memgraph.py +0 -109
  179. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
  180. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
  181. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
  182. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,156 @@
1
+ from typing import Optional, List, Dict, Any
2
+ from cognee.context_global_variables import session_user
3
+ from cognee.infrastructure.databases.cache.config import CacheConfig
4
+ from cognee.infrastructure.databases.exceptions import CacheConnectionError
5
+ from cognee.shared.logging_utils import get_logger
6
+
7
+ logger = get_logger("session_cache")
8
+
9
+
10
+ async def save_conversation_history(
11
+ query: str,
12
+ context_summary: str,
13
+ answer: str,
14
+ session_id: Optional[str] = None,
15
+ ) -> bool:
16
+ """
17
+ Saves Q&A interaction to the session cache if user is authenticated and caching is enabled.
18
+
19
+ Handles cache unavailability gracefully by logging warnings instead of failing.
20
+
21
+ Parameters:
22
+ -----------
23
+
24
+ - query (str): The user's query/question.
25
+ - context_summary (str): Summarized context used for generating the answer.
26
+ - answer (str): The generated answer/completion.
27
+ - session_id (Optional[str]): Session identifier. Defaults to 'default_session' if None.
28
+
29
+ Returns:
30
+ --------
31
+
32
+ - bool: True if successfully saved to cache, False otherwise.
33
+ """
34
+ try:
35
+ cache_config = CacheConfig()
36
+ user = session_user.get()
37
+ user_id = getattr(user, "id", None)
38
+
39
+ if not (user_id and cache_config.caching):
40
+ logger.debug("Session caching disabled or user not authenticated")
41
+ return False
42
+
43
+ if session_id is None:
44
+ session_id = "default_session"
45
+
46
+ from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
47
+
48
+ cache_engine = get_cache_engine()
49
+
50
+ if cache_engine is None:
51
+ logger.warning("Cache engine not available, skipping session save")
52
+ return False
53
+
54
+ await cache_engine.add_qa(
55
+ str(user_id),
56
+ session_id=session_id,
57
+ question=query,
58
+ context=context_summary,
59
+ answer=answer,
60
+ )
61
+
62
+ logger.info(
63
+ f"Successfully saved Q&A to session cache: user_id={user_id}, session_id={session_id}"
64
+ )
65
+ return True
66
+
67
+ except CacheConnectionError as e:
68
+ logger.warning(f"Cache unavailable, continuing without session save: {e.message}")
69
+ return False
70
+
71
+ except Exception as e:
72
+ logger.error(
73
+ f"Unexpected error saving to session cache: {type(e).__name__}: {str(e)}. Continuing without caching."
74
+ )
75
+ return False
76
+
77
+
78
+ async def get_conversation_history(
79
+ session_id: Optional[str] = None,
80
+ ) -> str:
81
+ """
82
+ Retrieves conversation history from cache and formats it as text.
83
+
84
+ Returns formatted conversation history with time, question, context, and answer
85
+ for the last N Q&A pairs (N is determined by cache engine default).
86
+
87
+ Parameters:
88
+ -----------
89
+
90
+ - session_id (Optional[str]): Session identifier. Defaults to 'default_session' if None.
91
+
92
+ Returns:
93
+ --------
94
+
95
+ - str: Formatted conversation history string, or empty string if no history or error.
96
+
97
+ Format:
98
+ -------
99
+
100
+ Previous conversation:
101
+
102
+ [2024-01-15 10:30:45]
103
+ QUESTION: What is X?
104
+ CONTEXT: X is a concept...
105
+ ANSWER: X is...
106
+
107
+ [2024-01-15 10:31:20]
108
+ QUESTION: How does Y work?
109
+ CONTEXT: Y is related to...
110
+ ANSWER: Y works by...
111
+ """
112
+ try:
113
+ cache_config = CacheConfig()
114
+ user = session_user.get()
115
+ user_id = getattr(user, "id", None)
116
+
117
+ if not (user_id and cache_config.caching):
118
+ logger.debug("Session caching disabled or user not authenticated")
119
+ return ""
120
+
121
+ if session_id is None:
122
+ session_id = "default_session"
123
+
124
+ from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
125
+
126
+ cache_engine = get_cache_engine()
127
+
128
+ if cache_engine is None:
129
+ logger.warning("Cache engine not available, skipping conversation history retrieval")
130
+ return ""
131
+
132
+ history_entries = await cache_engine.get_latest_qa(str(user_id), session_id)
133
+
134
+ if not history_entries:
135
+ logger.debug("No conversation history found")
136
+ return ""
137
+
138
+ history_text = "Previous conversation:\n\n"
139
+ for entry in history_entries:
140
+ history_text += f"[{entry.get('time', 'Unknown time')}]\n"
141
+ history_text += f"QUESTION: {entry.get('question', '')}\n"
142
+ history_text += f"CONTEXT: {entry.get('context', '')}\n"
143
+ history_text += f"ANSWER: {entry.get('answer', '')}\n\n"
144
+
145
+ logger.debug(f"Retrieved {len(history_entries)} conversation history entries")
146
+ return history_text
147
+
148
+ except CacheConnectionError as e:
149
+ logger.warning(f"Cache unavailable, continuing without conversation history: {e.message}")
150
+ return ""
151
+
152
+ except Exception as e:
153
+ logger.warning(
154
+ f"Unexpected error retrieving conversation history: {type(e).__name__}: {str(e)}"
155
+ )
156
+ return ""
@@ -9,7 +9,6 @@ from cognee.modules.search.exceptions import UnsupportedSearchTypeError
9
9
  # Retrievers
10
10
  from cognee.modules.retrieval.user_qa_feedback import UserQAFeedback
11
11
  from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
12
- from cognee.modules.retrieval.insights_retriever import InsightsRetriever
13
12
  from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
14
13
  from cognee.modules.retrieval.completion_retriever import CompletionRetriever
15
14
  from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
@@ -44,10 +43,6 @@ async def get_search_type_tools(
44
43
  SummariesRetriever(top_k=top_k).get_completion,
45
44
  SummariesRetriever(top_k=top_k).get_context,
46
45
  ],
47
- SearchType.INSIGHTS: [
48
- InsightsRetriever(top_k=top_k).get_completion,
49
- InsightsRetriever(top_k=top_k).get_context,
50
- ],
51
46
  SearchType.CHUNKS: [
52
47
  ChunksRetriever(top_k=top_k).get_completion,
53
48
  ChunksRetriever(top_k=top_k).get_context,
@@ -1,12 +1,16 @@
1
1
  from typing import Any, List, Optional, Tuple, Type, Union
2
2
 
3
+ from cognee.infrastructure.databases.graph import get_graph_engine
3
4
  from cognee.modules.data.models.Dataset import Dataset
4
5
  from cognee.modules.engine.models.node_set import NodeSet
5
6
  from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
6
7
  from cognee.modules.search.types import SearchType
8
+ from cognee.shared.logging_utils import get_logger
7
9
 
8
10
  from .get_search_type_tools import get_search_type_tools
9
11
 
12
+ logger = get_logger()
13
+
10
14
 
11
15
  async def no_access_control_search(
12
16
  query_type: SearchType,
@@ -19,6 +23,7 @@ async def no_access_control_search(
19
23
  save_interaction: bool = False,
20
24
  last_k: Optional[int] = None,
21
25
  only_context: bool = False,
26
+ session_id: Optional[str] = None,
22
27
  ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
23
28
  search_tools = await get_search_type_tools(
24
29
  query_type=query_type,
@@ -31,6 +36,12 @@ async def no_access_control_search(
31
36
  save_interaction=save_interaction,
32
37
  last_k=last_k,
33
38
  )
39
+ graph_engine = await get_graph_engine()
40
+ is_empty = await graph_engine.is_empty()
41
+
42
+ if is_empty:
43
+ # TODO: we can log here, but not all search types use graph. Still keeping this here for reviewer input
44
+ logger.warning("Search attempt on an empty knowledge graph")
34
45
  if len(search_tools) == 2:
35
46
  [get_completion, get_context] = search_tools
36
47
 
@@ -38,7 +49,7 @@ async def no_access_control_search(
38
49
  return None, await get_context(query_text), []
39
50
 
40
51
  context = await get_context(query_text)
41
- result = await get_completion(query_text, context)
52
+ result = await get_completion(query_text, context, session_id=session_id)
42
53
  else:
43
54
  unknown_tool = search_tools[0]
44
55
  result = await unknown_tool(query_text)
@@ -5,6 +5,8 @@ from uuid import UUID
5
5
  from fastapi.encoders import jsonable_encoder
6
6
  from typing import Any, List, Optional, Tuple, Type, Union
7
7
 
8
+ from cognee.infrastructure.databases.graph import get_graph_engine
9
+ from cognee.shared.logging_utils import get_logger
8
10
  from cognee.shared.utils import send_telemetry
9
11
  from cognee.context_global_variables import set_database_global_context_variables
10
12
 
@@ -22,11 +24,13 @@ from cognee.modules.data.models import Dataset
22
24
  from cognee.modules.data.methods.get_authorized_existing_datasets import (
23
25
  get_authorized_existing_datasets,
24
26
  )
25
-
27
+ from cognee import __version__ as cognee_version
26
28
  from .get_search_type_tools import get_search_type_tools
27
29
  from .no_access_control_search import no_access_control_search
28
30
  from ..utils.prepare_search_result import prepare_search_result
29
31
 
32
+ logger = get_logger()
33
+
30
34
 
31
35
  async def search(
32
36
  query_text: str,
@@ -42,6 +46,7 @@ async def search(
42
46
  last_k: Optional[int] = None,
43
47
  only_context: bool = False,
44
48
  use_combined_context: bool = False,
49
+ session_id: Optional[str] = None,
45
50
  ) -> Union[CombinedSearchResult, List[SearchResult]]:
46
51
  """
47
52
 
@@ -59,7 +64,14 @@ async def search(
59
64
  Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
60
65
  """
61
66
  query = await log_query(query_text, query_type.value, user.id)
62
- send_telemetry("cognee.search EXECUTION STARTED", user.id)
67
+ send_telemetry(
68
+ "cognee.search EXECUTION STARTED",
69
+ user.id,
70
+ additional_properties={
71
+ "cognee_version": cognee_version,
72
+ "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
73
+ },
74
+ )
63
75
 
64
76
  # Use search function filtered by permissions if access control is enabled
65
77
  if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
@@ -77,6 +89,7 @@ async def search(
77
89
  last_k=last_k,
78
90
  only_context=only_context,
79
91
  use_combined_context=use_combined_context,
92
+ session_id=session_id,
80
93
  )
81
94
  else:
82
95
  search_results = [
@@ -91,10 +104,18 @@ async def search(
91
104
  save_interaction=save_interaction,
92
105
  last_k=last_k,
93
106
  only_context=only_context,
107
+ session_id=session_id,
94
108
  )
95
109
  ]
96
110
 
97
- send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
111
+ send_telemetry(
112
+ "cognee.search EXECUTION COMPLETED",
113
+ user.id,
114
+ additional_properties={
115
+ "cognee_version": cognee_version,
116
+ "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
117
+ },
118
+ )
98
119
 
99
120
  await log_result(
100
121
  query.id,
@@ -195,6 +216,7 @@ async def authorized_search(
195
216
  last_k: Optional[int] = None,
196
217
  only_context: bool = False,
197
218
  use_combined_context: bool = False,
219
+ session_id: Optional[str] = None,
198
220
  ) -> Union[
199
221
  Tuple[Any, Union[List[Edge], str], List[Dataset]],
200
222
  List[Tuple[Any, Union[List[Edge], str], List[Dataset]]],
@@ -221,6 +243,7 @@ async def authorized_search(
221
243
  save_interaction=save_interaction,
222
244
  last_k=last_k,
223
245
  only_context=True,
246
+ session_id=session_id,
224
247
  )
225
248
 
226
249
  context = {}
@@ -263,7 +286,7 @@ async def authorized_search(
263
286
  return combined_context
264
287
 
265
288
  combined_context = prepare_combined_context(context)
266
- completion = await get_completion(query_text, combined_context)
289
+ completion = await get_completion(query_text, combined_context, session_id=session_id)
267
290
 
268
291
  return completion, combined_context, datasets
269
292
 
@@ -280,6 +303,7 @@ async def authorized_search(
280
303
  save_interaction=save_interaction,
281
304
  last_k=last_k,
282
305
  only_context=only_context,
306
+ session_id=session_id,
283
307
  )
284
308
 
285
309
  return search_results
@@ -298,6 +322,7 @@ async def search_in_datasets_context(
298
322
  last_k: Optional[int] = None,
299
323
  only_context: bool = False,
300
324
  context: Optional[Any] = None,
325
+ session_id: Optional[str] = None,
301
326
  ) -> List[Tuple[Any, Union[str, List[Edge]], List[Dataset]]]:
302
327
  """
303
328
  Searches all provided datasets and handles setting up of appropriate database context based on permissions.
@@ -317,10 +342,30 @@ async def search_in_datasets_context(
317
342
  last_k: Optional[int] = None,
318
343
  only_context: bool = False,
319
344
  context: Optional[Any] = None,
345
+ session_id: Optional[str] = None,
320
346
  ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
321
347
  # Set database configuration in async context for each dataset user has access for
322
348
  await set_database_global_context_variables(dataset.id, dataset.owner_id)
323
349
 
350
+ graph_engine = await get_graph_engine()
351
+ is_empty = await graph_engine.is_empty()
352
+
353
+ if is_empty:
354
+ # TODO: we can log here, but not all search types use graph. Still keeping this here for reviewer input
355
+ from cognee.modules.data.methods import get_dataset_data
356
+
357
+ dataset_data = await get_dataset_data(dataset.id)
358
+
359
+ if len(dataset_data) > 0:
360
+ logger.warning(
361
+ f"Dataset '{dataset.name}' has {len(dataset_data)} data item(s) but the knowledge graph is empty. "
362
+ "Please run cognify to process the data before searching."
363
+ )
364
+ else:
365
+ logger.warning(
366
+ "Search attempt on an empty knowledge graph - no data has been added to this dataset"
367
+ )
368
+
324
369
  specific_search_tools = await get_search_type_tools(
325
370
  query_type=query_type,
326
371
  query_text=query_text,
@@ -340,7 +385,7 @@ async def search_in_datasets_context(
340
385
  return None, await get_context(query_text), [dataset]
341
386
 
342
387
  search_context = context or await get_context(query_text)
343
- search_result = await get_completion(query_text, search_context)
388
+ search_result = await get_completion(query_text, search_context, session_id=session_id)
344
389
 
345
390
  return search_result, search_context, [dataset]
346
391
  else:
@@ -365,6 +410,7 @@ async def search_in_datasets_context(
365
410
  last_k=last_k,
366
411
  only_context=only_context,
367
412
  context=context,
413
+ session_id=session_id,
368
414
  )
369
415
  )
370
416
 
@@ -3,7 +3,6 @@ from enum import Enum
3
3
 
4
4
  class SearchType(Enum):
5
5
  SUMMARIES = "SUMMARIES"
6
- INSIGHTS = "INSIGHTS"
7
6
  CHUNKS = "CHUNKS"
8
7
  RAG_COMPLETION = "RAG_COMPLETION"
9
8
  GRAPH_COMPLETION = "GRAPH_COMPLETION"
@@ -15,6 +15,7 @@ class ModelName(Enum):
15
15
  ollama = "ollama"
16
16
  anthropic = "anthropic"
17
17
  gemini = "gemini"
18
+ mistral = "mistral"
18
19
 
19
20
 
20
21
  class LLMConfig(BaseModel):
@@ -72,6 +73,10 @@ def get_settings() -> SettingsDict:
72
73
  "value": "gemini",
73
74
  "label": "Gemini",
74
75
  },
76
+ {
77
+ "value": "mistral",
78
+ "label": "Mistral",
79
+ },
75
80
  ]
76
81
 
77
82
  return SettingsDict.model_validate(
@@ -134,6 +139,24 @@ def get_settings() -> SettingsDict:
134
139
  "label": "Gemini 2.0 Flash",
135
140
  },
136
141
  ],
142
+ "mistral": [
143
+ {
144
+ "value": "mistral-medium-2508",
145
+ "label": "Mistral Medium 3.1",
146
+ },
147
+ {
148
+ "value": "magistral-medium-2509",
149
+ "label": "Magistral Medium 1.2",
150
+ },
151
+ {
152
+ "value": "magistral-medium-2507",
153
+ "label": "Magistral Medium 1.1",
154
+ },
155
+ {
156
+ "value": "mistral-large-2411",
157
+ "label": "Mistral Large 2.1",
158
+ },
159
+ ],
137
160
  },
138
161
  },
139
162
  vector_db={
@@ -37,6 +37,8 @@ async def get_authenticated_user(
37
37
  except Exception as e:
38
38
  # Convert any get_default_user failure into a proper HTTP 500 error
39
39
  logger.error(f"Failed to create default user: {str(e)}")
40
- raise HTTPException(status_code=500, detail=f"Failed to create default user: {str(e)}")
40
+ raise HTTPException(
41
+ status_code=500, detail=f"Failed to create default user: {str(e)}"
42
+ ) from e
41
43
 
42
44
  return user
@@ -27,12 +27,7 @@ async def get_default_user() -> SimpleNamespace:
27
27
  if user is None:
28
28
  return await create_default_user()
29
29
 
30
- # We return a SimpleNamespace to have the same user type as our SaaS
31
- # SimpleNamespace is just a dictionary which can be accessed through attributes
32
- auth_data = SimpleNamespace(
33
- id=user.id, email=user.email, tenant_id=user.tenant_id, roles=[]
34
- )
35
- return auth_data
30
+ return user
36
31
  except Exception as error:
37
32
  if "principals" in str(error.args):
38
33
  raise DatabaseNotCreatedError() from error
@@ -40,8 +40,8 @@ async def create_role(
40
40
  # Add association directly to the association table
41
41
  role = Role(name=role_name, tenant_id=tenant.id)
42
42
  session.add(role)
43
- except IntegrityError:
44
- raise EntityAlreadyExistsError(message="Role already exists for tenant.")
43
+ except IntegrityError as e:
44
+ raise EntityAlreadyExistsError(message="Role already exists for tenant.") from e
45
45
 
46
46
  await session.commit()
47
47
  await session.refresh(role)
@@ -35,5 +35,5 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
35
35
  await session.merge(user)
36
36
  await session.commit()
37
37
  return tenant.id
38
- except IntegrityError:
39
- raise EntityAlreadyExistsError(message="Tenant already exists.")
38
+ except IntegrityError as e:
39
+ raise EntityAlreadyExistsError(message="Tenant already exists.") from e
@@ -7,6 +7,6 @@ class IngestionError(CogneeValidationError):
7
7
  self,
8
8
  message: str = "Failed to load data.",
9
9
  name: str = "IngestionError",
10
- status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
10
+ status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
11
11
  ):
12
12
  super().__init__(message, name, status_code)
@@ -430,6 +430,15 @@ def setup_logging(log_level=None, name=None):
430
430
  stream_handler.setFormatter(console_formatter)
431
431
  stream_handler.setLevel(log_level)
432
432
 
433
+ root_logger = logging.getLogger()
434
+ if root_logger.hasHandlers():
435
+ root_logger.handlers.clear()
436
+ root_logger.addHandler(stream_handler)
437
+
438
+ # Note: root logger needs to be set at NOTSET to allow all messages through and specific stream and file handlers
439
+ # can define their own levels.
440
+ root_logger.setLevel(logging.NOTSET)
441
+
433
442
  # Check if we already have a log file path from the environment
434
443
  # NOTE: environment variable must be used here as it allows us to
435
444
  # log to a single file with a name based on a timestamp in a multiprocess setting.
@@ -441,17 +450,15 @@ def setup_logging(log_level=None, name=None):
441
450
  log_file_path = os.path.join(LOGS_DIR, f"{start_time}.log")
442
451
  os.environ["LOG_FILE_NAME"] = log_file_path
443
452
 
444
- # Create a file handler that uses our custom PlainFileHandler
445
- file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
446
- file_handler.setLevel(DEBUG)
447
-
448
- # Configure root logger
449
- root_logger = logging.getLogger()
450
- if root_logger.hasHandlers():
451
- root_logger.handlers.clear()
452
- root_logger.addHandler(stream_handler)
453
- root_logger.addHandler(file_handler)
454
- root_logger.setLevel(log_level)
453
+ try:
454
+ # Create a file handler that uses our custom PlainFileHandler
455
+ file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
456
+ file_handler.setLevel(DEBUG)
457
+ root_logger.addHandler(file_handler)
458
+ except Exception as e:
459
+ # Note: Exceptions happen in case of read only file systems or log file path poiting to location where it does
460
+ # not have write permission. Logging to file is not mandatory so we just log a warning to console.
461
+ root_logger.warning(f"Warning: Could not create log file handler at {log_file_path}: {e}")
455
462
 
456
463
  if log_level > logging.DEBUG:
457
464
  import warnings
cognee/shared/utils.py CHANGED
@@ -8,7 +8,7 @@ import http.server
8
8
  import socketserver
9
9
  from threading import Thread
10
10
  import pathlib
11
- from uuid import uuid4
11
+ from uuid import uuid4, uuid5, NAMESPACE_OID
12
12
 
13
13
  from cognee.base_config import get_base_config
14
14
  from cognee.infrastructure.databases.graph import get_graph_engine
@@ -51,6 +51,26 @@ def get_anonymous_id():
51
51
  return anonymous_id
52
52
 
53
53
 
54
+ def _sanitize_nested_properties(obj, property_names: list[str]):
55
+ """
56
+ Recursively replaces any property whose key matches one of `property_names`
57
+ (e.g., ['url', 'path']) in a nested dict or list with a uuid5 hash
58
+ of its string value. Returns a new sanitized copy.
59
+ """
60
+ if isinstance(obj, dict):
61
+ new_obj = {}
62
+ for k, v in obj.items():
63
+ if k in property_names and isinstance(v, str):
64
+ new_obj[k] = str(uuid5(NAMESPACE_OID, v))
65
+ else:
66
+ new_obj[k] = _sanitize_nested_properties(v, property_names)
67
+ return new_obj
68
+ elif isinstance(obj, list):
69
+ return [_sanitize_nested_properties(item, property_names) for item in obj]
70
+ else:
71
+ return obj
72
+
73
+
54
74
  def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
55
75
  if os.getenv("TELEMETRY_DISABLED"):
56
76
  return
@@ -58,7 +78,9 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
58
78
  env = os.getenv("ENV")
59
79
  if env in ["test", "dev"]:
60
80
  return
61
-
81
+ additional_properties = _sanitize_nested_properties(
82
+ obj=additional_properties, property_names=["url"]
83
+ )
62
84
  current_time = datetime.now(timezone.utc)
63
85
  payload = {
64
86
  "anonymous_id": str(get_anonymous_id()),
@@ -124,5 +124,4 @@ async def add_rule_associations(
124
124
 
125
125
  if len(edges_to_save) > 0:
126
126
  await graph_engine.add_edges(edges_to_save)
127
-
128
- await index_graph_edges()
127
+ await index_graph_edges(edges_to_save)
@@ -12,7 +12,7 @@ class WrongDataDocumentInputError(CogneeValidationError):
12
12
  self,
13
13
  field: str,
14
14
  name: str = "WrongDataDocumentInputError",
15
- status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY,
15
+ status_code: int = status.HTTP_422_UNPROCESSABLE_CONTENT,
16
16
  ):
17
17
  message = f"Missing of invalid parameter: '{field}'."
18
18
  super().__init__(message, name, status_code)
@@ -0,0 +1,13 @@
1
+ from .extract_feedback_interactions import extract_feedback_interactions
2
+ from .generate_improved_answers import generate_improved_answers
3
+ from .create_enrichments import create_enrichments
4
+ from .link_enrichments_to_feedback import link_enrichments_to_feedback
5
+ from .models import FeedbackEnrichment
6
+
7
+ __all__ = [
8
+ "extract_feedback_interactions",
9
+ "generate_improved_answers",
10
+ "create_enrichments",
11
+ "link_enrichments_to_feedback",
12
+ "FeedbackEnrichment",
13
+ ]