cognee 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/__main__.py +4 -0
  3. cognee/api/client.py +28 -3
  4. cognee/api/health.py +10 -13
  5. cognee/api/v1/add/add.py +20 -6
  6. cognee/api/v1/add/routers/get_add_router.py +12 -37
  7. cognee/api/v1/cloud/routers/__init__.py +1 -0
  8. cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
  9. cognee/api/v1/cognify/code_graph_pipeline.py +14 -3
  10. cognee/api/v1/cognify/cognify.py +67 -105
  11. cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
  12. cognee/api/v1/datasets/routers/get_datasets_router.py +16 -5
  13. cognee/api/v1/memify/routers/__init__.py +1 -0
  14. cognee/api/v1/memify/routers/get_memify_router.py +100 -0
  15. cognee/api/v1/notebooks/routers/__init__.py +1 -0
  16. cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
  17. cognee/api/v1/responses/default_tools.py +4 -0
  18. cognee/api/v1/responses/dispatch_function.py +6 -1
  19. cognee/api/v1/responses/models.py +1 -1
  20. cognee/api/v1/search/routers/get_search_router.py +20 -1
  21. cognee/api/v1/search/search.py +17 -4
  22. cognee/api/v1/sync/__init__.py +17 -0
  23. cognee/api/v1/sync/routers/__init__.py +3 -0
  24. cognee/api/v1/sync/routers/get_sync_router.py +241 -0
  25. cognee/api/v1/sync/sync.py +877 -0
  26. cognee/api/v1/ui/__init__.py +1 -0
  27. cognee/api/v1/ui/ui.py +529 -0
  28. cognee/api/v1/users/routers/get_auth_router.py +13 -1
  29. cognee/base_config.py +10 -1
  30. cognee/cli/__init__.py +10 -0
  31. cognee/cli/_cognee.py +273 -0
  32. cognee/cli/commands/__init__.py +1 -0
  33. cognee/cli/commands/add_command.py +80 -0
  34. cognee/cli/commands/cognify_command.py +128 -0
  35. cognee/cli/commands/config_command.py +225 -0
  36. cognee/cli/commands/delete_command.py +80 -0
  37. cognee/cli/commands/search_command.py +149 -0
  38. cognee/cli/config.py +33 -0
  39. cognee/cli/debug.py +21 -0
  40. cognee/cli/echo.py +45 -0
  41. cognee/cli/exceptions.py +23 -0
  42. cognee/cli/minimal_cli.py +97 -0
  43. cognee/cli/reference.py +26 -0
  44. cognee/cli/suppress_logging.py +12 -0
  45. cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
  46. cognee/eval_framework/eval_config.py +1 -1
  47. cognee/infrastructure/databases/graph/config.py +10 -4
  48. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
  49. cognee/infrastructure/databases/graph/kuzu/adapter.py +199 -2
  50. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +138 -0
  51. cognee/infrastructure/databases/relational/__init__.py +2 -0
  52. cognee/infrastructure/databases/relational/get_async_session.py +15 -0
  53. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
  54. cognee/infrastructure/databases/relational/with_async_session.py +25 -0
  55. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
  56. cognee/infrastructure/databases/vector/config.py +13 -6
  57. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -4
  58. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
  59. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
  60. cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
  61. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
  62. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +10 -7
  63. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
  64. cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
  65. cognee/infrastructure/files/storage/StorageManager.py +7 -1
  66. cognee/infrastructure/files/storage/storage.py +16 -0
  67. cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
  68. cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
  69. cognee/infrastructure/llm/LLMGateway.py +32 -5
  70. cognee/infrastructure/llm/config.py +6 -4
  71. cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
  72. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
  73. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
  77. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
  78. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
  79. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
  80. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
  82. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
  83. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
  84. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +28 -4
  86. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
  87. cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
  88. cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
  89. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
  90. cognee/infrastructure/llm/utils.py +7 -7
  91. cognee/infrastructure/utils/run_sync.py +8 -1
  92. cognee/modules/chunking/models/DocumentChunk.py +4 -3
  93. cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
  94. cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
  95. cognee/modules/cloud/exceptions/__init__.py +2 -0
  96. cognee/modules/cloud/operations/__init__.py +1 -0
  97. cognee/modules/cloud/operations/check_api_key.py +25 -0
  98. cognee/modules/data/deletion/prune_system.py +1 -1
  99. cognee/modules/data/methods/__init__.py +2 -0
  100. cognee/modules/data/methods/check_dataset_name.py +1 -1
  101. cognee/modules/data/methods/create_authorized_dataset.py +19 -0
  102. cognee/modules/data/methods/get_authorized_dataset.py +11 -5
  103. cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
  104. cognee/modules/data/methods/get_dataset_data.py +1 -1
  105. cognee/modules/data/methods/load_or_create_datasets.py +2 -20
  106. cognee/modules/engine/models/Event.py +16 -0
  107. cognee/modules/engine/models/Interval.py +8 -0
  108. cognee/modules/engine/models/Timestamp.py +13 -0
  109. cognee/modules/engine/models/__init__.py +3 -0
  110. cognee/modules/engine/utils/__init__.py +2 -0
  111. cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
  112. cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
  113. cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
  114. cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
  115. cognee/modules/graph/utils/__init__.py +1 -0
  116. cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
  117. cognee/modules/memify/__init__.py +1 -0
  118. cognee/modules/memify/memify.py +118 -0
  119. cognee/modules/notebooks/methods/__init__.py +5 -0
  120. cognee/modules/notebooks/methods/create_notebook.py +26 -0
  121. cognee/modules/notebooks/methods/delete_notebook.py +13 -0
  122. cognee/modules/notebooks/methods/get_notebook.py +21 -0
  123. cognee/modules/notebooks/methods/get_notebooks.py +18 -0
  124. cognee/modules/notebooks/methods/update_notebook.py +17 -0
  125. cognee/modules/notebooks/models/Notebook.py +53 -0
  126. cognee/modules/notebooks/models/__init__.py +1 -0
  127. cognee/modules/notebooks/operations/__init__.py +1 -0
  128. cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
  129. cognee/modules/pipelines/__init__.py +1 -1
  130. cognee/modules/pipelines/exceptions/tasks.py +18 -0
  131. cognee/modules/pipelines/layers/__init__.py +1 -0
  132. cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
  133. cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
  134. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +28 -0
  135. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
  136. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
  137. cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
  138. cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
  139. cognee/modules/pipelines/methods/__init__.py +2 -0
  140. cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
  141. cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
  142. cognee/modules/pipelines/operations/__init__.py +0 -1
  143. cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
  144. cognee/modules/pipelines/operations/pipeline.py +24 -138
  145. cognee/modules/pipelines/operations/run_tasks.py +17 -41
  146. cognee/modules/retrieval/base_feedback.py +11 -0
  147. cognee/modules/retrieval/base_graph_retriever.py +18 -0
  148. cognee/modules/retrieval/base_retriever.py +1 -1
  149. cognee/modules/retrieval/code_retriever.py +8 -0
  150. cognee/modules/retrieval/coding_rules_retriever.py +31 -0
  151. cognee/modules/retrieval/completion_retriever.py +9 -3
  152. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
  153. cognee/modules/retrieval/cypher_search_retriever.py +1 -9
  154. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +29 -13
  155. cognee/modules/retrieval/graph_completion_cot_retriever.py +30 -13
  156. cognee/modules/retrieval/graph_completion_retriever.py +107 -56
  157. cognee/modules/retrieval/graph_summary_completion_retriever.py +5 -1
  158. cognee/modules/retrieval/insights_retriever.py +14 -3
  159. cognee/modules/retrieval/natural_language_retriever.py +0 -4
  160. cognee/modules/retrieval/summaries_retriever.py +1 -1
  161. cognee/modules/retrieval/temporal_retriever.py +152 -0
  162. cognee/modules/retrieval/user_qa_feedback.py +83 -0
  163. cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
  164. cognee/modules/retrieval/utils/completion.py +10 -3
  165. cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
  166. cognee/modules/retrieval/utils/models.py +40 -0
  167. cognee/modules/search/methods/get_search_type_tools.py +168 -0
  168. cognee/modules/search/methods/no_access_control_search.py +47 -0
  169. cognee/modules/search/methods/search.py +239 -118
  170. cognee/modules/search/types/SearchResult.py +21 -0
  171. cognee/modules/search/types/SearchType.py +3 -0
  172. cognee/modules/search/types/__init__.py +1 -0
  173. cognee/modules/search/utils/__init__.py +2 -0
  174. cognee/modules/search/utils/prepare_search_result.py +41 -0
  175. cognee/modules/search/utils/transform_context_to_graph.py +38 -0
  176. cognee/modules/settings/get_settings.py +2 -2
  177. cognee/modules/sync/__init__.py +1 -0
  178. cognee/modules/sync/methods/__init__.py +23 -0
  179. cognee/modules/sync/methods/create_sync_operation.py +53 -0
  180. cognee/modules/sync/methods/get_sync_operation.py +107 -0
  181. cognee/modules/sync/methods/update_sync_operation.py +248 -0
  182. cognee/modules/sync/models/SyncOperation.py +142 -0
  183. cognee/modules/sync/models/__init__.py +3 -0
  184. cognee/modules/users/__init__.py +0 -1
  185. cognee/modules/users/methods/__init__.py +4 -1
  186. cognee/modules/users/methods/create_user.py +26 -1
  187. cognee/modules/users/methods/get_authenticated_user.py +36 -42
  188. cognee/modules/users/methods/get_default_user.py +3 -1
  189. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
  190. cognee/root_dir.py +19 -0
  191. cognee/shared/CodeGraphEntities.py +1 -0
  192. cognee/shared/logging_utils.py +143 -32
  193. cognee/shared/utils.py +0 -1
  194. cognee/tasks/codingagents/coding_rule_associations.py +127 -0
  195. cognee/tasks/graph/extract_graph_from_data.py +6 -2
  196. cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
  197. cognee/tasks/memify/__init__.py +2 -0
  198. cognee/tasks/memify/extract_subgraph.py +7 -0
  199. cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
  200. cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
  201. cognee/tasks/repo_processor/get_repo_file_dependencies.py +144 -47
  202. cognee/tasks/storage/add_data_points.py +33 -3
  203. cognee/tasks/temporal_graph/__init__.py +1 -0
  204. cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
  205. cognee/tasks/temporal_graph/enrich_events.py +34 -0
  206. cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
  207. cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
  208. cognee/tasks/temporal_graph/models.py +49 -0
  209. cognee/tests/integration/cli/__init__.py +3 -0
  210. cognee/tests/integration/cli/test_cli_integration.py +331 -0
  211. cognee/tests/integration/documents/PdfDocument_test.py +2 -2
  212. cognee/tests/integration/documents/TextDocument_test.py +2 -4
  213. cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
  214. cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
  215. cognee/tests/test_delete_soft.py +85 -0
  216. cognee/tests/test_kuzu.py +2 -2
  217. cognee/tests/test_neo4j.py +2 -2
  218. cognee/tests/test_permissions.py +3 -3
  219. cognee/tests/test_relational_db_migration.py +7 -5
  220. cognee/tests/test_search_db.py +136 -23
  221. cognee/tests/test_temporal_graph.py +167 -0
  222. cognee/tests/unit/api/__init__.py +1 -0
  223. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
  224. cognee/tests/unit/cli/__init__.py +3 -0
  225. cognee/tests/unit/cli/test_cli_commands.py +483 -0
  226. cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
  227. cognee/tests/unit/cli/test_cli_main.py +173 -0
  228. cognee/tests/unit/cli/test_cli_runner.py +62 -0
  229. cognee/tests/unit/cli/test_cli_utils.py +127 -0
  230. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
  231. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +12 -15
  232. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +10 -15
  233. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +4 -3
  234. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
  235. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
  236. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
  237. cognee/tests/unit/modules/users/__init__.py +1 -0
  238. cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
  239. cognee/tests/unit/processing/utils/utils_test.py +20 -1
  240. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/METADATA +13 -9
  241. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/RECORD +247 -135
  242. cognee-0.3.0.dist-info/entry_points.txt +2 -0
  243. cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
  244. cognee/infrastructure/pipeline/models/Operation.py +0 -60
  245. cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
  246. cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
  247. cognee/tests/unit/modules/search/search_methods_test.py +0 -223
  248. /cognee/{infrastructure/databases/graph/networkx → api/v1/memify}/__init__.py +0 -0
  249. /cognee/{infrastructure/pipeline/models → tasks/codingagents}/__init__.py +0 -0
  250. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/WHEEL +0 -0
  251. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/LICENSE +0 -0
  252. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -2,31 +2,28 @@ import os
2
2
  import json
3
3
  import asyncio
4
4
  from uuid import UUID
5
- from typing import Callable, List, Optional, Type, Union
6
- from cognee.modules.search.exceptions import UnsupportedSearchTypeError
5
+ from fastapi.encoders import jsonable_encoder
6
+ from typing import Any, List, Optional, Tuple, Type, Union
7
+
8
+ from cognee.shared.utils import send_telemetry
7
9
  from cognee.context_global_variables import set_database_global_context_variables
8
- from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
9
- from cognee.modules.retrieval.insights_retriever import InsightsRetriever
10
- from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
11
- from cognee.modules.retrieval.completion_retriever import CompletionRetriever
12
- from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
13
- from cognee.modules.retrieval.graph_summary_completion_retriever import (
14
- GraphSummaryCompletionRetriever,
15
- )
16
- from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
17
- from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
18
- GraphCompletionContextExtensionRetriever,
10
+
11
+ from cognee.modules.engine.models.node_set import NodeSet
12
+ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
13
+ from cognee.modules.search.types import (
14
+ SearchResult,
15
+ CombinedSearchResult,
16
+ SearchResultDataset,
17
+ SearchType,
19
18
  )
20
- from cognee.modules.retrieval.code_retriever import CodeRetriever
21
- from cognee.modules.retrieval.cypher_search_retriever import CypherSearchRetriever
22
- from cognee.modules.retrieval.natural_language_retriever import NaturalLanguageRetriever
23
- from cognee.modules.search.types import SearchType
24
- from cognee.modules.storage.utils import JSONEncoder
19
+ from cognee.modules.search.operations import log_query, log_result
25
20
  from cognee.modules.users.models import User
26
21
  from cognee.modules.data.models import Dataset
27
- from cognee.shared.utils import send_telemetry
28
22
  from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
29
- from cognee.modules.search.operations import log_query, log_result, select_search_type
23
+
24
+ from .get_search_type_tools import get_search_type_tools
25
+ from .no_access_control_search import no_access_control_search
26
+ from ..utils.prepare_search_result import prepare_search_result
30
27
 
31
28
 
32
29
  async def search(
@@ -35,10 +32,15 @@ async def search(
35
32
  dataset_ids: Union[list[UUID], None],
36
33
  user: User,
37
34
  system_prompt_path="answer_simple_question.txt",
35
+ system_prompt: Optional[str] = None,
38
36
  top_k: int = 10,
39
- node_type: Optional[Type] = None,
37
+ node_type: Optional[Type] = NodeSet,
40
38
  node_name: Optional[List[str]] = None,
41
- ):
39
+ save_interaction: bool = False,
40
+ last_k: Optional[int] = None,
41
+ only_context: bool = False,
42
+ use_combined_context: bool = False,
43
+ ) -> Union[CombinedSearchResult, List[SearchResult]]:
42
44
  """
43
45
 
44
46
  Args:
@@ -54,156 +56,275 @@ async def search(
54
56
  Notes:
55
57
  Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
56
58
  """
59
+ query = await log_query(query_text, query_type.value, user.id)
60
+ send_telemetry("cognee.search EXECUTION STARTED", user.id)
61
+
57
62
  # Use search function filtered by permissions if access control is enabled
58
63
  if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
59
- return await authorized_search(
60
- query_text, query_type, user, dataset_ids, system_prompt_path, top_k
64
+ search_results = await authorized_search(
65
+ query_type=query_type,
66
+ query_text=query_text,
67
+ user=user,
68
+ dataset_ids=dataset_ids,
69
+ system_prompt_path=system_prompt_path,
70
+ system_prompt=system_prompt,
71
+ top_k=top_k,
72
+ node_type=node_type,
73
+ node_name=node_name,
74
+ save_interaction=save_interaction,
75
+ last_k=last_k,
76
+ only_context=only_context,
77
+ use_combined_context=use_combined_context,
61
78
  )
79
+ else:
80
+ search_results = [
81
+ await no_access_control_search(
82
+ query_type=query_type,
83
+ query_text=query_text,
84
+ system_prompt_path=system_prompt_path,
85
+ system_prompt=system_prompt,
86
+ top_k=top_k,
87
+ node_type=node_type,
88
+ node_name=node_name,
89
+ save_interaction=save_interaction,
90
+ last_k=last_k,
91
+ only_context=only_context,
92
+ )
93
+ ]
62
94
 
63
- query = await log_query(query_text, query_type.value, user.id)
64
-
65
- search_results = await specific_search(
66
- query_type,
67
- query_text,
68
- user,
69
- system_prompt_path=system_prompt_path,
70
- top_k=top_k,
71
- node_type=node_type,
72
- node_name=node_name,
73
- )
95
+ send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
74
96
 
75
97
  await log_result(
76
98
  query.id,
77
99
  json.dumps(
78
- search_results if len(search_results) > 1 else search_results[0], cls=JSONEncoder
100
+ jsonable_encoder(
101
+ await prepare_search_result(
102
+ search_results[0] if isinstance(search_results, list) else search_results
103
+ )
104
+ if use_combined_context
105
+ else [
106
+ await prepare_search_result(search_result) for search_result in search_results
107
+ ]
108
+ )
79
109
  ),
80
110
  user.id,
81
111
  )
82
112
 
83
- return search_results
113
+ if use_combined_context:
114
+ prepared_search_results = await prepare_search_result(
115
+ search_results[0] if isinstance(search_results, list) else search_results
116
+ )
117
+ result = prepared_search_results["result"]
118
+ graphs = prepared_search_results["graphs"]
119
+ context = prepared_search_results["context"]
120
+ datasets = prepared_search_results["datasets"]
121
+
122
+ return CombinedSearchResult(
123
+ result=result,
124
+ graphs=graphs,
125
+ context=context,
126
+ datasets=[
127
+ SearchResultDataset(
128
+ id=dataset.id,
129
+ name=dataset.name,
130
+ )
131
+ for dataset in datasets
132
+ ],
133
+ )
134
+ else:
135
+ return [
136
+ SearchResult(
137
+ search_result=result,
138
+ dataset_id=datasets[min(index, len(datasets) - 1)].id if datasets else None,
139
+ dataset_name=datasets[min(index, len(datasets) - 1)].name if datasets else None,
140
+ )
141
+ for index, (result, _, datasets) in enumerate(search_results)
142
+ ]
84
143
 
85
144
 
86
- async def specific_search(
145
+ async def authorized_search(
87
146
  query_type: SearchType,
88
- query: str,
147
+ query_text: str,
89
148
  user: User,
90
- system_prompt_path="answer_simple_question.txt",
149
+ dataset_ids: Optional[list[UUID]] = None,
150
+ system_prompt_path: str = "answer_simple_question.txt",
151
+ system_prompt: Optional[str] = None,
91
152
  top_k: int = 10,
92
- node_type: Optional[Type] = None,
153
+ node_type: Optional[Type] = NodeSet,
93
154
  node_name: Optional[List[str]] = None,
94
- ) -> list:
95
- search_tasks: dict[SearchType, Callable] = {
96
- SearchType.SUMMARIES: SummariesRetriever(top_k=top_k).get_completion,
97
- SearchType.INSIGHTS: InsightsRetriever(top_k=top_k).get_completion,
98
- SearchType.CHUNKS: ChunksRetriever(top_k=top_k).get_completion,
99
- SearchType.RAG_COMPLETION: CompletionRetriever(
100
- system_prompt_path=system_prompt_path, top_k=top_k
101
- ).get_completion,
102
- SearchType.GRAPH_COMPLETION: GraphCompletionRetriever(
103
- system_prompt_path=system_prompt_path,
104
- top_k=top_k,
105
- node_type=node_type,
106
- node_name=node_name,
107
- ).get_completion,
108
- SearchType.GRAPH_COMPLETION_COT: GraphCompletionCotRetriever(
109
- system_prompt_path=system_prompt_path,
110
- top_k=top_k,
111
- node_type=node_type,
112
- node_name=node_name,
113
- ).get_completion,
114
- SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION: GraphCompletionContextExtensionRetriever(
115
- system_prompt_path=system_prompt_path,
116
- top_k=top_k,
117
- node_type=node_type,
118
- node_name=node_name,
119
- ).get_completion,
120
- SearchType.GRAPH_SUMMARY_COMPLETION: GraphSummaryCompletionRetriever(
155
+ save_interaction: bool = False,
156
+ last_k: Optional[int] = None,
157
+ only_context: bool = False,
158
+ use_combined_context: bool = False,
159
+ ) -> Union[
160
+ Tuple[Any, Union[List[Edge], str], List[Dataset]],
161
+ List[Tuple[Any, Union[List[Edge], str], List[Dataset]]],
162
+ ]:
163
+ """
164
+ Verifies access for provided datasets or uses all datasets user has read access for and performs search per dataset.
165
+ Not to be used outside of active access control mode.
166
+ """
167
+ # Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
168
+ search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
169
+
170
+ if use_combined_context:
171
+ search_responses = await search_in_datasets_context(
172
+ search_datasets=search_datasets,
173
+ query_type=query_type,
174
+ query_text=query_text,
121
175
  system_prompt_path=system_prompt_path,
176
+ system_prompt=system_prompt,
122
177
  top_k=top_k,
123
178
  node_type=node_type,
124
179
  node_name=node_name,
125
- ).get_completion,
126
- SearchType.CODE: CodeRetriever(top_k=top_k).get_completion,
127
- SearchType.CYPHER: CypherSearchRetriever().get_completion,
128
- SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion,
129
- }
180
+ save_interaction=save_interaction,
181
+ last_k=last_k,
182
+ only_context=True,
183
+ )
130
184
 
131
- # If the query type is FEELING_LUCKY, select the search type intelligently
132
- if query_type is SearchType.FEELING_LUCKY:
133
- query_type = await select_search_type(query)
185
+ context = {}
186
+ datasets: List[Dataset] = []
134
187
 
135
- search_task = search_tasks.get(query_type)
188
+ for _, search_context, datasets in search_responses:
189
+ for dataset in datasets:
190
+ context[str(dataset.id)] = search_context
136
191
 
137
- if search_task is None:
138
- raise UnsupportedSearchTypeError(str(query_type))
192
+ datasets.extend(datasets)
139
193
 
140
- send_telemetry("cognee.search EXECUTION STARTED", user.id)
194
+ specific_search_tools = await get_search_type_tools(
195
+ query_type=query_type,
196
+ query_text=query_text,
197
+ system_prompt_path=system_prompt_path,
198
+ system_prompt=system_prompt,
199
+ top_k=top_k,
200
+ node_type=node_type,
201
+ node_name=node_name,
202
+ save_interaction=save_interaction,
203
+ last_k=last_k,
204
+ )
205
+ search_tools = specific_search_tools
206
+ if len(search_tools) == 2:
207
+ [get_completion, _] = search_tools
208
+ else:
209
+ get_completion = search_tools[0]
141
210
 
142
- results = await search_task(query)
211
+ def prepare_combined_context(
212
+ context,
213
+ ) -> Union[List[Edge], str]:
214
+ combined_context = []
143
215
 
144
- send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
216
+ for dataset_context in context.values():
217
+ combined_context += dataset_context
145
218
 
146
- return results
219
+ if combined_context and isinstance(combined_context[0], str):
220
+ return "\n".join(combined_context)
147
221
 
222
+ return combined_context
148
223
 
149
- async def authorized_search(
150
- query_text: str,
151
- query_type: SearchType,
152
- user: User = None,
153
- dataset_ids: Optional[list[UUID]] = None,
154
- system_prompt_path: str = "answer_simple_question.txt",
155
- top_k: int = 10,
156
- ) -> list:
157
- """
158
- Verifies access for provided datasets or uses all datasets user has read access for and performs search per dataset.
159
- Not to be used outside of active access control mode.
160
- """
224
+ combined_context = prepare_combined_context(context)
225
+ completion = await get_completion(query_text, combined_context)
161
226
 
162
- query = await log_query(query_text, query_type.value, user.id)
163
-
164
- # Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
165
- search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
227
+ return completion, combined_context, datasets
166
228
 
167
229
  # Searches all provided datasets and handles setting up of appropriate database context based on permissions
168
- search_results = await specific_search_by_context(
169
- search_datasets, query_text, query_type, user, system_prompt_path, top_k
230
+ search_results = await search_in_datasets_context(
231
+ search_datasets=search_datasets,
232
+ query_type=query_type,
233
+ query_text=query_text,
234
+ system_prompt_path=system_prompt_path,
235
+ system_prompt=system_prompt,
236
+ top_k=top_k,
237
+ node_type=node_type,
238
+ node_name=node_name,
239
+ save_interaction=save_interaction,
240
+ last_k=last_k,
241
+ only_context=only_context,
170
242
  )
171
243
 
172
- await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)
173
-
174
244
  return search_results
175
245
 
176
246
 
177
- async def specific_search_by_context(
247
+ async def search_in_datasets_context(
178
248
  search_datasets: list[Dataset],
179
- query_text: str,
180
249
  query_type: SearchType,
181
- user: User,
182
- system_prompt_path: str,
183
- top_k: int,
184
- ):
250
+ query_text: str,
251
+ system_prompt_path: str = "answer_simple_question.txt",
252
+ system_prompt: Optional[str] = None,
253
+ top_k: int = 10,
254
+ node_type: Optional[Type] = NodeSet,
255
+ node_name: Optional[List[str]] = None,
256
+ save_interaction: bool = False,
257
+ last_k: Optional[int] = None,
258
+ only_context: bool = False,
259
+ context: Optional[Any] = None,
260
+ ) -> List[Tuple[Any, Union[str, List[Edge]], List[Dataset]]]:
185
261
  """
186
262
  Searches all provided datasets and handles setting up of appropriate database context based on permissions.
187
263
  Not to be used outside of active access control mode.
188
264
  """
189
265
 
190
- async def _search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k):
266
+ async def _search_in_dataset_context(
267
+ dataset: Dataset,
268
+ query_type: SearchType,
269
+ query_text: str,
270
+ system_prompt_path: str = "answer_simple_question.txt",
271
+ system_prompt: Optional[str] = None,
272
+ top_k: int = 10,
273
+ node_type: Optional[Type] = NodeSet,
274
+ node_name: Optional[List[str]] = None,
275
+ save_interaction: bool = False,
276
+ last_k: Optional[int] = None,
277
+ only_context: bool = False,
278
+ context: Optional[Any] = None,
279
+ ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
191
280
  # Set database configuration in async context for each dataset user has access for
192
281
  await set_database_global_context_variables(dataset.id, dataset.owner_id)
193
- search_results = await specific_search(
194
- query_type, query_text, user, system_prompt_path=system_prompt_path, top_k=top_k
282
+
283
+ specific_search_tools = await get_search_type_tools(
284
+ query_type=query_type,
285
+ query_text=query_text,
286
+ system_prompt_path=system_prompt_path,
287
+ system_prompt=system_prompt,
288
+ top_k=top_k,
289
+ node_type=node_type,
290
+ node_name=node_name,
291
+ save_interaction=save_interaction,
292
+ last_k=last_k,
195
293
  )
196
- return {
197
- "search_result": search_results,
198
- "dataset_id": dataset.id,
199
- "dataset_name": dataset.name,
200
- }
294
+ search_tools = specific_search_tools
295
+ if len(search_tools) == 2:
296
+ [get_completion, get_context] = search_tools
297
+
298
+ if only_context:
299
+ return None, await get_context(query_text), [dataset]
300
+
301
+ search_context = context or await get_context(query_text)
302
+ search_result = await get_completion(query_text, search_context)
303
+
304
+ return search_result, search_context, [dataset]
305
+ else:
306
+ unknown_tool = search_tools[0]
307
+
308
+ return await unknown_tool(query_text), "", [dataset]
201
309
 
202
310
  # Search every dataset async based on query and appropriate database configuration
203
311
  tasks = []
204
312
  for dataset in search_datasets:
205
313
  tasks.append(
206
- _search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k)
314
+ _search_in_dataset_context(
315
+ dataset=dataset,
316
+ query_type=query_type,
317
+ query_text=query_text,
318
+ system_prompt_path=system_prompt_path,
319
+ system_prompt=system_prompt,
320
+ top_k=top_k,
321
+ node_type=node_type,
322
+ node_name=node_name,
323
+ save_interaction=save_interaction,
324
+ last_k=last_k,
325
+ only_context=only_context,
326
+ context=context,
327
+ )
207
328
  )
208
329
 
209
330
  return await asyncio.gather(*tasks)
@@ -0,0 +1,21 @@
1
+ from uuid import UUID
2
+ from pydantic import BaseModel
3
+ from typing import Any, Dict, List, Optional
4
+
5
+
6
+ class SearchResultDataset(BaseModel):
7
+ id: UUID
8
+ name: str
9
+
10
+
11
+ class CombinedSearchResult(BaseModel):
12
+ result: Optional[Any]
13
+ context: Dict[str, Any]
14
+ graphs: Optional[Dict[str, Any]] = {}
15
+ datasets: Optional[List[SearchResultDataset]] = None
16
+
17
+
18
+ class SearchResult(BaseModel):
19
+ search_result: Any
20
+ dataset_id: Optional[UUID]
21
+ dataset_name: Optional[str]
@@ -14,3 +14,6 @@ class SearchType(Enum):
14
14
  GRAPH_COMPLETION_COT = "GRAPH_COMPLETION_COT"
15
15
  GRAPH_COMPLETION_CONTEXT_EXTENSION = "GRAPH_COMPLETION_CONTEXT_EXTENSION"
16
16
  FEELING_LUCKY = "FEELING_LUCKY"
17
+ FEEDBACK = "FEEDBACK"
18
+ TEMPORAL = "TEMPORAL"
19
+ CODING_RULES = "CODING_RULES"
@@ -1 +1,2 @@
1
1
  from .SearchType import SearchType
2
+ from .SearchResult import SearchResult, SearchResultDataset, CombinedSearchResult
@@ -0,0 +1,2 @@
1
+ from .prepare_search_result import prepare_search_result
2
+ from .transform_context_to_graph import transform_context_to_graph
@@ -0,0 +1,41 @@
1
+ from typing import List, cast
2
+
3
+ from cognee.modules.graph.utils import resolve_edges_to_text
4
+ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
5
+ from cognee.modules.search.utils.transform_context_to_graph import transform_context_to_graph
6
+
7
+
8
+ async def prepare_search_result(search_result):
9
+ result, context, datasets = search_result
10
+
11
+ graphs = None
12
+ result_graph = None
13
+ context_texts = {}
14
+
15
+ if isinstance(context, List) and len(context) > 0 and isinstance(context[0], Edge):
16
+ context_graph = transform_context_to_graph(context)
17
+
18
+ graphs = {
19
+ "*": context_graph,
20
+ }
21
+ context_texts = {
22
+ "*": await resolve_edges_to_text(context),
23
+ }
24
+ elif isinstance(context, str):
25
+ context_texts = {
26
+ "*": context,
27
+ }
28
+ elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], str):
29
+ context_texts = {
30
+ "*": "\n".join(cast(List[str], context)),
31
+ }
32
+
33
+ if isinstance(result, List) and len(result) > 0 and isinstance(result[0], Edge):
34
+ result_graph = transform_context_to_graph(result)
35
+
36
+ return {
37
+ "result": result_graph or result,
38
+ "graphs": graphs,
39
+ "context": context_texts,
40
+ "datasets": datasets,
41
+ }
@@ -0,0 +1,38 @@
1
+ from typing import List
2
+
3
+ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
4
+
5
+
6
+ def transform_context_to_graph(context: List[Edge]):
7
+ nodes = {}
8
+ edges = {}
9
+
10
+ for triplet in context:
11
+ nodes[triplet.node1.id] = {
12
+ "id": triplet.node1.id,
13
+ "label": triplet.node1.attributes["name"]
14
+ if "name" in triplet.node1.attributes
15
+ else triplet.node1.id,
16
+ "type": triplet.node1.attributes["type"],
17
+ "attributes": triplet.node2.attributes,
18
+ }
19
+ nodes[triplet.node2.id] = {
20
+ "id": triplet.node2.id,
21
+ "label": triplet.node2.attributes["name"]
22
+ if "name" in triplet.node2.attributes
23
+ else triplet.node2.id,
24
+ "type": triplet.node2.attributes["type"],
25
+ "attributes": triplet.node2.attributes,
26
+ }
27
+ edges[
28
+ f"{triplet.node1.id}_{triplet.attributes['relationship_name']}_{triplet.node2.id}"
29
+ ] = {
30
+ "source": triplet.node1.id,
31
+ "target": triplet.node2.id,
32
+ "label": triplet.attributes["relationship_name"],
33
+ }
34
+
35
+ return {
36
+ "nodes": list(nodes.values()),
37
+ "edges": list(edges.values()),
38
+ }
@@ -88,8 +88,8 @@ def get_settings() -> SettingsDict:
88
88
  "models": {
89
89
  "openai": [
90
90
  {
91
- "value": "gpt-4o-mini",
92
- "label": "gpt-4o-mini",
91
+ "value": "gpt-5-mini",
92
+ "label": "gpt-5-mini",
93
93
  },
94
94
  {
95
95
  "value": "gpt-4o",
@@ -0,0 +1 @@
1
+ # Sync module for tracking sync operations
@@ -0,0 +1,23 @@
1
+ from .create_sync_operation import create_sync_operation
2
+ from .get_sync_operation import (
3
+ get_sync_operation,
4
+ get_user_sync_operations,
5
+ get_running_sync_operations_for_user,
6
+ )
7
+ from .update_sync_operation import (
8
+ update_sync_operation,
9
+ mark_sync_started,
10
+ mark_sync_completed,
11
+ mark_sync_failed,
12
+ )
13
+
14
+ __all__ = [
15
+ "create_sync_operation",
16
+ "get_sync_operation",
17
+ "get_user_sync_operations",
18
+ "get_running_sync_operations_for_user",
19
+ "update_sync_operation",
20
+ "mark_sync_started",
21
+ "mark_sync_completed",
22
+ "mark_sync_failed",
23
+ ]
@@ -0,0 +1,53 @@
1
+ from uuid import UUID
2
+ from typing import Optional, List
3
+ from datetime import datetime, timezone
4
+ from cognee.modules.sync.models import SyncOperation, SyncStatus
5
+ from cognee.infrastructure.databases.relational import get_relational_engine
6
+
7
+
8
+ async def create_sync_operation(
9
+ run_id: str,
10
+ dataset_ids: List[UUID],
11
+ dataset_names: List[str],
12
+ user_id: UUID,
13
+ total_records_to_sync: Optional[int] = None,
14
+ total_records_to_download: Optional[int] = None,
15
+ total_records_to_upload: Optional[int] = None,
16
+ ) -> SyncOperation:
17
+ """
18
+ Create a new sync operation record in the database.
19
+
20
+ Args:
21
+ run_id: Unique public identifier for this sync operation
22
+ dataset_ids: List of dataset UUIDs being synced
23
+ dataset_names: List of dataset names being synced
24
+ user_id: UUID of the user who initiated the sync
25
+ total_records_to_sync: Total number of records to sync (if known)
26
+ total_records_to_download: Total number of records to download (if known)
27
+ total_records_to_upload: Total number of records to upload (if known)
28
+
29
+ Returns:
30
+ SyncOperation: The created sync operation record
31
+ """
32
+ db_engine = get_relational_engine()
33
+
34
+ sync_operation = SyncOperation(
35
+ run_id=run_id,
36
+ dataset_ids=[
37
+ str(uuid) for uuid in dataset_ids
38
+ ], # Convert UUIDs to strings for JSON storage
39
+ dataset_names=dataset_names,
40
+ user_id=user_id,
41
+ status=SyncStatus.STARTED,
42
+ total_records_to_sync=total_records_to_sync,
43
+ total_records_to_download=total_records_to_download,
44
+ total_records_to_upload=total_records_to_upload,
45
+ created_at=datetime.now(timezone.utc),
46
+ )
47
+
48
+ async with db_engine.get_async_session() as session:
49
+ session.add(sync_operation)
50
+ await session.commit()
51
+ await session.refresh(sync_operation)
52
+
53
+ return sync_operation