cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  156. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  157. cognee/tasks/summarization/models.py +0 -2
  158. cognee/tasks/temporal_graph/__init__.py +0 -1
  159. cognee/tasks/translation/__init__.py +96 -0
  160. cognee/tasks/translation/config.py +110 -0
  161. cognee/tasks/translation/detect_language.py +190 -0
  162. cognee/tasks/translation/exceptions.py +62 -0
  163. cognee/tasks/translation/models.py +72 -0
  164. cognee/tasks/translation/providers/__init__.py +44 -0
  165. cognee/tasks/translation/providers/azure_provider.py +192 -0
  166. cognee/tasks/translation/providers/base.py +85 -0
  167. cognee/tasks/translation/providers/google_provider.py +158 -0
  168. cognee/tasks/translation/providers/llm_provider.py +143 -0
  169. cognee/tasks/translation/translate_content.py +282 -0
  170. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  171. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  172. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  173. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  174. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  175. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  176. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  177. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  178. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  179. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  180. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  181. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  182. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  183. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  184. cognee/tests/tasks/translation/README.md +147 -0
  185. cognee/tests/tasks/translation/__init__.py +1 -0
  186. cognee/tests/tasks/translation/config_test.py +93 -0
  187. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  188. cognee/tests/tasks/translation/providers_test.py +151 -0
  189. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  190. cognee/tests/test_chromadb.py +1 -1
  191. cognee/tests/test_cleanup_unused_data.py +165 -0
  192. cognee/tests/test_delete_by_id.py +6 -6
  193. cognee/tests/test_extract_usage_frequency.py +308 -0
  194. cognee/tests/test_kuzu.py +17 -7
  195. cognee/tests/test_lancedb.py +3 -1
  196. cognee/tests/test_library.py +1 -1
  197. cognee/tests/test_neo4j.py +17 -7
  198. cognee/tests/test_neptune_analytics_vector.py +3 -1
  199. cognee/tests/test_permissions.py +172 -187
  200. cognee/tests/test_pgvector.py +3 -1
  201. cognee/tests/test_relational_db_migration.py +15 -1
  202. cognee/tests/test_remote_kuzu.py +3 -1
  203. cognee/tests/test_s3_file_storage.py +1 -1
  204. cognee/tests/test_search_db.py +97 -110
  205. cognee/tests/test_usage_logger_e2e.py +268 -0
  206. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  207. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  208. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  209. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  210. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  211. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  212. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  213. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  214. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  215. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  216. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  217. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  218. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  219. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  220. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  221. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  222. cognee/tests/unit/modules/search/test_search.py +176 -0
  223. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  224. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  225. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  226. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  227. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
  228. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
  229. cognee/api/.env.example +0 -5
  230. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  231. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  232. cognee/modules/search/methods/no_access_control_search.py +0 -62
  233. cognee/modules/search/utils/prepare_search_result.py +0 -63
  234. cognee/tests/test_feedback_enrichment.py +0 -174
  235. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
  236. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
  237. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,252 @@
1
+ import os
2
+ from typing import Callable, List, Optional, Type, Tuple
3
+
4
+ from cognee.modules.retrieval.base_retriever import BaseRetriever
5
+
6
+ from cognee.modules.engine.models.node_set import NodeSet
7
+ from cognee.modules.retrieval.triplet_retriever import TripletRetriever
8
+ from cognee.modules.search.types import SearchType
9
+ from cognee.modules.search.operations import select_search_type
10
+ from cognee.modules.search.exceptions import UnsupportedSearchTypeError
11
+
12
+ # Retrievers
13
+ from cognee.modules.retrieval.user_qa_feedback import UserQAFeedback
14
+ from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
15
+ from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
16
+ from cognee.modules.retrieval.completion_retriever import CompletionRetriever
17
+ from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
18
+ from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
19
+ from cognee.modules.retrieval.coding_rules_retriever import CodingRulesRetriever
20
+ from cognee.modules.retrieval.jaccard_retrival import JaccardChunksRetriever
21
+ from cognee.modules.retrieval.graph_summary_completion_retriever import (
22
+ GraphSummaryCompletionRetriever,
23
+ )
24
+ from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
25
+ from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
26
+ GraphCompletionContextExtensionRetriever,
27
+ )
28
+ from cognee.modules.retrieval.cypher_search_retriever import CypherSearchRetriever
29
+ from cognee.modules.retrieval.natural_language_retriever import NaturalLanguageRetriever
30
+
31
+
32
+ async def get_search_type_retriever_instance(
33
+ query_type: SearchType,
34
+ query_text: str,
35
+ **kwargs,
36
+ ) -> BaseRetriever:
37
+ """
38
+ Factory method to get the appropriate retriever instance based on the search type.
39
+
40
+ Args:
41
+ query_type: SearchType enum indicating the type of search.
42
+ query_text: query string.
43
+ retriever_specific_config: Retriever specific configuration dictionary.
44
+ **kwargs: General keyword arguments for retriever initialization.
45
+
46
+ Returns:
47
+
48
+ """
49
+ # Transform retriever specific config if empty to avoid None checks later
50
+ retriever_specific_config = kwargs.get("retriever_specific_config")
51
+ if retriever_specific_config is None:
52
+ retriever_specific_config = {}
53
+
54
+ # Extract common defaults with fallback values from kwargs
55
+ top_k = kwargs.get("top_k", 10)
56
+ system_prompt_path = kwargs.get("system_prompt_path", "answer_simple_question.txt")
57
+ system_prompt = kwargs.get("system_prompt")
58
+ node_type = kwargs.get("node_type", NodeSet)
59
+ node_name = kwargs.get("node_name")
60
+ save_interaction = kwargs.get("save_interaction", False)
61
+ wide_search_top_k = kwargs.get("wide_search_top_k", 100)
62
+ triplet_distance_penalty = kwargs.get("triplet_distance_penalty", 3.5)
63
+ session_id = kwargs.get("session_id")
64
+
65
+ # Registry mapping search types to their corresponding retriever classes and input parameters
66
+ search_core_registry: dict[SearchType, Tuple[BaseRetriever, dict]] = {
67
+ SearchType.SUMMARIES: (SummariesRetriever, {"top_k": top_k, "session_id": session_id}),
68
+ SearchType.CHUNKS: (
69
+ ChunksRetriever,
70
+ {"top_k": top_k},
71
+ ),
72
+ SearchType.RAG_COMPLETION: (
73
+ CompletionRetriever,
74
+ {
75
+ "system_prompt_path": system_prompt_path,
76
+ "top_k": top_k,
77
+ "system_prompt": system_prompt,
78
+ "session_id": session_id,
79
+ "response_model": retriever_specific_config.get("response_model", str),
80
+ },
81
+ ),
82
+ SearchType.TRIPLET_COMPLETION: (
83
+ TripletRetriever,
84
+ {
85
+ "system_prompt_path": system_prompt_path,
86
+ "top_k": top_k,
87
+ "system_prompt": system_prompt,
88
+ "session_id": session_id,
89
+ "response_model": retriever_specific_config.get("response_model", str),
90
+ },
91
+ ),
92
+ SearchType.GRAPH_COMPLETION: (
93
+ GraphCompletionRetriever,
94
+ {
95
+ "system_prompt_path": system_prompt_path,
96
+ "top_k": top_k,
97
+ "node_type": node_type,
98
+ "node_name": node_name,
99
+ "save_interaction": save_interaction,
100
+ "system_prompt": system_prompt,
101
+ "wide_search_top_k": wide_search_top_k,
102
+ "triplet_distance_penalty": triplet_distance_penalty,
103
+ "session_id": session_id,
104
+ "response_model": retriever_specific_config.get("response_model", str),
105
+ },
106
+ ),
107
+ SearchType.GRAPH_COMPLETION_COT: (
108
+ GraphCompletionCotRetriever,
109
+ {
110
+ "system_prompt_path": system_prompt_path,
111
+ "top_k": top_k,
112
+ "node_type": node_type,
113
+ "node_name": node_name,
114
+ "save_interaction": save_interaction,
115
+ "system_prompt": system_prompt,
116
+ "wide_search_top_k": wide_search_top_k,
117
+ "triplet_distance_penalty": triplet_distance_penalty,
118
+ "max_iter": retriever_specific_config.get("max_iter", 4),
119
+ "validation_system_prompt_path": retriever_specific_config.get(
120
+ "validation_system_prompt_path", "cot_validation_system_prompt.txt"
121
+ ),
122
+ "validation_user_prompt_path": retriever_specific_config.get(
123
+ "validation_user_prompt_path", "cot_validation_user_prompt.txt"
124
+ ),
125
+ "followup_system_prompt_path": retriever_specific_config.get(
126
+ "followup_system_prompt_path", "cot_followup_system_prompt.txt"
127
+ ),
128
+ "followup_user_prompt_path": retriever_specific_config.get(
129
+ "followup_user_prompt_path", "cot_followup_user_prompt.txt"
130
+ ),
131
+ "session_id": session_id,
132
+ "response_model": retriever_specific_config.get("response_model", str),
133
+ },
134
+ ),
135
+ SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION: (
136
+ GraphCompletionContextExtensionRetriever,
137
+ {
138
+ "system_prompt_path": system_prompt_path,
139
+ "top_k": top_k,
140
+ "node_type": node_type,
141
+ "node_name": node_name,
142
+ "save_interaction": save_interaction,
143
+ "system_prompt": system_prompt,
144
+ "wide_search_top_k": wide_search_top_k,
145
+ "triplet_distance_penalty": triplet_distance_penalty,
146
+ "context_extension_rounds": retriever_specific_config.get(
147
+ "context_extension_rounds", 4
148
+ ),
149
+ "session_id": session_id,
150
+ "response_model": retriever_specific_config.get("response_model", str),
151
+ },
152
+ ),
153
+ SearchType.GRAPH_SUMMARY_COMPLETION: (
154
+ GraphSummaryCompletionRetriever,
155
+ {
156
+ "system_prompt_path": system_prompt_path,
157
+ "top_k": top_k,
158
+ "node_type": node_type,
159
+ "node_name": node_name,
160
+ "save_interaction": save_interaction,
161
+ "system_prompt": system_prompt,
162
+ "wide_search_top_k": wide_search_top_k,
163
+ "triplet_distance_penalty": triplet_distance_penalty,
164
+ "session_id": session_id,
165
+ "summarize_prompt_path": retriever_specific_config.get(
166
+ "summarize_prompt_path", "summarize_search_results.txt"
167
+ ),
168
+ },
169
+ ),
170
+ SearchType.CYPHER: (
171
+ CypherSearchRetriever,
172
+ {
173
+ "user_prompt_path": retriever_specific_config.get(
174
+ "user_prompt_path", "context_for_question.txt"
175
+ ),
176
+ "system_prompt_path": retriever_specific_config.get(
177
+ "system_prompt_path", "answer_simple_question.txt"
178
+ ),
179
+ "session_id": session_id,
180
+ },
181
+ ),
182
+ SearchType.NATURAL_LANGUAGE: (
183
+ NaturalLanguageRetriever,
184
+ {
185
+ "session_id": session_id,
186
+ "system_prompt_path": retriever_specific_config.get(
187
+ "system_prompt_path", "natural_language_retriever_system.txt"
188
+ ),
189
+ "max_attempts": retriever_specific_config.get("max_attempts", 3),
190
+ },
191
+ ),
192
+ SearchType.TEMPORAL: (
193
+ TemporalRetriever,
194
+ {
195
+ "top_k": top_k,
196
+ "wide_search_top_k": wide_search_top_k,
197
+ "triplet_distance_penalty": triplet_distance_penalty,
198
+ "session_id": session_id,
199
+ "response_model": retriever_specific_config.get("response_model", str),
200
+ "user_prompt_path": retriever_specific_config.get(
201
+ "user_prompt_path", "graph_context_for_question.txt"
202
+ ),
203
+ "system_prompt_path": retriever_specific_config.get(
204
+ "system_prompt_path", "answer_simple_question.txt"
205
+ ),
206
+ "time_extraction_prompt_path": retriever_specific_config.get(
207
+ "time_extraction_prompt_path", "extract_query_time.txt"
208
+ ),
209
+ "node_type": node_type,
210
+ "node_name": node_name,
211
+ },
212
+ ),
213
+ SearchType.CHUNKS_LEXICAL: (JaccardChunksRetriever, {"top_k": top_k}),
214
+ SearchType.CODING_RULES: (
215
+ CodingRulesRetriever,
216
+ {"rules_nodeset_name": node_name},
217
+ ),
218
+ }
219
+
220
+ # If the query type is FEELING_LUCKY, select the search type intelligently
221
+ if query_type is SearchType.FEELING_LUCKY:
222
+ query_type = await select_search_type(query_text)
223
+
224
+ if (
225
+ query_type in [SearchType.CYPHER, SearchType.NATURAL_LANGUAGE]
226
+ and os.getenv("ALLOW_CYPHER_QUERY", "true").lower() == "false"
227
+ ):
228
+ raise UnsupportedSearchTypeError("Cypher query search types are disabled.")
229
+
230
+ from cognee.modules.retrieval.registered_community_retrievers import (
231
+ registered_community_retrievers,
232
+ )
233
+
234
+ if query_type in registered_community_retrievers:
235
+ retriever = registered_community_retrievers.get(query_type)
236
+
237
+ if not retriever:
238
+ raise UnsupportedSearchTypeError(str(query_type))
239
+ # TODO: Fix community retrievers on the community side so they get all input parameters properly
240
+ retriever_instance = retriever(**kwargs)
241
+ else:
242
+ retriever_info = search_core_registry.get(query_type)
243
+ # Check if retriever info is found for the given query type
244
+ if not retriever_info:
245
+ raise UnsupportedSearchTypeError(str(query_type))
246
+
247
+ # If it exists unpack the retriever class and its initialization arguments
248
+ retriever_cls, retriever_args = retriever_info
249
+
250
+ retriever_instance = retriever_cls(**retriever_args)
251
+
252
+ return retriever_instance
@@ -14,8 +14,6 @@ from cognee.modules.engine.models.node_set import NodeSet
14
14
  from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
15
15
  from cognee.modules.search.types import (
16
16
  SearchResult,
17
- CombinedSearchResult,
18
- SearchResultDataset,
19
17
  SearchType,
20
18
  )
21
19
  from cognee.modules.search.operations import log_query, log_result
@@ -25,9 +23,7 @@ from cognee.modules.data.methods.get_authorized_existing_datasets import (
25
23
  get_authorized_existing_datasets,
26
24
  )
27
25
  from cognee import __version__ as cognee_version
28
- from .get_search_type_tools import get_search_type_tools
29
- from .no_access_control_search import no_access_control_search
30
- from ..utils.prepare_search_result import prepare_search_result
26
+ from cognee.modules.search.methods.get_retriever_output import get_retriever_output
31
27
 
32
28
  logger = get_logger()
33
29
 
@@ -45,11 +41,12 @@ async def search(
45
41
  save_interaction: bool = False,
46
42
  last_k: Optional[int] = None,
47
43
  only_context: bool = False,
48
- use_combined_context: bool = False,
49
44
  session_id: Optional[str] = None,
50
45
  wide_search_top_k: Optional[int] = 100,
51
46
  triplet_distance_penalty: Optional[float] = 3.5,
52
- ) -> Union[CombinedSearchResult, List[SearchResult]]:
47
+ verbose=False,
48
+ retriever_specific_config: Optional[dict] = None,
49
+ ) -> List[SearchResult]:
53
50
  """
54
51
 
55
52
  Args:
@@ -75,44 +72,24 @@ async def search(
75
72
  },
76
73
  )
77
74
 
78
- # Use search function filtered by permissions if access control is enabled
79
- if backend_access_control_enabled():
80
- search_results = await authorized_search(
81
- query_type=query_type,
82
- query_text=query_text,
83
- user=user,
84
- dataset_ids=dataset_ids,
85
- system_prompt_path=system_prompt_path,
86
- system_prompt=system_prompt,
87
- top_k=top_k,
88
- node_type=node_type,
89
- node_name=node_name,
90
- save_interaction=save_interaction,
91
- last_k=last_k,
92
- only_context=only_context,
93
- use_combined_context=use_combined_context,
94
- session_id=session_id,
95
- wide_search_top_k=wide_search_top_k,
96
- triplet_distance_penalty=triplet_distance_penalty,
97
- )
98
- else:
99
- search_results = [
100
- await no_access_control_search(
101
- query_type=query_type,
102
- query_text=query_text,
103
- system_prompt_path=system_prompt_path,
104
- system_prompt=system_prompt,
105
- top_k=top_k,
106
- node_type=node_type,
107
- node_name=node_name,
108
- save_interaction=save_interaction,
109
- last_k=last_k,
110
- only_context=only_context,
111
- session_id=session_id,
112
- wide_search_top_k=wide_search_top_k,
113
- triplet_distance_penalty=triplet_distance_penalty,
114
- )
115
- ]
75
+ search_results = await authorized_search(
76
+ query_type=query_type,
77
+ query_text=query_text,
78
+ user=user,
79
+ dataset_ids=dataset_ids,
80
+ system_prompt_path=system_prompt_path,
81
+ system_prompt=system_prompt,
82
+ top_k=top_k,
83
+ node_type=node_type,
84
+ node_name=node_name,
85
+ save_interaction=save_interaction,
86
+ last_k=last_k,
87
+ only_context=only_context,
88
+ session_id=session_id,
89
+ wide_search_top_k=wide_search_top_k,
90
+ triplet_distance_penalty=triplet_distance_penalty,
91
+ retriever_specific_config=retriever_specific_config,
92
+ )
116
93
 
117
94
  send_telemetry(
118
95
  "cognee.search EXECUTION COMPLETED",
@@ -125,89 +102,11 @@ async def search(
125
102
 
126
103
  await log_result(
127
104
  query.id,
128
- json.dumps(
129
- jsonable_encoder(
130
- await prepare_search_result(
131
- search_results[0] if isinstance(search_results, list) else search_results
132
- )
133
- if use_combined_context
134
- else [
135
- await prepare_search_result(search_result) for search_result in search_results
136
- ]
137
- )
138
- ),
105
+ json.dumps(jsonable_encoder(search_results)),
139
106
  user.id,
140
107
  )
141
108
 
142
- if use_combined_context:
143
- prepared_search_results = await prepare_search_result(
144
- search_results[0] if isinstance(search_results, list) else search_results
145
- )
146
- result = prepared_search_results["result"]
147
- graphs = prepared_search_results["graphs"]
148
- context = prepared_search_results["context"]
149
- datasets = prepared_search_results["datasets"]
150
-
151
- return CombinedSearchResult(
152
- result=result,
153
- graphs=graphs,
154
- context=context,
155
- datasets=[
156
- SearchResultDataset(
157
- id=dataset.id,
158
- name=dataset.name,
159
- )
160
- for dataset in datasets
161
- ],
162
- )
163
- else:
164
- # This is for maintaining backwards compatibility
165
- if backend_access_control_enabled():
166
- return_value = []
167
- for search_result in search_results:
168
- prepared_search_results = await prepare_search_result(search_result)
169
-
170
- result = prepared_search_results["result"]
171
- graphs = prepared_search_results["graphs"]
172
- context = prepared_search_results["context"]
173
- datasets = prepared_search_results["datasets"]
174
-
175
- if only_context:
176
- return_value.append(
177
- {
178
- "search_result": [context] if context else None,
179
- "dataset_id": datasets[0].id,
180
- "dataset_name": datasets[0].name,
181
- "dataset_tenant_id": datasets[0].tenant_id,
182
- "graphs": graphs,
183
- }
184
- )
185
- else:
186
- return_value.append(
187
- {
188
- "search_result": [result] if result else None,
189
- "dataset_id": datasets[0].id,
190
- "dataset_name": datasets[0].name,
191
- "dataset_tenant_id": datasets[0].tenant_id,
192
- "graphs": graphs,
193
- }
194
- )
195
- return return_value
196
- else:
197
- return_value = []
198
- if only_context:
199
- for search_result in search_results:
200
- prepared_search_results = await prepare_search_result(search_result)
201
- return_value.append(prepared_search_results["context"])
202
- else:
203
- for search_result in search_results:
204
- result, context, datasets = search_result
205
- return_value.append(result)
206
- # For maintaining backwards compatibility
207
- if len(return_value) == 1 and isinstance(return_value[0], list):
208
- return return_value[0]
209
- else:
210
- return return_value
109
+ return _backwards_compatible_search_results(search_results, verbose)
211
110
 
212
111
 
213
112
  async def authorized_search(
@@ -223,14 +122,11 @@ async def authorized_search(
223
122
  save_interaction: bool = False,
224
123
  last_k: Optional[int] = None,
225
124
  only_context: bool = False,
226
- use_combined_context: bool = False,
227
125
  session_id: Optional[str] = None,
228
126
  wide_search_top_k: Optional[int] = 100,
229
127
  triplet_distance_penalty: Optional[float] = 3.5,
230
- ) -> Union[
231
- Tuple[Any, Union[List[Edge], str], List[Dataset]],
232
- List[Tuple[Any, Union[List[Edge], str], List[Dataset]]],
233
- ]:
128
+ retriever_specific_config: Optional[dict] = None,
129
+ ) -> List[Tuple[Any, Union[List[Edge], str], List[Dataset]]]:
234
130
  """
235
131
  Verifies access for provided datasets or uses all datasets user has read access for and performs search per dataset.
236
132
  Not to be used outside of active access control mode.
@@ -240,70 +136,6 @@ async def authorized_search(
240
136
  datasets=dataset_ids, permission_type="read", user=user
241
137
  )
242
138
 
243
- if use_combined_context:
244
- search_responses = await search_in_datasets_context(
245
- search_datasets=search_datasets,
246
- query_type=query_type,
247
- query_text=query_text,
248
- system_prompt_path=system_prompt_path,
249
- system_prompt=system_prompt,
250
- top_k=top_k,
251
- node_type=node_type,
252
- node_name=node_name,
253
- save_interaction=save_interaction,
254
- last_k=last_k,
255
- only_context=True,
256
- session_id=session_id,
257
- wide_search_top_k=wide_search_top_k,
258
- triplet_distance_penalty=triplet_distance_penalty,
259
- )
260
-
261
- context = {}
262
- datasets: List[Dataset] = []
263
-
264
- for _, search_context, search_datasets in search_responses:
265
- for dataset in search_datasets:
266
- context[str(dataset.id)] = search_context
267
-
268
- datasets.extend(search_datasets)
269
-
270
- specific_search_tools = await get_search_type_tools(
271
- query_type=query_type,
272
- query_text=query_text,
273
- system_prompt_path=system_prompt_path,
274
- system_prompt=system_prompt,
275
- top_k=top_k,
276
- node_type=node_type,
277
- node_name=node_name,
278
- save_interaction=save_interaction,
279
- last_k=last_k,
280
- wide_search_top_k=wide_search_top_k,
281
- triplet_distance_penalty=triplet_distance_penalty,
282
- )
283
- search_tools = specific_search_tools
284
- if len(search_tools) == 2:
285
- [get_completion, _] = search_tools
286
- else:
287
- get_completion = search_tools[0]
288
-
289
- def prepare_combined_context(
290
- context,
291
- ) -> Union[List[Edge], str]:
292
- combined_context = []
293
-
294
- for dataset_context in context.values():
295
- combined_context += dataset_context
296
-
297
- if combined_context and isinstance(combined_context[0], str):
298
- return "\n".join(combined_context)
299
-
300
- return combined_context
301
-
302
- combined_context = prepare_combined_context(context)
303
- completion = await get_completion(query_text, combined_context, session_id=session_id)
304
-
305
- return completion, combined_context, datasets
306
-
307
139
  # Searches all provided datasets and handles setting up of appropriate database context based on permissions
308
140
  search_results = await search_in_datasets_context(
309
141
  search_datasets=search_datasets,
@@ -319,6 +151,8 @@ async def authorized_search(
319
151
  only_context=only_context,
320
152
  session_id=session_id,
321
153
  wide_search_top_k=wide_search_top_k,
154
+ triplet_distance_penalty=triplet_distance_penalty,
155
+ retriever_specific_config=retriever_specific_config,
322
156
  )
323
157
 
324
158
  return search_results
@@ -336,10 +170,10 @@ async def search_in_datasets_context(
336
170
  save_interaction: bool = False,
337
171
  last_k: Optional[int] = None,
338
172
  only_context: bool = False,
339
- context: Optional[Any] = None,
340
173
  session_id: Optional[str] = None,
341
174
  wide_search_top_k: Optional[int] = 100,
342
175
  triplet_distance_penalty: Optional[float] = 3.5,
176
+ retriever_specific_config: Optional[dict] = None,
343
177
  ) -> List[Tuple[Any, Union[str, List[Edge]], List[Dataset]]]:
344
178
  """
345
179
  Searches all provided datasets and handles setting up of appropriate database context based on permissions.
@@ -358,17 +192,17 @@ async def search_in_datasets_context(
358
192
  save_interaction: bool = False,
359
193
  last_k: Optional[int] = None,
360
194
  only_context: bool = False,
361
- context: Optional[Any] = None,
362
195
  session_id: Optional[str] = None,
363
196
  wide_search_top_k: Optional[int] = 100,
364
197
  triplet_distance_penalty: Optional[float] = 3.5,
198
+ retriever_specific_config: Optional[dict] = None,
365
199
  ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
366
200
  # Set database configuration in async context for each dataset user has access for
367
201
  await set_database_global_context_variables(dataset.id, dataset.owner_id)
368
202
 
203
+ # Check if graph for dataset is empty and log warnings if necessary
369
204
  graph_engine = await get_graph_engine()
370
205
  is_empty = await graph_engine.is_empty()
371
-
372
206
  if is_empty:
373
207
  # TODO: we can log here, but not all search types use graph. Still keeping this here for reviewer input
374
208
  from cognee.modules.data.methods import get_dataset_data
@@ -382,12 +216,14 @@ async def search_in_datasets_context(
382
216
  )
383
217
  else:
384
218
  logger.warning(
385
- "Search attempt on an empty knowledge graph - no data has been added to this dataset"
219
+ f"Search attempt on an empty knowledge graph - no data has been added to this dataset: {dataset.name}"
386
220
  )
387
221
 
388
- specific_search_tools = await get_search_type_tools(
222
+ # Get retriever output in the context of the current dataset
223
+ return await get_retriever_output(
389
224
  query_type=query_type,
390
225
  query_text=query_text,
226
+ dataset=dataset,
391
227
  system_prompt_path=system_prompt_path,
392
228
  system_prompt=system_prompt,
393
229
  top_k=top_k,
@@ -395,24 +231,12 @@ async def search_in_datasets_context(
395
231
  node_name=node_name,
396
232
  save_interaction=save_interaction,
397
233
  last_k=last_k,
234
+ only_context=only_context,
235
+ session_id=session_id,
398
236
  wide_search_top_k=wide_search_top_k,
399
237
  triplet_distance_penalty=triplet_distance_penalty,
238
+ retriever_specific_config=retriever_specific_config,
400
239
  )
401
- search_tools = specific_search_tools
402
- if len(search_tools) == 2:
403
- [get_completion, get_context] = search_tools
404
-
405
- if only_context:
406
- return None, await get_context(query_text), [dataset]
407
-
408
- search_context = context or await get_context(query_text)
409
- search_result = await get_completion(query_text, search_context, session_id=session_id)
410
-
411
- return search_result, search_context, [dataset]
412
- else:
413
- unknown_tool = search_tools[0]
414
-
415
- return await unknown_tool(query_text), "", [dataset]
416
240
 
417
241
  # Search every dataset async based on query and appropriate database configuration
418
242
  tasks = []
@@ -430,11 +254,62 @@ async def search_in_datasets_context(
430
254
  save_interaction=save_interaction,
431
255
  last_k=last_k,
432
256
  only_context=only_context,
433
- context=context,
434
257
  session_id=session_id,
435
258
  wide_search_top_k=wide_search_top_k,
436
259
  triplet_distance_penalty=triplet_distance_penalty,
260
+ retriever_specific_config=retriever_specific_config,
437
261
  )
438
262
  )
439
263
 
440
264
  return await asyncio.gather(*tasks)
265
+
266
+
267
+ def _backwards_compatible_search_results(search_results, verbose: bool):
268
+ """
269
+ Prepares search results in a format compatible with previous versions of the API.
270
+ """
271
+ # This is for maintaining backwards compatibility
272
+ if backend_access_control_enabled():
273
+ return_value = []
274
+ for search_result in search_results:
275
+ # Dataset info needs to be always included
276
+ search_result_dict = {
277
+ "dataset_id": search_result.dataset_id,
278
+ "dataset_name": search_result.dataset_name,
279
+ "dataset_tenant_id": search_result.dataset_tenant_id,
280
+ }
281
+ if verbose:
282
+ # Include all different types of results only in verbose mode
283
+ search_result_dict["text_result"] = search_result.completion
284
+ search_result_dict["context_result"] = search_result.context
285
+ search_result_dict["objects_result"] = search_result.result_object
286
+ else:
287
+ # Result attribute handles returning appropriate result based on set flags and outputs
288
+ search_result_dict["search_result"] = search_result.result
289
+
290
+ return_value.append(search_result_dict)
291
+ return return_value
292
+ else:
293
+ return_value = []
294
+ if verbose:
295
+ for search_result in search_results:
296
+ # Include all different types of results only in verbose mode
297
+ search_result_dict = {
298
+ "text_result": search_result.completion,
299
+ "context_result": search_result.context,
300
+ "objects_result": search_result.result_object,
301
+ }
302
+ return_value.append(search_result_dict)
303
+ return return_value
304
+ else:
305
+ for search_result in search_results:
306
+ # Result attribute handles returning appropriate result based on set flags and outputs
307
+ return_value.append(search_result.result)
308
+
309
+ # For maintaining backwards compatibility
310
+ if len(return_value) == 1 and isinstance(return_value[0], list):
311
+ # If a single element list return the element directly
312
+ return return_value[0]
313
+ else:
314
+ # Otherwise return the list of results
315
+ return return_value