cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  156. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  157. cognee/tasks/summarization/models.py +0 -2
  158. cognee/tasks/temporal_graph/__init__.py +0 -1
  159. cognee/tasks/translation/__init__.py +96 -0
  160. cognee/tasks/translation/config.py +110 -0
  161. cognee/tasks/translation/detect_language.py +190 -0
  162. cognee/tasks/translation/exceptions.py +62 -0
  163. cognee/tasks/translation/models.py +72 -0
  164. cognee/tasks/translation/providers/__init__.py +44 -0
  165. cognee/tasks/translation/providers/azure_provider.py +192 -0
  166. cognee/tasks/translation/providers/base.py +85 -0
  167. cognee/tasks/translation/providers/google_provider.py +158 -0
  168. cognee/tasks/translation/providers/llm_provider.py +143 -0
  169. cognee/tasks/translation/translate_content.py +282 -0
  170. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  171. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  172. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  173. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  174. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  175. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  176. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  177. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  178. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  179. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  180. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  181. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  182. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  183. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  184. cognee/tests/tasks/translation/README.md +147 -0
  185. cognee/tests/tasks/translation/__init__.py +1 -0
  186. cognee/tests/tasks/translation/config_test.py +93 -0
  187. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  188. cognee/tests/tasks/translation/providers_test.py +151 -0
  189. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  190. cognee/tests/test_chromadb.py +1 -1
  191. cognee/tests/test_cleanup_unused_data.py +165 -0
  192. cognee/tests/test_delete_by_id.py +6 -6
  193. cognee/tests/test_extract_usage_frequency.py +308 -0
  194. cognee/tests/test_kuzu.py +17 -7
  195. cognee/tests/test_lancedb.py +3 -1
  196. cognee/tests/test_library.py +1 -1
  197. cognee/tests/test_neo4j.py +17 -7
  198. cognee/tests/test_neptune_analytics_vector.py +3 -1
  199. cognee/tests/test_permissions.py +172 -187
  200. cognee/tests/test_pgvector.py +3 -1
  201. cognee/tests/test_relational_db_migration.py +15 -1
  202. cognee/tests/test_remote_kuzu.py +3 -1
  203. cognee/tests/test_s3_file_storage.py +1 -1
  204. cognee/tests/test_search_db.py +97 -110
  205. cognee/tests/test_usage_logger_e2e.py +268 -0
  206. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  207. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  208. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  209. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  210. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  211. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  212. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  213. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  214. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  215. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  216. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  217. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  218. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  219. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  220. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  221. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  222. cognee/tests/unit/modules/search/test_search.py +176 -0
  223. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  224. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  225. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  226. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  227. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
  228. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
  229. cognee/api/.env.example +0 -5
  230. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  231. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  232. cognee/modules/search/methods/no_access_control_search.py +0 -62
  233. cognee/modules/search/utils/prepare_search_result.py +0 -63
  234. cognee/tests/test_feedback_enrichment.py +0 -174
  235. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
  236. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
  237. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -7,7 +7,7 @@ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
7
7
  from cognee.tasks.storage import add_data_points
8
8
  from cognee.modules.graph.utils import resolve_edges_to_text
9
9
  from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
10
- from cognee.modules.retrieval.base_graph_retriever import BaseGraphRetriever
10
+ from cognee.modules.retrieval.base_retriever import BaseRetriever
11
11
  from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search
12
12
  from cognee.modules.retrieval.utils.completion import generate_completion, summarize_text
13
13
  from cognee.modules.retrieval.utils.session_cache import (
@@ -16,26 +16,24 @@ from cognee.modules.retrieval.utils.session_cache import (
16
16
  )
17
17
  from cognee.shared.logging_utils import get_logger
18
18
  from cognee.modules.retrieval.utils.extract_uuid_from_node import extract_uuid_from_node
19
+ from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
19
20
  from cognee.modules.retrieval.utils.models import CogneeUserInteraction
20
21
  from cognee.modules.engine.models.node_set import NodeSet
21
22
  from cognee.infrastructure.databases.graph import get_graph_engine
22
23
  from cognee.context_global_variables import session_user
23
24
  from cognee.infrastructure.databases.cache.config import CacheConfig
25
+ from cognee.modules.graph.utils import get_entity_nodes_from_triplets
24
26
 
25
27
  logger = get_logger("GraphCompletionRetriever")
26
28
 
27
29
 
28
- class GraphCompletionRetriever(BaseGraphRetriever):
30
+ class GraphCompletionRetriever(BaseRetriever):
29
31
  """
30
32
  Retriever for handling graph-based completion searches.
31
33
 
32
- This class provides methods to retrieve graph nodes and edges, resolve them into a
33
- human-readable format, and generate completions based on graph context. Public methods
34
- include:
35
- - resolve_edges_to_text
36
- - get_triplets
37
- - get_context
38
- - get_completion
34
+ This class implements the retrieval pipeline by searching for graph triplets (get_retrieved_objects function),
35
+ resolving those triplets into human-readable text context (get_context_from_objects function), and generating
36
+ LLM completions using the retrieved graph data (get_completion_from_context function).
39
37
  """
40
38
 
41
39
  def __init__(
@@ -49,6 +47,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
49
47
  save_interaction: bool = False,
50
48
  wide_search_top_k: Optional[int] = 100,
51
49
  triplet_distance_penalty: Optional[float] = 3.5,
50
+ session_id: Optional[str] = None,
51
+ response_model: Type = str,
52
52
  ):
53
53
  """Initialize retriever with prompt paths and search parameters."""
54
54
  self.save_interaction = save_interaction
@@ -60,6 +60,39 @@ class GraphCompletionRetriever(BaseGraphRetriever):
60
60
  self.node_type = node_type
61
61
  self.node_name = node_name
62
62
  self.triplet_distance_penalty = triplet_distance_penalty
63
+ # session_id (Optional[str]): Identifier for managing conversation history.
64
+ self.session_id = session_id
65
+ # response_model (Type): The Pydantic model or type for the expected response.
66
+ self.response_model = response_model
67
+
68
+ async def get_retrieved_objects(self, query: str) -> List[Edge]:
69
+ """
70
+ Performs a brute-force triplet search on the graph and updates access timestamps.
71
+
72
+ Args:
73
+ query (str): The search query to find relevant graph triplets.
74
+
75
+ Returns:
76
+ List[Edge]: A list of retrieved Edge objects (triplets).
77
+ Returns an empty list if the graph is empty or no results are found.
78
+ """
79
+ graph_engine = await get_graph_engine()
80
+ is_empty = await graph_engine.is_empty()
81
+
82
+ if is_empty:
83
+ logger.warning("Search attempt on an empty knowledge graph")
84
+ return []
85
+
86
+ triplets = await self.get_triplets(query)
87
+
88
+ if len(triplets) == 0:
89
+ logger.warning("Empty context was provided to the completion")
90
+ return []
91
+ # TODO: Remove when refactor of timestamps tracking is merged
92
+ entity_nodes = get_entity_nodes_from_triplets(triplets)
93
+ await update_node_access_timestamps(entity_nodes)
94
+
95
+ return triplets
63
96
 
64
97
  async def resolve_edges_to_text(self, retrieved_edges: list) -> str:
65
98
  """
@@ -115,72 +148,54 @@ class GraphCompletionRetriever(BaseGraphRetriever):
115
148
 
116
149
  return found_triplets
117
150
 
118
- async def get_context(self, query: str) -> List[Edge]:
151
+ async def get_context_from_objects(self, query, retrieved_objects) -> str:
119
152
  """
120
- Retrieves and resolves graph triplets into context based on a query.
153
+ Transforms raw retrieved graph triplets into a textual context string.
121
154
 
122
- Parameters:
123
- -----------
124
-
125
- - query (str): The query string used to retrieve context from the graph triplets.
155
+ Args:
156
+ query (str): The original search query.
157
+ retrieved_objects (List[Edge]): The raw triplets returned from the search.
158
+ Output of the get_retrieved_objects method.
126
159
 
127
160
  Returns:
128
- --------
161
+ str: A string representing the resolved graph context.
162
+ Returns an empty list (as string) if no triplets are provided.
129
163
 
130
- - str: A string representing the resolved context from the retrieved triplets, or an
131
- empty string if no triplets are found.
164
+ Note: To avoid duplicate retrievals, ensure that retrieved_objects
165
+ are provided from get_retrieved_objects method call.
132
166
  """
133
- graph_engine = await get_graph_engine()
134
- is_empty = await graph_engine.is_empty()
135
167
 
136
- if is_empty:
137
- logger.warning("Search attempt on an empty knowledge graph")
138
- return []
139
-
140
- triplets = await self.get_triplets(query)
168
+ triplets = retrieved_objects
141
169
 
142
170
  if len(triplets) == 0:
143
171
  logger.warning("Empty context was provided to the completion")
144
- return []
172
+ return ""
145
173
 
146
- # context = await self.resolve_edges_to_text(triplets)
174
+ return await self.resolve_edges_to_text(triplets)
147
175
 
148
- return triplets
149
-
150
- async def convert_retrieved_objects_to_context(self, triplets: List[Edge]):
151
- context = await self.resolve_edges_to_text(triplets)
152
- return context
153
-
154
- async def get_completion(
176
+ async def get_completion_from_context(
155
177
  self,
156
178
  query: str,
157
- context: Optional[List[Edge]] = None,
158
- session_id: Optional[str] = None,
159
- response_model: Type = str,
179
+ retrieved_objects: Optional[List[Edge]],
180
+ context: str,
160
181
  ) -> List[Any]:
161
182
  """
162
- Generates a completion using graph connections context based on a query.
183
+ Generates an LLM response based on the query, context, and conversation history.
184
+ Optionally saves the interaction and updates the session cache.
163
185
 
164
- Parameters:
165
- -----------
166
-
167
- - query (str): The query string for which a completion is generated.
168
- - context (Optional[Any]): Optional context to use for generating the completion; if
169
- not provided, context is retrieved based on the query. (default None)
170
- - session_id (Optional[str]): Optional session identifier for caching. If None,
171
- defaults to 'default_session'. (default None)
186
+ Args:
187
+ query (str): The user's question or prompt.
188
+ retrieved_objects (Optional[List[Edge]]): Raw triplets used for interaction mapping.
189
+ Output of get_retrieved_objects method.
190
+ context (str): The text-resolved graph context.
191
+ Output of the get_context_from_objects method.
172
192
 
173
193
  Returns:
174
- --------
194
+ List[Any]: A list containing the generated response (completion).
175
195
 
176
- - Any: A generated completion based on the query and context provided.
196
+ Note: To avoid duplicate retrievals, ensure that retrieved_objects and context
197
+ are provided from previous method calls.
177
198
  """
178
- triplets = context
179
-
180
- if triplets is None:
181
- triplets = await self.get_context(query)
182
-
183
- context_text = await resolve_edges_to_text(triplets)
184
199
 
185
200
  cache_config = CacheConfig()
186
201
  user = session_user.get()
@@ -188,33 +203,33 @@ class GraphCompletionRetriever(BaseGraphRetriever):
188
203
  session_save = user_id and cache_config.caching
189
204
 
190
205
  if session_save:
191
- conversation_history = await get_conversation_history(session_id=session_id)
206
+ conversation_history = await get_conversation_history(session_id=self.session_id)
192
207
 
193
208
  context_summary, completion = await asyncio.gather(
194
- summarize_text(context_text),
209
+ summarize_text(context),
195
210
  generate_completion(
196
211
  query=query,
197
- context=context_text,
212
+ context=context,
198
213
  user_prompt_path=self.user_prompt_path,
199
214
  system_prompt_path=self.system_prompt_path,
200
215
  system_prompt=self.system_prompt,
201
216
  conversation_history=conversation_history,
202
- response_model=response_model,
217
+ response_model=self.response_model,
203
218
  ),
204
219
  )
205
220
  else:
206
221
  completion = await generate_completion(
207
222
  query=query,
208
- context=context_text,
223
+ context=context,
209
224
  user_prompt_path=self.user_prompt_path,
210
225
  system_prompt_path=self.system_prompt_path,
211
226
  system_prompt=self.system_prompt,
212
- response_model=response_model,
227
+ response_model=self.response_model,
213
228
  )
214
229
 
215
- if self.save_interaction and context and triplets and completion:
230
+ if self.save_interaction and retrieved_objects and completion:
216
231
  await self.save_qa(
217
- question=query, answer=completion, context=context_text, triplets=triplets
232
+ question=query, answer=completion, context=context, triplets=retrieved_objects
218
233
  )
219
234
 
220
235
  if session_save:
@@ -222,7 +237,7 @@ class GraphCompletionRetriever(BaseGraphRetriever):
222
237
  query=query,
223
238
  context_summary=context_summary,
224
239
  answer=completion,
225
- session_id=session_id,
240
+ session_id=self.session_id,
226
241
  )
227
242
 
228
243
  return [completion]
@@ -28,6 +28,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
28
28
  save_interaction: bool = False,
29
29
  wide_search_top_k: Optional[int] = 100,
30
30
  triplet_distance_penalty: Optional[float] = 3.5,
31
+ session_id: Optional[str] = None,
31
32
  ):
32
33
  """Initialize retriever with default prompt paths and search parameters."""
33
34
  super().__init__(
@@ -40,6 +41,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
40
41
  system_prompt=system_prompt,
41
42
  wide_search_top_k=wide_search_top_k,
42
43
  triplet_distance_penalty=triplet_distance_penalty,
44
+ session_id=session_id,
43
45
  )
44
46
  self.summarize_prompt_path = summarize_prompt_path
45
47
 
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from typing import Any, Callable, Optional
2
+ from typing import Any, Callable, Optional, List, Union
3
3
  from heapq import nlargest
4
4
 
5
5
  from cognee.infrastructure.databases.graph import get_graph_engine
@@ -72,7 +72,7 @@ class LexicalRetriever(BaseRetriever):
72
72
  self._initialized = True
73
73
  logger.info("Initialized with %d document chunks", len(self.chunks))
74
74
 
75
- async def get_context(self, query: str) -> Any:
75
+ async def get_retrieved_objects(self, query: str) -> Any:
76
76
  """Retrieves relevant chunks for the given query."""
77
77
  if not self._initialized:
78
78
  await self.initialize()
@@ -116,11 +116,36 @@ class LexicalRetriever(BaseRetriever):
116
116
  else:
117
117
  return [self.payloads[chunk_id] for chunk_id, _ in top_results]
118
118
 
119
- async def get_completion(
120
- self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
121
- ) -> Any:
119
+ async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
122
120
  """
123
- Returns context for the given query (retrieves if not provided).
121
+ Retrieves context from retrieved chunks, in text form.
122
+
123
+ Parameters:
124
+ -----------
125
+
126
+ - query (str): The query string used to search for relevant document chunk payloads.
127
+ - retrieved_objects (Any): The retrieved objects to be used for generating textual context.
128
+
129
+ Returns:
130
+ --------
131
+
132
+ - str: A string containing the combined text of the retrieved chunk payloads, or an
133
+ empty string if none are found.
134
+ """
135
+ if retrieved_objects:
136
+ payload_texts = [payload["text"] for payload in retrieved_objects]
137
+ return "\n".join(payload_texts)
138
+ else:
139
+ return ""
140
+
141
+ async def get_completion_from_context(
142
+ self, query: str, retrieved_objects: Any, context: Any
143
+ ) -> Union[List[str], List[dict]]:
144
+ """
145
+ Returns a completion for the given query.
146
+
147
+ In case of the Lexical Retriever, we do not generate a completion, we just return
148
+ the scored chunk payloads, i.e. the retrieved objects.
124
149
 
125
150
  Parameters:
126
151
  -----------
@@ -128,14 +153,11 @@ class LexicalRetriever(BaseRetriever):
128
153
  - query (str): The query string to retrieve context for.
129
154
  - context (Optional[Any]): Optional pre-fetched context; if None, it retrieves
130
155
  the context for the query. (default None)
131
- - session_id (Optional[str]): Optional session identifier for caching. If None,
132
- defaults to 'default_session'. (default None)
133
156
 
134
157
  Returns:
135
158
  --------
136
159
 
137
- - Any: The context, either provided or retrieved.
160
+ - List[dict]: The retrieved objects, i.e. the scored payloads.
138
161
  """
139
- if context is None:
140
- context = await self.get_context(query)
141
- return context
162
+ # TODO: Do we want to generate a completion using LLM here?
163
+ return retrieved_objects
@@ -4,7 +4,6 @@ from cognee.infrastructure.databases.graph import get_graph_engine
4
4
  from cognee.infrastructure.llm.LLMGateway import LLMGateway
5
5
  from cognee.infrastructure.llm.prompts import render_prompt
6
6
  from cognee.modules.retrieval.base_retriever import BaseRetriever
7
- from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
8
7
  from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
9
8
 
10
9
  logger = get_logger("NaturalLanguageRetriever")
@@ -25,10 +24,12 @@ class NaturalLanguageRetriever(BaseRetriever):
25
24
  self,
26
25
  system_prompt_path: str = "natural_language_retriever_system.txt",
27
26
  max_attempts: int = 3,
27
+ session_id: Optional[str] = None,
28
28
  ):
29
29
  """Initialize retriever with optional custom prompt paths."""
30
30
  self.system_prompt_path = system_prompt_path
31
31
  self.max_attempts = max_attempts
32
+ self.session_id = session_id
32
33
 
33
34
  async def _get_graph_schema(self, graph_engine) -> tuple:
34
35
  """Retrieve the node and edge schemas from the graph database."""
@@ -102,7 +103,17 @@ class NaturalLanguageRetriever(BaseRetriever):
102
103
  )
103
104
  return []
104
105
 
105
- async def get_context(self, query: str) -> Optional[Any]:
106
+ async def get_retrieved_objects(self, query: str) -> Any:
107
+ graph_engine = await get_graph_engine()
108
+ is_empty = await graph_engine.is_empty()
109
+
110
+ if is_empty:
111
+ logger.warning("Search attempt on an empty knowledge graph")
112
+ return []
113
+
114
+ return await self._execute_cypher_query(query, graph_engine)
115
+
116
+ async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> Optional[Any]:
106
117
  """
107
118
  Retrieves relevant context using a natural language query converted to Cypher.
108
119
 
@@ -121,17 +132,11 @@ class NaturalLanguageRetriever(BaseRetriever):
121
132
  - Optional[Any]: Returns the context retrieved from the graph database based on the
122
133
  query.
123
134
  """
124
- graph_engine = await get_graph_engine()
125
- is_empty = await graph_engine.is_empty()
135
+ # TODO: Do we want to process retrieved_objects into a context string?
136
+ return retrieved_objects
126
137
 
127
- if is_empty:
128
- logger.warning("Search attempt on an empty knowledge graph")
129
- return []
130
-
131
- return await self._execute_cypher_query(query, graph_engine)
132
-
133
- async def get_completion(
134
- self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
138
+ async def get_completion_from_context(
139
+ self, query: str, retrieved_objects: Any, context: Optional[Any] = None
135
140
  ) -> Any:
136
141
  """
137
142
  Returns a completion based on the query and context.
@@ -154,7 +159,5 @@ class NaturalLanguageRetriever(BaseRetriever):
154
159
 
155
160
  - Any: Returns the completion derived from the given query and context.
156
161
  """
157
- if context is None:
158
- context = await self.get_context(query)
159
-
162
+ # TODO: Do we want to generate a completion using LLM here?
160
163
  return context
@@ -1,9 +1,10 @@
1
- from typing import Any, Optional
1
+ from typing import Any, Optional, List, Union
2
2
 
3
3
  from cognee.shared.logging_utils import get_logger
4
4
  from cognee.infrastructure.databases.vector import get_vector_engine
5
5
  from cognee.modules.retrieval.base_retriever import BaseRetriever
6
6
  from cognee.modules.retrieval.exceptions.exceptions import NoDataError
7
+ from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
7
8
  from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
8
9
 
9
10
  logger = get_logger("SummariesRetriever")
@@ -22,13 +23,14 @@ class SummariesRetriever(BaseRetriever):
22
23
  - top_k: int - Number of top summaries to retrieve.
23
24
  """
24
25
 
25
- def __init__(self, top_k: int = 5):
26
+ def __init__(self, top_k: int = 5, session_id: Optional[str] = None):
26
27
  """Initialize retriever with search parameters."""
27
28
  self.top_k = top_k
29
+ self.session_id = session_id
28
30
 
29
- async def get_context(self, query: str) -> Any:
31
+ async def get_retrieved_objects(self, query: str) -> Any:
30
32
  """
31
- Retrieves summary context based on the query.
33
+ Retrieves text summary objects based on the query.
32
34
 
33
35
  On encountering a missing collection, raises NoDataError with a message to add data
34
36
  first.
@@ -41,7 +43,7 @@ class SummariesRetriever(BaseRetriever):
41
43
  Returns:
42
44
  --------
43
45
 
44
- - Any: A list of payloads from the retrieved summaries.
46
+ - Any: A list of text summaries retrieved from the search.
45
47
  """
46
48
  logger.info(
47
49
  f"Starting summary retrieval for query: '{query[:100]}{'...' if len(query) > 100 else ''}'"
@@ -51,51 +53,66 @@ class SummariesRetriever(BaseRetriever):
51
53
 
52
54
  try:
53
55
  summaries_results = await vector_engine.search(
54
- "TextSummary_text", query, limit=self.top_k
56
+ "TextSummary_text", query, limit=self.top_k, include_payload=True
55
57
  )
56
58
  logger.info(f"Found {len(summaries_results)} summaries from vector search")
59
+
60
+ await update_node_access_timestamps(summaries_results)
61
+
62
+ return summaries_results
57
63
  except CollectionNotFoundError as error:
58
64
  logger.error("TextSummary_text collection not found in vector database")
59
65
  raise NoDataError("No data found in the system, please add data first.") from error
60
66
 
61
- summary_payloads = [summary.payload for summary in summaries_results]
62
- logger.info(f"Returning {len(summary_payloads)} summary payloads")
63
- return summary_payloads
64
-
65
- async def get_completion(
66
- self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None, **kwargs
67
- ) -> Any:
67
+ async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
68
68
  """
69
- Generates a completion using summaries context.
69
+ Retrieves relevant summaries as context.
70
70
 
71
- If no context is provided, retrieves context using the query. Returns the provided
72
- context or the retrieved context if none was given.
71
+ Fetches text summaries based on a query from a vector engine and combines their text.
72
+ Returns empty string if no summaries are found. Raises NoDataError if the collection is not
73
+ found.
73
74
 
74
75
  Parameters:
75
76
  -----------
76
77
 
77
- - query (str): The search query for generating the completion.
78
- - context (Optional[Any]): Optional context for the completion; if not provided,
79
- will be retrieved based on the query. (default None)
80
- - session_id (Optional[str]): Optional session identifier for caching. If None,
81
- defaults to 'default_session'. (default None)
78
+ - query (str): The query string used to search for relevant text summaries.
82
79
 
83
80
  Returns:
84
81
  --------
85
82
 
86
- - Any: The generated completion context, which is either provided or retrieved.
83
+ - str: A string containing the combined text of the retrieved summaries, or an
84
+ empty string if none are found.
87
85
  """
88
- logger.info(
89
- f"Starting completion generation for query: '{query[:100]}{'...' if len(query) > 100 else ''}'"
90
- )
91
-
92
- if context is None:
93
- logger.debug("No context provided, retrieving context from vector database")
94
- context = await self.get_context(query)
86
+ if retrieved_objects:
87
+ summary_payload_texts = [summary.payload["text"] for summary in retrieved_objects]
88
+ return "\n".join(summary_payload_texts)
95
89
  else:
96
- logger.debug("Using provided context")
90
+ return ""
97
91
 
98
- logger.info(
99
- f"Returning context with {len(context) if isinstance(context, list) else 1} item(s)"
100
- )
101
- return context
92
+ async def get_completion_from_context(
93
+ self, query: str, retrieved_objects: Any, context: Any
94
+ ) -> Union[List[str], List[dict]]:
95
+ """
96
+ Generates a completion using text summaries.
97
+ In case of the Summaries Retriever, we do not generate a completion, we just return
98
+ the payloads of found summaries.
99
+
100
+ Parameters:
101
+ -----------
102
+
103
+ - query (str): The query string to be used for generating a completion.
104
+ - retrieved_objects (Any): The retrieved objects to be used for generating a completion.
105
+ - context (Any): The context to be used for generating a completion.
106
+
107
+ Returns:
108
+ --------
109
+
110
+ - List[dict]: A list of payloads of found summaries.
111
+ """
112
+ # TODO: Do we want to generate a completion using LLM here?
113
+ if retrieved_objects:
114
+ summary_payloads = [summary.payload for summary in retrieved_objects]
115
+ logger.info(f"Returning {len(summary_payloads)} summary payloads")
116
+ return summary_payloads
117
+ else:
118
+ return []