cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/chunks/__init__.py +9 -0
  156. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  157. cognee/tasks/graph/__init__.py +7 -0
  158. cognee/tasks/memify/__init__.py +8 -0
  159. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  160. cognee/tasks/summarization/models.py +0 -2
  161. cognee/tasks/temporal_graph/__init__.py +0 -1
  162. cognee/tasks/translation/__init__.py +96 -0
  163. cognee/tasks/translation/config.py +110 -0
  164. cognee/tasks/translation/detect_language.py +190 -0
  165. cognee/tasks/translation/exceptions.py +62 -0
  166. cognee/tasks/translation/models.py +72 -0
  167. cognee/tasks/translation/providers/__init__.py +44 -0
  168. cognee/tasks/translation/providers/azure_provider.py +192 -0
  169. cognee/tasks/translation/providers/base.py +85 -0
  170. cognee/tasks/translation/providers/google_provider.py +158 -0
  171. cognee/tasks/translation/providers/llm_provider.py +143 -0
  172. cognee/tasks/translation/translate_content.py +282 -0
  173. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  174. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  175. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  176. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  177. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  178. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  179. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  180. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  181. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  182. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  183. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  184. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  185. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  186. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  187. cognee/tests/tasks/translation/README.md +147 -0
  188. cognee/tests/tasks/translation/__init__.py +1 -0
  189. cognee/tests/tasks/translation/config_test.py +93 -0
  190. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  191. cognee/tests/tasks/translation/providers_test.py +151 -0
  192. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  193. cognee/tests/test_chromadb.py +1 -1
  194. cognee/tests/test_cleanup_unused_data.py +165 -0
  195. cognee/tests/test_delete_by_id.py +6 -6
  196. cognee/tests/test_extract_usage_frequency.py +308 -0
  197. cognee/tests/test_kuzu.py +17 -7
  198. cognee/tests/test_lancedb.py +3 -1
  199. cognee/tests/test_library.py +1 -1
  200. cognee/tests/test_neo4j.py +17 -7
  201. cognee/tests/test_neptune_analytics_vector.py +3 -1
  202. cognee/tests/test_permissions.py +172 -187
  203. cognee/tests/test_pgvector.py +3 -1
  204. cognee/tests/test_relational_db_migration.py +15 -1
  205. cognee/tests/test_remote_kuzu.py +3 -1
  206. cognee/tests/test_s3_file_storage.py +1 -1
  207. cognee/tests/test_search_db.py +97 -110
  208. cognee/tests/test_usage_logger_e2e.py +268 -0
  209. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  210. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  211. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  212. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  213. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  214. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  215. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  216. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  217. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  218. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  219. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  220. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  221. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  222. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  223. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  224. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  225. cognee/tests/unit/modules/search/test_search.py +176 -0
  226. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  227. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  228. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  229. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  230. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  231. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
  232. cognee/api/.env.example +0 -5
  233. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  234. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  235. cognee/modules/search/methods/no_access_control_search.py +0 -62
  236. cognee/modules/search/utils/prepare_search_result.py +0 -63
  237. cognee/tests/test_feedback_enrichment.py +0 -174
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  239. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  240. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  241. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,6 +1,7 @@
1
1
  import time
2
2
  from cognee.shared.logging_utils import get_logger
3
- from typing import List, Dict, Union, Optional, Type
3
+ from cognee.modules.engine.utils.generate_edge_id import generate_edge_id
4
+ from typing import List, Dict, Union, Optional, Type, Iterable, Tuple, Callable, Any
4
5
 
5
6
  from cognee.modules.graph.exceptions import (
6
7
  EntityNotFoundError,
@@ -25,12 +26,16 @@ class CogneeGraph(CogneeAbstractGraph):
25
26
 
26
27
  nodes: Dict[str, Node]
27
28
  edges: List[Edge]
29
+ edges_by_distance_key: Dict[str, List[Edge]]
28
30
  directed: bool
31
+ triplet_distance_penalty: float
29
32
 
30
33
  def __init__(self, directed: bool = True):
31
34
  self.nodes = {}
32
35
  self.edges = []
36
+ self.edges_by_distance_key = {}
33
37
  self.directed = directed
38
+ self.triplet_distance_penalty = 3.5
34
39
 
35
40
  def add_node(self, node: Node) -> None:
36
41
  if node.id not in self.nodes:
@@ -40,8 +45,20 @@ class CogneeGraph(CogneeAbstractGraph):
40
45
 
41
46
  def add_edge(self, edge: Edge) -> None:
42
47
  self.edges.append(edge)
48
+
49
+ edge_text = edge.attributes.get("edge_text") or edge.attributes.get("relationship_type")
50
+ edge.attributes["edge_type_id"] = (
51
+ generate_edge_id(edge_id=edge_text) if edge_text else None
52
+ ) # Update edge with generated edge_type_id
53
+
43
54
  edge.node1.add_skeleton_edge(edge)
44
55
  edge.node2.add_skeleton_edge(edge)
56
+ key = edge.get_distance_key()
57
+ if not key:
58
+ return
59
+ if key not in self.edges_by_distance_key:
60
+ self.edges_by_distance_key[key] = []
61
+ self.edges_by_distance_key[key].append(edge)
45
62
 
46
63
  def get_node(self, node_id: str) -> Node:
47
64
  return self.nodes.get(node_id, None)
@@ -56,6 +73,29 @@ class CogneeGraph(CogneeAbstractGraph):
56
73
  def get_edges(self) -> List[Edge]:
57
74
  return self.edges
58
75
 
76
+ def reset_distances(self, collection: Iterable[Union[Node, Edge]], query_count: int) -> None:
77
+ """Reset vector distances for a collection of nodes or edges."""
78
+ for item in collection:
79
+ item.reset_vector_distances(query_count, self.triplet_distance_penalty)
80
+
81
+ def _normalize_query_distance_lists(
82
+ self, distances: List, query_list_length: Optional[int] = None, name: str = "distances"
83
+ ) -> List:
84
+ """Normalize shape: flat list -> single-query; nested list -> multi-query."""
85
+ if not distances:
86
+ return []
87
+ first_item = distances[0]
88
+ if isinstance(first_item, (list, tuple)):
89
+ per_query_lists = distances
90
+ else:
91
+ per_query_lists = [distances]
92
+ if query_list_length is not None and len(per_query_lists) != query_list_length:
93
+ raise ValueError(
94
+ f"{name} has {len(per_query_lists)} query lists, "
95
+ f"but query_list_length is {query_list_length}"
96
+ )
97
+ return per_query_lists
98
+
59
99
  async def _get_nodeset_subgraph(
60
100
  self,
61
101
  adapter,
@@ -148,7 +188,7 @@ class CogneeGraph(CogneeAbstractGraph):
148
188
  adapter, memory_fragment_filter
149
189
  )
150
190
 
151
- import time
191
+ self.triplet_distance_penalty = triplet_distance_penalty
152
192
 
153
193
  start_time = time.time()
154
194
  # Process nodes
@@ -182,9 +222,6 @@ class CogneeGraph(CogneeAbstractGraph):
182
222
  edge_penalty=triplet_distance_penalty,
183
223
  )
184
224
  self.add_edge(edge)
185
-
186
- source_node.add_skeleton_edge(edge)
187
- target_node.add_skeleton_edge(edge)
188
225
  else:
189
226
  raise EntityNotFoundError(
190
227
  message=f"Edge references nonexistent nodes: {source_id} -> {target_id}"
@@ -200,41 +237,117 @@ class CogneeGraph(CogneeAbstractGraph):
200
237
  logger.error(f"Error during graph projection: {str(e)}")
201
238
  raise
202
239
 
203
- async def map_vector_distances_to_graph_nodes(self, node_distances) -> None:
204
- mapped_nodes = 0
205
- for category, scored_results in node_distances.items():
206
- for scored_result in scored_results:
207
- node_id = str(scored_result.id)
208
- score = scored_result.score
209
- node = self.get_node(node_id)
210
- if node:
211
- node.add_attribute("vector_distance", score)
212
- mapped_nodes += 1
213
-
214
- async def map_vector_distances_to_graph_edges(self, edge_distances) -> None:
215
- try:
216
- if edge_distances is None:
217
- return
240
+ async def map_vector_distances_to_graph_nodes(
241
+ self,
242
+ node_distances,
243
+ query_list_length: Optional[int] = None,
244
+ ) -> None:
245
+ """Map vector distances to nodes, supporting single- and multi-query input shapes."""
218
246
 
219
- embedding_map = {result.payload["text"]: result.score for result in edge_distances}
247
+ query_count = query_list_length or 1
220
248
 
221
- for edge in self.edges:
222
- edge_key = edge.attributes.get("edge_text") or edge.attributes.get(
223
- "relationship_type"
224
- )
225
- distance = embedding_map.get(edge_key, None)
226
- if distance is not None:
227
- edge.attributes["vector_distance"] = distance
249
+ self.reset_distances(self.nodes.values(), query_count)
250
+
251
+ for collection_name, scored_results in node_distances.items():
252
+ if not scored_results:
253
+ continue
254
+
255
+ per_query_scored_results = self._normalize_query_distance_lists(
256
+ scored_results, query_list_length, f"Collection '{collection_name}'"
257
+ )
258
+
259
+ for query_index, scored_results in enumerate(per_query_scored_results):
260
+ for result in scored_results:
261
+ node_id = str(getattr(result, "id", None))
262
+ if not node_id:
263
+ continue
264
+ node = self.get_node(node_id)
265
+ if node is None:
266
+ continue
267
+ score = float(getattr(result, "score", self.triplet_distance_penalty))
268
+ node.update_distance_for_query(
269
+ query_index=query_index,
270
+ score=score,
271
+ query_count=query_count,
272
+ default_penalty=self.triplet_distance_penalty,
273
+ )
228
274
 
229
- except Exception as ex:
230
- logger.error(f"Error mapping vector distances to edges: {str(ex)}")
231
- raise ex
275
+ async def map_vector_distances_to_graph_edges(
276
+ self,
277
+ edge_distances,
278
+ query_list_length: Optional[int] = None,
279
+ ) -> None:
280
+ """Map vector distances to graph edges, supporting single- and multi-query input shapes."""
281
+ query_count = query_list_length or 1
282
+
283
+ self.reset_distances(self.edges, query_count)
284
+
285
+ if not edge_distances:
286
+ return None
232
287
 
233
- async def calculate_top_triplet_importances(self, k: int) -> List[Edge]:
234
- def score(edge):
235
- n1 = edge.node1.attributes.get("vector_distance", 1)
236
- n2 = edge.node2.attributes.get("vector_distance", 1)
237
- e = edge.attributes.get("vector_distance", 1)
238
- return n1 + n2 + e
288
+ per_query_scored_results = self._normalize_query_distance_lists(
289
+ edge_distances, query_list_length, "edge_distances"
290
+ )
291
+
292
+ for query_index, scored_results in enumerate(per_query_scored_results):
293
+ for result in scored_results:
294
+ matching_edges = self.edges_by_distance_key.get(str(result.id))
295
+ if not matching_edges:
296
+ continue
297
+ for edge in matching_edges:
298
+ edge.update_distance_for_query(
299
+ query_index=query_index,
300
+ score=float(getattr(result, "score", self.triplet_distance_penalty)),
301
+ query_count=query_count,
302
+ default_penalty=self.triplet_distance_penalty,
303
+ )
304
+
305
+ def _calculate_query_top_triplet_importances(
306
+ self,
307
+ k: int,
308
+ query_index: int = 0,
309
+ ) -> List[Edge]:
310
+ """Calculate top k triplet importances for a specific query index."""
311
+
312
+ def score(edge: Edge) -> float:
313
+ elements = (
314
+ (edge.node1, f"node {edge.node1.id}"),
315
+ (edge.node2, f"node {edge.node2.id}"),
316
+ (edge, f"edge {edge.node1.id}->{edge.node2.id}"),
317
+ )
318
+
319
+ importances = []
320
+ for element, label in elements:
321
+ distances = element.attributes.get("vector_distance")
322
+ if not isinstance(distances, list) or query_index >= len(distances):
323
+ raise ValueError(
324
+ f"{label}: vector_distance must be a list with length > {query_index} "
325
+ f"before scoring (got {type(distances).__name__} with length "
326
+ f"{len(distances) if isinstance(distances, list) else 'n/a'})"
327
+ )
328
+ value = distances[query_index]
329
+ try:
330
+ importances.append(float(value))
331
+ except (TypeError, ValueError):
332
+ raise ValueError(
333
+ f"{label}: vector_distance[{query_index}] must be float-like, "
334
+ f"got {type(value).__name__}"
335
+ )
336
+
337
+ return sum(importances)
239
338
 
240
339
  return heapq.nsmallest(k, self.edges, key=score)
340
+
341
+ async def calculate_top_triplet_importances(
342
+ self, k: int, query_list_length: Optional[int] = None
343
+ ) -> Union[List[Edge], List[List[Edge]]]:
344
+ """Calculate top k triplet importances, supporting both single and multi-query modes."""
345
+ query_count = query_list_length or 1
346
+ results = [
347
+ self._calculate_query_top_triplet_importances(k=k, query_index=i)
348
+ for i in range(query_count)
349
+ ]
350
+
351
+ if query_list_length is None:
352
+ return results[0]
353
+ return results
@@ -30,11 +30,31 @@ class Node:
30
30
  raise InvalidDimensionsError()
31
31
  self.id = node_id
32
32
  self.attributes = attributes if attributes is not None else {}
33
- self.attributes["vector_distance"] = node_penalty
33
+ self.attributes["vector_distance"] = None
34
34
  self.skeleton_neighbours = []
35
35
  self.skeleton_edges = []
36
36
  self.status = np.ones(dimension, dtype=int)
37
37
 
38
+ def reset_vector_distances(self, query_count: int, default_penalty: float) -> None:
39
+ self.attributes["vector_distance"] = [default_penalty] * query_count
40
+
41
+ def ensure_vector_distance_list(self, query_count: int, default_penalty: float) -> List[float]:
42
+ distances = self.attributes.get("vector_distance")
43
+ if not isinstance(distances, list) or len(distances) != query_count:
44
+ distances = [default_penalty] * query_count
45
+ self.attributes["vector_distance"] = distances
46
+ return distances
47
+
48
+ def update_distance_for_query(
49
+ self,
50
+ query_index: int,
51
+ score: float,
52
+ query_count: int,
53
+ default_penalty: float,
54
+ ) -> None:
55
+ distances = self.ensure_vector_distance_list(query_count, default_penalty)
56
+ distances[query_index] = score
57
+
38
58
  def add_skeleton_neighbor(self, neighbor: "Node") -> None:
39
59
  if neighbor not in self.skeleton_neighbours:
40
60
  self.skeleton_neighbours.append(neighbor)
@@ -116,10 +136,36 @@ class Edge:
116
136
  self.node1 = node1
117
137
  self.node2 = node2
118
138
  self.attributes = attributes if attributes is not None else {}
119
- self.attributes["vector_distance"] = edge_penalty
139
+ self.attributes["vector_distance"] = None
120
140
  self.directed = directed
121
141
  self.status = np.ones(dimension, dtype=int)
122
142
 
143
+ def get_distance_key(self) -> Optional[str]:
144
+ key = self.attributes.get("edge_type_id")
145
+ if key is None:
146
+ return None
147
+ return str(key)
148
+
149
+ def reset_vector_distances(self, query_count: int, default_penalty: float) -> None:
150
+ self.attributes["vector_distance"] = [default_penalty] * query_count
151
+
152
+ def ensure_vector_distance_list(self, query_count: int, default_penalty: float) -> List[float]:
153
+ distances = self.attributes.get("vector_distance")
154
+ if not isinstance(distances, list) or len(distances) != query_count:
155
+ distances = [default_penalty] * query_count
156
+ self.attributes["vector_distance"] = distances
157
+ return distances
158
+
159
+ def update_distance_for_query(
160
+ self,
161
+ query_index: int,
162
+ score: float,
163
+ query_count: int,
164
+ default_penalty: float,
165
+ ) -> None:
166
+ distances = self.ensure_vector_distance_list(query_count, default_penalty)
167
+ distances[query_index] = score
168
+
123
169
  def is_edge_alive_in_dimension(self, dimension: int) -> bool:
124
170
  if dimension < 0 or dimension >= len(self.status):
125
171
  raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1)
@@ -5,3 +5,4 @@ from .retrieve_existing_edges import retrieve_existing_edges
5
5
  from .convert_node_to_data_point import convert_node_to_data_point
6
6
  from .deduplicate_nodes_and_edges import deduplicate_nodes_and_edges
7
7
  from .resolve_edges_to_text import resolve_edges_to_text
8
+ from .get_entity_nodes_from_triplets import get_entity_nodes_from_triplets
@@ -0,0 +1,12 @@
1
+ def get_entity_nodes_from_triplets(triplets):
2
+ entity_nodes = []
3
+ seen_ids = set()
4
+ for triplet in triplets:
5
+ if hasattr(triplet, "node1") and triplet.node1 and triplet.node1.id not in seen_ids:
6
+ entity_nodes.append({"id": str(triplet.node1.id)})
7
+ seen_ids.add(triplet.node1.id)
8
+ if hasattr(triplet, "node2") and triplet.node2 and triplet.node2.id not in seen_ids:
9
+ entity_nodes.append({"id": str(triplet.node2.id)})
10
+ seen_ids.add(triplet.node2.id)
11
+
12
+ return entity_nodes
@@ -3,3 +3,4 @@ from .get_notebooks import get_notebooks
3
3
  from .create_notebook import create_notebook
4
4
  from .update_notebook import update_notebook
5
5
  from .delete_notebook import delete_notebook
6
+ from .create_tutorial_notebooks import create_tutorial_notebooks
@@ -6,40 +6,6 @@ from cognee.infrastructure.databases.relational import with_async_session
6
6
 
7
7
  from ..models.Notebook import Notebook, NotebookCell
8
8
 
9
- TUTORIAL_NOTEBOOK_NAME = "Python Development with Cognee Tutorial 🧠"
10
-
11
-
12
- async def _create_tutorial_notebook(
13
- user_id: UUID, session: AsyncSession, force_refresh: bool = False
14
- ) -> None:
15
- """
16
- Create the default tutorial notebook for new users.
17
- Dynamically fetches from: https://github.com/topoteretes/cognee/blob/notebook_tutorial/notebooks/starter_tutorial.zip
18
- """
19
- TUTORIAL_ZIP_URL = (
20
- "https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip"
21
- )
22
-
23
- try:
24
- # Create notebook from remote zip file (includes notebook + data files)
25
- notebook = await Notebook.from_ipynb_zip_url(
26
- zip_url=TUTORIAL_ZIP_URL,
27
- owner_id=user_id,
28
- notebook_filename="tutorial.ipynb",
29
- name=TUTORIAL_NOTEBOOK_NAME,
30
- deletable=False,
31
- force=force_refresh,
32
- )
33
-
34
- # Add to session and commit
35
- session.add(notebook)
36
- await session.commit()
37
-
38
- except Exception as e:
39
- print(f"Failed to fetch tutorial notebook from {TUTORIAL_ZIP_URL}: {e}")
40
-
41
- raise e
42
-
43
9
 
44
10
  @with_async_session
45
11
  async def create_notebook(
@@ -0,0 +1,191 @@
1
+ from pathlib import Path
2
+ from uuid import NAMESPACE_OID, UUID, uuid5, uuid4
3
+ from typing import List, Optional, Dict, Any
4
+ import re
5
+ import json
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+
8
+ from cognee.shared.logging_utils import get_logger
9
+ from cognee.root_dir import ROOT_DIR
10
+
11
+ from ..models.Notebook import Notebook, NotebookCell
12
+
13
+ logger = get_logger()
14
+
15
+
16
+ def _get_tutorials_directory() -> Path:
17
+ """Get the path to the tutorials directory."""
18
+ return ROOT_DIR / "modules" / "notebooks" / "tutorials"
19
+
20
+
21
+ def _parse_cell_index(filename: str) -> int:
22
+ """Extract cell index from filename like 'cell-0.md' or 'cell-123.py'."""
23
+ match = re.search(r"cell-(\d+)", filename)
24
+ if match:
25
+ return int(match.group(1))
26
+ return -1
27
+
28
+
29
+ def _get_cell_type(file_path: Path) -> str:
30
+ """Determine cell type from file extension."""
31
+ extension = file_path.suffix.lower()
32
+ if extension == ".md":
33
+ return "markdown"
34
+ elif extension == ".py":
35
+ return "code"
36
+ else:
37
+ raise ValueError(f"Unsupported cell file type: {extension}")
38
+
39
+
40
+ def _extract_markdown_heading(content: str) -> str | None:
41
+ """Extract the first markdown heading from content."""
42
+ for line in content.splitlines():
43
+ line = line.strip()
44
+ # Match lines starting with one or more # followed by space and text
45
+ match = re.match(r"^#+\s+(.+)$", line)
46
+ if match:
47
+ return match.group(1).strip()
48
+ return None
49
+
50
+
51
+ def _get_cell_name(cell_file: Path, cell_type: str, content: str) -> str:
52
+ """Get the appropriate name for a cell."""
53
+ if cell_type == "code":
54
+ return "Code Cell"
55
+ elif cell_type == "markdown":
56
+ heading = _extract_markdown_heading(content)
57
+ if heading:
58
+ return heading
59
+ # Fallback to filename stem
60
+ return cell_file.stem
61
+
62
+
63
+ def _load_tutorial_cells(tutorial_dir: Path) -> List[NotebookCell]:
64
+ """Load all cells from a tutorial directory, sorted by cell index."""
65
+ cells = []
66
+
67
+ cell_files = [
68
+ file_path
69
+ for file_path in tutorial_dir.iterdir()
70
+ if file_path.is_file()
71
+ and file_path.name.startswith("cell-")
72
+ and file_path.suffix in [".md", ".py"]
73
+ ]
74
+
75
+ cell_files.sort(key=lambda file_path: _parse_cell_index(file_path.name))
76
+
77
+ for cell_file in cell_files:
78
+ try:
79
+ cell_type = _get_cell_type(cell_file)
80
+ content = cell_file.read_text(encoding="utf-8")
81
+ cell_name = _get_cell_name(cell_file, cell_type, content)
82
+
83
+ cells.append(
84
+ NotebookCell(
85
+ id=uuid4(),
86
+ type=cell_type,
87
+ name=cell_name,
88
+ content=content,
89
+ )
90
+ )
91
+ except Exception as e:
92
+ logger.warning(f"Failed to load cell {cell_file}: {e}")
93
+ continue
94
+
95
+ return cells
96
+
97
+
98
+ def _read_tutorial_config(tutorial_dir: Path) -> Optional[Dict[str, Any]]:
99
+ """Read config.json from a tutorial directory if it exists."""
100
+ config_path = tutorial_dir / "config.json"
101
+ if config_path.exists():
102
+ try:
103
+ with open(config_path, "r", encoding="utf-8") as f:
104
+ return json.load(f)
105
+ except (json.JSONDecodeError, IOError) as e:
106
+ logger.warning(f"Failed to read config.json from {tutorial_dir}: {e}")
107
+ return None
108
+ return None
109
+
110
+
111
+ def _format_tutorial_name(tutorial_dir_name: str) -> str:
112
+ """Format tutorial directory name into a readable notebook name (fallback)."""
113
+
114
+ name = tutorial_dir_name.replace("-", " ").replace("_", " ")
115
+ return f"{name.capitalize()} - tutorial 🧠"
116
+
117
+
118
+ async def create_tutorial_notebooks(user_id: UUID, session: AsyncSession) -> None:
119
+ """
120
+ Create tutorial notebooks for all tutorials found in the tutorials directory.
121
+ Each tutorial directory will become a separate notebook.
122
+ """
123
+ try:
124
+ tutorials_dir = _get_tutorials_directory()
125
+
126
+ if not tutorials_dir.exists():
127
+ logger.warning(f"Tutorials directory not found: {tutorials_dir}")
128
+ return
129
+
130
+ tutorial_dirs = [
131
+ d for d in tutorials_dir.iterdir() if d.is_dir() and not d.name.startswith(".")
132
+ ]
133
+
134
+ if not tutorial_dirs:
135
+ logger.warning(f"No tutorial directories found in {tutorials_dir}")
136
+ return
137
+
138
+ notebooks_to_add = []
139
+
140
+ for tutorial_dir in tutorial_dirs:
141
+ try:
142
+ cells = _load_tutorial_cells(tutorial_dir)
143
+
144
+ if not cells:
145
+ logger.warning(f"No cells found in tutorial directory: {tutorial_dir}")
146
+ continue
147
+
148
+ config = _read_tutorial_config(tutorial_dir)
149
+
150
+ # Use name from config.json, or fallback to formatted directory name
151
+ if config and "name" in config:
152
+ notebook_name = config["name"]
153
+ else:
154
+ notebook_name = _format_tutorial_name(tutorial_dir.name)
155
+ logger.warning(
156
+ f"No config.json or 'name' field found in {tutorial_dir}, "
157
+ f"using fallback name: {notebook_name}"
158
+ )
159
+
160
+ # Use deletable flag from config.json, or default to False for tutorials
161
+ deletable = False
162
+ if config and "deletable" in config:
163
+ deletable = bool(config["deletable"])
164
+
165
+ notebook_id = uuid5(NAMESPACE_OID, name=notebook_name)
166
+
167
+ notebook = Notebook(
168
+ id=notebook_id,
169
+ owner_id=user_id,
170
+ name=notebook_name,
171
+ cells=cells,
172
+ deletable=deletable,
173
+ )
174
+
175
+ notebooks_to_add.append(notebook)
176
+ logger.info(f"Created tutorial notebook: {notebook_name} with {len(cells)} cells")
177
+
178
+ except Exception as e:
179
+ logger.error(f"Failed to create tutorial notebook from {tutorial_dir}: {e}")
180
+ continue
181
+
182
+ if not notebooks_to_add:
183
+ return
184
+
185
+ for notebook in notebooks_to_add:
186
+ session.add(notebook)
187
+
188
+ await session.commit()
189
+
190
+ except Exception as e:
191
+ logger.error(f"Failed to create tutorial notebooks for user {user_id}: {e}")
@@ -1,4 +1,4 @@
1
- from uuid import UUID
1
+ from uuid import NAMESPACE_OID, UUID, uuid5
2
2
  from typing import List
3
3
  from sqlalchemy import select, and_
4
4
  from sqlalchemy.ext.asyncio import AsyncSession
@@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
6
6
  from cognee.infrastructure.databases.relational import with_async_session
7
7
 
8
8
  from ..models.Notebook import Notebook
9
- from .create_notebook import _create_tutorial_notebook, TUTORIAL_NOTEBOOK_NAME
9
+ from .create_tutorial_notebooks import create_tutorial_notebooks
10
10
 
11
11
  from cognee.shared.logging_utils import get_logger
12
12
 
@@ -19,21 +19,25 @@ async def get_notebooks(
19
19
  session: AsyncSession,
20
20
  ) -> List[Notebook]:
21
21
  # Check if tutorial notebook already exists for this user
22
+ tutorial_notebook_ids = [
23
+ uuid5(NAMESPACE_OID, name="Cognee Basics - tutorial 🧠"),
24
+ uuid5(NAMESPACE_OID, name="Python Development with Cognee - tutorial 🧠"),
25
+ ]
22
26
  tutorial_query = select(Notebook).where(
23
27
  and_(
24
28
  Notebook.owner_id == user_id,
25
- Notebook.name == TUTORIAL_NOTEBOOK_NAME,
29
+ Notebook.id.in_(tutorial_notebook_ids),
26
30
  ~Notebook.deletable,
27
31
  )
28
32
  )
29
33
  tutorial_result = await session.execute(tutorial_query)
30
- tutorial_notebook = tutorial_result.scalar_one_or_none()
34
+ tutorial_notebooks = tutorial_result.scalars().all()
31
35
 
32
- # If tutorial notebook doesn't exist, create it
33
- if tutorial_notebook is None:
34
- logger.info(f"Tutorial notebook not found for user {user_id}, creating it")
36
+ # If tutorial notebooks don't exist, create them
37
+ if len(tutorial_notebooks) == 0:
38
+ logger.info(f"Tutorial notebooks not found for user {user_id}, creating them")
35
39
  try:
36
- await _create_tutorial_notebook(user_id, session, force_refresh=False)
40
+ await create_tutorial_notebooks(user_id, session)
37
41
  except Exception as e:
38
42
  # Log the error but continue to return existing notebooks
39
43
  logger.error(f"Failed to create tutorial notebook for user {user_id}: {e}")
@@ -0,0 +1,3 @@
1
+ # Use Cognee to Build Your Own Knowledge Graph
2
+
3
+ Cognee is a tool that allows you to build your own knowledge graph from the data you have.
@@ -0,0 +1,10 @@
1
+ # What You'll Learn in This Tutorial
2
+
3
+ In this tutorial, you'll learn how to use Cognee to transform scattered data into an intelligent knowledge system that enhances your workflow.
4
+ By the end, you'll have:
5
+
6
+ - Connected disparate data sources into a unified AI memory graph
7
+ - Built a memory layer that infers knowledge from provided data
8
+ - Learn how to use search capabilities that combine the diverse context
9
+
10
+ This tutorial demonstrates the power of knowledge graphs and retrieval-augmented generation (RAG), showing you how to build systems that learn from data and infer knowledge.
@@ -0,0 +1,7 @@
1
+ # Cognee and Its Core Operations
2
+
3
+ Before we dive in, let's understand the core Cognee operations we'll be working with:
4
+
5
+ - `cognee.add()` - Ingests raw data into the system
6
+ - `cognee.cognify()` - Processes and structures data into a knowledge graph using AI
7
+ - `cognee.search()` - Queries the knowledge graph with natural language
@@ -0,0 +1,28 @@
1
+ # Add data one by one, or pass a list to add multiple items at once
2
+
3
+ await cognee.add(
4
+ "Harry Potter is a student at Hogwarts and belongs to Gryffindor house. \
5
+ He is known for defeating Voldemort and his Patronus is a stag.",
6
+ dataset_name="cognee-basics",
7
+ )
8
+
9
+ await cognee.add(
10
+ "Hermione Granger is a student at Hogwarts and also belongs to Gryffindor house. \
11
+ She is known for her intelligence and deep knowledge of spells. Her Patronus is an otter.",
12
+ dataset_name="cognee-basics",
13
+ )
14
+
15
+ await cognee.add(
16
+ "Severus Snape is a professor at Hogwarts who teaches Potions. \
17
+ He belongs to Slytherin house and was secretly loyal to Albus Dumbledore.",
18
+ dataset_name="cognee-basics",
19
+ )
20
+
21
+ await cognee.add(
22
+ [
23
+ "Hogwarts is a magical school located in Scotland. During Harry Potter's time at school, the headmaster was Albus Dumbledore.",
24
+ "A Horcrux is a dark magic object used to store a fragment of a wizard's soul. Voldemort created multiple Horcruxes to achieve immortality.",
25
+ "The Elder Wand is a powerful wand believed to be unbeatable. Its final known owner was Harry Potter.",
26
+ ],
27
+ dataset_name="cognee-basics",
28
+ )