cognee 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/__main__.py +4 -0
  3. cognee/api/client.py +28 -3
  4. cognee/api/health.py +10 -13
  5. cognee/api/v1/add/add.py +20 -6
  6. cognee/api/v1/add/routers/get_add_router.py +12 -37
  7. cognee/api/v1/cloud/routers/__init__.py +1 -0
  8. cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
  9. cognee/api/v1/cognify/code_graph_pipeline.py +14 -3
  10. cognee/api/v1/cognify/cognify.py +67 -105
  11. cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
  12. cognee/api/v1/datasets/routers/get_datasets_router.py +16 -5
  13. cognee/api/v1/memify/routers/__init__.py +1 -0
  14. cognee/api/v1/memify/routers/get_memify_router.py +100 -0
  15. cognee/api/v1/notebooks/routers/__init__.py +1 -0
  16. cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
  17. cognee/api/v1/responses/default_tools.py +4 -0
  18. cognee/api/v1/responses/dispatch_function.py +6 -1
  19. cognee/api/v1/responses/models.py +1 -1
  20. cognee/api/v1/search/routers/get_search_router.py +20 -1
  21. cognee/api/v1/search/search.py +17 -4
  22. cognee/api/v1/sync/__init__.py +17 -0
  23. cognee/api/v1/sync/routers/__init__.py +3 -0
  24. cognee/api/v1/sync/routers/get_sync_router.py +241 -0
  25. cognee/api/v1/sync/sync.py +877 -0
  26. cognee/api/v1/ui/__init__.py +1 -0
  27. cognee/api/v1/ui/ui.py +529 -0
  28. cognee/api/v1/users/routers/get_auth_router.py +13 -1
  29. cognee/base_config.py +10 -1
  30. cognee/cli/__init__.py +10 -0
  31. cognee/cli/_cognee.py +273 -0
  32. cognee/cli/commands/__init__.py +1 -0
  33. cognee/cli/commands/add_command.py +80 -0
  34. cognee/cli/commands/cognify_command.py +128 -0
  35. cognee/cli/commands/config_command.py +225 -0
  36. cognee/cli/commands/delete_command.py +80 -0
  37. cognee/cli/commands/search_command.py +149 -0
  38. cognee/cli/config.py +33 -0
  39. cognee/cli/debug.py +21 -0
  40. cognee/cli/echo.py +45 -0
  41. cognee/cli/exceptions.py +23 -0
  42. cognee/cli/minimal_cli.py +97 -0
  43. cognee/cli/reference.py +26 -0
  44. cognee/cli/suppress_logging.py +12 -0
  45. cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
  46. cognee/eval_framework/eval_config.py +1 -1
  47. cognee/infrastructure/databases/graph/config.py +10 -4
  48. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
  49. cognee/infrastructure/databases/graph/kuzu/adapter.py +199 -2
  50. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +138 -0
  51. cognee/infrastructure/databases/relational/__init__.py +2 -0
  52. cognee/infrastructure/databases/relational/get_async_session.py +15 -0
  53. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
  54. cognee/infrastructure/databases/relational/with_async_session.py +25 -0
  55. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
  56. cognee/infrastructure/databases/vector/config.py +13 -6
  57. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -4
  58. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
  59. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
  60. cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
  61. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
  62. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +10 -7
  63. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
  64. cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
  65. cognee/infrastructure/files/storage/StorageManager.py +7 -1
  66. cognee/infrastructure/files/storage/storage.py +16 -0
  67. cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
  68. cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
  69. cognee/infrastructure/llm/LLMGateway.py +32 -5
  70. cognee/infrastructure/llm/config.py +6 -4
  71. cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
  72. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
  73. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
  77. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
  78. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
  79. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
  80. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
  82. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
  83. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
  84. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +28 -4
  86. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
  87. cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
  88. cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
  89. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
  90. cognee/infrastructure/llm/utils.py +7 -7
  91. cognee/infrastructure/utils/run_sync.py +8 -1
  92. cognee/modules/chunking/models/DocumentChunk.py +4 -3
  93. cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
  94. cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
  95. cognee/modules/cloud/exceptions/__init__.py +2 -0
  96. cognee/modules/cloud/operations/__init__.py +1 -0
  97. cognee/modules/cloud/operations/check_api_key.py +25 -0
  98. cognee/modules/data/deletion/prune_system.py +1 -1
  99. cognee/modules/data/methods/__init__.py +2 -0
  100. cognee/modules/data/methods/check_dataset_name.py +1 -1
  101. cognee/modules/data/methods/create_authorized_dataset.py +19 -0
  102. cognee/modules/data/methods/get_authorized_dataset.py +11 -5
  103. cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
  104. cognee/modules/data/methods/get_dataset_data.py +1 -1
  105. cognee/modules/data/methods/load_or_create_datasets.py +2 -20
  106. cognee/modules/engine/models/Event.py +16 -0
  107. cognee/modules/engine/models/Interval.py +8 -0
  108. cognee/modules/engine/models/Timestamp.py +13 -0
  109. cognee/modules/engine/models/__init__.py +3 -0
  110. cognee/modules/engine/utils/__init__.py +2 -0
  111. cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
  112. cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
  113. cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
  114. cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
  115. cognee/modules/graph/utils/__init__.py +1 -0
  116. cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
  117. cognee/modules/memify/__init__.py +1 -0
  118. cognee/modules/memify/memify.py +118 -0
  119. cognee/modules/notebooks/methods/__init__.py +5 -0
  120. cognee/modules/notebooks/methods/create_notebook.py +26 -0
  121. cognee/modules/notebooks/methods/delete_notebook.py +13 -0
  122. cognee/modules/notebooks/methods/get_notebook.py +21 -0
  123. cognee/modules/notebooks/methods/get_notebooks.py +18 -0
  124. cognee/modules/notebooks/methods/update_notebook.py +17 -0
  125. cognee/modules/notebooks/models/Notebook.py +53 -0
  126. cognee/modules/notebooks/models/__init__.py +1 -0
  127. cognee/modules/notebooks/operations/__init__.py +1 -0
  128. cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
  129. cognee/modules/pipelines/__init__.py +1 -1
  130. cognee/modules/pipelines/exceptions/tasks.py +18 -0
  131. cognee/modules/pipelines/layers/__init__.py +1 -0
  132. cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
  133. cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
  134. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +28 -0
  135. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
  136. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
  137. cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
  138. cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
  139. cognee/modules/pipelines/methods/__init__.py +2 -0
  140. cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
  141. cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
  142. cognee/modules/pipelines/operations/__init__.py +0 -1
  143. cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
  144. cognee/modules/pipelines/operations/pipeline.py +24 -138
  145. cognee/modules/pipelines/operations/run_tasks.py +17 -41
  146. cognee/modules/retrieval/base_feedback.py +11 -0
  147. cognee/modules/retrieval/base_graph_retriever.py +18 -0
  148. cognee/modules/retrieval/base_retriever.py +1 -1
  149. cognee/modules/retrieval/code_retriever.py +8 -0
  150. cognee/modules/retrieval/coding_rules_retriever.py +31 -0
  151. cognee/modules/retrieval/completion_retriever.py +9 -3
  152. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
  153. cognee/modules/retrieval/cypher_search_retriever.py +1 -9
  154. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +29 -13
  155. cognee/modules/retrieval/graph_completion_cot_retriever.py +30 -13
  156. cognee/modules/retrieval/graph_completion_retriever.py +107 -56
  157. cognee/modules/retrieval/graph_summary_completion_retriever.py +5 -1
  158. cognee/modules/retrieval/insights_retriever.py +14 -3
  159. cognee/modules/retrieval/natural_language_retriever.py +0 -4
  160. cognee/modules/retrieval/summaries_retriever.py +1 -1
  161. cognee/modules/retrieval/temporal_retriever.py +152 -0
  162. cognee/modules/retrieval/user_qa_feedback.py +83 -0
  163. cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
  164. cognee/modules/retrieval/utils/completion.py +10 -3
  165. cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
  166. cognee/modules/retrieval/utils/models.py +40 -0
  167. cognee/modules/search/methods/get_search_type_tools.py +168 -0
  168. cognee/modules/search/methods/no_access_control_search.py +47 -0
  169. cognee/modules/search/methods/search.py +239 -118
  170. cognee/modules/search/types/SearchResult.py +21 -0
  171. cognee/modules/search/types/SearchType.py +3 -0
  172. cognee/modules/search/types/__init__.py +1 -0
  173. cognee/modules/search/utils/__init__.py +2 -0
  174. cognee/modules/search/utils/prepare_search_result.py +41 -0
  175. cognee/modules/search/utils/transform_context_to_graph.py +38 -0
  176. cognee/modules/settings/get_settings.py +2 -2
  177. cognee/modules/sync/__init__.py +1 -0
  178. cognee/modules/sync/methods/__init__.py +23 -0
  179. cognee/modules/sync/methods/create_sync_operation.py +53 -0
  180. cognee/modules/sync/methods/get_sync_operation.py +107 -0
  181. cognee/modules/sync/methods/update_sync_operation.py +248 -0
  182. cognee/modules/sync/models/SyncOperation.py +142 -0
  183. cognee/modules/sync/models/__init__.py +3 -0
  184. cognee/modules/users/__init__.py +0 -1
  185. cognee/modules/users/methods/__init__.py +4 -1
  186. cognee/modules/users/methods/create_user.py +26 -1
  187. cognee/modules/users/methods/get_authenticated_user.py +36 -42
  188. cognee/modules/users/methods/get_default_user.py +3 -1
  189. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
  190. cognee/root_dir.py +19 -0
  191. cognee/shared/CodeGraphEntities.py +1 -0
  192. cognee/shared/logging_utils.py +143 -32
  193. cognee/shared/utils.py +0 -1
  194. cognee/tasks/codingagents/coding_rule_associations.py +127 -0
  195. cognee/tasks/graph/extract_graph_from_data.py +6 -2
  196. cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
  197. cognee/tasks/memify/__init__.py +2 -0
  198. cognee/tasks/memify/extract_subgraph.py +7 -0
  199. cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
  200. cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
  201. cognee/tasks/repo_processor/get_repo_file_dependencies.py +144 -47
  202. cognee/tasks/storage/add_data_points.py +33 -3
  203. cognee/tasks/temporal_graph/__init__.py +1 -0
  204. cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
  205. cognee/tasks/temporal_graph/enrich_events.py +34 -0
  206. cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
  207. cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
  208. cognee/tasks/temporal_graph/models.py +49 -0
  209. cognee/tests/integration/cli/__init__.py +3 -0
  210. cognee/tests/integration/cli/test_cli_integration.py +331 -0
  211. cognee/tests/integration/documents/PdfDocument_test.py +2 -2
  212. cognee/tests/integration/documents/TextDocument_test.py +2 -4
  213. cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
  214. cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
  215. cognee/tests/test_delete_soft.py +85 -0
  216. cognee/tests/test_kuzu.py +2 -2
  217. cognee/tests/test_neo4j.py +2 -2
  218. cognee/tests/test_permissions.py +3 -3
  219. cognee/tests/test_relational_db_migration.py +7 -5
  220. cognee/tests/test_search_db.py +136 -23
  221. cognee/tests/test_temporal_graph.py +167 -0
  222. cognee/tests/unit/api/__init__.py +1 -0
  223. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
  224. cognee/tests/unit/cli/__init__.py +3 -0
  225. cognee/tests/unit/cli/test_cli_commands.py +483 -0
  226. cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
  227. cognee/tests/unit/cli/test_cli_main.py +173 -0
  228. cognee/tests/unit/cli/test_cli_runner.py +62 -0
  229. cognee/tests/unit/cli/test_cli_utils.py +127 -0
  230. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
  231. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +12 -15
  232. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +10 -15
  233. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +4 -3
  234. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
  235. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
  236. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
  237. cognee/tests/unit/modules/users/__init__.py +1 -0
  238. cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
  239. cognee/tests/unit/processing/utils/utils_test.py +20 -1
  240. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/METADATA +13 -9
  241. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/RECORD +247 -135
  242. cognee-0.3.0.dist-info/entry_points.txt +2 -0
  243. cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
  244. cognee/infrastructure/pipeline/models/Operation.py +0 -60
  245. cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
  246. cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
  247. cognee/tests/unit/modules/search/search_methods_test.py +0 -223
  248. /cognee/{infrastructure/databases/graph/networkx → api/v1/memify}/__init__.py +0 -0
  249. /cognee/{infrastructure/pipeline/models → tasks/codingagents}/__init__.py +0 -0
  250. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/WHEEL +0 -0
  251. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/LICENSE +0 -0
  252. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/NOTICE.md +0 -0
cognee/__init__.py CHANGED
@@ -18,6 +18,7 @@ logger = setup_logging()
18
18
  from .api.v1.add import add
19
19
  from .api.v1.delete import delete
20
20
  from .api.v1.cognify import cognify
21
+ from .modules.memify import memify
21
22
  from .api.v1.config.config import config
22
23
  from .api.v1.datasets.datasets import datasets
23
24
  from .api.v1.prune import prune
@@ -26,6 +27,7 @@ from .api.v1.visualize import visualize_graph, start_visualization_server
26
27
  from cognee.modules.visualization.cognee_network_visualization import (
27
28
  cognee_network_visualization,
28
29
  )
30
+ from .api.v1.ui import start_ui
29
31
 
30
32
  # Pipelines
31
33
  from .modules import pipelines
cognee/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from cognee.cli._cognee import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
cognee/api/client.py CHANGED
@@ -9,7 +9,7 @@ from contextlib import asynccontextmanager
9
9
  from fastapi import Request
10
10
  from fastapi import FastAPI, status
11
11
  from fastapi.encoders import jsonable_encoder
12
- from fastapi.responses import JSONResponse, Response
12
+ from fastapi.responses import JSONResponse
13
13
  from fastapi.middleware.cors import CORSMiddleware
14
14
  from fastapi.exceptions import RequestValidationError
15
15
  from fastapi.openapi.utils import get_openapi
@@ -17,14 +17,18 @@ from fastapi.openapi.utils import get_openapi
17
17
  from cognee.exceptions import CogneeApiError
18
18
  from cognee.shared.logging_utils import get_logger, setup_logging
19
19
  from cognee.api.health import health_checker, HealthStatus
20
+ from cognee.api.v1.cloud.routers import get_checks_router
21
+ from cognee.api.v1.notebooks.routers import get_notebooks_router
20
22
  from cognee.api.v1.permissions.routers import get_permissions_router
21
23
  from cognee.api.v1.settings.routers import get_settings_router
22
24
  from cognee.api.v1.datasets.routers import get_datasets_router
23
25
  from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
24
26
  from cognee.api.v1.search.routers import get_search_router
27
+ from cognee.api.v1.memify.routers import get_memify_router
25
28
  from cognee.api.v1.add.routers import get_add_router
26
29
  from cognee.api.v1.delete.routers import get_delete_router
27
30
  from cognee.api.v1.responses.routers import get_responses_router
31
+ from cognee.api.v1.sync.routers import get_sync_router
28
32
  from cognee.api.v1.users.routers import (
29
33
  get_auth_router,
30
34
  get_register_router,
@@ -33,6 +37,7 @@ from cognee.api.v1.users.routers import (
33
37
  get_users_router,
34
38
  get_visualize_router,
35
39
  )
40
+ from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION
36
41
 
37
42
  logger = get_logger()
38
43
 
@@ -83,7 +88,7 @@ app.add_middleware(
83
88
  CORSMiddleware,
84
89
  allow_origins=allowed_origins, # Now controlled by env var
85
90
  allow_credentials=True,
86
- allow_methods=["OPTIONS", "GET", "POST", "DELETE"],
91
+ allow_methods=["OPTIONS", "GET", "PUT", "POST", "DELETE"],
87
92
  allow_headers=["*"],
88
93
  )
89
94
  # To allow origins, set CORS_ALLOWED_ORIGINS env variable to a comma-separated list, e.g.:
@@ -110,7 +115,11 @@ def custom_openapi():
110
115
  },
111
116
  }
112
117
 
113
- openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
118
+ if REQUIRE_AUTHENTICATION:
119
+ openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
120
+
121
+ # Remove global security requirement - let individual endpoints specify their own security
122
+ # openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
114
123
 
115
124
  app.openapi_schema = openapi_schema
116
125
 
@@ -230,6 +239,8 @@ app.include_router(get_add_router(), prefix="/api/v1/add", tags=["add"])
230
239
 
231
240
  app.include_router(get_cognify_router(), prefix="/api/v1/cognify", tags=["cognify"])
232
241
 
242
+ app.include_router(get_memify_router(), prefix="/api/v1/memify", tags=["memify"])
243
+
233
244
  app.include_router(get_search_router(), prefix="/api/v1/search", tags=["search"])
234
245
 
235
246
  app.include_router(
@@ -248,6 +259,8 @@ app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"]
248
259
 
249
260
  app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
250
261
 
262
+ app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
263
+
251
264
  codegraph_routes = get_code_pipeline_router()
252
265
  if codegraph_routes:
253
266
  app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
@@ -258,6 +271,18 @@ app.include_router(
258
271
  tags=["users"],
259
272
  )
260
273
 
274
+ app.include_router(
275
+ get_notebooks_router(),
276
+ prefix="/api/v1/notebooks",
277
+ tags=["notebooks"],
278
+ )
279
+
280
+ app.include_router(
281
+ get_checks_router(),
282
+ prefix="/api/v1/checks",
283
+ tags=["checks"],
284
+ )
285
+
261
286
 
262
287
  def start_api_server(host: str = "0.0.0.0", port: int = 8000):
263
288
  """
cognee/api/health.py CHANGED
@@ -1,9 +1,10 @@
1
1
  """Health check system for cognee API."""
2
2
 
3
+ from io import BytesIO
3
4
  import time
4
5
  import asyncio
5
6
  from datetime import datetime, timezone
6
- from typing import Dict, Any, Optional
7
+ from typing import Dict
7
8
  from enum import Enum
8
9
  from pydantic import BaseModel
9
10
 
@@ -53,7 +54,7 @@ class HealthChecker:
53
54
  # Test connection by creating a session
54
55
  session = engine.get_session()
55
56
  if session:
56
- await session.close()
57
+ session.close()
57
58
 
58
59
  response_time = int((time.time() - start_time) * 1000)
59
60
  return ComponentHealth(
@@ -117,12 +118,9 @@ class HealthChecker:
117
118
  engine = await get_graph_engine()
118
119
 
119
120
  # Test basic operation with actual graph query
120
- if hasattr(engine, "execute"):
121
- # For SQL-like graph DBs (Neo4j, Memgraph)
122
- await engine.execute("MATCH () RETURN count(*) LIMIT 1")
123
- elif hasattr(engine, "query"):
121
+ if hasattr(engine, "query"):
124
122
  # For other graph engines
125
- engine.query("MATCH () RETURN count(*) LIMIT 1", {})
123
+ await engine.query("MATCH () RETURN count(*) LIMIT 1", {})
126
124
  # If engine exists but no test method, consider it healthy
127
125
 
128
126
  response_time = int((time.time() - start_time) * 1000)
@@ -167,8 +165,8 @@ class HealthChecker:
167
165
  else:
168
166
  # For S3, test basic operations
169
167
  test_path = "health_check_test"
170
- await storage.store(test_path, b"test")
171
- await storage.delete(test_path)
168
+ await storage.store(test_path, BytesIO(b"test"))
169
+ await storage.remove(test_path)
172
170
 
173
171
  response_time = int((time.time() - start_time) * 1000)
174
172
  return ComponentHealth(
@@ -190,14 +188,13 @@ class HealthChecker:
190
188
  """Check LLM provider health (non-critical)."""
191
189
  start_time = time.time()
192
190
  try:
193
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
194
191
  from cognee.infrastructure.llm.config import get_llm_config
192
+ from cognee.infrastructure.llm import LLMGateway
195
193
 
196
194
  config = get_llm_config()
197
195
 
198
196
  # Test actual API connection with minimal request
199
- client = get_llm_client()
200
- await client.show_prompt("test", "test")
197
+ LLMGateway.show_prompt("test", "test")
201
198
 
202
199
  response_time = int((time.time() - start_time) * 1000)
203
200
  return ComponentHealth(
@@ -226,7 +223,7 @@ class HealthChecker:
226
223
 
227
224
  # Test actual embedding generation with minimal text
228
225
  engine = get_embedding_engine()
229
- await engine.embed_text("test")
226
+ await engine.embed_text(["test"])
230
227
 
231
228
  response_time = int((time.time() - start_time) * 1000)
232
229
  return ComponentHealth(
cognee/api/v1/add/add.py CHANGED
@@ -1,9 +1,15 @@
1
1
  from uuid import UUID
2
2
  from typing import Union, BinaryIO, List, Optional
3
3
 
4
- from cognee.modules.pipelines import Task
5
4
  from cognee.modules.users.models import User
6
- from cognee.modules.pipelines import cognee_pipeline
5
+ from cognee.modules.pipelines import Task, run_pipeline
6
+ from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
7
+ resolve_authorized_user_dataset,
8
+ )
9
+ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
10
+ reset_dataset_pipeline_run_status,
11
+ )
12
+ from cognee.modules.engine.operations.setup import setup
7
13
  from cognee.tasks.ingestion import ingest_data, resolve_data_directories
8
14
 
9
15
 
@@ -128,11 +134,11 @@ async def add(
128
134
 
129
135
  Optional:
130
136
  - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
131
- - LLM_MODEL: Model name (default: "gpt-4o-mini")
137
+ - LLM_MODEL: Model name (default: "gpt-5-mini")
132
138
  - DEFAULT_USER_EMAIL: Custom default user email
133
139
  - DEFAULT_USER_PASSWORD: Custom default user password
134
140
  - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
135
- - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
141
+ - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
136
142
 
137
143
  """
138
144
  tasks = [
@@ -140,11 +146,19 @@ async def add(
140
146
  Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
141
147
  ]
142
148
 
149
+ await setup()
150
+
151
+ user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
152
+
153
+ await reset_dataset_pipeline_run_status(
154
+ authorized_dataset.id, user, pipeline_names=["add_pipeline", "cognify_pipeline"]
155
+ )
156
+
143
157
  pipeline_run_info = None
144
158
 
145
- async for run_info in cognee_pipeline(
159
+ async for run_info in run_pipeline(
146
160
  tasks=tasks,
147
- datasets=dataset_id if dataset_id else dataset_name,
161
+ datasets=[authorized_dataset.id],
148
162
  data=data,
149
163
  user=user,
150
164
  pipeline_name="add_pipeline",
@@ -1,6 +1,3 @@
1
- import os
2
- import requests
3
- import subprocess
4
1
  from uuid import UUID
5
2
 
6
3
  from fastapi import APIRouter
@@ -24,7 +21,9 @@ def get_add_router() -> APIRouter:
24
21
  async def add(
25
22
  data: List[UploadFile] = File(default=None),
26
23
  datasetName: Optional[str] = Form(default=None),
24
+ # Note: Literal is needed for Swagger use
27
25
  datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]),
26
+ node_set: Optional[List[str]] = Form(default=[""], example=[""]),
28
27
  user: User = Depends(get_authenticated_user),
29
28
  ):
30
29
  """
@@ -41,6 +40,8 @@ def get_add_router() -> APIRouter:
41
40
  - Regular file uploads
42
41
  - **datasetName** (Optional[str]): Name of the dataset to add data to
43
42
  - **datasetId** (Optional[UUID]): UUID of an already existing dataset
43
+ - **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control.
44
+ Used for grouping related data points in the knowledge graph.
44
45
 
45
46
  Either datasetName or datasetId must be provided.
46
47
 
@@ -57,17 +58,12 @@ def get_add_router() -> APIRouter:
57
58
 
58
59
  ## Notes
59
60
  - To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
60
- - GitHub repositories are cloned and all files are processed
61
- - HTTP URLs are fetched and their content is processed
62
- - The ALLOW_HTTP_REQUESTS environment variable controls URL processing
63
61
  - datasetId value can only be the UUID of an already existing dataset
64
62
  """
65
63
  send_telemetry(
66
64
  "Add API Endpoint Invoked",
67
65
  user.id,
68
- additional_properties={
69
- "endpoint": "POST /v1/add",
70
- },
66
+ additional_properties={"endpoint": "POST /v1/add", "node_set": node_set},
71
67
  )
72
68
 
73
69
  from cognee.api.v1.add import add as cognee_add
@@ -76,34 +72,13 @@ def get_add_router() -> APIRouter:
76
72
  raise ValueError("Either datasetId or datasetName must be provided.")
77
73
 
78
74
  try:
79
- if (
80
- isinstance(data, str)
81
- and data.startswith("http")
82
- and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true")
83
- ):
84
- if "github" in data:
85
- # Perform git clone if the URL is from GitHub
86
- repo_name = data.split("/")[-1].replace(".git", "")
87
- subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
88
- # TODO: Update add call with dataset info
89
- await cognee_add(
90
- "data://.data/",
91
- f"{repo_name}",
92
- )
93
- else:
94
- # Fetch and store the data from other types of URL using curl
95
- response = requests.get(data)
96
- response.raise_for_status()
97
-
98
- file_data = await response.content()
99
- # TODO: Update add call with dataset info
100
- return await cognee_add(file_data)
101
- else:
102
- add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
103
-
104
- if isinstance(add_run, PipelineRunErrored):
105
- return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
106
- return add_run.model_dump()
75
+ add_run = await cognee_add(
76
+ data, datasetName, user=user, dataset_id=datasetId, node_set=node_set
77
+ )
78
+
79
+ if isinstance(add_run, PipelineRunErrored):
80
+ return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
81
+ return add_run.model_dump()
107
82
  except Exception as error:
108
83
  return JSONResponse(status_code=409, content={"error": str(error)})
109
84
 
@@ -0,0 +1 @@
1
+ from .get_checks_router import get_checks_router
@@ -0,0 +1,23 @@
1
+ from fastapi import APIRouter, Depends, Request
2
+
3
+ from cognee.modules.users.models import User
4
+ from cognee.modules.users.methods import get_authenticated_user
5
+ from cognee.modules.cloud.operations import check_api_key
6
+ from cognee.modules.cloud.exceptions import CloudApiKeyMissingError
7
+
8
+
9
+ def get_checks_router():
10
+ router = APIRouter()
11
+
12
+ @router.post("/connection")
13
+ async def get_connection_check_endpoint(
14
+ request: Request, user: User = Depends(get_authenticated_user)
15
+ ):
16
+ api_token = request.headers.get("X-Api-Key")
17
+
18
+ if api_token is None:
19
+ return CloudApiKeyMissingError()
20
+
21
+ return await check_api_key(api_token)
22
+
23
+ return router
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  import pathlib
3
3
  import asyncio
4
+ from typing import Optional
4
5
  from cognee.shared.logging_utils import get_logger, setup_logging
5
6
  from cognee.modules.observability.get_observe import get_observe
6
7
 
@@ -28,7 +29,12 @@ logger = get_logger("code_graph_pipeline")
28
29
 
29
30
 
30
31
  @observe
31
- async def run_code_graph_pipeline(repo_path, include_docs=False):
32
+ async def run_code_graph_pipeline(
33
+ repo_path,
34
+ include_docs=False,
35
+ excluded_paths: Optional[list[str]] = None,
36
+ supported_languages: Optional[list[str]] = None,
37
+ ):
32
38
  import cognee
33
39
  from cognee.low_level import setup
34
40
 
@@ -41,7 +47,12 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
41
47
  detailed_extraction = True
42
48
 
43
49
  tasks = [
44
- Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction),
50
+ Task(
51
+ get_repo_file_dependencies,
52
+ detailed_extraction=detailed_extraction,
53
+ supported_languages=supported_languages,
54
+ excluded_paths=excluded_paths,
55
+ ),
45
56
  # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
46
57
  Task(add_data_points, task_config={"batch_size": 30}),
47
58
  ]
@@ -89,7 +100,7 @@ if __name__ == "__main__":
89
100
 
90
101
  async def main():
91
102
  async for run_status in run_code_graph_pipeline("REPO_PATH"):
92
- print(f"{run_status.pipeline_name}: {run_status.status}")
103
+ print(f"{run_status.pipeline_run_id}: {run_status.status}")
93
104
 
94
105
  file_path = os.path.join(
95
106
  pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html"
@@ -7,12 +7,10 @@ from cognee.shared.logging_utils import get_logger
7
7
  from cognee.shared.data_models import KnowledgeGraph
8
8
  from cognee.infrastructure.llm import get_max_chunk_tokens
9
9
 
10
- from cognee.modules.pipelines import cognee_pipeline
10
+ from cognee.modules.pipelines import run_pipeline
11
11
  from cognee.modules.pipelines.tasks.task import Task
12
12
  from cognee.modules.chunking.TextChunker import TextChunker
13
13
  from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
14
- from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunErrored
15
- from cognee.modules.pipelines.queues.pipeline_run_info_queues import push_to_queue
16
14
  from cognee.modules.users.models import User
17
15
 
18
16
  from cognee.tasks.documents import (
@@ -23,6 +21,12 @@ from cognee.tasks.documents import (
23
21
  from cognee.tasks.graph import extract_graph_from_data
24
22
  from cognee.tasks.storage import add_data_points
25
23
  from cognee.tasks.summarization import summarize_text
24
+ from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
25
+ from cognee.tasks.temporal_graph.extract_events_and_entities import extract_events_and_timestamps
26
+ from cognee.tasks.temporal_graph.extract_knowledge_graph_from_events import (
27
+ extract_knowledge_graph_from_events,
28
+ )
29
+
26
30
 
27
31
  logger = get_logger("cognify")
28
32
 
@@ -40,6 +44,8 @@ async def cognify(
40
44
  graph_db_config: dict = None,
41
45
  run_in_background: bool = False,
42
46
  incremental_loading: bool = True,
47
+ custom_prompt: Optional[str] = None,
48
+ temporal_cognify: bool = False,
43
49
  ):
44
50
  """
45
51
  Transform ingested data into a structured knowledge graph.
@@ -91,7 +97,7 @@ async def cognify(
91
97
  - LangchainChunker: Recursive character splitting with overlap
92
98
  Determines how documents are segmented for processing.
93
99
  chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
94
- Formula: min(embedding_max_tokens, llm_max_tokens // 2)
100
+ Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
95
101
  Default limits: ~512-8192 tokens depending on models.
96
102
  Smaller chunks = more granular but potentially fragmented knowledge.
97
103
  ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
@@ -102,6 +108,10 @@ async def cognify(
102
108
  If False, waits for completion before returning.
103
109
  Background mode recommended for large datasets (>100MB).
104
110
  Use pipeline_run_id from return value to monitor progress.
111
+ custom_prompt: Optional custom prompt string to use for entity extraction and graph generation.
112
+ If provided, this prompt will be used instead of the default prompts for
113
+ knowledge graph extraction. The prompt should guide the LLM on how to
114
+ extract entities and relationships from the text content.
105
115
 
106
116
  Returns:
107
117
  Union[dict, list[PipelineRunInfo]]:
@@ -178,115 +188,27 @@ async def cognify(
178
188
  - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
179
189
  - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
180
190
  """
181
- tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
182
-
183
- if run_in_background:
184
- return await run_cognify_as_background_process(
185
- tasks=tasks,
186
- user=user,
187
- datasets=datasets,
188
- vector_db_config=vector_db_config,
189
- graph_db_config=graph_db_config,
190
- incremental_loading=incremental_loading,
191
- )
191
+ if temporal_cognify:
192
+ tasks = await get_temporal_tasks(user, chunker, chunk_size)
192
193
  else:
193
- return await run_cognify_blocking(
194
- tasks=tasks,
195
- user=user,
196
- datasets=datasets,
197
- vector_db_config=vector_db_config,
198
- graph_db_config=graph_db_config,
199
- incremental_loading=incremental_loading,
194
+ tasks = await get_default_tasks(
195
+ user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
200
196
  )
201
197
 
198
+ # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
199
+ pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
202
200
 
203
- async def run_cognify_blocking(
204
- tasks,
205
- user,
206
- datasets,
207
- graph_db_config: dict = None,
208
- vector_db_config: dict = False,
209
- incremental_loading: bool = True,
210
- ):
211
- total_run_info = {}
212
-
213
- async for run_info in cognee_pipeline(
201
+ # Run the run_pipeline in the background or blocking based on executor
202
+ return await pipeline_executor_func(
203
+ pipeline=run_pipeline,
214
204
  tasks=tasks,
215
- datasets=datasets,
216
205
  user=user,
217
- pipeline_name="cognify_pipeline",
218
- graph_db_config=graph_db_config,
206
+ datasets=datasets,
219
207
  vector_db_config=vector_db_config,
208
+ graph_db_config=graph_db_config,
220
209
  incremental_loading=incremental_loading,
221
- ):
222
- if run_info.dataset_id:
223
- total_run_info[run_info.dataset_id] = run_info
224
- else:
225
- total_run_info = run_info
226
-
227
- return total_run_info
228
-
229
-
230
- async def run_cognify_as_background_process(
231
- tasks,
232
- user,
233
- datasets,
234
- graph_db_config: dict = None,
235
- vector_db_config: dict = False,
236
- incremental_loading: bool = True,
237
- ):
238
- # Convert dataset to list if it's a string
239
- if isinstance(datasets, str):
240
- datasets = [datasets]
241
-
242
- # Store pipeline status for all pipelines
243
- pipeline_run_started_info = {}
244
-
245
- async def handle_rest_of_the_run(pipeline_list):
246
- # Execute all provided pipelines one by one to avoid database write conflicts
247
- # TODO: Convert to async gather task instead of for loop when Queue mechanism for database is created
248
- for pipeline in pipeline_list:
249
- while True:
250
- try:
251
- pipeline_run_info = await anext(pipeline)
252
-
253
- push_to_queue(pipeline_run_info.pipeline_run_id, pipeline_run_info)
254
-
255
- if isinstance(pipeline_run_info, PipelineRunCompleted) or isinstance(
256
- pipeline_run_info, PipelineRunErrored
257
- ):
258
- break
259
- except StopAsyncIteration:
260
- break
261
-
262
- # Start all pipelines to get started status
263
- pipeline_list = []
264
- for dataset in datasets:
265
- pipeline_run = cognee_pipeline(
266
- tasks=tasks,
267
- user=user,
268
- datasets=dataset,
269
- pipeline_name="cognify_pipeline",
270
- graph_db_config=graph_db_config,
271
- vector_db_config=vector_db_config,
272
- incremental_loading=incremental_loading,
273
- )
274
-
275
- # Save dataset Pipeline run started info
276
- run_info = await anext(pipeline_run)
277
- pipeline_run_started_info[run_info.dataset_id] = run_info
278
-
279
- if pipeline_run_started_info[run_info.dataset_id].payload:
280
- # Remove payload info to avoid serialization
281
- # TODO: Handle payload serialization
282
- pipeline_run_started_info[run_info.dataset_id].payload = []
283
-
284
- pipeline_list.append(pipeline_run)
285
-
286
- # Send all started pipelines to execute one by one in background
287
- asyncio.create_task(handle_rest_of_the_run(pipeline_list=pipeline_list))
288
-
289
- return pipeline_run_started_info
210
+ pipeline_name="cognify_pipeline",
211
+ )
290
212
 
291
213
 
292
214
  async def get_default_tasks( # TODO: Find out a better way to do this (Boris's comment)
@@ -295,6 +217,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
295
217
  chunker=TextChunker,
296
218
  chunk_size: int = None,
297
219
  ontology_file_path: Optional[str] = None,
220
+ custom_prompt: Optional[str] = None,
298
221
  ) -> list[Task]:
299
222
  default_tasks = [
300
223
  Task(classify_documents),
@@ -308,6 +231,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
308
231
  extract_graph_from_data,
309
232
  graph_model=graph_model,
310
233
  ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
234
+ custom_prompt=custom_prompt,
311
235
  task_config={"batch_size": 10},
312
236
  ), # Generate knowledge graphs from the document chunks.
313
237
  Task(
@@ -318,3 +242,41 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
318
242
  ]
319
243
 
320
244
  return default_tasks
245
+
246
+
247
+ async def get_temporal_tasks(
248
+ user: User = None, chunker=TextChunker, chunk_size: int = None
249
+ ) -> list[Task]:
250
+ """
251
+ Builds and returns a list of temporal processing tasks to be executed in sequence.
252
+
253
+ The pipeline includes:
254
+ 1. Document classification.
255
+ 2. Dataset permission checks (requires "write" access).
256
+ 3. Document chunking with a specified or default chunk size.
257
+ 4. Event and timestamp extraction from chunks.
258
+ 5. Knowledge graph extraction from events.
259
+ 6. Batched insertion of data points.
260
+
261
+ Args:
262
+ user (User, optional): The user requesting task execution, used for permission checks.
263
+ chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
264
+ chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
265
+
266
+ Returns:
267
+ list[Task]: A list of Task objects representing the temporal processing pipeline.
268
+ """
269
+ temporal_tasks = [
270
+ Task(classify_documents),
271
+ Task(check_permissions_on_dataset, user=user, permissions=["write"]),
272
+ Task(
273
+ extract_chunks_from_documents,
274
+ max_chunk_size=chunk_size or get_max_chunk_tokens(),
275
+ chunker=chunker,
276
+ ),
277
+ Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
278
+ Task(extract_knowledge_graph_from_events),
279
+ Task(add_data_points, task_config={"batch_size": 10}),
280
+ ]
281
+
282
+ return temporal_tasks