cognee 0.2.4__py3-none-any.whl → 0.3.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +28 -3
  3. cognee/api/health.py +10 -13
  4. cognee/api/v1/add/add.py +3 -1
  5. cognee/api/v1/add/routers/get_add_router.py +12 -37
  6. cognee/api/v1/cloud/routers/__init__.py +1 -0
  7. cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
  8. cognee/api/v1/cognify/code_graph_pipeline.py +9 -4
  9. cognee/api/v1/cognify/cognify.py +50 -3
  10. cognee/api/v1/cognify/routers/get_cognify_router.py +1 -1
  11. cognee/api/v1/datasets/routers/get_datasets_router.py +15 -4
  12. cognee/api/v1/memify/__init__.py +0 -0
  13. cognee/api/v1/memify/routers/__init__.py +1 -0
  14. cognee/api/v1/memify/routers/get_memify_router.py +100 -0
  15. cognee/api/v1/notebooks/routers/__init__.py +1 -0
  16. cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
  17. cognee/api/v1/search/routers/get_search_router.py +20 -1
  18. cognee/api/v1/search/search.py +11 -4
  19. cognee/api/v1/sync/__init__.py +17 -0
  20. cognee/api/v1/sync/routers/__init__.py +3 -0
  21. cognee/api/v1/sync/routers/get_sync_router.py +241 -0
  22. cognee/api/v1/sync/sync.py +877 -0
  23. cognee/api/v1/users/routers/get_auth_router.py +13 -1
  24. cognee/base_config.py +10 -1
  25. cognee/infrastructure/databases/graph/config.py +10 -4
  26. cognee/infrastructure/databases/graph/kuzu/adapter.py +135 -0
  27. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +89 -0
  28. cognee/infrastructure/databases/relational/__init__.py +2 -0
  29. cognee/infrastructure/databases/relational/get_async_session.py +15 -0
  30. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
  31. cognee/infrastructure/databases/relational/with_async_session.py +25 -0
  32. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
  33. cognee/infrastructure/databases/vector/config.py +13 -6
  34. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +1 -1
  35. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
  36. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +4 -1
  37. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
  38. cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
  39. cognee/infrastructure/files/storage/StorageManager.py +7 -1
  40. cognee/infrastructure/files/storage/storage.py +16 -0
  41. cognee/infrastructure/llm/LLMGateway.py +18 -0
  42. cognee/infrastructure/llm/config.py +4 -2
  43. cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
  44. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
  45. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
  46. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
  47. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
  48. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
  49. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
  50. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -1
  51. cognee/infrastructure/utils/run_sync.py +8 -1
  52. cognee/modules/chunking/models/DocumentChunk.py +4 -3
  53. cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
  54. cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
  55. cognee/modules/cloud/exceptions/__init__.py +2 -0
  56. cognee/modules/cloud/operations/__init__.py +1 -0
  57. cognee/modules/cloud/operations/check_api_key.py +25 -0
  58. cognee/modules/data/deletion/prune_system.py +1 -1
  59. cognee/modules/data/methods/check_dataset_name.py +1 -1
  60. cognee/modules/data/methods/get_dataset_data.py +1 -1
  61. cognee/modules/data/methods/load_or_create_datasets.py +1 -1
  62. cognee/modules/engine/models/Event.py +16 -0
  63. cognee/modules/engine/models/Interval.py +8 -0
  64. cognee/modules/engine/models/Timestamp.py +13 -0
  65. cognee/modules/engine/models/__init__.py +3 -0
  66. cognee/modules/engine/utils/__init__.py +2 -0
  67. cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
  68. cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
  69. cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
  70. cognee/modules/graph/utils/__init__.py +1 -0
  71. cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
  72. cognee/modules/memify/__init__.py +1 -0
  73. cognee/modules/memify/memify.py +118 -0
  74. cognee/modules/notebooks/methods/__init__.py +5 -0
  75. cognee/modules/notebooks/methods/create_notebook.py +26 -0
  76. cognee/modules/notebooks/methods/delete_notebook.py +13 -0
  77. cognee/modules/notebooks/methods/get_notebook.py +21 -0
  78. cognee/modules/notebooks/methods/get_notebooks.py +18 -0
  79. cognee/modules/notebooks/methods/update_notebook.py +17 -0
  80. cognee/modules/notebooks/models/Notebook.py +53 -0
  81. cognee/modules/notebooks/models/__init__.py +1 -0
  82. cognee/modules/notebooks/operations/__init__.py +1 -0
  83. cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
  84. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +19 -3
  85. cognee/modules/pipelines/operations/pipeline.py +1 -0
  86. cognee/modules/pipelines/operations/run_tasks.py +17 -41
  87. cognee/modules/retrieval/base_graph_retriever.py +18 -0
  88. cognee/modules/retrieval/base_retriever.py +1 -1
  89. cognee/modules/retrieval/code_retriever.py +8 -0
  90. cognee/modules/retrieval/coding_rules_retriever.py +31 -0
  91. cognee/modules/retrieval/completion_retriever.py +9 -3
  92. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
  93. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +23 -14
  94. cognee/modules/retrieval/graph_completion_cot_retriever.py +21 -11
  95. cognee/modules/retrieval/graph_completion_retriever.py +32 -65
  96. cognee/modules/retrieval/graph_summary_completion_retriever.py +3 -1
  97. cognee/modules/retrieval/insights_retriever.py +14 -3
  98. cognee/modules/retrieval/summaries_retriever.py +1 -1
  99. cognee/modules/retrieval/temporal_retriever.py +152 -0
  100. cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
  101. cognee/modules/retrieval/utils/completion.py +10 -3
  102. cognee/modules/search/methods/get_search_type_tools.py +168 -0
  103. cognee/modules/search/methods/no_access_control_search.py +47 -0
  104. cognee/modules/search/methods/search.py +219 -139
  105. cognee/modules/search/types/SearchResult.py +21 -0
  106. cognee/modules/search/types/SearchType.py +2 -0
  107. cognee/modules/search/types/__init__.py +1 -0
  108. cognee/modules/search/utils/__init__.py +2 -0
  109. cognee/modules/search/utils/prepare_search_result.py +41 -0
  110. cognee/modules/search/utils/transform_context_to_graph.py +38 -0
  111. cognee/modules/sync/__init__.py +1 -0
  112. cognee/modules/sync/methods/__init__.py +23 -0
  113. cognee/modules/sync/methods/create_sync_operation.py +53 -0
  114. cognee/modules/sync/methods/get_sync_operation.py +107 -0
  115. cognee/modules/sync/methods/update_sync_operation.py +248 -0
  116. cognee/modules/sync/models/SyncOperation.py +142 -0
  117. cognee/modules/sync/models/__init__.py +3 -0
  118. cognee/modules/users/__init__.py +0 -1
  119. cognee/modules/users/methods/__init__.py +4 -1
  120. cognee/modules/users/methods/create_user.py +26 -1
  121. cognee/modules/users/methods/get_authenticated_user.py +36 -42
  122. cognee/modules/users/methods/get_default_user.py +3 -1
  123. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
  124. cognee/root_dir.py +19 -0
  125. cognee/shared/logging_utils.py +1 -1
  126. cognee/tasks/codingagents/__init__.py +0 -0
  127. cognee/tasks/codingagents/coding_rule_associations.py +127 -0
  128. cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
  129. cognee/tasks/memify/__init__.py +2 -0
  130. cognee/tasks/memify/extract_subgraph.py +7 -0
  131. cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
  132. cognee/tasks/repo_processor/get_repo_file_dependencies.py +52 -27
  133. cognee/tasks/temporal_graph/__init__.py +1 -0
  134. cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
  135. cognee/tasks/temporal_graph/enrich_events.py +34 -0
  136. cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
  137. cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
  138. cognee/tasks/temporal_graph/models.py +49 -0
  139. cognee/tests/test_kuzu.py +4 -4
  140. cognee/tests/test_neo4j.py +4 -4
  141. cognee/tests/test_permissions.py +3 -3
  142. cognee/tests/test_relational_db_migration.py +7 -5
  143. cognee/tests/test_search_db.py +18 -24
  144. cognee/tests/test_temporal_graph.py +167 -0
  145. cognee/tests/unit/api/__init__.py +1 -0
  146. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
  147. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
  148. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +13 -16
  149. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +11 -16
  150. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +5 -4
  151. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
  152. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
  153. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
  154. cognee/tests/unit/modules/users/__init__.py +1 -0
  155. cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
  156. cognee/tests/unit/processing/utils/utils_test.py +20 -1
  157. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/METADATA +8 -6
  158. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/RECORD +162 -89
  159. cognee/tests/unit/modules/search/search_methods_test.py +0 -225
  160. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/WHEEL +0 -0
  161. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/entry_points.txt +0 -0
  162. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/LICENSE +0 -0
  163. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
cognee/__init__.py CHANGED
@@ -18,6 +18,7 @@ logger = setup_logging()
18
18
  from .api.v1.add import add
19
19
  from .api.v1.delete import delete
20
20
  from .api.v1.cognify import cognify
21
+ from .modules.memify import memify
21
22
  from .api.v1.config.config import config
22
23
  from .api.v1.datasets.datasets import datasets
23
24
  from .api.v1.prune import prune
cognee/api/client.py CHANGED
@@ -9,7 +9,7 @@ from contextlib import asynccontextmanager
9
9
  from fastapi import Request
10
10
  from fastapi import FastAPI, status
11
11
  from fastapi.encoders import jsonable_encoder
12
- from fastapi.responses import JSONResponse, Response
12
+ from fastapi.responses import JSONResponse
13
13
  from fastapi.middleware.cors import CORSMiddleware
14
14
  from fastapi.exceptions import RequestValidationError
15
15
  from fastapi.openapi.utils import get_openapi
@@ -17,14 +17,18 @@ from fastapi.openapi.utils import get_openapi
17
17
  from cognee.exceptions import CogneeApiError
18
18
  from cognee.shared.logging_utils import get_logger, setup_logging
19
19
  from cognee.api.health import health_checker, HealthStatus
20
+ from cognee.api.v1.cloud.routers import get_checks_router
21
+ from cognee.api.v1.notebooks.routers import get_notebooks_router
20
22
  from cognee.api.v1.permissions.routers import get_permissions_router
21
23
  from cognee.api.v1.settings.routers import get_settings_router
22
24
  from cognee.api.v1.datasets.routers import get_datasets_router
23
25
  from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
24
26
  from cognee.api.v1.search.routers import get_search_router
27
+ from cognee.api.v1.memify.routers import get_memify_router
25
28
  from cognee.api.v1.add.routers import get_add_router
26
29
  from cognee.api.v1.delete.routers import get_delete_router
27
30
  from cognee.api.v1.responses.routers import get_responses_router
31
+ from cognee.api.v1.sync.routers import get_sync_router
28
32
  from cognee.api.v1.users.routers import (
29
33
  get_auth_router,
30
34
  get_register_router,
@@ -33,6 +37,7 @@ from cognee.api.v1.users.routers import (
33
37
  get_users_router,
34
38
  get_visualize_router,
35
39
  )
40
+ from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION
36
41
 
37
42
  logger = get_logger()
38
43
 
@@ -83,7 +88,7 @@ app.add_middleware(
83
88
  CORSMiddleware,
84
89
  allow_origins=allowed_origins, # Now controlled by env var
85
90
  allow_credentials=True,
86
- allow_methods=["OPTIONS", "GET", "POST", "DELETE"],
91
+ allow_methods=["OPTIONS", "GET", "PUT", "POST", "DELETE"],
87
92
  allow_headers=["*"],
88
93
  )
89
94
  # To allow origins, set CORS_ALLOWED_ORIGINS env variable to a comma-separated list, e.g.:
@@ -110,7 +115,11 @@ def custom_openapi():
110
115
  },
111
116
  }
112
117
 
113
- openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
118
+ if REQUIRE_AUTHENTICATION:
119
+ openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
120
+
121
+ # Remove global security requirement - let individual endpoints specify their own security
122
+ # openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
114
123
 
115
124
  app.openapi_schema = openapi_schema
116
125
 
@@ -230,6 +239,8 @@ app.include_router(get_add_router(), prefix="/api/v1/add", tags=["add"])
230
239
 
231
240
  app.include_router(get_cognify_router(), prefix="/api/v1/cognify", tags=["cognify"])
232
241
 
242
+ app.include_router(get_memify_router(), prefix="/api/v1/memify", tags=["memify"])
243
+
233
244
  app.include_router(get_search_router(), prefix="/api/v1/search", tags=["search"])
234
245
 
235
246
  app.include_router(
@@ -248,6 +259,8 @@ app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"]
248
259
 
249
260
  app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
250
261
 
262
+ app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
263
+
251
264
  codegraph_routes = get_code_pipeline_router()
252
265
  if codegraph_routes:
253
266
  app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
@@ -258,6 +271,18 @@ app.include_router(
258
271
  tags=["users"],
259
272
  )
260
273
 
274
+ app.include_router(
275
+ get_notebooks_router(),
276
+ prefix="/api/v1/notebooks",
277
+ tags=["notebooks"],
278
+ )
279
+
280
+ app.include_router(
281
+ get_checks_router(),
282
+ prefix="/api/v1/checks",
283
+ tags=["checks"],
284
+ )
285
+
261
286
 
262
287
  def start_api_server(host: str = "0.0.0.0", port: int = 8000):
263
288
  """
cognee/api/health.py CHANGED
@@ -1,9 +1,10 @@
1
1
  """Health check system for cognee API."""
2
2
 
3
+ from io import BytesIO
3
4
  import time
4
5
  import asyncio
5
6
  from datetime import datetime, timezone
6
- from typing import Dict, Any, Optional
7
+ from typing import Dict
7
8
  from enum import Enum
8
9
  from pydantic import BaseModel
9
10
 
@@ -53,7 +54,7 @@ class HealthChecker:
53
54
  # Test connection by creating a session
54
55
  session = engine.get_session()
55
56
  if session:
56
- await session.close()
57
+ session.close()
57
58
 
58
59
  response_time = int((time.time() - start_time) * 1000)
59
60
  return ComponentHealth(
@@ -117,12 +118,9 @@ class HealthChecker:
117
118
  engine = await get_graph_engine()
118
119
 
119
120
  # Test basic operation with actual graph query
120
- if hasattr(engine, "execute"):
121
- # For SQL-like graph DBs (Neo4j, Memgraph)
122
- await engine.execute("MATCH () RETURN count(*) LIMIT 1")
123
- elif hasattr(engine, "query"):
121
+ if hasattr(engine, "query"):
124
122
  # For other graph engines
125
- engine.query("MATCH () RETURN count(*) LIMIT 1", {})
123
+ await engine.query("MATCH () RETURN count(*) LIMIT 1", {})
126
124
  # If engine exists but no test method, consider it healthy
127
125
 
128
126
  response_time = int((time.time() - start_time) * 1000)
@@ -167,8 +165,8 @@ class HealthChecker:
167
165
  else:
168
166
  # For S3, test basic operations
169
167
  test_path = "health_check_test"
170
- await storage.store(test_path, b"test")
171
- await storage.delete(test_path)
168
+ await storage.store(test_path, BytesIO(b"test"))
169
+ await storage.remove(test_path)
172
170
 
173
171
  response_time = int((time.time() - start_time) * 1000)
174
172
  return ComponentHealth(
@@ -190,14 +188,13 @@ class HealthChecker:
190
188
  """Check LLM provider health (non-critical)."""
191
189
  start_time = time.time()
192
190
  try:
193
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
194
191
  from cognee.infrastructure.llm.config import get_llm_config
192
+ from cognee.infrastructure.llm import LLMGateway
195
193
 
196
194
  config = get_llm_config()
197
195
 
198
196
  # Test actual API connection with minimal request
199
- client = get_llm_client()
200
- await client.show_prompt("test", "test")
197
+ LLMGateway.show_prompt("test", "test")
201
198
 
202
199
  response_time = int((time.time() - start_time) * 1000)
203
200
  return ComponentHealth(
@@ -226,7 +223,7 @@ class HealthChecker:
226
223
 
227
224
  # Test actual embedding generation with minimal text
228
225
  engine = get_embedding_engine()
229
- await engine.embed_text("test")
226
+ await engine.embed_text(["test"])
230
227
 
231
228
  response_time = int((time.time() - start_time) * 1000)
232
229
  return ComponentHealth(
cognee/api/v1/add/add.py CHANGED
@@ -150,7 +150,9 @@ async def add(
150
150
 
151
151
  user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
152
152
 
153
- await reset_dataset_pipeline_run_status(authorized_dataset.id, user)
153
+ await reset_dataset_pipeline_run_status(
154
+ authorized_dataset.id, user, pipeline_names=["add_pipeline", "cognify_pipeline"]
155
+ )
154
156
 
155
157
  pipeline_run_info = None
156
158
 
@@ -1,6 +1,3 @@
1
- import os
2
- import requests
3
- import subprocess
4
1
  from uuid import UUID
5
2
 
6
3
  from fastapi import APIRouter
@@ -24,7 +21,9 @@ def get_add_router() -> APIRouter:
24
21
  async def add(
25
22
  data: List[UploadFile] = File(default=None),
26
23
  datasetName: Optional[str] = Form(default=None),
24
+ # Note: Literal is needed for Swagger use
27
25
  datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]),
26
+ node_set: Optional[List[str]] = Form(default=[""], example=[""]),
28
27
  user: User = Depends(get_authenticated_user),
29
28
  ):
30
29
  """
@@ -41,6 +40,8 @@ def get_add_router() -> APIRouter:
41
40
  - Regular file uploads
42
41
  - **datasetName** (Optional[str]): Name of the dataset to add data to
43
42
  - **datasetId** (Optional[UUID]): UUID of an already existing dataset
43
+ - **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control.
44
+ Used for grouping related data points in the knowledge graph.
44
45
 
45
46
  Either datasetName or datasetId must be provided.
46
47
 
@@ -57,17 +58,12 @@ def get_add_router() -> APIRouter:
57
58
 
58
59
  ## Notes
59
60
  - To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
60
- - GitHub repositories are cloned and all files are processed
61
- - HTTP URLs are fetched and their content is processed
62
- - The ALLOW_HTTP_REQUESTS environment variable controls URL processing
63
61
  - datasetId value can only be the UUID of an already existing dataset
64
62
  """
65
63
  send_telemetry(
66
64
  "Add API Endpoint Invoked",
67
65
  user.id,
68
- additional_properties={
69
- "endpoint": "POST /v1/add",
70
- },
66
+ additional_properties={"endpoint": "POST /v1/add", "node_set": node_set},
71
67
  )
72
68
 
73
69
  from cognee.api.v1.add import add as cognee_add
@@ -76,34 +72,13 @@ def get_add_router() -> APIRouter:
76
72
  raise ValueError("Either datasetId or datasetName must be provided.")
77
73
 
78
74
  try:
79
- if (
80
- isinstance(data, str)
81
- and data.startswith("http")
82
- and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true")
83
- ):
84
- if "github" in data:
85
- # Perform git clone if the URL is from GitHub
86
- repo_name = data.split("/")[-1].replace(".git", "")
87
- subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
88
- # TODO: Update add call with dataset info
89
- await cognee_add(
90
- "data://.data/",
91
- f"{repo_name}",
92
- )
93
- else:
94
- # Fetch and store the data from other types of URL using curl
95
- response = requests.get(data)
96
- response.raise_for_status()
97
-
98
- file_data = await response.content()
99
- # TODO: Update add call with dataset info
100
- return await cognee_add(file_data)
101
- else:
102
- add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
103
-
104
- if isinstance(add_run, PipelineRunErrored):
105
- return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
106
- return add_run.model_dump()
75
+ add_run = await cognee_add(
76
+ data, datasetName, user=user, dataset_id=datasetId, node_set=node_set
77
+ )
78
+
79
+ if isinstance(add_run, PipelineRunErrored):
80
+ return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
81
+ return add_run.model_dump()
107
82
  except Exception as error:
108
83
  return JSONResponse(status_code=409, content={"error": str(error)})
109
84
 
@@ -0,0 +1 @@
1
+ from .get_checks_router import get_checks_router
@@ -0,0 +1,23 @@
1
+ from fastapi import APIRouter, Depends, Request
2
+
3
+ from cognee.modules.users.models import User
4
+ from cognee.modules.users.methods import get_authenticated_user
5
+ from cognee.modules.cloud.operations import check_api_key
6
+ from cognee.modules.cloud.exceptions import CloudApiKeyMissingError
7
+
8
+
9
+ def get_checks_router():
10
+ router = APIRouter()
11
+
12
+ @router.post("/connection")
13
+ async def get_connection_check_endpoint(
14
+ request: Request, user: User = Depends(get_authenticated_user)
15
+ ):
16
+ api_token = request.headers.get("X-Api-Key")
17
+
18
+ if api_token is None:
19
+ return CloudApiKeyMissingError()
20
+
21
+ return await check_api_key(api_token)
22
+
23
+ return router
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  import pathlib
3
3
  import asyncio
4
+ from typing import Optional
4
5
  from cognee.shared.logging_utils import get_logger, setup_logging
5
6
  from cognee.modules.observability.get_observe import get_observe
6
7
 
@@ -28,7 +29,12 @@ logger = get_logger("code_graph_pipeline")
28
29
 
29
30
 
30
31
  @observe
31
- async def run_code_graph_pipeline(repo_path, include_docs=False):
32
+ async def run_code_graph_pipeline(
33
+ repo_path,
34
+ include_docs=False,
35
+ excluded_paths: Optional[list[str]] = None,
36
+ supported_languages: Optional[list[str]] = None,
37
+ ):
32
38
  import cognee
33
39
  from cognee.low_level import setup
34
40
 
@@ -40,13 +46,12 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
40
46
  user = await get_default_user()
41
47
  detailed_extraction = True
42
48
 
43
- # Multi-language support: allow passing supported_languages
44
- supported_languages = None # defer to task defaults
45
49
  tasks = [
46
50
  Task(
47
51
  get_repo_file_dependencies,
48
52
  detailed_extraction=detailed_extraction,
49
53
  supported_languages=supported_languages,
54
+ excluded_paths=excluded_paths,
50
55
  ),
51
56
  # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
52
57
  Task(add_data_points, task_config={"batch_size": 30}),
@@ -95,7 +100,7 @@ if __name__ == "__main__":
95
100
 
96
101
  async def main():
97
102
  async for run_status in run_code_graph_pipeline("REPO_PATH"):
98
- print(f"{run_status.pipeline_name}: {run_status.status}")
103
+ print(f"{run_status.pipeline_run_id}: {run_status.status}")
99
104
 
100
105
  file_path = os.path.join(
101
106
  pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html"
@@ -22,6 +22,11 @@ from cognee.tasks.graph import extract_graph_from_data
22
22
  from cognee.tasks.storage import add_data_points
23
23
  from cognee.tasks.summarization import summarize_text
24
24
  from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
25
+ from cognee.tasks.temporal_graph.extract_events_and_entities import extract_events_and_timestamps
26
+ from cognee.tasks.temporal_graph.extract_knowledge_graph_from_events import (
27
+ extract_knowledge_graph_from_events,
28
+ )
29
+
25
30
 
26
31
  logger = get_logger("cognify")
27
32
 
@@ -40,6 +45,7 @@ async def cognify(
40
45
  run_in_background: bool = False,
41
46
  incremental_loading: bool = True,
42
47
  custom_prompt: Optional[str] = None,
48
+ temporal_cognify: bool = False,
43
49
  ):
44
50
  """
45
51
  Transform ingested data into a structured knowledge graph.
@@ -182,9 +188,12 @@ async def cognify(
182
188
  - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
183
189
  - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
184
190
  """
185
- tasks = await get_default_tasks(
186
- user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
187
- )
191
+ if temporal_cognify:
192
+ tasks = await get_temporal_tasks(user, chunker, chunk_size)
193
+ else:
194
+ tasks = await get_default_tasks(
195
+ user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
196
+ )
188
197
 
189
198
  # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
190
199
  pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
@@ -233,3 +242,41 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
233
242
  ]
234
243
 
235
244
  return default_tasks
245
+
246
+
247
+ async def get_temporal_tasks(
248
+ user: User = None, chunker=TextChunker, chunk_size: int = None
249
+ ) -> list[Task]:
250
+ """
251
+ Builds and returns a list of temporal processing tasks to be executed in sequence.
252
+
253
+ The pipeline includes:
254
+ 1. Document classification.
255
+ 2. Dataset permission checks (requires "write" access).
256
+ 3. Document chunking with a specified or default chunk size.
257
+ 4. Event and timestamp extraction from chunks.
258
+ 5. Knowledge graph extraction from events.
259
+ 6. Batched insertion of data points.
260
+
261
+ Args:
262
+ user (User, optional): The user requesting task execution, used for permission checks.
263
+ chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
264
+ chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
265
+
266
+ Returns:
267
+ list[Task]: A list of Task objects representing the temporal processing pipeline.
268
+ """
269
+ temporal_tasks = [
270
+ Task(classify_documents),
271
+ Task(check_permissions_on_dataset, user=user, permissions=["write"]),
272
+ Task(
273
+ extract_chunks_from_documents,
274
+ max_chunk_size=chunk_size or get_max_chunk_tokens(),
275
+ chunker=chunker,
276
+ ),
277
+ Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
278
+ Task(extract_knowledge_graph_from_events),
279
+ Task(add_data_points, task_config={"batch_size": 10}),
280
+ ]
281
+
282
+ return temporal_tasks
@@ -38,7 +38,7 @@ class CognifyPayloadDTO(InDTO):
38
38
  dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
39
39
  run_in_background: Optional[bool] = Field(default=False)
40
40
  custom_prompt: Optional[str] = Field(
41
- default=None, description="Custom prompt for entity extraction and graph generation"
41
+ default="", description="Custom prompt for entity extraction and graph generation"
42
42
  )
43
43
 
44
44
 
@@ -5,6 +5,7 @@ from typing import List, Optional
5
5
  from typing_extensions import Annotated
6
6
  from fastapi import status
7
7
  from fastapi import APIRouter
8
+ from fastapi.encoders import jsonable_encoder
8
9
  from fastapi import HTTPException, Query, Depends
9
10
  from fastapi.responses import JSONResponse, FileResponse
10
11
 
@@ -47,6 +48,7 @@ class DataDTO(OutDTO):
47
48
  extension: str
48
49
  mime_type: str
49
50
  raw_data_location: str
51
+ dataset_id: UUID
50
52
 
51
53
 
52
54
  class GraphNodeDTO(OutDTO):
@@ -114,7 +116,8 @@ def get_datasets_router() -> APIRouter:
114
116
 
115
117
  @router.post("", response_model=DatasetDTO)
116
118
  async def create_new_dataset(
117
- dataset_data: DatasetCreationPayload, user: User = Depends(get_authenticated_user)
119
+ dataset_data: DatasetCreationPayload,
120
+ user: User = Depends(get_authenticated_user),
118
121
  ):
119
122
  """
120
123
  Create a new dataset or return existing dataset with the same name.
@@ -327,7 +330,7 @@ def get_datasets_router() -> APIRouter:
327
330
  },
328
331
  )
329
332
 
330
- from cognee.modules.data.methods import get_dataset_data, get_dataset
333
+ from cognee.modules.data.methods import get_dataset_data
331
334
 
332
335
  # Verify user has permission to read dataset
333
336
  dataset = await get_authorized_existing_datasets([dataset_id], "read", user)
@@ -338,12 +341,20 @@ def get_datasets_router() -> APIRouter:
338
341
  content=ErrorResponseDTO(f"Dataset ({str(dataset_id)}) not found."),
339
342
  )
340
343
 
341
- dataset_data = await get_dataset_data(dataset_id=dataset[0].id)
344
+ dataset_id = dataset[0].id
345
+
346
+ dataset_data = await get_dataset_data(dataset_id=dataset_id)
342
347
 
343
348
  if dataset_data is None:
344
349
  return []
345
350
 
346
- return dataset_data
351
+ return [
352
+ dict(
353
+ **jsonable_encoder(data),
354
+ dataset_id=dataset_id,
355
+ )
356
+ for data in dataset_data
357
+ ]
347
358
 
348
359
  @router.get("/status", response_model=dict[str, PipelineRunStatus])
349
360
  async def get_dataset_status(
File without changes
@@ -0,0 +1 @@
1
+ from .get_memify_router import get_memify_router
@@ -0,0 +1,100 @@
1
+ from uuid import UUID
2
+
3
+ from fastapi import APIRouter
4
+ from fastapi.responses import JSONResponse
5
+ from fastapi import Depends
6
+ from pydantic import Field
7
+ from typing import List, Optional, Union, Literal
8
+
9
+ from cognee.api.DTO import InDTO
10
+ from cognee.modules.users.models import User
11
+ from cognee.modules.users.methods import get_authenticated_user
12
+ from cognee.shared.utils import send_telemetry
13
+ from cognee.modules.pipelines.models import PipelineRunErrored
14
+ from cognee.shared.logging_utils import get_logger
15
+
16
+ logger = get_logger()
17
+
18
+
19
+ class MemifyPayloadDTO(InDTO):
20
+ extraction_tasks: Optional[List[str]] = Field(
21
+ default=None,
22
+ examples=[[]],
23
+ )
24
+ enrichment_tasks: Optional[List[str]] = Field(default=None, examples=[[]])
25
+ data: Optional[str] = Field(default="")
26
+ dataset_name: Optional[str] = Field(default=None)
27
+ # Note: Literal is needed for Swagger use
28
+ dataset_id: Union[UUID, Literal[""], None] = Field(default=None, examples=[""])
29
+ node_name: Optional[List[str]] = Field(default=None, examples=[[]])
30
+ run_in_background: Optional[bool] = Field(default=False)
31
+
32
+
33
+ def get_memify_router() -> APIRouter:
34
+ router = APIRouter()
35
+
36
+ @router.post("", response_model=dict)
37
+ async def memify(payload: MemifyPayloadDTO, user: User = Depends(get_authenticated_user)):
38
+ """
39
+ Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data,
40
+ custom data can also be provided instead which can be processed with provided extraction and enrichment tasks.
41
+
42
+ Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation.
43
+
44
+ ## Request Parameters
45
+ - **extractionTasks** Optional[List[str]]: List of available Cognee Tasks to execute for graph/data extraction.
46
+ - **enrichmentTasks** Optional[List[str]]: List of available Cognee Tasks to handle enrichment of provided graph/data from extraction tasks.
47
+ - **data** Optional[List[str]]: The data to ingest. Can be any text data when custom extraction and enrichment tasks are used.
48
+ Data provided here will be forwarded to the first extraction task in the pipeline as input.
49
+ If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded
50
+ - **dataset_name** (Optional[str]): Name of the datasets to memify
51
+ - **dataset_id** (Optional[UUID]): List of UUIDs of an already existing dataset
52
+ - **node_name** (Optional[List[str]]): Filter graph to specific named entities (for targeted search). Used when no data is provided.
53
+ - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
54
+
55
+ Either datasetName or datasetId must be provided.
56
+
57
+ ## Response
58
+ Returns information about the add operation containing:
59
+ - Status of the operation
60
+ - Details about the processed data
61
+ - Any relevant metadata from the ingestion process
62
+
63
+ ## Error Codes
64
+ - **400 Bad Request**: Neither datasetId nor datasetName provided
65
+ - **409 Conflict**: Error during memify operation
66
+ - **403 Forbidden**: User doesn't have permission to use dataset
67
+
68
+ ## Notes
69
+ - To memify datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
70
+ - datasetId value can only be the UUID of an already existing dataset
71
+ """
72
+
73
+ send_telemetry(
74
+ "Memify API Endpoint Invoked",
75
+ user.id,
76
+ additional_properties={"endpoint": "POST /v1/memify"},
77
+ )
78
+
79
+ if not payload.dataset_id and not payload.dataset_name:
80
+ raise ValueError("Either datasetId or datasetName must be provided.")
81
+
82
+ try:
83
+ from cognee.modules.memify import memify as cognee_memify
84
+
85
+ memify_run = await cognee_memify(
86
+ extraction_tasks=payload.extraction_tasks,
87
+ enrichment_tasks=payload.enrichment_tasks,
88
+ data=payload.data,
89
+ dataset=payload.dataset_id if payload.dataset_id else payload.dataset_name,
90
+ node_name=payload.node_name,
91
+ user=user,
92
+ )
93
+
94
+ if isinstance(memify_run, PipelineRunErrored):
95
+ return JSONResponse(status_code=420, content=memify_run)
96
+ return memify_run
97
+ except Exception as error:
98
+ return JSONResponse(status_code=409, content={"error": str(error)})
99
+
100
+ return router
@@ -0,0 +1 @@
1
+ from .get_notebooks_router import get_notebooks_router