cognee 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/api/client.py +28 -3
- cognee/api/health.py +10 -13
- cognee/api/v1/add/add.py +3 -1
- cognee/api/v1/add/routers/get_add_router.py +12 -37
- cognee/api/v1/cloud/routers/__init__.py +1 -0
- cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +9 -4
- cognee/api/v1/cognify/cognify.py +50 -3
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +15 -4
- cognee/api/v1/memify/__init__.py +0 -0
- cognee/api/v1/memify/routers/__init__.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +100 -0
- cognee/api/v1/notebooks/routers/__init__.py +1 -0
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
- cognee/api/v1/search/routers/get_search_router.py +20 -1
- cognee/api/v1/search/search.py +11 -4
- cognee/api/v1/sync/__init__.py +17 -0
- cognee/api/v1/sync/routers/__init__.py +3 -0
- cognee/api/v1/sync/routers/get_sync_router.py +241 -0
- cognee/api/v1/sync/sync.py +877 -0
- cognee/api/v1/ui/__init__.py +1 -0
- cognee/api/v1/ui/ui.py +529 -0
- cognee/api/v1/users/routers/get_auth_router.py +13 -1
- cognee/base_config.py +10 -1
- cognee/cli/_cognee.py +93 -0
- cognee/infrastructure/databases/graph/config.py +10 -4
- cognee/infrastructure/databases/graph/kuzu/adapter.py +135 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +89 -0
- cognee/infrastructure/databases/relational/__init__.py +2 -0
- cognee/infrastructure/databases/relational/get_async_session.py +15 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
- cognee/infrastructure/databases/relational/with_async_session.py +25 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
- cognee/infrastructure/databases/vector/config.py +13 -6
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +4 -1
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
- cognee/infrastructure/files/storage/StorageManager.py +7 -1
- cognee/infrastructure/files/storage/storage.py +16 -0
- cognee/infrastructure/llm/LLMGateway.py +18 -0
- cognee/infrastructure/llm/config.py +4 -2
- cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -1
- cognee/infrastructure/utils/run_sync.py +8 -1
- cognee/modules/chunking/models/DocumentChunk.py +4 -3
- cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
- cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
- cognee/modules/cloud/exceptions/__init__.py +2 -0
- cognee/modules/cloud/operations/__init__.py +1 -0
- cognee/modules/cloud/operations/check_api_key.py +25 -0
- cognee/modules/data/deletion/prune_system.py +1 -1
- cognee/modules/data/methods/check_dataset_name.py +1 -1
- cognee/modules/data/methods/get_dataset_data.py +1 -1
- cognee/modules/data/methods/load_or_create_datasets.py +1 -1
- cognee/modules/engine/models/Event.py +16 -0
- cognee/modules/engine/models/Interval.py +8 -0
- cognee/modules/engine/models/Timestamp.py +13 -0
- cognee/modules/engine/models/__init__.py +3 -0
- cognee/modules/engine/utils/__init__.py +2 -0
- cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
- cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
- cognee/modules/memify/__init__.py +1 -0
- cognee/modules/memify/memify.py +118 -0
- cognee/modules/notebooks/methods/__init__.py +5 -0
- cognee/modules/notebooks/methods/create_notebook.py +26 -0
- cognee/modules/notebooks/methods/delete_notebook.py +13 -0
- cognee/modules/notebooks/methods/get_notebook.py +21 -0
- cognee/modules/notebooks/methods/get_notebooks.py +18 -0
- cognee/modules/notebooks/methods/update_notebook.py +17 -0
- cognee/modules/notebooks/models/Notebook.py +53 -0
- cognee/modules/notebooks/models/__init__.py +1 -0
- cognee/modules/notebooks/operations/__init__.py +1 -0
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +19 -3
- cognee/modules/pipelines/operations/pipeline.py +1 -0
- cognee/modules/pipelines/operations/run_tasks.py +17 -41
- cognee/modules/retrieval/base_graph_retriever.py +18 -0
- cognee/modules/retrieval/base_retriever.py +1 -1
- cognee/modules/retrieval/code_retriever.py +8 -0
- cognee/modules/retrieval/coding_rules_retriever.py +31 -0
- cognee/modules/retrieval/completion_retriever.py +9 -3
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +23 -14
- cognee/modules/retrieval/graph_completion_cot_retriever.py +21 -11
- cognee/modules/retrieval/graph_completion_retriever.py +32 -65
- cognee/modules/retrieval/graph_summary_completion_retriever.py +3 -1
- cognee/modules/retrieval/insights_retriever.py +14 -3
- cognee/modules/retrieval/summaries_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +152 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
- cognee/modules/retrieval/utils/completion.py +10 -3
- cognee/modules/search/methods/get_search_type_tools.py +168 -0
- cognee/modules/search/methods/no_access_control_search.py +47 -0
- cognee/modules/search/methods/search.py +219 -139
- cognee/modules/search/types/SearchResult.py +21 -0
- cognee/modules/search/types/SearchType.py +2 -0
- cognee/modules/search/types/__init__.py +1 -0
- cognee/modules/search/utils/__init__.py +2 -0
- cognee/modules/search/utils/prepare_search_result.py +41 -0
- cognee/modules/search/utils/transform_context_to_graph.py +38 -0
- cognee/modules/sync/__init__.py +1 -0
- cognee/modules/sync/methods/__init__.py +23 -0
- cognee/modules/sync/methods/create_sync_operation.py +53 -0
- cognee/modules/sync/methods/get_sync_operation.py +107 -0
- cognee/modules/sync/methods/update_sync_operation.py +248 -0
- cognee/modules/sync/models/SyncOperation.py +142 -0
- cognee/modules/sync/models/__init__.py +3 -0
- cognee/modules/users/__init__.py +0 -1
- cognee/modules/users/methods/__init__.py +4 -1
- cognee/modules/users/methods/create_user.py +26 -1
- cognee/modules/users/methods/get_authenticated_user.py +36 -42
- cognee/modules/users/methods/get_default_user.py +3 -1
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
- cognee/root_dir.py +19 -0
- cognee/shared/logging_utils.py +1 -1
- cognee/tasks/codingagents/__init__.py +0 -0
- cognee/tasks/codingagents/coding_rule_associations.py +127 -0
- cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/extract_subgraph.py +7 -0
- cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +52 -27
- cognee/tasks/temporal_graph/__init__.py +1 -0
- cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
- cognee/tasks/temporal_graph/enrich_events.py +34 -0
- cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
- cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
- cognee/tasks/temporal_graph/models.py +49 -0
- cognee/tests/test_kuzu.py +4 -4
- cognee/tests/test_neo4j.py +4 -4
- cognee/tests/test_permissions.py +3 -3
- cognee/tests/test_relational_db_migration.py +7 -5
- cognee/tests/test_search_db.py +18 -24
- cognee/tests/test_temporal_graph.py +167 -0
- cognee/tests/unit/api/__init__.py +1 -0
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +13 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +11 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +5 -4
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
- cognee/tests/unit/modules/users/__init__.py +1 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
- cognee/tests/unit/processing/utils/utils_test.py +20 -1
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/METADATA +8 -6
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/RECORD +165 -90
- cognee/tests/unit/modules/search/search_methods_test.py +0 -225
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/WHEEL +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/entry_points.txt +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/licenses/NOTICE.md +0 -0
cognee/__init__.py
CHANGED
|
@@ -18,6 +18,7 @@ logger = setup_logging()
|
|
|
18
18
|
from .api.v1.add import add
|
|
19
19
|
from .api.v1.delete import delete
|
|
20
20
|
from .api.v1.cognify import cognify
|
|
21
|
+
from .modules.memify import memify
|
|
21
22
|
from .api.v1.config.config import config
|
|
22
23
|
from .api.v1.datasets.datasets import datasets
|
|
23
24
|
from .api.v1.prune import prune
|
|
@@ -26,6 +27,7 @@ from .api.v1.visualize import visualize_graph, start_visualization_server
|
|
|
26
27
|
from cognee.modules.visualization.cognee_network_visualization import (
|
|
27
28
|
cognee_network_visualization,
|
|
28
29
|
)
|
|
30
|
+
from .api.v1.ui import start_ui
|
|
29
31
|
|
|
30
32
|
# Pipelines
|
|
31
33
|
from .modules import pipelines
|
cognee/api/client.py
CHANGED
|
@@ -9,7 +9,7 @@ from contextlib import asynccontextmanager
|
|
|
9
9
|
from fastapi import Request
|
|
10
10
|
from fastapi import FastAPI, status
|
|
11
11
|
from fastapi.encoders import jsonable_encoder
|
|
12
|
-
from fastapi.responses import JSONResponse
|
|
12
|
+
from fastapi.responses import JSONResponse
|
|
13
13
|
from fastapi.middleware.cors import CORSMiddleware
|
|
14
14
|
from fastapi.exceptions import RequestValidationError
|
|
15
15
|
from fastapi.openapi.utils import get_openapi
|
|
@@ -17,14 +17,18 @@ from fastapi.openapi.utils import get_openapi
|
|
|
17
17
|
from cognee.exceptions import CogneeApiError
|
|
18
18
|
from cognee.shared.logging_utils import get_logger, setup_logging
|
|
19
19
|
from cognee.api.health import health_checker, HealthStatus
|
|
20
|
+
from cognee.api.v1.cloud.routers import get_checks_router
|
|
21
|
+
from cognee.api.v1.notebooks.routers import get_notebooks_router
|
|
20
22
|
from cognee.api.v1.permissions.routers import get_permissions_router
|
|
21
23
|
from cognee.api.v1.settings.routers import get_settings_router
|
|
22
24
|
from cognee.api.v1.datasets.routers import get_datasets_router
|
|
23
25
|
from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
|
|
24
26
|
from cognee.api.v1.search.routers import get_search_router
|
|
27
|
+
from cognee.api.v1.memify.routers import get_memify_router
|
|
25
28
|
from cognee.api.v1.add.routers import get_add_router
|
|
26
29
|
from cognee.api.v1.delete.routers import get_delete_router
|
|
27
30
|
from cognee.api.v1.responses.routers import get_responses_router
|
|
31
|
+
from cognee.api.v1.sync.routers import get_sync_router
|
|
28
32
|
from cognee.api.v1.users.routers import (
|
|
29
33
|
get_auth_router,
|
|
30
34
|
get_register_router,
|
|
@@ -33,6 +37,7 @@ from cognee.api.v1.users.routers import (
|
|
|
33
37
|
get_users_router,
|
|
34
38
|
get_visualize_router,
|
|
35
39
|
)
|
|
40
|
+
from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION
|
|
36
41
|
|
|
37
42
|
logger = get_logger()
|
|
38
43
|
|
|
@@ -83,7 +88,7 @@ app.add_middleware(
|
|
|
83
88
|
CORSMiddleware,
|
|
84
89
|
allow_origins=allowed_origins, # Now controlled by env var
|
|
85
90
|
allow_credentials=True,
|
|
86
|
-
allow_methods=["OPTIONS", "GET", "POST", "DELETE"],
|
|
91
|
+
allow_methods=["OPTIONS", "GET", "PUT", "POST", "DELETE"],
|
|
87
92
|
allow_headers=["*"],
|
|
88
93
|
)
|
|
89
94
|
# To allow origins, set CORS_ALLOWED_ORIGINS env variable to a comma-separated list, e.g.:
|
|
@@ -110,7 +115,11 @@ def custom_openapi():
|
|
|
110
115
|
},
|
|
111
116
|
}
|
|
112
117
|
|
|
113
|
-
|
|
118
|
+
if REQUIRE_AUTHENTICATION:
|
|
119
|
+
openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
|
|
120
|
+
|
|
121
|
+
# Remove global security requirement - let individual endpoints specify their own security
|
|
122
|
+
# openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
|
|
114
123
|
|
|
115
124
|
app.openapi_schema = openapi_schema
|
|
116
125
|
|
|
@@ -230,6 +239,8 @@ app.include_router(get_add_router(), prefix="/api/v1/add", tags=["add"])
|
|
|
230
239
|
|
|
231
240
|
app.include_router(get_cognify_router(), prefix="/api/v1/cognify", tags=["cognify"])
|
|
232
241
|
|
|
242
|
+
app.include_router(get_memify_router(), prefix="/api/v1/memify", tags=["memify"])
|
|
243
|
+
|
|
233
244
|
app.include_router(get_search_router(), prefix="/api/v1/search", tags=["search"])
|
|
234
245
|
|
|
235
246
|
app.include_router(
|
|
@@ -248,6 +259,8 @@ app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"]
|
|
|
248
259
|
|
|
249
260
|
app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
|
|
250
261
|
|
|
262
|
+
app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
|
|
263
|
+
|
|
251
264
|
codegraph_routes = get_code_pipeline_router()
|
|
252
265
|
if codegraph_routes:
|
|
253
266
|
app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
|
|
@@ -258,6 +271,18 @@ app.include_router(
|
|
|
258
271
|
tags=["users"],
|
|
259
272
|
)
|
|
260
273
|
|
|
274
|
+
app.include_router(
|
|
275
|
+
get_notebooks_router(),
|
|
276
|
+
prefix="/api/v1/notebooks",
|
|
277
|
+
tags=["notebooks"],
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
app.include_router(
|
|
281
|
+
get_checks_router(),
|
|
282
|
+
prefix="/api/v1/checks",
|
|
283
|
+
tags=["checks"],
|
|
284
|
+
)
|
|
285
|
+
|
|
261
286
|
|
|
262
287
|
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
|
263
288
|
"""
|
cognee/api/health.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
"""Health check system for cognee API."""
|
|
2
2
|
|
|
3
|
+
from io import BytesIO
|
|
3
4
|
import time
|
|
4
5
|
import asyncio
|
|
5
6
|
from datetime import datetime, timezone
|
|
6
|
-
from typing import Dict
|
|
7
|
+
from typing import Dict
|
|
7
8
|
from enum import Enum
|
|
8
9
|
from pydantic import BaseModel
|
|
9
10
|
|
|
@@ -53,7 +54,7 @@ class HealthChecker:
|
|
|
53
54
|
# Test connection by creating a session
|
|
54
55
|
session = engine.get_session()
|
|
55
56
|
if session:
|
|
56
|
-
|
|
57
|
+
session.close()
|
|
57
58
|
|
|
58
59
|
response_time = int((time.time() - start_time) * 1000)
|
|
59
60
|
return ComponentHealth(
|
|
@@ -117,12 +118,9 @@ class HealthChecker:
|
|
|
117
118
|
engine = await get_graph_engine()
|
|
118
119
|
|
|
119
120
|
# Test basic operation with actual graph query
|
|
120
|
-
if hasattr(engine, "
|
|
121
|
-
# For SQL-like graph DBs (Neo4j, Memgraph)
|
|
122
|
-
await engine.execute("MATCH () RETURN count(*) LIMIT 1")
|
|
123
|
-
elif hasattr(engine, "query"):
|
|
121
|
+
if hasattr(engine, "query"):
|
|
124
122
|
# For other graph engines
|
|
125
|
-
engine.query("MATCH () RETURN count(*) LIMIT 1", {})
|
|
123
|
+
await engine.query("MATCH () RETURN count(*) LIMIT 1", {})
|
|
126
124
|
# If engine exists but no test method, consider it healthy
|
|
127
125
|
|
|
128
126
|
response_time = int((time.time() - start_time) * 1000)
|
|
@@ -167,8 +165,8 @@ class HealthChecker:
|
|
|
167
165
|
else:
|
|
168
166
|
# For S3, test basic operations
|
|
169
167
|
test_path = "health_check_test"
|
|
170
|
-
await storage.store(test_path, b"test")
|
|
171
|
-
await storage.
|
|
168
|
+
await storage.store(test_path, BytesIO(b"test"))
|
|
169
|
+
await storage.remove(test_path)
|
|
172
170
|
|
|
173
171
|
response_time = int((time.time() - start_time) * 1000)
|
|
174
172
|
return ComponentHealth(
|
|
@@ -190,14 +188,13 @@ class HealthChecker:
|
|
|
190
188
|
"""Check LLM provider health (non-critical)."""
|
|
191
189
|
start_time = time.time()
|
|
192
190
|
try:
|
|
193
|
-
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
194
191
|
from cognee.infrastructure.llm.config import get_llm_config
|
|
192
|
+
from cognee.infrastructure.llm import LLMGateway
|
|
195
193
|
|
|
196
194
|
config = get_llm_config()
|
|
197
195
|
|
|
198
196
|
# Test actual API connection with minimal request
|
|
199
|
-
|
|
200
|
-
await client.show_prompt("test", "test")
|
|
197
|
+
LLMGateway.show_prompt("test", "test")
|
|
201
198
|
|
|
202
199
|
response_time = int((time.time() - start_time) * 1000)
|
|
203
200
|
return ComponentHealth(
|
|
@@ -226,7 +223,7 @@ class HealthChecker:
|
|
|
226
223
|
|
|
227
224
|
# Test actual embedding generation with minimal text
|
|
228
225
|
engine = get_embedding_engine()
|
|
229
|
-
await engine.embed_text("test")
|
|
226
|
+
await engine.embed_text(["test"])
|
|
230
227
|
|
|
231
228
|
response_time = int((time.time() - start_time) * 1000)
|
|
232
229
|
return ComponentHealth(
|
cognee/api/v1/add/add.py
CHANGED
|
@@ -150,7 +150,9 @@ async def add(
|
|
|
150
150
|
|
|
151
151
|
user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
|
|
152
152
|
|
|
153
|
-
await reset_dataset_pipeline_run_status(
|
|
153
|
+
await reset_dataset_pipeline_run_status(
|
|
154
|
+
authorized_dataset.id, user, pipeline_names=["add_pipeline", "cognify_pipeline"]
|
|
155
|
+
)
|
|
154
156
|
|
|
155
157
|
pipeline_run_info = None
|
|
156
158
|
|
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import requests
|
|
3
|
-
import subprocess
|
|
4
1
|
from uuid import UUID
|
|
5
2
|
|
|
6
3
|
from fastapi import APIRouter
|
|
@@ -24,7 +21,9 @@ def get_add_router() -> APIRouter:
|
|
|
24
21
|
async def add(
|
|
25
22
|
data: List[UploadFile] = File(default=None),
|
|
26
23
|
datasetName: Optional[str] = Form(default=None),
|
|
24
|
+
# Note: Literal is needed for Swagger use
|
|
27
25
|
datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]),
|
|
26
|
+
node_set: Optional[List[str]] = Form(default=[""], example=[""]),
|
|
28
27
|
user: User = Depends(get_authenticated_user),
|
|
29
28
|
):
|
|
30
29
|
"""
|
|
@@ -41,6 +40,8 @@ def get_add_router() -> APIRouter:
|
|
|
41
40
|
- Regular file uploads
|
|
42
41
|
- **datasetName** (Optional[str]): Name of the dataset to add data to
|
|
43
42
|
- **datasetId** (Optional[UUID]): UUID of an already existing dataset
|
|
43
|
+
- **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control.
|
|
44
|
+
Used for grouping related data points in the knowledge graph.
|
|
44
45
|
|
|
45
46
|
Either datasetName or datasetId must be provided.
|
|
46
47
|
|
|
@@ -57,17 +58,12 @@ def get_add_router() -> APIRouter:
|
|
|
57
58
|
|
|
58
59
|
## Notes
|
|
59
60
|
- To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
|
|
60
|
-
- GitHub repositories are cloned and all files are processed
|
|
61
|
-
- HTTP URLs are fetched and their content is processed
|
|
62
|
-
- The ALLOW_HTTP_REQUESTS environment variable controls URL processing
|
|
63
61
|
- datasetId value can only be the UUID of an already existing dataset
|
|
64
62
|
"""
|
|
65
63
|
send_telemetry(
|
|
66
64
|
"Add API Endpoint Invoked",
|
|
67
65
|
user.id,
|
|
68
|
-
additional_properties={
|
|
69
|
-
"endpoint": "POST /v1/add",
|
|
70
|
-
},
|
|
66
|
+
additional_properties={"endpoint": "POST /v1/add", "node_set": node_set},
|
|
71
67
|
)
|
|
72
68
|
|
|
73
69
|
from cognee.api.v1.add import add as cognee_add
|
|
@@ -76,34 +72,13 @@ def get_add_router() -> APIRouter:
|
|
|
76
72
|
raise ValueError("Either datasetId or datasetName must be provided.")
|
|
77
73
|
|
|
78
74
|
try:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
):
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
repo_name = data.split("/")[-1].replace(".git", "")
|
|
87
|
-
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
|
|
88
|
-
# TODO: Update add call with dataset info
|
|
89
|
-
await cognee_add(
|
|
90
|
-
"data://.data/",
|
|
91
|
-
f"{repo_name}",
|
|
92
|
-
)
|
|
93
|
-
else:
|
|
94
|
-
# Fetch and store the data from other types of URL using curl
|
|
95
|
-
response = requests.get(data)
|
|
96
|
-
response.raise_for_status()
|
|
97
|
-
|
|
98
|
-
file_data = await response.content()
|
|
99
|
-
# TODO: Update add call with dataset info
|
|
100
|
-
return await cognee_add(file_data)
|
|
101
|
-
else:
|
|
102
|
-
add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
|
|
103
|
-
|
|
104
|
-
if isinstance(add_run, PipelineRunErrored):
|
|
105
|
-
return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
|
|
106
|
-
return add_run.model_dump()
|
|
75
|
+
add_run = await cognee_add(
|
|
76
|
+
data, datasetName, user=user, dataset_id=datasetId, node_set=node_set
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if isinstance(add_run, PipelineRunErrored):
|
|
80
|
+
return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
|
|
81
|
+
return add_run.model_dump()
|
|
107
82
|
except Exception as error:
|
|
108
83
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
|
109
84
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .get_checks_router import get_checks_router
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from fastapi import APIRouter, Depends, Request
|
|
2
|
+
|
|
3
|
+
from cognee.modules.users.models import User
|
|
4
|
+
from cognee.modules.users.methods import get_authenticated_user
|
|
5
|
+
from cognee.modules.cloud.operations import check_api_key
|
|
6
|
+
from cognee.modules.cloud.exceptions import CloudApiKeyMissingError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_checks_router():
|
|
10
|
+
router = APIRouter()
|
|
11
|
+
|
|
12
|
+
@router.post("/connection")
|
|
13
|
+
async def get_connection_check_endpoint(
|
|
14
|
+
request: Request, user: User = Depends(get_authenticated_user)
|
|
15
|
+
):
|
|
16
|
+
api_token = request.headers.get("X-Api-Key")
|
|
17
|
+
|
|
18
|
+
if api_token is None:
|
|
19
|
+
return CloudApiKeyMissingError()
|
|
20
|
+
|
|
21
|
+
return await check_api_key(api_token)
|
|
22
|
+
|
|
23
|
+
return router
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import pathlib
|
|
3
3
|
import asyncio
|
|
4
|
+
from typing import Optional
|
|
4
5
|
from cognee.shared.logging_utils import get_logger, setup_logging
|
|
5
6
|
from cognee.modules.observability.get_observe import get_observe
|
|
6
7
|
|
|
@@ -28,7 +29,12 @@ logger = get_logger("code_graph_pipeline")
|
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
@observe
|
|
31
|
-
async def run_code_graph_pipeline(
|
|
32
|
+
async def run_code_graph_pipeline(
|
|
33
|
+
repo_path,
|
|
34
|
+
include_docs=False,
|
|
35
|
+
excluded_paths: Optional[list[str]] = None,
|
|
36
|
+
supported_languages: Optional[list[str]] = None,
|
|
37
|
+
):
|
|
32
38
|
import cognee
|
|
33
39
|
from cognee.low_level import setup
|
|
34
40
|
|
|
@@ -40,13 +46,12 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
|
|
|
40
46
|
user = await get_default_user()
|
|
41
47
|
detailed_extraction = True
|
|
42
48
|
|
|
43
|
-
# Multi-language support: allow passing supported_languages
|
|
44
|
-
supported_languages = None # defer to task defaults
|
|
45
49
|
tasks = [
|
|
46
50
|
Task(
|
|
47
51
|
get_repo_file_dependencies,
|
|
48
52
|
detailed_extraction=detailed_extraction,
|
|
49
53
|
supported_languages=supported_languages,
|
|
54
|
+
excluded_paths=excluded_paths,
|
|
50
55
|
),
|
|
51
56
|
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
|
52
57
|
Task(add_data_points, task_config={"batch_size": 30}),
|
|
@@ -95,7 +100,7 @@ if __name__ == "__main__":
|
|
|
95
100
|
|
|
96
101
|
async def main():
|
|
97
102
|
async for run_status in run_code_graph_pipeline("REPO_PATH"):
|
|
98
|
-
print(f"{run_status.
|
|
103
|
+
print(f"{run_status.pipeline_run_id}: {run_status.status}")
|
|
99
104
|
|
|
100
105
|
file_path = os.path.join(
|
|
101
106
|
pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html"
|
cognee/api/v1/cognify/cognify.py
CHANGED
|
@@ -22,6 +22,11 @@ from cognee.tasks.graph import extract_graph_from_data
|
|
|
22
22
|
from cognee.tasks.storage import add_data_points
|
|
23
23
|
from cognee.tasks.summarization import summarize_text
|
|
24
24
|
from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
|
|
25
|
+
from cognee.tasks.temporal_graph.extract_events_and_entities import extract_events_and_timestamps
|
|
26
|
+
from cognee.tasks.temporal_graph.extract_knowledge_graph_from_events import (
|
|
27
|
+
extract_knowledge_graph_from_events,
|
|
28
|
+
)
|
|
29
|
+
|
|
25
30
|
|
|
26
31
|
logger = get_logger("cognify")
|
|
27
32
|
|
|
@@ -40,6 +45,7 @@ async def cognify(
|
|
|
40
45
|
run_in_background: bool = False,
|
|
41
46
|
incremental_loading: bool = True,
|
|
42
47
|
custom_prompt: Optional[str] = None,
|
|
48
|
+
temporal_cognify: bool = False,
|
|
43
49
|
):
|
|
44
50
|
"""
|
|
45
51
|
Transform ingested data into a structured knowledge graph.
|
|
@@ -182,9 +188,12 @@ async def cognify(
|
|
|
182
188
|
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
|
|
183
189
|
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
|
|
184
190
|
"""
|
|
185
|
-
|
|
186
|
-
user,
|
|
187
|
-
|
|
191
|
+
if temporal_cognify:
|
|
192
|
+
tasks = await get_temporal_tasks(user, chunker, chunk_size)
|
|
193
|
+
else:
|
|
194
|
+
tasks = await get_default_tasks(
|
|
195
|
+
user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
|
|
196
|
+
)
|
|
188
197
|
|
|
189
198
|
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
|
190
199
|
pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
|
|
@@ -233,3 +242,41 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
233
242
|
]
|
|
234
243
|
|
|
235
244
|
return default_tasks
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
async def get_temporal_tasks(
|
|
248
|
+
user: User = None, chunker=TextChunker, chunk_size: int = None
|
|
249
|
+
) -> list[Task]:
|
|
250
|
+
"""
|
|
251
|
+
Builds and returns a list of temporal processing tasks to be executed in sequence.
|
|
252
|
+
|
|
253
|
+
The pipeline includes:
|
|
254
|
+
1. Document classification.
|
|
255
|
+
2. Dataset permission checks (requires "write" access).
|
|
256
|
+
3. Document chunking with a specified or default chunk size.
|
|
257
|
+
4. Event and timestamp extraction from chunks.
|
|
258
|
+
5. Knowledge graph extraction from events.
|
|
259
|
+
6. Batched insertion of data points.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
user (User, optional): The user requesting task execution, used for permission checks.
|
|
263
|
+
chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
|
|
264
|
+
chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
list[Task]: A list of Task objects representing the temporal processing pipeline.
|
|
268
|
+
"""
|
|
269
|
+
temporal_tasks = [
|
|
270
|
+
Task(classify_documents),
|
|
271
|
+
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
|
272
|
+
Task(
|
|
273
|
+
extract_chunks_from_documents,
|
|
274
|
+
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
|
275
|
+
chunker=chunker,
|
|
276
|
+
),
|
|
277
|
+
Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
|
|
278
|
+
Task(extract_knowledge_graph_from_events),
|
|
279
|
+
Task(add_data_points, task_config={"batch_size": 10}),
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
return temporal_tasks
|
|
@@ -38,7 +38,7 @@ class CognifyPayloadDTO(InDTO):
|
|
|
38
38
|
dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
|
|
39
39
|
run_in_background: Optional[bool] = Field(default=False)
|
|
40
40
|
custom_prompt: Optional[str] = Field(
|
|
41
|
-
default=
|
|
41
|
+
default="", description="Custom prompt for entity extraction and graph generation"
|
|
42
42
|
)
|
|
43
43
|
|
|
44
44
|
|
|
@@ -5,6 +5,7 @@ from typing import List, Optional
|
|
|
5
5
|
from typing_extensions import Annotated
|
|
6
6
|
from fastapi import status
|
|
7
7
|
from fastapi import APIRouter
|
|
8
|
+
from fastapi.encoders import jsonable_encoder
|
|
8
9
|
from fastapi import HTTPException, Query, Depends
|
|
9
10
|
from fastapi.responses import JSONResponse, FileResponse
|
|
10
11
|
|
|
@@ -47,6 +48,7 @@ class DataDTO(OutDTO):
|
|
|
47
48
|
extension: str
|
|
48
49
|
mime_type: str
|
|
49
50
|
raw_data_location: str
|
|
51
|
+
dataset_id: UUID
|
|
50
52
|
|
|
51
53
|
|
|
52
54
|
class GraphNodeDTO(OutDTO):
|
|
@@ -114,7 +116,8 @@ def get_datasets_router() -> APIRouter:
|
|
|
114
116
|
|
|
115
117
|
@router.post("", response_model=DatasetDTO)
|
|
116
118
|
async def create_new_dataset(
|
|
117
|
-
dataset_data: DatasetCreationPayload,
|
|
119
|
+
dataset_data: DatasetCreationPayload,
|
|
120
|
+
user: User = Depends(get_authenticated_user),
|
|
118
121
|
):
|
|
119
122
|
"""
|
|
120
123
|
Create a new dataset or return existing dataset with the same name.
|
|
@@ -327,7 +330,7 @@ def get_datasets_router() -> APIRouter:
|
|
|
327
330
|
},
|
|
328
331
|
)
|
|
329
332
|
|
|
330
|
-
from cognee.modules.data.methods import get_dataset_data
|
|
333
|
+
from cognee.modules.data.methods import get_dataset_data
|
|
331
334
|
|
|
332
335
|
# Verify user has permission to read dataset
|
|
333
336
|
dataset = await get_authorized_existing_datasets([dataset_id], "read", user)
|
|
@@ -338,12 +341,20 @@ def get_datasets_router() -> APIRouter:
|
|
|
338
341
|
content=ErrorResponseDTO(f"Dataset ({str(dataset_id)}) not found."),
|
|
339
342
|
)
|
|
340
343
|
|
|
341
|
-
|
|
344
|
+
dataset_id = dataset[0].id
|
|
345
|
+
|
|
346
|
+
dataset_data = await get_dataset_data(dataset_id=dataset_id)
|
|
342
347
|
|
|
343
348
|
if dataset_data is None:
|
|
344
349
|
return []
|
|
345
350
|
|
|
346
|
-
return
|
|
351
|
+
return [
|
|
352
|
+
dict(
|
|
353
|
+
**jsonable_encoder(data),
|
|
354
|
+
dataset_id=dataset_id,
|
|
355
|
+
)
|
|
356
|
+
for data in dataset_data
|
|
357
|
+
]
|
|
347
358
|
|
|
348
359
|
@router.get("/status", response_model=dict[str, PipelineRunStatus])
|
|
349
360
|
async def get_dataset_status(
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .get_memify_router import get_memify_router
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter
|
|
4
|
+
from fastapi.responses import JSONResponse
|
|
5
|
+
from fastapi import Depends
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
from typing import List, Optional, Union, Literal
|
|
8
|
+
|
|
9
|
+
from cognee.api.DTO import InDTO
|
|
10
|
+
from cognee.modules.users.models import User
|
|
11
|
+
from cognee.modules.users.methods import get_authenticated_user
|
|
12
|
+
from cognee.shared.utils import send_telemetry
|
|
13
|
+
from cognee.modules.pipelines.models import PipelineRunErrored
|
|
14
|
+
from cognee.shared.logging_utils import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MemifyPayloadDTO(InDTO):
|
|
20
|
+
extraction_tasks: Optional[List[str]] = Field(
|
|
21
|
+
default=None,
|
|
22
|
+
examples=[[]],
|
|
23
|
+
)
|
|
24
|
+
enrichment_tasks: Optional[List[str]] = Field(default=None, examples=[[]])
|
|
25
|
+
data: Optional[str] = Field(default="")
|
|
26
|
+
dataset_name: Optional[str] = Field(default=None)
|
|
27
|
+
# Note: Literal is needed for Swagger use
|
|
28
|
+
dataset_id: Union[UUID, Literal[""], None] = Field(default=None, examples=[""])
|
|
29
|
+
node_name: Optional[List[str]] = Field(default=None, examples=[[]])
|
|
30
|
+
run_in_background: Optional[bool] = Field(default=False)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_memify_router() -> APIRouter:
|
|
34
|
+
router = APIRouter()
|
|
35
|
+
|
|
36
|
+
@router.post("", response_model=dict)
|
|
37
|
+
async def memify(payload: MemifyPayloadDTO, user: User = Depends(get_authenticated_user)):
|
|
38
|
+
"""
|
|
39
|
+
Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data,
|
|
40
|
+
custom data can also be provided instead which can be processed with provided extraction and enrichment tasks.
|
|
41
|
+
|
|
42
|
+
Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation.
|
|
43
|
+
|
|
44
|
+
## Request Parameters
|
|
45
|
+
- **extractionTasks** Optional[List[str]]: List of available Cognee Tasks to execute for graph/data extraction.
|
|
46
|
+
- **enrichmentTasks** Optional[List[str]]: List of available Cognee Tasks to handle enrichment of provided graph/data from extraction tasks.
|
|
47
|
+
- **data** Optional[List[str]]: The data to ingest. Can be any text data when custom extraction and enrichment tasks are used.
|
|
48
|
+
Data provided here will be forwarded to the first extraction task in the pipeline as input.
|
|
49
|
+
If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded
|
|
50
|
+
- **dataset_name** (Optional[str]): Name of the datasets to memify
|
|
51
|
+
- **dataset_id** (Optional[UUID]): List of UUIDs of an already existing dataset
|
|
52
|
+
- **node_name** (Optional[List[str]]): Filter graph to specific named entities (for targeted search). Used when no data is provided.
|
|
53
|
+
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
|
|
54
|
+
|
|
55
|
+
Either datasetName or datasetId must be provided.
|
|
56
|
+
|
|
57
|
+
## Response
|
|
58
|
+
Returns information about the add operation containing:
|
|
59
|
+
- Status of the operation
|
|
60
|
+
- Details about the processed data
|
|
61
|
+
- Any relevant metadata from the ingestion process
|
|
62
|
+
|
|
63
|
+
## Error Codes
|
|
64
|
+
- **400 Bad Request**: Neither datasetId nor datasetName provided
|
|
65
|
+
- **409 Conflict**: Error during memify operation
|
|
66
|
+
- **403 Forbidden**: User doesn't have permission to use dataset
|
|
67
|
+
|
|
68
|
+
## Notes
|
|
69
|
+
- To memify datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
|
|
70
|
+
- datasetId value can only be the UUID of an already existing dataset
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
send_telemetry(
|
|
74
|
+
"Memify API Endpoint Invoked",
|
|
75
|
+
user.id,
|
|
76
|
+
additional_properties={"endpoint": "POST /v1/memify"},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if not payload.dataset_id and not payload.dataset_name:
|
|
80
|
+
raise ValueError("Either datasetId or datasetName must be provided.")
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
from cognee.modules.memify import memify as cognee_memify
|
|
84
|
+
|
|
85
|
+
memify_run = await cognee_memify(
|
|
86
|
+
extraction_tasks=payload.extraction_tasks,
|
|
87
|
+
enrichment_tasks=payload.enrichment_tasks,
|
|
88
|
+
data=payload.data,
|
|
89
|
+
dataset=payload.dataset_id if payload.dataset_id else payload.dataset_name,
|
|
90
|
+
node_name=payload.node_name,
|
|
91
|
+
user=user,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if isinstance(memify_run, PipelineRunErrored):
|
|
95
|
+
return JSONResponse(status_code=420, content=memify_run)
|
|
96
|
+
return memify_run
|
|
97
|
+
except Exception as error:
|
|
98
|
+
return JSONResponse(status_code=409, content={"error": str(error)})
|
|
99
|
+
|
|
100
|
+
return router
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .get_notebooks_router import get_notebooks_router
|