cognee 0.2.4__py3-none-any.whl → 0.3.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +28 -3
- cognee/api/health.py +10 -13
- cognee/api/v1/add/add.py +3 -1
- cognee/api/v1/add/routers/get_add_router.py +12 -37
- cognee/api/v1/cloud/routers/__init__.py +1 -0
- cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +9 -4
- cognee/api/v1/cognify/cognify.py +50 -3
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +15 -4
- cognee/api/v1/memify/__init__.py +0 -0
- cognee/api/v1/memify/routers/__init__.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +100 -0
- cognee/api/v1/notebooks/routers/__init__.py +1 -0
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
- cognee/api/v1/search/routers/get_search_router.py +20 -1
- cognee/api/v1/search/search.py +11 -4
- cognee/api/v1/sync/__init__.py +17 -0
- cognee/api/v1/sync/routers/__init__.py +3 -0
- cognee/api/v1/sync/routers/get_sync_router.py +241 -0
- cognee/api/v1/sync/sync.py +877 -0
- cognee/api/v1/users/routers/get_auth_router.py +13 -1
- cognee/base_config.py +10 -1
- cognee/infrastructure/databases/graph/config.py +10 -4
- cognee/infrastructure/databases/graph/kuzu/adapter.py +135 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +89 -0
- cognee/infrastructure/databases/relational/__init__.py +2 -0
- cognee/infrastructure/databases/relational/get_async_session.py +15 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
- cognee/infrastructure/databases/relational/with_async_session.py +25 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
- cognee/infrastructure/databases/vector/config.py +13 -6
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +4 -1
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
- cognee/infrastructure/files/storage/StorageManager.py +7 -1
- cognee/infrastructure/files/storage/storage.py +16 -0
- cognee/infrastructure/llm/LLMGateway.py +18 -0
- cognee/infrastructure/llm/config.py +4 -2
- cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -1
- cognee/infrastructure/utils/run_sync.py +8 -1
- cognee/modules/chunking/models/DocumentChunk.py +4 -3
- cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
- cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
- cognee/modules/cloud/exceptions/__init__.py +2 -0
- cognee/modules/cloud/operations/__init__.py +1 -0
- cognee/modules/cloud/operations/check_api_key.py +25 -0
- cognee/modules/data/deletion/prune_system.py +1 -1
- cognee/modules/data/methods/check_dataset_name.py +1 -1
- cognee/modules/data/methods/get_dataset_data.py +1 -1
- cognee/modules/data/methods/load_or_create_datasets.py +1 -1
- cognee/modules/engine/models/Event.py +16 -0
- cognee/modules/engine/models/Interval.py +8 -0
- cognee/modules/engine/models/Timestamp.py +13 -0
- cognee/modules/engine/models/__init__.py +3 -0
- cognee/modules/engine/utils/__init__.py +2 -0
- cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
- cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
- cognee/modules/memify/__init__.py +1 -0
- cognee/modules/memify/memify.py +118 -0
- cognee/modules/notebooks/methods/__init__.py +5 -0
- cognee/modules/notebooks/methods/create_notebook.py +26 -0
- cognee/modules/notebooks/methods/delete_notebook.py +13 -0
- cognee/modules/notebooks/methods/get_notebook.py +21 -0
- cognee/modules/notebooks/methods/get_notebooks.py +18 -0
- cognee/modules/notebooks/methods/update_notebook.py +17 -0
- cognee/modules/notebooks/models/Notebook.py +53 -0
- cognee/modules/notebooks/models/__init__.py +1 -0
- cognee/modules/notebooks/operations/__init__.py +1 -0
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +19 -3
- cognee/modules/pipelines/operations/pipeline.py +1 -0
- cognee/modules/pipelines/operations/run_tasks.py +17 -41
- cognee/modules/retrieval/base_graph_retriever.py +18 -0
- cognee/modules/retrieval/base_retriever.py +1 -1
- cognee/modules/retrieval/code_retriever.py +8 -0
- cognee/modules/retrieval/coding_rules_retriever.py +31 -0
- cognee/modules/retrieval/completion_retriever.py +9 -3
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +23 -14
- cognee/modules/retrieval/graph_completion_cot_retriever.py +21 -11
- cognee/modules/retrieval/graph_completion_retriever.py +32 -65
- cognee/modules/retrieval/graph_summary_completion_retriever.py +3 -1
- cognee/modules/retrieval/insights_retriever.py +14 -3
- cognee/modules/retrieval/summaries_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +152 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
- cognee/modules/retrieval/utils/completion.py +10 -3
- cognee/modules/search/methods/get_search_type_tools.py +168 -0
- cognee/modules/search/methods/no_access_control_search.py +47 -0
- cognee/modules/search/methods/search.py +219 -139
- cognee/modules/search/types/SearchResult.py +21 -0
- cognee/modules/search/types/SearchType.py +2 -0
- cognee/modules/search/types/__init__.py +1 -0
- cognee/modules/search/utils/__init__.py +2 -0
- cognee/modules/search/utils/prepare_search_result.py +41 -0
- cognee/modules/search/utils/transform_context_to_graph.py +38 -0
- cognee/modules/sync/__init__.py +1 -0
- cognee/modules/sync/methods/__init__.py +23 -0
- cognee/modules/sync/methods/create_sync_operation.py +53 -0
- cognee/modules/sync/methods/get_sync_operation.py +107 -0
- cognee/modules/sync/methods/update_sync_operation.py +248 -0
- cognee/modules/sync/models/SyncOperation.py +142 -0
- cognee/modules/sync/models/__init__.py +3 -0
- cognee/modules/users/__init__.py +0 -1
- cognee/modules/users/methods/__init__.py +4 -1
- cognee/modules/users/methods/create_user.py +26 -1
- cognee/modules/users/methods/get_authenticated_user.py +36 -42
- cognee/modules/users/methods/get_default_user.py +3 -1
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
- cognee/root_dir.py +19 -0
- cognee/shared/logging_utils.py +1 -1
- cognee/tasks/codingagents/__init__.py +0 -0
- cognee/tasks/codingagents/coding_rule_associations.py +127 -0
- cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/extract_subgraph.py +7 -0
- cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +52 -27
- cognee/tasks/temporal_graph/__init__.py +1 -0
- cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
- cognee/tasks/temporal_graph/enrich_events.py +34 -0
- cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
- cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
- cognee/tasks/temporal_graph/models.py +49 -0
- cognee/tests/test_kuzu.py +4 -4
- cognee/tests/test_neo4j.py +4 -4
- cognee/tests/test_permissions.py +3 -3
- cognee/tests/test_relational_db_migration.py +7 -5
- cognee/tests/test_search_db.py +18 -24
- cognee/tests/test_temporal_graph.py +167 -0
- cognee/tests/unit/api/__init__.py +1 -0
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +13 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +11 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +5 -4
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
- cognee/tests/unit/modules/users/__init__.py +1 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
- cognee/tests/unit/processing/utils/utils_test.py +20 -1
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/METADATA +8 -6
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/RECORD +162 -89
- cognee/tests/unit/modules/search/search_methods_test.py +0 -225
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from fastapi.encoders import jsonable_encoder
|
|
3
|
+
from fastapi.responses import JSONResponse
|
|
4
|
+
from pydantic import Field
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
from fastapi import APIRouter, Depends
|
|
7
|
+
|
|
8
|
+
from cognee.api.DTO import InDTO
|
|
9
|
+
from cognee.infrastructure.databases.relational import get_async_session
|
|
10
|
+
from cognee.infrastructure.utils.run_async import run_async
|
|
11
|
+
from cognee.modules.notebooks.models import Notebook, NotebookCell
|
|
12
|
+
from cognee.modules.notebooks.operations import run_in_local_sandbox
|
|
13
|
+
from cognee.modules.users.models import User
|
|
14
|
+
from cognee.modules.users.methods import get_authenticated_user
|
|
15
|
+
from cognee.modules.notebooks.methods import (
|
|
16
|
+
create_notebook,
|
|
17
|
+
delete_notebook,
|
|
18
|
+
get_notebook,
|
|
19
|
+
get_notebooks,
|
|
20
|
+
update_notebook,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class NotebookData(InDTO):
|
|
25
|
+
name: Optional[str] = Field(...)
|
|
26
|
+
cells: Optional[List[NotebookCell]] = Field(default=[])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_notebooks_router():
|
|
30
|
+
router = APIRouter()
|
|
31
|
+
|
|
32
|
+
@router.get("")
|
|
33
|
+
async def get_notebooks_endpoint(user: User = Depends(get_authenticated_user)):
|
|
34
|
+
return await get_notebooks(user.id)
|
|
35
|
+
|
|
36
|
+
@router.post("")
|
|
37
|
+
async def create_notebook_endpoint(
|
|
38
|
+
notebook_data: NotebookData, user: User = Depends(get_authenticated_user)
|
|
39
|
+
):
|
|
40
|
+
return await create_notebook(
|
|
41
|
+
user.id, notebook_data.name, notebook_data.cells, deletable=True
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
@router.put("/{notebook_id}")
|
|
45
|
+
async def update_notebook_endpoint(
|
|
46
|
+
notebook_id: UUID, notebook_data: NotebookData, user: User = Depends(get_authenticated_user)
|
|
47
|
+
):
|
|
48
|
+
async with get_async_session(auto_commit=True) as session:
|
|
49
|
+
notebook: Notebook = await get_notebook(notebook_id, user.id, session)
|
|
50
|
+
|
|
51
|
+
if notebook is None:
|
|
52
|
+
return JSONResponse(status_code=404, content={"error": "Notebook not found"})
|
|
53
|
+
|
|
54
|
+
if notebook_data.name and notebook_data.name != notebook.name:
|
|
55
|
+
notebook.name = notebook_data.name
|
|
56
|
+
|
|
57
|
+
if notebook_data.cells:
|
|
58
|
+
notebook.cells = notebook_data.cells
|
|
59
|
+
|
|
60
|
+
return await update_notebook(notebook, session)
|
|
61
|
+
|
|
62
|
+
class RunCodeData(InDTO):
|
|
63
|
+
content: str = Field(...)
|
|
64
|
+
|
|
65
|
+
@router.post("/{notebook_id}/{cell_id}/run")
|
|
66
|
+
async def run_notebook_cell_endpoint(
|
|
67
|
+
notebook_id: UUID,
|
|
68
|
+
cell_id: UUID,
|
|
69
|
+
run_code: RunCodeData,
|
|
70
|
+
user: User = Depends(get_authenticated_user),
|
|
71
|
+
):
|
|
72
|
+
async with get_async_session() as session:
|
|
73
|
+
notebook: Notebook = await get_notebook(notebook_id, user.id, session)
|
|
74
|
+
|
|
75
|
+
if notebook is None:
|
|
76
|
+
return JSONResponse(status_code=404, content={"error": "Notebook not found"})
|
|
77
|
+
|
|
78
|
+
result, error = await run_async(run_in_local_sandbox, run_code.content)
|
|
79
|
+
|
|
80
|
+
return JSONResponse(
|
|
81
|
+
status_code=200, content={"result": jsonable_encoder(result), "error": error}
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
@router.delete("/{notebook_id}")
|
|
85
|
+
async def delete_notebook_endpoint(
|
|
86
|
+
notebook_id: UUID, user: User = Depends(get_authenticated_user)
|
|
87
|
+
):
|
|
88
|
+
async with get_async_session(auto_commit=True) as session:
|
|
89
|
+
notebook: Notebook = await get_notebook(notebook_id, user.id, session)
|
|
90
|
+
|
|
91
|
+
if notebook is None:
|
|
92
|
+
return JSONResponse(status_code=404, content={"error": "Notebook not found"})
|
|
93
|
+
|
|
94
|
+
return await delete_notebook(notebook, session)
|
|
95
|
+
|
|
96
|
+
return router
|
|
@@ -4,6 +4,8 @@ from datetime import datetime
|
|
|
4
4
|
from pydantic import Field
|
|
5
5
|
from fastapi import Depends, APIRouter
|
|
6
6
|
from fastapi.responses import JSONResponse
|
|
7
|
+
from fastapi.encoders import jsonable_encoder
|
|
8
|
+
|
|
7
9
|
from cognee.modules.search.types import SearchType
|
|
8
10
|
from cognee.api.DTO import InDTO, OutDTO
|
|
9
11
|
from cognee.modules.users.exceptions.exceptions import PermissionDeniedError
|
|
@@ -20,7 +22,13 @@ class SearchPayloadDTO(InDTO):
|
|
|
20
22
|
datasets: Optional[list[str]] = Field(default=None)
|
|
21
23
|
dataset_ids: Optional[list[UUID]] = Field(default=None, examples=[[]])
|
|
22
24
|
query: str = Field(default="What is in the document?")
|
|
25
|
+
system_prompt: Optional[str] = Field(
|
|
26
|
+
default="Answer the question using the provided context. Be as brief as possible."
|
|
27
|
+
)
|
|
28
|
+
node_name: Optional[list[str]] = Field(default=None, example=[])
|
|
23
29
|
top_k: Optional[int] = Field(default=10)
|
|
30
|
+
only_context: bool = Field(default=False)
|
|
31
|
+
use_combined_context: bool = Field(default=False)
|
|
24
32
|
|
|
25
33
|
|
|
26
34
|
def get_search_router() -> APIRouter:
|
|
@@ -79,7 +87,10 @@ def get_search_router() -> APIRouter:
|
|
|
79
87
|
- **datasets** (Optional[List[str]]): List of dataset names to search within
|
|
80
88
|
- **dataset_ids** (Optional[List[UUID]]): List of dataset UUIDs to search within
|
|
81
89
|
- **query** (str): The search query string
|
|
90
|
+
- **system_prompt** Optional[str]: System prompt to be used for Completion type searches in Cognee
|
|
91
|
+
- **node_name** Optional[list[str]]: Filter results to specific node_sets defined in the add pipeline (for targeted search).
|
|
82
92
|
- **top_k** (Optional[int]): Maximum number of results to return (default: 10)
|
|
93
|
+
- **only_context** bool: Set to true to only return context Cognee will be sending to LLM in Completion type searches. This will be returned instead of LLM calls for completion type searches.
|
|
83
94
|
|
|
84
95
|
## Response
|
|
85
96
|
Returns a list of search results containing relevant nodes from the graph.
|
|
@@ -102,7 +113,11 @@ def get_search_router() -> APIRouter:
|
|
|
102
113
|
"datasets": payload.datasets,
|
|
103
114
|
"dataset_ids": [str(dataset_id) for dataset_id in payload.dataset_ids or []],
|
|
104
115
|
"query": payload.query,
|
|
116
|
+
"system_prompt": payload.system_prompt,
|
|
117
|
+
"node_name": payload.node_name,
|
|
105
118
|
"top_k": payload.top_k,
|
|
119
|
+
"only_context": payload.only_context,
|
|
120
|
+
"use_combined_context": payload.use_combined_context,
|
|
106
121
|
},
|
|
107
122
|
)
|
|
108
123
|
|
|
@@ -115,10 +130,14 @@ def get_search_router() -> APIRouter:
|
|
|
115
130
|
user=user,
|
|
116
131
|
datasets=payload.datasets,
|
|
117
132
|
dataset_ids=payload.dataset_ids,
|
|
133
|
+
system_prompt=payload.system_prompt,
|
|
134
|
+
node_name=payload.node_name,
|
|
118
135
|
top_k=payload.top_k,
|
|
136
|
+
only_context=payload.only_context,
|
|
137
|
+
use_combined_context=payload.use_combined_context,
|
|
119
138
|
)
|
|
120
139
|
|
|
121
|
-
return results
|
|
140
|
+
return jsonable_encoder(results)
|
|
122
141
|
except PermissionDeniedError:
|
|
123
142
|
return []
|
|
124
143
|
except Exception as error:
|
cognee/api/v1/search/search.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import Union, Optional, List, Type
|
|
3
3
|
|
|
4
|
+
from cognee.modules.engine.models.node_set import NodeSet
|
|
4
5
|
from cognee.modules.users.models import User
|
|
5
|
-
from cognee.modules.search.types import SearchType
|
|
6
|
+
from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
|
|
6
7
|
from cognee.modules.users.methods import get_default_user
|
|
7
8
|
from cognee.modules.search.methods import search as search_function
|
|
8
9
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
|
@@ -12,16 +13,19 @@ from cognee.modules.data.exceptions import DatasetNotFoundError
|
|
|
12
13
|
async def search(
|
|
13
14
|
query_text: str,
|
|
14
15
|
query_type: SearchType = SearchType.GRAPH_COMPLETION,
|
|
15
|
-
user: User = None,
|
|
16
|
+
user: Optional[User] = None,
|
|
16
17
|
datasets: Optional[Union[list[str], str]] = None,
|
|
17
18
|
dataset_ids: Optional[Union[list[UUID], UUID]] = None,
|
|
18
19
|
system_prompt_path: str = "answer_simple_question.txt",
|
|
20
|
+
system_prompt: Optional[str] = None,
|
|
19
21
|
top_k: int = 10,
|
|
20
|
-
node_type: Optional[Type] =
|
|
22
|
+
node_type: Optional[Type] = NodeSet,
|
|
21
23
|
node_name: Optional[List[str]] = None,
|
|
22
24
|
save_interaction: bool = False,
|
|
23
25
|
last_k: Optional[int] = None,
|
|
24
|
-
|
|
26
|
+
only_context: bool = False,
|
|
27
|
+
use_combined_context: bool = False,
|
|
28
|
+
) -> Union[List[SearchResult], CombinedSearchResult]:
|
|
25
29
|
"""
|
|
26
30
|
Search and query the knowledge graph for insights, information, and connections.
|
|
27
31
|
|
|
@@ -183,11 +187,14 @@ async def search(
|
|
|
183
187
|
dataset_ids=dataset_ids if dataset_ids else datasets,
|
|
184
188
|
user=user,
|
|
185
189
|
system_prompt_path=system_prompt_path,
|
|
190
|
+
system_prompt=system_prompt,
|
|
186
191
|
top_k=top_k,
|
|
187
192
|
node_type=node_type,
|
|
188
193
|
node_name=node_name,
|
|
189
194
|
save_interaction=save_interaction,
|
|
190
195
|
last_k=last_k,
|
|
196
|
+
only_context=only_context,
|
|
197
|
+
use_combined_context=use_combined_context,
|
|
191
198
|
)
|
|
192
199
|
|
|
193
200
|
return filtered_search_results
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .sync import (
|
|
2
|
+
sync,
|
|
3
|
+
SyncResponse,
|
|
4
|
+
LocalFileInfo,
|
|
5
|
+
CheckMissingHashesRequest,
|
|
6
|
+
CheckHashesDiffResponse,
|
|
7
|
+
PruneDatasetRequest,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"sync",
|
|
12
|
+
"SyncResponse",
|
|
13
|
+
"LocalFileInfo",
|
|
14
|
+
"CheckMissingHashesRequest",
|
|
15
|
+
"CheckHashesDiffResponse",
|
|
16
|
+
"PruneDatasetRequest",
|
|
17
|
+
]
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import Optional, List
|
|
3
|
+
from fastapi import APIRouter, Depends
|
|
4
|
+
from fastapi.responses import JSONResponse
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
from cognee.api.DTO import InDTO
|
|
8
|
+
from cognee.modules.users.models import User
|
|
9
|
+
from cognee.modules.users.methods import get_authenticated_user
|
|
10
|
+
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
|
11
|
+
from cognee.modules.sync.methods import get_running_sync_operations_for_user, get_sync_operation
|
|
12
|
+
from cognee.shared.utils import send_telemetry
|
|
13
|
+
from cognee.shared.logging_utils import get_logger
|
|
14
|
+
from cognee.api.v1.sync import SyncResponse
|
|
15
|
+
from cognee.context_global_variables import set_database_global_context_variables
|
|
16
|
+
|
|
17
|
+
logger = get_logger()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SyncRequest(InDTO):
|
|
21
|
+
"""Request model for sync operations."""
|
|
22
|
+
|
|
23
|
+
dataset_ids: Optional[List[UUID]] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_sync_router() -> APIRouter:
|
|
27
|
+
router = APIRouter()
|
|
28
|
+
|
|
29
|
+
@router.post("", response_model=dict[str, SyncResponse])
|
|
30
|
+
async def sync_to_cloud(
|
|
31
|
+
request: SyncRequest,
|
|
32
|
+
user: User = Depends(get_authenticated_user),
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Sync local data to Cognee Cloud.
|
|
36
|
+
|
|
37
|
+
This endpoint triggers synchronization of local Cognee data to your cloud instance.
|
|
38
|
+
It uploads your local datasets, knowledge graphs, and processed data to the cloud
|
|
39
|
+
for backup, sharing, or cloud-based processing.
|
|
40
|
+
|
|
41
|
+
## Request Body (JSON)
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"dataset_ids": ["123e4567-e89b-12d3-a456-426614174000", "456e7890-e12b-34c5-d678-901234567000"]
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Response
|
|
49
|
+
Returns immediate response for the sync operation:
|
|
50
|
+
- **run_id**: Unique identifier for tracking the background sync operation
|
|
51
|
+
- **status**: Always "started" (operation runs in background)
|
|
52
|
+
- **dataset_ids**: List of dataset IDs being synced
|
|
53
|
+
- **dataset_names**: List of dataset names being synced
|
|
54
|
+
- **message**: Description of the background operation
|
|
55
|
+
- **timestamp**: When the sync was initiated
|
|
56
|
+
- **user_id**: User who initiated the sync
|
|
57
|
+
|
|
58
|
+
## Cloud Sync Features
|
|
59
|
+
- **Automatic Authentication**: Uses your Cognee Cloud credentials
|
|
60
|
+
- **Data Compression**: Optimizes transfer size for faster uploads
|
|
61
|
+
- **Smart Sync**: Automatically handles data updates efficiently
|
|
62
|
+
- **Progress Tracking**: Monitor sync status with sync_id
|
|
63
|
+
- **Error Recovery**: Automatic retry for failed transfers
|
|
64
|
+
- **Data Validation**: Ensures data integrity during transfer
|
|
65
|
+
|
|
66
|
+
## Example Usage
|
|
67
|
+
```bash
|
|
68
|
+
# Sync multiple datasets to cloud by IDs (JSON request)
|
|
69
|
+
curl -X POST "http://localhost:8000/api/v1/sync" \\
|
|
70
|
+
-H "Content-Type: application/json" \\
|
|
71
|
+
-H "Cookie: auth_token=your-token" \\
|
|
72
|
+
-d '{"dataset_ids": ["123e4567-e89b-12d3-a456-426614174000", "456e7890-e12b-34c5-d678-901234567000"]}'
|
|
73
|
+
|
|
74
|
+
# Sync all user datasets (empty request body or null dataset_ids)
|
|
75
|
+
curl -X POST "http://localhost:8000/api/v1/sync" \\
|
|
76
|
+
-H "Content-Type: application/json" \\
|
|
77
|
+
-H "Cookie: auth_token=your-token" \\
|
|
78
|
+
-d '{}'
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Error Codes
|
|
82
|
+
- **400 Bad Request**: Invalid dataset_ids format
|
|
83
|
+
- **401 Unauthorized**: Invalid or missing authentication
|
|
84
|
+
- **403 Forbidden**: User doesn't have permission to access dataset
|
|
85
|
+
- **404 Not Found**: Dataset not found
|
|
86
|
+
- **409 Conflict**: Sync operation conflict or cloud service unavailable
|
|
87
|
+
- **413 Payload Too Large**: Dataset too large for current cloud plan
|
|
88
|
+
- **429 Too Many Requests**: Rate limit exceeded
|
|
89
|
+
|
|
90
|
+
## Notes
|
|
91
|
+
- Sync operations run in the background - you get an immediate response
|
|
92
|
+
- Use the returned run_id to track progress (status API coming soon)
|
|
93
|
+
- Large datasets are automatically chunked for efficient transfer
|
|
94
|
+
- Cloud storage usage counts against your plan limits
|
|
95
|
+
- The sync will continue even if you close your connection
|
|
96
|
+
"""
|
|
97
|
+
send_telemetry(
|
|
98
|
+
"Cloud Sync API Endpoint Invoked",
|
|
99
|
+
user.id,
|
|
100
|
+
additional_properties={
|
|
101
|
+
"endpoint": "POST /v1/sync",
|
|
102
|
+
"dataset_ids": [str(id) for id in request.dataset_ids]
|
|
103
|
+
if request.dataset_ids
|
|
104
|
+
else "*",
|
|
105
|
+
},
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
from cognee.api.v1.sync import sync as cognee_sync
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
# Check if user has any running sync operations
|
|
112
|
+
running_syncs = await get_running_sync_operations_for_user(user.id)
|
|
113
|
+
if running_syncs:
|
|
114
|
+
# Return information about the existing sync operation
|
|
115
|
+
existing_sync = running_syncs[0] # Get the most recent running sync
|
|
116
|
+
return JSONResponse(
|
|
117
|
+
status_code=409,
|
|
118
|
+
content={
|
|
119
|
+
"error": "Sync operation already in progress",
|
|
120
|
+
"details": {
|
|
121
|
+
"run_id": existing_sync.run_id,
|
|
122
|
+
"status": "already_running",
|
|
123
|
+
"dataset_ids": existing_sync.dataset_ids,
|
|
124
|
+
"dataset_names": existing_sync.dataset_names,
|
|
125
|
+
"message": f"You have a sync operation already in progress with run_id '{existing_sync.run_id}'. Use the status endpoint to monitor progress, or wait for it to complete before starting a new sync.",
|
|
126
|
+
"timestamp": existing_sync.created_at.isoformat(),
|
|
127
|
+
"progress_percentage": existing_sync.progress_percentage,
|
|
128
|
+
},
|
|
129
|
+
},
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Retrieve existing dataset and check permissions
|
|
133
|
+
datasets = await get_specific_user_permission_datasets(
|
|
134
|
+
user.id, "write", request.dataset_ids if request.dataset_ids else None
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Execute new cloud sync operation for all datasets
|
|
138
|
+
sync_result = await cognee_sync(
|
|
139
|
+
datasets=datasets,
|
|
140
|
+
user=user,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
return sync_result
|
|
144
|
+
|
|
145
|
+
except ValueError as e:
|
|
146
|
+
return JSONResponse(status_code=400, content={"error": str(e)})
|
|
147
|
+
except PermissionError as e:
|
|
148
|
+
return JSONResponse(status_code=403, content={"error": str(e)})
|
|
149
|
+
except ConnectionError as e:
|
|
150
|
+
return JSONResponse(
|
|
151
|
+
status_code=409, content={"error": f"Cloud service unavailable: {str(e)}"}
|
|
152
|
+
)
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(f"Cloud sync operation failed: {str(e)}")
|
|
155
|
+
return JSONResponse(status_code=409, content={"error": "Cloud sync operation failed."})
|
|
156
|
+
|
|
157
|
+
@router.get("/status")
|
|
158
|
+
async def get_sync_status_overview(
|
|
159
|
+
user: User = Depends(get_authenticated_user),
|
|
160
|
+
):
|
|
161
|
+
"""
|
|
162
|
+
Check if there are any running sync operations for the current user.
|
|
163
|
+
|
|
164
|
+
This endpoint provides a simple check to see if the user has any active sync operations
|
|
165
|
+
without needing to know specific run IDs.
|
|
166
|
+
|
|
167
|
+
## Response
|
|
168
|
+
Returns a simple status overview:
|
|
169
|
+
- **has_running_sync**: Boolean indicating if there are any running syncs
|
|
170
|
+
- **running_sync_count**: Number of currently running sync operations
|
|
171
|
+
- **latest_running_sync** (optional): Information about the most recent running sync if any exists
|
|
172
|
+
|
|
173
|
+
## Example Usage
|
|
174
|
+
```bash
|
|
175
|
+
curl -X GET "http://localhost:8000/api/v1/sync/status" \\
|
|
176
|
+
-H "Cookie: auth_token=your-token"
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## Example Responses
|
|
180
|
+
|
|
181
|
+
**No running syncs:**
|
|
182
|
+
```json
|
|
183
|
+
{
|
|
184
|
+
"has_running_sync": false,
|
|
185
|
+
"running_sync_count": 0
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**With running sync:**
|
|
190
|
+
```json
|
|
191
|
+
{
|
|
192
|
+
"has_running_sync": true,
|
|
193
|
+
"running_sync_count": 1,
|
|
194
|
+
"latest_running_sync": {
|
|
195
|
+
"run_id": "12345678-1234-5678-9012-123456789012",
|
|
196
|
+
"dataset_name": "My Dataset",
|
|
197
|
+
"progress_percentage": 45,
|
|
198
|
+
"created_at": "2025-01-01T00:00:00Z"
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
"""
|
|
203
|
+
send_telemetry(
|
|
204
|
+
"Sync Status Overview API Endpoint Invoked",
|
|
205
|
+
user.id,
|
|
206
|
+
additional_properties={
|
|
207
|
+
"endpoint": "GET /v1/sync/status",
|
|
208
|
+
},
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
try:
|
|
212
|
+
# Get any running sync operations for the user
|
|
213
|
+
running_syncs = await get_running_sync_operations_for_user(user.id)
|
|
214
|
+
|
|
215
|
+
response = {
|
|
216
|
+
"has_running_sync": len(running_syncs) > 0,
|
|
217
|
+
"running_sync_count": len(running_syncs),
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
# If there are running syncs, include info about the latest one
|
|
221
|
+
if running_syncs:
|
|
222
|
+
latest_sync = running_syncs[0] # Already ordered by created_at desc
|
|
223
|
+
response["latest_running_sync"] = {
|
|
224
|
+
"run_id": latest_sync.run_id,
|
|
225
|
+
"dataset_ids": latest_sync.dataset_ids,
|
|
226
|
+
"dataset_names": latest_sync.dataset_names,
|
|
227
|
+
"progress_percentage": latest_sync.progress_percentage,
|
|
228
|
+
"created_at": latest_sync.created_at.isoformat()
|
|
229
|
+
if latest_sync.created_at
|
|
230
|
+
else None,
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return response
|
|
234
|
+
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logger.error(f"Failed to get sync status overview: {str(e)}")
|
|
237
|
+
return JSONResponse(
|
|
238
|
+
status_code=500, content={"error": "Failed to get sync status overview"}
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
return router
|