cognee 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/api/client.py +28 -3
- cognee/api/health.py +10 -13
- cognee/api/v1/add/add.py +3 -1
- cognee/api/v1/add/routers/get_add_router.py +12 -37
- cognee/api/v1/cloud/routers/__init__.py +1 -0
- cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +9 -4
- cognee/api/v1/cognify/cognify.py +50 -3
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +15 -4
- cognee/api/v1/memify/__init__.py +0 -0
- cognee/api/v1/memify/routers/__init__.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +100 -0
- cognee/api/v1/notebooks/routers/__init__.py +1 -0
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
- cognee/api/v1/search/routers/get_search_router.py +20 -1
- cognee/api/v1/search/search.py +11 -4
- cognee/api/v1/sync/__init__.py +17 -0
- cognee/api/v1/sync/routers/__init__.py +3 -0
- cognee/api/v1/sync/routers/get_sync_router.py +241 -0
- cognee/api/v1/sync/sync.py +877 -0
- cognee/api/v1/ui/__init__.py +1 -0
- cognee/api/v1/ui/ui.py +529 -0
- cognee/api/v1/users/routers/get_auth_router.py +13 -1
- cognee/base_config.py +10 -1
- cognee/cli/_cognee.py +93 -0
- cognee/infrastructure/databases/graph/config.py +10 -4
- cognee/infrastructure/databases/graph/kuzu/adapter.py +135 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +89 -0
- cognee/infrastructure/databases/relational/__init__.py +2 -0
- cognee/infrastructure/databases/relational/get_async_session.py +15 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
- cognee/infrastructure/databases/relational/with_async_session.py +25 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
- cognee/infrastructure/databases/vector/config.py +13 -6
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +4 -1
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
- cognee/infrastructure/files/storage/StorageManager.py +7 -1
- cognee/infrastructure/files/storage/storage.py +16 -0
- cognee/infrastructure/llm/LLMGateway.py +18 -0
- cognee/infrastructure/llm/config.py +4 -2
- cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -1
- cognee/infrastructure/utils/run_sync.py +8 -1
- cognee/modules/chunking/models/DocumentChunk.py +4 -3
- cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
- cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
- cognee/modules/cloud/exceptions/__init__.py +2 -0
- cognee/modules/cloud/operations/__init__.py +1 -0
- cognee/modules/cloud/operations/check_api_key.py +25 -0
- cognee/modules/data/deletion/prune_system.py +1 -1
- cognee/modules/data/methods/check_dataset_name.py +1 -1
- cognee/modules/data/methods/get_dataset_data.py +1 -1
- cognee/modules/data/methods/load_or_create_datasets.py +1 -1
- cognee/modules/engine/models/Event.py +16 -0
- cognee/modules/engine/models/Interval.py +8 -0
- cognee/modules/engine/models/Timestamp.py +13 -0
- cognee/modules/engine/models/__init__.py +3 -0
- cognee/modules/engine/utils/__init__.py +2 -0
- cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
- cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
- cognee/modules/memify/__init__.py +1 -0
- cognee/modules/memify/memify.py +118 -0
- cognee/modules/notebooks/methods/__init__.py +5 -0
- cognee/modules/notebooks/methods/create_notebook.py +26 -0
- cognee/modules/notebooks/methods/delete_notebook.py +13 -0
- cognee/modules/notebooks/methods/get_notebook.py +21 -0
- cognee/modules/notebooks/methods/get_notebooks.py +18 -0
- cognee/modules/notebooks/methods/update_notebook.py +17 -0
- cognee/modules/notebooks/models/Notebook.py +53 -0
- cognee/modules/notebooks/models/__init__.py +1 -0
- cognee/modules/notebooks/operations/__init__.py +1 -0
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +19 -3
- cognee/modules/pipelines/operations/pipeline.py +1 -0
- cognee/modules/pipelines/operations/run_tasks.py +17 -41
- cognee/modules/retrieval/base_graph_retriever.py +18 -0
- cognee/modules/retrieval/base_retriever.py +1 -1
- cognee/modules/retrieval/code_retriever.py +8 -0
- cognee/modules/retrieval/coding_rules_retriever.py +31 -0
- cognee/modules/retrieval/completion_retriever.py +9 -3
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +23 -14
- cognee/modules/retrieval/graph_completion_cot_retriever.py +21 -11
- cognee/modules/retrieval/graph_completion_retriever.py +32 -65
- cognee/modules/retrieval/graph_summary_completion_retriever.py +3 -1
- cognee/modules/retrieval/insights_retriever.py +14 -3
- cognee/modules/retrieval/summaries_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +152 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
- cognee/modules/retrieval/utils/completion.py +10 -3
- cognee/modules/search/methods/get_search_type_tools.py +168 -0
- cognee/modules/search/methods/no_access_control_search.py +47 -0
- cognee/modules/search/methods/search.py +219 -139
- cognee/modules/search/types/SearchResult.py +21 -0
- cognee/modules/search/types/SearchType.py +2 -0
- cognee/modules/search/types/__init__.py +1 -0
- cognee/modules/search/utils/__init__.py +2 -0
- cognee/modules/search/utils/prepare_search_result.py +41 -0
- cognee/modules/search/utils/transform_context_to_graph.py +38 -0
- cognee/modules/sync/__init__.py +1 -0
- cognee/modules/sync/methods/__init__.py +23 -0
- cognee/modules/sync/methods/create_sync_operation.py +53 -0
- cognee/modules/sync/methods/get_sync_operation.py +107 -0
- cognee/modules/sync/methods/update_sync_operation.py +248 -0
- cognee/modules/sync/models/SyncOperation.py +142 -0
- cognee/modules/sync/models/__init__.py +3 -0
- cognee/modules/users/__init__.py +0 -1
- cognee/modules/users/methods/__init__.py +4 -1
- cognee/modules/users/methods/create_user.py +26 -1
- cognee/modules/users/methods/get_authenticated_user.py +36 -42
- cognee/modules/users/methods/get_default_user.py +3 -1
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
- cognee/root_dir.py +19 -0
- cognee/shared/logging_utils.py +1 -1
- cognee/tasks/codingagents/__init__.py +0 -0
- cognee/tasks/codingagents/coding_rule_associations.py +127 -0
- cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/extract_subgraph.py +7 -0
- cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +52 -27
- cognee/tasks/temporal_graph/__init__.py +1 -0
- cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
- cognee/tasks/temporal_graph/enrich_events.py +34 -0
- cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
- cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
- cognee/tasks/temporal_graph/models.py +49 -0
- cognee/tests/test_kuzu.py +4 -4
- cognee/tests/test_neo4j.py +4 -4
- cognee/tests/test_permissions.py +3 -3
- cognee/tests/test_relational_db_migration.py +7 -5
- cognee/tests/test_search_db.py +18 -24
- cognee/tests/test_temporal_graph.py +167 -0
- cognee/tests/unit/api/__init__.py +1 -0
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +13 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +11 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +5 -4
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
- cognee/tests/unit/modules/users/__init__.py +1 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
- cognee/tests/unit/processing/utils/utils_test.py +20 -1
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/METADATA +8 -6
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/RECORD +165 -90
- cognee/tests/unit/modules/search/search_methods_test.py +0 -225
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/WHEEL +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/entry_points.txt +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from uuid import uuid4
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Optional, List
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from sqlalchemy import (
|
|
6
|
+
Column,
|
|
7
|
+
Text,
|
|
8
|
+
DateTime,
|
|
9
|
+
UUID as SQLAlchemy_UUID,
|
|
10
|
+
Integer,
|
|
11
|
+
Enum as SQLEnum,
|
|
12
|
+
JSON,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from cognee.infrastructure.databases.relational import Base
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SyncStatus(str, Enum):
|
|
19
|
+
"""Enumeration of possible sync operation statuses."""
|
|
20
|
+
|
|
21
|
+
STARTED = "started"
|
|
22
|
+
IN_PROGRESS = "in_progress"
|
|
23
|
+
COMPLETED = "completed"
|
|
24
|
+
FAILED = "failed"
|
|
25
|
+
CANCELLED = "cancelled"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SyncOperation(Base):
|
|
29
|
+
"""
|
|
30
|
+
Database model for tracking sync operations.
|
|
31
|
+
|
|
32
|
+
This model stores information about background sync operations,
|
|
33
|
+
allowing users to monitor progress and query the status of their sync requests.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
__tablename__ = "sync_operations"
|
|
37
|
+
|
|
38
|
+
# Primary identifiers
|
|
39
|
+
id = Column(SQLAlchemy_UUID, primary_key=True, default=uuid4, doc="Database primary key")
|
|
40
|
+
run_id = Column(Text, unique=True, index=True, doc="Public run ID returned to users")
|
|
41
|
+
|
|
42
|
+
# Status and progress tracking
|
|
43
|
+
status = Column(
|
|
44
|
+
SQLEnum(SyncStatus), default=SyncStatus.STARTED, doc="Current status of the sync operation"
|
|
45
|
+
)
|
|
46
|
+
progress_percentage = Column(Integer, default=0, doc="Progress percentage (0-100)")
|
|
47
|
+
|
|
48
|
+
# Operation metadata
|
|
49
|
+
dataset_ids = Column(JSON, doc="Array of dataset IDs being synced")
|
|
50
|
+
dataset_names = Column(JSON, doc="Array of dataset names being synced")
|
|
51
|
+
user_id = Column(SQLAlchemy_UUID, index=True, doc="ID of the user who initiated the sync")
|
|
52
|
+
|
|
53
|
+
# Timing information
|
|
54
|
+
created_at = Column(
|
|
55
|
+
DateTime(timezone=True),
|
|
56
|
+
default=lambda: datetime.now(timezone.utc),
|
|
57
|
+
doc="When the sync was initiated",
|
|
58
|
+
)
|
|
59
|
+
started_at = Column(DateTime(timezone=True), doc="When the actual sync processing began")
|
|
60
|
+
completed_at = Column(
|
|
61
|
+
DateTime(timezone=True), doc="When the sync finished (success or failure)"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Operation details
|
|
65
|
+
total_records_to_sync = Column(Integer, doc="Total number of records to sync")
|
|
66
|
+
total_records_to_download = Column(Integer, doc="Total number of records to download")
|
|
67
|
+
total_records_to_upload = Column(Integer, doc="Total number of records to upload")
|
|
68
|
+
|
|
69
|
+
records_downloaded = Column(Integer, default=0, doc="Number of records successfully downloaded")
|
|
70
|
+
records_uploaded = Column(Integer, default=0, doc="Number of records successfully uploaded")
|
|
71
|
+
bytes_downloaded = Column(Integer, default=0, doc="Total bytes downloaded from cloud")
|
|
72
|
+
bytes_uploaded = Column(Integer, default=0, doc="Total bytes uploaded to cloud")
|
|
73
|
+
|
|
74
|
+
# Data lineage tracking per dataset
|
|
75
|
+
dataset_sync_hashes = Column(
|
|
76
|
+
JSON, doc="Mapping of dataset_id -> {uploaded: [hashes], downloaded: [hashes]}"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Error handling
|
|
80
|
+
error_message = Column(Text, doc="Error message if sync failed")
|
|
81
|
+
retry_count = Column(Integer, default=0, doc="Number of retry attempts")
|
|
82
|
+
|
|
83
|
+
def get_duration_seconds(self) -> Optional[float]:
|
|
84
|
+
"""Get the duration of the sync operation in seconds."""
|
|
85
|
+
if not self.created_at:
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
end_time = self.completed_at or datetime.now(timezone.utc)
|
|
89
|
+
return (end_time - self.created_at).total_seconds()
|
|
90
|
+
|
|
91
|
+
def get_progress_info(self) -> dict:
|
|
92
|
+
"""Get comprehensive progress information."""
|
|
93
|
+
total_records_processed = (self.records_downloaded or 0) + (self.records_uploaded or 0)
|
|
94
|
+
total_bytes_transferred = (self.bytes_downloaded or 0) + (self.bytes_uploaded or 0)
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
"status": self.status.value,
|
|
98
|
+
"progress_percentage": self.progress_percentage,
|
|
99
|
+
"records_processed": f"{total_records_processed}/{self.total_records_to_sync or 'unknown'}",
|
|
100
|
+
"records_downloaded": self.records_downloaded or 0,
|
|
101
|
+
"records_uploaded": self.records_uploaded or 0,
|
|
102
|
+
"bytes_transferred": total_bytes_transferred,
|
|
103
|
+
"bytes_downloaded": self.bytes_downloaded or 0,
|
|
104
|
+
"bytes_uploaded": self.bytes_uploaded or 0,
|
|
105
|
+
"duration_seconds": self.get_duration_seconds(),
|
|
106
|
+
"error_message": self.error_message,
|
|
107
|
+
"dataset_sync_hashes": self.dataset_sync_hashes or {},
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
def _get_all_sync_hashes(self) -> List[str]:
|
|
111
|
+
"""Get all content hashes for data created/modified during this sync operation."""
|
|
112
|
+
all_hashes = set()
|
|
113
|
+
dataset_hashes = self.dataset_sync_hashes or {}
|
|
114
|
+
|
|
115
|
+
for dataset_id, operations in dataset_hashes.items():
|
|
116
|
+
if isinstance(operations, dict):
|
|
117
|
+
all_hashes.update(operations.get("uploaded", []))
|
|
118
|
+
all_hashes.update(operations.get("downloaded", []))
|
|
119
|
+
|
|
120
|
+
return list(all_hashes)
|
|
121
|
+
|
|
122
|
+
def _get_dataset_sync_hashes(self, dataset_id: str) -> dict:
|
|
123
|
+
"""Get uploaded/downloaded hashes for a specific dataset."""
|
|
124
|
+
dataset_hashes = self.dataset_sync_hashes or {}
|
|
125
|
+
return dataset_hashes.get(dataset_id, {"uploaded": [], "downloaded": []})
|
|
126
|
+
|
|
127
|
+
def was_data_synced(self, content_hash: str, dataset_id: str = None) -> bool:
|
|
128
|
+
"""
|
|
129
|
+
Check if a specific piece of data was part of this sync operation.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
content_hash: The content hash to check for
|
|
133
|
+
dataset_id: Optional - check only within this dataset
|
|
134
|
+
"""
|
|
135
|
+
if dataset_id:
|
|
136
|
+
dataset_hashes = self.get_dataset_sync_hashes(dataset_id)
|
|
137
|
+
return content_hash in dataset_hashes.get(
|
|
138
|
+
"uploaded", []
|
|
139
|
+
) or content_hash in dataset_hashes.get("downloaded", [])
|
|
140
|
+
|
|
141
|
+
all_hashes = self.get_all_sync_hashes()
|
|
142
|
+
return content_hash in all_hashes
|
cognee/modules/users/__init__.py
CHANGED
|
@@ -4,4 +4,7 @@ from .delete_user import delete_user
|
|
|
4
4
|
from .get_default_user import get_default_user
|
|
5
5
|
from .get_user_by_email import get_user_by_email
|
|
6
6
|
from .create_default_user import create_default_user
|
|
7
|
-
from .get_authenticated_user import
|
|
7
|
+
from .get_authenticated_user import (
|
|
8
|
+
get_authenticated_user,
|
|
9
|
+
REQUIRE_AUTHENTICATION,
|
|
10
|
+
)
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
+
from uuid import uuid4
|
|
1
2
|
from fastapi_users.exceptions import UserAlreadyExists
|
|
2
|
-
|
|
3
|
+
|
|
3
4
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
5
|
+
from cognee.modules.notebooks.methods import create_notebook
|
|
6
|
+
from cognee.modules.notebooks.models.Notebook import NotebookCell
|
|
7
|
+
from cognee.modules.users.exceptions import TenantNotFoundError
|
|
4
8
|
from cognee.modules.users.get_user_manager import get_user_manager_context
|
|
5
9
|
from cognee.modules.users.get_user_db import get_user_db_context
|
|
6
10
|
from cognee.modules.users.models.User import UserCreate
|
|
@@ -56,6 +60,27 @@ async def create_user(
|
|
|
56
60
|
if auto_login:
|
|
57
61
|
await session.refresh(user)
|
|
58
62
|
|
|
63
|
+
await create_notebook(
|
|
64
|
+
user_id=user.id,
|
|
65
|
+
notebook_name="Welcome to cognee 🧠",
|
|
66
|
+
cells=[
|
|
67
|
+
NotebookCell(
|
|
68
|
+
id=uuid4(),
|
|
69
|
+
name="Welcome",
|
|
70
|
+
content="Cognee is your toolkit for turning text into a structured knowledge graph, optionally enhanced by ontologies, and then querying it with advanced retrieval techniques. This notebook will guide you through a simple example.",
|
|
71
|
+
type="markdown",
|
|
72
|
+
),
|
|
73
|
+
NotebookCell(
|
|
74
|
+
id=uuid4(),
|
|
75
|
+
name="Example",
|
|
76
|
+
content="",
|
|
77
|
+
type="code",
|
|
78
|
+
),
|
|
79
|
+
],
|
|
80
|
+
deletable=False,
|
|
81
|
+
session=session,
|
|
82
|
+
)
|
|
83
|
+
|
|
59
84
|
return user
|
|
60
85
|
except UserAlreadyExists as error:
|
|
61
86
|
print(f"User {email} already exists")
|
|
@@ -1,48 +1,42 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from fastapi import Depends, HTTPException
|
|
4
|
+
from ..models import User
|
|
1
5
|
from ..get_fastapi_users import get_fastapi_users
|
|
6
|
+
from .get_default_user import get_default_user
|
|
7
|
+
from cognee.shared.logging_utils import get_logger
|
|
2
8
|
|
|
3
9
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
get_authenticated_user = fastapi_users.current_user(active=True)
|
|
7
|
-
|
|
8
|
-
# from types import SimpleNamespace
|
|
9
|
-
|
|
10
|
-
# from ..get_fastapi_users import get_fastapi_users
|
|
11
|
-
# from fastapi import HTTPException, Security
|
|
12
|
-
# from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
13
|
-
# import os
|
|
14
|
-
# import jwt
|
|
15
|
-
|
|
16
|
-
# from uuid import UUID
|
|
17
|
-
|
|
18
|
-
# fastapi_users = get_fastapi_users()
|
|
10
|
+
logger = get_logger("get_authenticated_user")
|
|
19
11
|
|
|
20
|
-
#
|
|
21
|
-
|
|
12
|
+
# Check environment variable to determine authentication requirement
|
|
13
|
+
REQUIRE_AUTHENTICATION = (
|
|
14
|
+
os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true"
|
|
15
|
+
or os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true"
|
|
16
|
+
)
|
|
22
17
|
|
|
18
|
+
fastapi_users = get_fastapi_users()
|
|
23
19
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
#
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
#
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
# except jwt.InvalidTokenError:
|
|
48
|
-
# raise HTTPException(status_code=401, detail="Invalid token")
|
|
20
|
+
_auth_dependency = fastapi_users.current_user(active=True, optional=not REQUIRE_AUTHENTICATION)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def get_authenticated_user(
|
|
24
|
+
user: Optional[User] = Depends(_auth_dependency),
|
|
25
|
+
) -> User:
|
|
26
|
+
"""
|
|
27
|
+
Get authenticated user with environment-controlled behavior:
|
|
28
|
+
- If REQUIRE_AUTHENTICATION=true: Enforces authentication (raises 401 if not authenticated)
|
|
29
|
+
- If REQUIRE_AUTHENTICATION=false: Falls back to default user if not authenticated
|
|
30
|
+
|
|
31
|
+
Always returns a User object for consistent typing.
|
|
32
|
+
"""
|
|
33
|
+
if user is None:
|
|
34
|
+
# When authentication is optional and user is None, use default user
|
|
35
|
+
try:
|
|
36
|
+
user = await get_default_user()
|
|
37
|
+
except Exception as e:
|
|
38
|
+
# Convert any get_default_user failure into a proper HTTP 500 error
|
|
39
|
+
logger.error(f"Failed to create default user: {str(e)}")
|
|
40
|
+
raise HTTPException(status_code=500, detail=f"Failed to create default user: {str(e)}")
|
|
41
|
+
|
|
42
|
+
return user
|
|
@@ -29,7 +29,9 @@ async def get_default_user() -> SimpleNamespace:
|
|
|
29
29
|
|
|
30
30
|
# We return a SimpleNamespace to have the same user type as our SaaS
|
|
31
31
|
# SimpleNamespace is just a dictionary which can be accessed through attributes
|
|
32
|
-
auth_data = SimpleNamespace(
|
|
32
|
+
auth_data = SimpleNamespace(
|
|
33
|
+
id=user.id, email=user.email, tenant_id=user.tenant_id, roles=[]
|
|
34
|
+
)
|
|
33
35
|
return auth_data
|
|
34
36
|
except Exception as error:
|
|
35
37
|
if "principals" in str(error.args):
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
|
+
from typing import Optional
|
|
2
3
|
from cognee.modules.data.models.Dataset import Dataset
|
|
3
4
|
from cognee.modules.users.permissions.methods.get_all_user_permission_datasets import (
|
|
4
5
|
get_all_user_permission_datasets,
|
|
@@ -8,7 +9,7 @@ from cognee.modules.users.methods import get_user
|
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
async def get_specific_user_permission_datasets(
|
|
11
|
-
user_id: UUID, permission_type: str, dataset_ids: list[UUID] = None
|
|
12
|
+
user_id: UUID, permission_type: str, dataset_ids: Optional[list[UUID]] = None
|
|
12
13
|
) -> list[Dataset]:
|
|
13
14
|
"""
|
|
14
15
|
Return a list of datasets user has given permission for. If a list of datasets is provided,
|
cognee/root_dir.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
ROOT_DIR = Path(__file__).resolve().parent
|
|
4
5
|
|
|
@@ -6,3 +7,21 @@ ROOT_DIR = Path(__file__).resolve().parent
|
|
|
6
7
|
def get_absolute_path(path_from_root: str) -> str:
|
|
7
8
|
absolute_path = ROOT_DIR / path_from_root
|
|
8
9
|
return str(absolute_path.resolve())
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def ensure_absolute_path(path: str) -> str:
|
|
13
|
+
"""Ensures a path is absolute.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
path: The path to validate.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Absolute path as string
|
|
20
|
+
"""
|
|
21
|
+
if path is None:
|
|
22
|
+
raise ValueError("Path cannot be None")
|
|
23
|
+
path_obj = Path(path).expanduser()
|
|
24
|
+
if path_obj.is_absolute():
|
|
25
|
+
return str(path_obj.resolve())
|
|
26
|
+
|
|
27
|
+
raise ValueError(f"Path must be absolute. Got relative path: {path}")
|
cognee/shared/logging_utils.py
CHANGED
|
@@ -268,7 +268,7 @@ def setup_logging(log_level=None, name=None):
|
|
|
268
268
|
global _is_structlog_configured
|
|
269
269
|
|
|
270
270
|
# Regular detailed logging for non-CLI usage
|
|
271
|
-
log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
|
|
271
|
+
log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO").upper()]
|
|
272
272
|
|
|
273
273
|
# Configure external library logging early to suppress verbose output
|
|
274
274
|
configure_external_library_logging()
|
|
File without changes
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from uuid import NAMESPACE_OID, uuid5
|
|
2
|
+
|
|
3
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
4
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
5
|
+
|
|
6
|
+
from cognee.low_level import DataPoint
|
|
7
|
+
from cognee.infrastructure.llm import LLMGateway
|
|
8
|
+
from cognee.shared.logging_utils import get_logger
|
|
9
|
+
from cognee.modules.engine.models import NodeSet
|
|
10
|
+
from cognee.tasks.storage import add_data_points, index_graph_edges
|
|
11
|
+
from typing import Optional, List, Any
|
|
12
|
+
from pydantic import Field
|
|
13
|
+
|
|
14
|
+
logger = get_logger("coding_rule_association")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Rule(DataPoint):
|
|
18
|
+
"""A single developer rule extracted from text."""
|
|
19
|
+
|
|
20
|
+
text: str = Field(..., description="The coding rule associated with the conversation")
|
|
21
|
+
belongs_to_set: Optional[NodeSet] = None
|
|
22
|
+
metadata: dict = {"index_fields": ["rule"]}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RuleSet(DataPoint):
|
|
26
|
+
"""Collection of parsed rules."""
|
|
27
|
+
|
|
28
|
+
rules: List[Rule] = Field(
|
|
29
|
+
...,
|
|
30
|
+
description="List of developer rules extracted from the input text. Each rule represents a coding best practice or guideline.",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def get_existing_rules(rules_nodeset_name: str) -> List[str]:
|
|
35
|
+
graph_engine = await get_graph_engine()
|
|
36
|
+
nodes_data, _ = await graph_engine.get_nodeset_subgraph(
|
|
37
|
+
node_type=NodeSet, node_name=[rules_nodeset_name]
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
existing_rules = [
|
|
41
|
+
item[1]["text"]
|
|
42
|
+
for item in nodes_data
|
|
43
|
+
if isinstance(item, tuple)
|
|
44
|
+
and len(item) == 2
|
|
45
|
+
and isinstance(item[1], dict)
|
|
46
|
+
and "text" in item[1]
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
return existing_rules
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
async def get_origin_edges(data: str, rules: List[Rule]) -> list[Any]:
|
|
53
|
+
vector_engine = get_vector_engine()
|
|
54
|
+
|
|
55
|
+
origin_chunk = await vector_engine.search("DocumentChunk_text", data, limit=1)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
origin_id = origin_chunk[0].id
|
|
59
|
+
except (AttributeError, KeyError, TypeError, IndexError):
|
|
60
|
+
origin_id = None
|
|
61
|
+
|
|
62
|
+
relationships = []
|
|
63
|
+
|
|
64
|
+
if origin_id and isinstance(rules, (list, tuple)) and len(rules) > 0:
|
|
65
|
+
for rule in rules:
|
|
66
|
+
try:
|
|
67
|
+
rule_id = getattr(rule, "id", None)
|
|
68
|
+
if rule_id is not None:
|
|
69
|
+
rel_name = "rule_associated_from"
|
|
70
|
+
relationships.append(
|
|
71
|
+
(
|
|
72
|
+
rule_id,
|
|
73
|
+
origin_id,
|
|
74
|
+
rel_name,
|
|
75
|
+
{
|
|
76
|
+
"relationship_name": rel_name,
|
|
77
|
+
"source_node_id": rule_id,
|
|
78
|
+
"target_node_id": origin_id,
|
|
79
|
+
"ontology_valid": False,
|
|
80
|
+
},
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.info(f"Warning: Skipping invalid rule due to error: {e}")
|
|
85
|
+
else:
|
|
86
|
+
logger.info("No valid origin_id or rules provided.")
|
|
87
|
+
|
|
88
|
+
return relationships
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def add_rule_associations(
|
|
92
|
+
data: str,
|
|
93
|
+
rules_nodeset_name: str,
|
|
94
|
+
user_prompt_location: str = "coding_rule_association_agent_user.txt",
|
|
95
|
+
system_prompt_location: str = "coding_rule_association_agent_system.txt",
|
|
96
|
+
):
|
|
97
|
+
if isinstance(data, list):
|
|
98
|
+
# If data is a list of strings join all strings in list
|
|
99
|
+
data = " ".join(data)
|
|
100
|
+
|
|
101
|
+
graph_engine = await get_graph_engine()
|
|
102
|
+
existing_rules = await get_existing_rules(rules_nodeset_name=rules_nodeset_name)
|
|
103
|
+
existing_rules = "\n".join(f"- {rule}" for rule in existing_rules)
|
|
104
|
+
|
|
105
|
+
user_context = {"chat": data, "rules": existing_rules}
|
|
106
|
+
|
|
107
|
+
user_prompt = LLMGateway.render_prompt(user_prompt_location, context=user_context)
|
|
108
|
+
system_prompt = LLMGateway.render_prompt(system_prompt_location, context={})
|
|
109
|
+
|
|
110
|
+
rule_list = await LLMGateway.acreate_structured_output(
|
|
111
|
+
text_input=user_prompt, system_prompt=system_prompt, response_model=RuleSet
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
rules_nodeset = NodeSet(
|
|
115
|
+
id=uuid5(NAMESPACE_OID, name=rules_nodeset_name), name=rules_nodeset_name
|
|
116
|
+
)
|
|
117
|
+
for rule in rule_list.rules:
|
|
118
|
+
rule.belongs_to_set = rules_nodeset
|
|
119
|
+
|
|
120
|
+
edges_to_save = await get_origin_edges(data=data, rules=rule_list.rules)
|
|
121
|
+
|
|
122
|
+
await add_data_points(data_points=rule_list.rules)
|
|
123
|
+
|
|
124
|
+
if len(edges_to_save) > 0:
|
|
125
|
+
await graph_engine.add_edges(edges_to_save)
|
|
126
|
+
|
|
127
|
+
await index_graph_edges()
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
from urllib.parse import urlparse
|
|
3
4
|
from typing import Union, BinaryIO, Any
|
|
4
5
|
|
|
5
6
|
from cognee.modules.ingestion.exceptions import IngestionError
|
|
6
7
|
from cognee.modules.ingestion import save_data_to_file
|
|
8
|
+
from cognee.shared.logging_utils import get_logger
|
|
7
9
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
8
10
|
|
|
11
|
+
logger = get_logger()
|
|
12
|
+
|
|
9
13
|
|
|
10
14
|
class SaveDataSettings(BaseSettings):
|
|
11
15
|
accept_local_file_path: bool = True
|
|
@@ -30,6 +34,16 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
|
|
|
30
34
|
if isinstance(data_item, str):
|
|
31
35
|
parsed_url = urlparse(data_item)
|
|
32
36
|
|
|
37
|
+
try:
|
|
38
|
+
# In case data item is a string with a relative path transform data item to absolute path and check
|
|
39
|
+
# if the file exists
|
|
40
|
+
abs_path = (Path.cwd() / Path(data_item)).resolve()
|
|
41
|
+
abs_path.is_file()
|
|
42
|
+
except (OSError, ValueError):
|
|
43
|
+
# In case file path is too long it's most likely not a relative path
|
|
44
|
+
logger.debug(f"Data item was too long to be a possible file path: {abs_path}")
|
|
45
|
+
abs_path = Path("")
|
|
46
|
+
|
|
33
47
|
# data is s3 file path
|
|
34
48
|
if parsed_url.scheme == "s3":
|
|
35
49
|
return data_item
|
|
@@ -56,6 +70,15 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
|
|
|
56
70
|
return file_path
|
|
57
71
|
else:
|
|
58
72
|
raise IngestionError(message="Local files are not accepted.")
|
|
73
|
+
# Data is a relative file path
|
|
74
|
+
elif abs_path.is_file():
|
|
75
|
+
if settings.accept_local_file_path:
|
|
76
|
+
# Normalize path separators before creating file URL
|
|
77
|
+
normalized_path = os.path.normpath(abs_path)
|
|
78
|
+
# Use forward slashes in file URLs for consistency
|
|
79
|
+
url_path = normalized_path.replace(os.sep, "/")
|
|
80
|
+
file_path = "file://" + url_path
|
|
81
|
+
return file_path
|
|
59
82
|
|
|
60
83
|
# data is text, save it to data storage and return the file path
|
|
61
84
|
return await save_data_to_file(data_item)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
async def extract_subgraph_chunks(subgraphs: list[CogneeGraph]):
|
|
5
|
+
"""
|
|
6
|
+
Get all Document Chunks from subgraphs and forward to next task in pipeline
|
|
7
|
+
"""
|
|
8
|
+
for subgraph in subgraphs:
|
|
9
|
+
for node in subgraph.nodes.values():
|
|
10
|
+
if node.attributes["type"] == "DocumentChunk":
|
|
11
|
+
yield node.attributes["text"]
|