cognee 0.2.4__py3-none-any.whl → 0.3.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +28 -3
  3. cognee/api/health.py +10 -13
  4. cognee/api/v1/add/add.py +3 -1
  5. cognee/api/v1/add/routers/get_add_router.py +12 -37
  6. cognee/api/v1/cloud/routers/__init__.py +1 -0
  7. cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
  8. cognee/api/v1/cognify/code_graph_pipeline.py +9 -4
  9. cognee/api/v1/cognify/cognify.py +50 -3
  10. cognee/api/v1/cognify/routers/get_cognify_router.py +1 -1
  11. cognee/api/v1/datasets/routers/get_datasets_router.py +15 -4
  12. cognee/api/v1/memify/__init__.py +0 -0
  13. cognee/api/v1/memify/routers/__init__.py +1 -0
  14. cognee/api/v1/memify/routers/get_memify_router.py +100 -0
  15. cognee/api/v1/notebooks/routers/__init__.py +1 -0
  16. cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
  17. cognee/api/v1/search/routers/get_search_router.py +20 -1
  18. cognee/api/v1/search/search.py +11 -4
  19. cognee/api/v1/sync/__init__.py +17 -0
  20. cognee/api/v1/sync/routers/__init__.py +3 -0
  21. cognee/api/v1/sync/routers/get_sync_router.py +241 -0
  22. cognee/api/v1/sync/sync.py +877 -0
  23. cognee/api/v1/users/routers/get_auth_router.py +13 -1
  24. cognee/base_config.py +10 -1
  25. cognee/infrastructure/databases/graph/config.py +10 -4
  26. cognee/infrastructure/databases/graph/kuzu/adapter.py +135 -0
  27. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +89 -0
  28. cognee/infrastructure/databases/relational/__init__.py +2 -0
  29. cognee/infrastructure/databases/relational/get_async_session.py +15 -0
  30. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
  31. cognee/infrastructure/databases/relational/with_async_session.py +25 -0
  32. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
  33. cognee/infrastructure/databases/vector/config.py +13 -6
  34. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +1 -1
  35. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
  36. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +4 -1
  37. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
  38. cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
  39. cognee/infrastructure/files/storage/StorageManager.py +7 -1
  40. cognee/infrastructure/files/storage/storage.py +16 -0
  41. cognee/infrastructure/llm/LLMGateway.py +18 -0
  42. cognee/infrastructure/llm/config.py +4 -2
  43. cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
  44. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
  45. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
  46. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
  47. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
  48. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
  49. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
  50. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -1
  51. cognee/infrastructure/utils/run_sync.py +8 -1
  52. cognee/modules/chunking/models/DocumentChunk.py +4 -3
  53. cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
  54. cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
  55. cognee/modules/cloud/exceptions/__init__.py +2 -0
  56. cognee/modules/cloud/operations/__init__.py +1 -0
  57. cognee/modules/cloud/operations/check_api_key.py +25 -0
  58. cognee/modules/data/deletion/prune_system.py +1 -1
  59. cognee/modules/data/methods/check_dataset_name.py +1 -1
  60. cognee/modules/data/methods/get_dataset_data.py +1 -1
  61. cognee/modules/data/methods/load_or_create_datasets.py +1 -1
  62. cognee/modules/engine/models/Event.py +16 -0
  63. cognee/modules/engine/models/Interval.py +8 -0
  64. cognee/modules/engine/models/Timestamp.py +13 -0
  65. cognee/modules/engine/models/__init__.py +3 -0
  66. cognee/modules/engine/utils/__init__.py +2 -0
  67. cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
  68. cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
  69. cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
  70. cognee/modules/graph/utils/__init__.py +1 -0
  71. cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
  72. cognee/modules/memify/__init__.py +1 -0
  73. cognee/modules/memify/memify.py +118 -0
  74. cognee/modules/notebooks/methods/__init__.py +5 -0
  75. cognee/modules/notebooks/methods/create_notebook.py +26 -0
  76. cognee/modules/notebooks/methods/delete_notebook.py +13 -0
  77. cognee/modules/notebooks/methods/get_notebook.py +21 -0
  78. cognee/modules/notebooks/methods/get_notebooks.py +18 -0
  79. cognee/modules/notebooks/methods/update_notebook.py +17 -0
  80. cognee/modules/notebooks/models/Notebook.py +53 -0
  81. cognee/modules/notebooks/models/__init__.py +1 -0
  82. cognee/modules/notebooks/operations/__init__.py +1 -0
  83. cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
  84. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +19 -3
  85. cognee/modules/pipelines/operations/pipeline.py +1 -0
  86. cognee/modules/pipelines/operations/run_tasks.py +17 -41
  87. cognee/modules/retrieval/base_graph_retriever.py +18 -0
  88. cognee/modules/retrieval/base_retriever.py +1 -1
  89. cognee/modules/retrieval/code_retriever.py +8 -0
  90. cognee/modules/retrieval/coding_rules_retriever.py +31 -0
  91. cognee/modules/retrieval/completion_retriever.py +9 -3
  92. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
  93. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +23 -14
  94. cognee/modules/retrieval/graph_completion_cot_retriever.py +21 -11
  95. cognee/modules/retrieval/graph_completion_retriever.py +32 -65
  96. cognee/modules/retrieval/graph_summary_completion_retriever.py +3 -1
  97. cognee/modules/retrieval/insights_retriever.py +14 -3
  98. cognee/modules/retrieval/summaries_retriever.py +1 -1
  99. cognee/modules/retrieval/temporal_retriever.py +152 -0
  100. cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
  101. cognee/modules/retrieval/utils/completion.py +10 -3
  102. cognee/modules/search/methods/get_search_type_tools.py +168 -0
  103. cognee/modules/search/methods/no_access_control_search.py +47 -0
  104. cognee/modules/search/methods/search.py +219 -139
  105. cognee/modules/search/types/SearchResult.py +21 -0
  106. cognee/modules/search/types/SearchType.py +2 -0
  107. cognee/modules/search/types/__init__.py +1 -0
  108. cognee/modules/search/utils/__init__.py +2 -0
  109. cognee/modules/search/utils/prepare_search_result.py +41 -0
  110. cognee/modules/search/utils/transform_context_to_graph.py +38 -0
  111. cognee/modules/sync/__init__.py +1 -0
  112. cognee/modules/sync/methods/__init__.py +23 -0
  113. cognee/modules/sync/methods/create_sync_operation.py +53 -0
  114. cognee/modules/sync/methods/get_sync_operation.py +107 -0
  115. cognee/modules/sync/methods/update_sync_operation.py +248 -0
  116. cognee/modules/sync/models/SyncOperation.py +142 -0
  117. cognee/modules/sync/models/__init__.py +3 -0
  118. cognee/modules/users/__init__.py +0 -1
  119. cognee/modules/users/methods/__init__.py +4 -1
  120. cognee/modules/users/methods/create_user.py +26 -1
  121. cognee/modules/users/methods/get_authenticated_user.py +36 -42
  122. cognee/modules/users/methods/get_default_user.py +3 -1
  123. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
  124. cognee/root_dir.py +19 -0
  125. cognee/shared/logging_utils.py +1 -1
  126. cognee/tasks/codingagents/__init__.py +0 -0
  127. cognee/tasks/codingagents/coding_rule_associations.py +127 -0
  128. cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
  129. cognee/tasks/memify/__init__.py +2 -0
  130. cognee/tasks/memify/extract_subgraph.py +7 -0
  131. cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
  132. cognee/tasks/repo_processor/get_repo_file_dependencies.py +52 -27
  133. cognee/tasks/temporal_graph/__init__.py +1 -0
  134. cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
  135. cognee/tasks/temporal_graph/enrich_events.py +34 -0
  136. cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
  137. cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
  138. cognee/tasks/temporal_graph/models.py +49 -0
  139. cognee/tests/test_kuzu.py +4 -4
  140. cognee/tests/test_neo4j.py +4 -4
  141. cognee/tests/test_permissions.py +3 -3
  142. cognee/tests/test_relational_db_migration.py +7 -5
  143. cognee/tests/test_search_db.py +18 -24
  144. cognee/tests/test_temporal_graph.py +167 -0
  145. cognee/tests/unit/api/__init__.py +1 -0
  146. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
  147. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
  148. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +13 -16
  149. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +11 -16
  150. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +5 -4
  151. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
  152. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
  153. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
  154. cognee/tests/unit/modules/users/__init__.py +1 -0
  155. cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
  156. cognee/tests/unit/processing/utils/utils_test.py +20 -1
  157. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/METADATA +8 -6
  158. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/RECORD +162 -89
  159. cognee/tests/unit/modules/search/search_methods_test.py +0 -225
  160. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/WHEEL +0 -0
  161. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/entry_points.txt +0 -0
  162. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/LICENSE +0 -0
  163. {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,142 @@
1
+ from uuid import uuid4
2
+ from enum import Enum
3
+ from typing import Optional, List
4
+ from datetime import datetime, timezone
5
+ from sqlalchemy import (
6
+ Column,
7
+ Text,
8
+ DateTime,
9
+ UUID as SQLAlchemy_UUID,
10
+ Integer,
11
+ Enum as SQLEnum,
12
+ JSON,
13
+ )
14
+
15
+ from cognee.infrastructure.databases.relational import Base
16
+
17
+
18
+ class SyncStatus(str, Enum):
19
+ """Enumeration of possible sync operation statuses."""
20
+
21
+ STARTED = "started"
22
+ IN_PROGRESS = "in_progress"
23
+ COMPLETED = "completed"
24
+ FAILED = "failed"
25
+ CANCELLED = "cancelled"
26
+
27
+
28
+ class SyncOperation(Base):
29
+ """
30
+ Database model for tracking sync operations.
31
+
32
+ This model stores information about background sync operations,
33
+ allowing users to monitor progress and query the status of their sync requests.
34
+ """
35
+
36
+ __tablename__ = "sync_operations"
37
+
38
+ # Primary identifiers
39
+ id = Column(SQLAlchemy_UUID, primary_key=True, default=uuid4, doc="Database primary key")
40
+ run_id = Column(Text, unique=True, index=True, doc="Public run ID returned to users")
41
+
42
+ # Status and progress tracking
43
+ status = Column(
44
+ SQLEnum(SyncStatus), default=SyncStatus.STARTED, doc="Current status of the sync operation"
45
+ )
46
+ progress_percentage = Column(Integer, default=0, doc="Progress percentage (0-100)")
47
+
48
+ # Operation metadata
49
+ dataset_ids = Column(JSON, doc="Array of dataset IDs being synced")
50
+ dataset_names = Column(JSON, doc="Array of dataset names being synced")
51
+ user_id = Column(SQLAlchemy_UUID, index=True, doc="ID of the user who initiated the sync")
52
+
53
+ # Timing information
54
+ created_at = Column(
55
+ DateTime(timezone=True),
56
+ default=lambda: datetime.now(timezone.utc),
57
+ doc="When the sync was initiated",
58
+ )
59
+ started_at = Column(DateTime(timezone=True), doc="When the actual sync processing began")
60
+ completed_at = Column(
61
+ DateTime(timezone=True), doc="When the sync finished (success or failure)"
62
+ )
63
+
64
+ # Operation details
65
+ total_records_to_sync = Column(Integer, doc="Total number of records to sync")
66
+ total_records_to_download = Column(Integer, doc="Total number of records to download")
67
+ total_records_to_upload = Column(Integer, doc="Total number of records to upload")
68
+
69
+ records_downloaded = Column(Integer, default=0, doc="Number of records successfully downloaded")
70
+ records_uploaded = Column(Integer, default=0, doc="Number of records successfully uploaded")
71
+ bytes_downloaded = Column(Integer, default=0, doc="Total bytes downloaded from cloud")
72
+ bytes_uploaded = Column(Integer, default=0, doc="Total bytes uploaded to cloud")
73
+
74
+ # Data lineage tracking per dataset
75
+ dataset_sync_hashes = Column(
76
+ JSON, doc="Mapping of dataset_id -> {uploaded: [hashes], downloaded: [hashes]}"
77
+ )
78
+
79
+ # Error handling
80
+ error_message = Column(Text, doc="Error message if sync failed")
81
+ retry_count = Column(Integer, default=0, doc="Number of retry attempts")
82
+
83
+ def get_duration_seconds(self) -> Optional[float]:
84
+ """Get the duration of the sync operation in seconds."""
85
+ if not self.created_at:
86
+ return None
87
+
88
+ end_time = self.completed_at or datetime.now(timezone.utc)
89
+ return (end_time - self.created_at).total_seconds()
90
+
91
+ def get_progress_info(self) -> dict:
92
+ """Get comprehensive progress information."""
93
+ total_records_processed = (self.records_downloaded or 0) + (self.records_uploaded or 0)
94
+ total_bytes_transferred = (self.bytes_downloaded or 0) + (self.bytes_uploaded or 0)
95
+
96
+ return {
97
+ "status": self.status.value,
98
+ "progress_percentage": self.progress_percentage,
99
+ "records_processed": f"{total_records_processed}/{self.total_records_to_sync or 'unknown'}",
100
+ "records_downloaded": self.records_downloaded or 0,
101
+ "records_uploaded": self.records_uploaded or 0,
102
+ "bytes_transferred": total_bytes_transferred,
103
+ "bytes_downloaded": self.bytes_downloaded or 0,
104
+ "bytes_uploaded": self.bytes_uploaded or 0,
105
+ "duration_seconds": self.get_duration_seconds(),
106
+ "error_message": self.error_message,
107
+ "dataset_sync_hashes": self.dataset_sync_hashes or {},
108
+ }
109
+
110
+ def _get_all_sync_hashes(self) -> List[str]:
111
+ """Get all content hashes for data created/modified during this sync operation."""
112
+ all_hashes = set()
113
+ dataset_hashes = self.dataset_sync_hashes or {}
114
+
115
+ for dataset_id, operations in dataset_hashes.items():
116
+ if isinstance(operations, dict):
117
+ all_hashes.update(operations.get("uploaded", []))
118
+ all_hashes.update(operations.get("downloaded", []))
119
+
120
+ return list(all_hashes)
121
+
122
+ def _get_dataset_sync_hashes(self, dataset_id: str) -> dict:
123
+ """Get uploaded/downloaded hashes for a specific dataset."""
124
+ dataset_hashes = self.dataset_sync_hashes or {}
125
+ return dataset_hashes.get(dataset_id, {"uploaded": [], "downloaded": []})
126
+
127
+ def was_data_synced(self, content_hash: str, dataset_id: str = None) -> bool:
128
+ """
129
+ Check if a specific piece of data was part of this sync operation.
130
+
131
+ Args:
132
+ content_hash: The content hash to check for
133
+ dataset_id: Optional - check only within this dataset
134
+ """
135
+ if dataset_id:
136
+ dataset_hashes = self.get_dataset_sync_hashes(dataset_id)
137
+ return content_hash in dataset_hashes.get(
138
+ "uploaded", []
139
+ ) or content_hash in dataset_hashes.get("downloaded", [])
140
+
141
+ all_hashes = self.get_all_sync_hashes()
142
+ return content_hash in all_hashes
@@ -0,0 +1,3 @@
1
+ from .SyncOperation import SyncOperation, SyncStatus
2
+
3
+ __all__ = ["SyncOperation", "SyncStatus"]
@@ -1,2 +1 @@
1
1
  from .get_user_db import get_user_db
2
- from .get_user_db import get_async_session
@@ -4,4 +4,7 @@ from .delete_user import delete_user
4
4
  from .get_default_user import get_default_user
5
5
  from .get_user_by_email import get_user_by_email
6
6
  from .create_default_user import create_default_user
7
- from .get_authenticated_user import get_authenticated_user
7
+ from .get_authenticated_user import (
8
+ get_authenticated_user,
9
+ REQUIRE_AUTHENTICATION,
10
+ )
@@ -1,6 +1,10 @@
1
+ from uuid import uuid4
1
2
  from fastapi_users.exceptions import UserAlreadyExists
2
- from cognee.modules.users.exceptions import TenantNotFoundError
3
+
3
4
  from cognee.infrastructure.databases.relational import get_relational_engine
5
+ from cognee.modules.notebooks.methods import create_notebook
6
+ from cognee.modules.notebooks.models.Notebook import NotebookCell
7
+ from cognee.modules.users.exceptions import TenantNotFoundError
4
8
  from cognee.modules.users.get_user_manager import get_user_manager_context
5
9
  from cognee.modules.users.get_user_db import get_user_db_context
6
10
  from cognee.modules.users.models.User import UserCreate
@@ -56,6 +60,27 @@ async def create_user(
56
60
  if auto_login:
57
61
  await session.refresh(user)
58
62
 
63
+ await create_notebook(
64
+ user_id=user.id,
65
+ notebook_name="Welcome to cognee 🧠",
66
+ cells=[
67
+ NotebookCell(
68
+ id=uuid4(),
69
+ name="Welcome",
70
+ content="Cognee is your toolkit for turning text into a structured knowledge graph, optionally enhanced by ontologies, and then querying it with advanced retrieval techniques. This notebook will guide you through a simple example.",
71
+ type="markdown",
72
+ ),
73
+ NotebookCell(
74
+ id=uuid4(),
75
+ name="Example",
76
+ content="",
77
+ type="code",
78
+ ),
79
+ ],
80
+ deletable=False,
81
+ session=session,
82
+ )
83
+
59
84
  return user
60
85
  except UserAlreadyExists as error:
61
86
  print(f"User {email} already exists")
@@ -1,48 +1,42 @@
1
+ import os
2
+ from typing import Optional
3
+ from fastapi import Depends, HTTPException
4
+ from ..models import User
1
5
  from ..get_fastapi_users import get_fastapi_users
6
+ from .get_default_user import get_default_user
7
+ from cognee.shared.logging_utils import get_logger
2
8
 
3
9
 
4
- fastapi_users = get_fastapi_users()
5
-
6
- get_authenticated_user = fastapi_users.current_user(active=True)
7
-
8
- # from types import SimpleNamespace
9
-
10
- # from ..get_fastapi_users import get_fastapi_users
11
- # from fastapi import HTTPException, Security
12
- # from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
13
- # import os
14
- # import jwt
15
-
16
- # from uuid import UUID
17
-
18
- # fastapi_users = get_fastapi_users()
10
+ logger = get_logger("get_authenticated_user")
19
11
 
20
- # # Allows Swagger to understand authorization type and allow single sign on for the Swagger docs to test backend
21
- # bearer_scheme = HTTPBearer(scheme_name="BearerAuth", description="Paste **Bearer <JWT>**")
12
+ # Check environment variable to determine authentication requirement
13
+ REQUIRE_AUTHENTICATION = (
14
+ os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true"
15
+ or os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true"
16
+ )
22
17
 
18
+ fastapi_users = get_fastapi_users()
23
19
 
24
- # async def get_authenticated_user(
25
- # creds: HTTPAuthorizationCredentials = Security(bearer_scheme),
26
- # ) -> SimpleNamespace:
27
- # """
28
- # Extract and validate the JWT presented in the Authorization header.
29
- # """
30
- # if creds is None: # header missing
31
- # raise HTTPException(status_code=401, detail="Not authenticated")
32
-
33
- # if creds.scheme.lower() != "bearer": # shouldn't happen extra guard
34
- # raise HTTPException(status_code=401, detail="Invalid authentication scheme")
35
-
36
- # token = creds.credentials
37
- # try:
38
- # payload = jwt.decode(
39
- # token, os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret"), algorithms=["HS256"]
40
- # )
41
-
42
- # auth_data = SimpleNamespace(id=UUID(payload["user_id"]))
43
- # return auth_data
44
-
45
- # except jwt.ExpiredSignatureError:
46
- # raise HTTPException(status_code=401, detail="Token has expired")
47
- # except jwt.InvalidTokenError:
48
- # raise HTTPException(status_code=401, detail="Invalid token")
20
+ _auth_dependency = fastapi_users.current_user(active=True, optional=not REQUIRE_AUTHENTICATION)
21
+
22
+
23
+ async def get_authenticated_user(
24
+ user: Optional[User] = Depends(_auth_dependency),
25
+ ) -> User:
26
+ """
27
+ Get authenticated user with environment-controlled behavior:
28
+ - If REQUIRE_AUTHENTICATION=true: Enforces authentication (raises 401 if not authenticated)
29
+ - If REQUIRE_AUTHENTICATION=false: Falls back to default user if not authenticated
30
+
31
+ Always returns a User object for consistent typing.
32
+ """
33
+ if user is None:
34
+ # When authentication is optional and user is None, use default user
35
+ try:
36
+ user = await get_default_user()
37
+ except Exception as e:
38
+ # Convert any get_default_user failure into a proper HTTP 500 error
39
+ logger.error(f"Failed to create default user: {str(e)}")
40
+ raise HTTPException(status_code=500, detail=f"Failed to create default user: {str(e)}")
41
+
42
+ return user
@@ -29,7 +29,9 @@ async def get_default_user() -> SimpleNamespace:
29
29
 
30
30
  # We return a SimpleNamespace to have the same user type as our SaaS
31
31
  # SimpleNamespace is just a dictionary which can be accessed through attributes
32
- auth_data = SimpleNamespace(id=user.id, tenant_id=user.tenant_id, roles=[])
32
+ auth_data = SimpleNamespace(
33
+ id=user.id, email=user.email, tenant_id=user.tenant_id, roles=[]
34
+ )
33
35
  return auth_data
34
36
  except Exception as error:
35
37
  if "principals" in str(error.args):
@@ -1,4 +1,5 @@
1
1
  from uuid import UUID
2
+ from typing import Optional
2
3
  from cognee.modules.data.models.Dataset import Dataset
3
4
  from cognee.modules.users.permissions.methods.get_all_user_permission_datasets import (
4
5
  get_all_user_permission_datasets,
@@ -8,7 +9,7 @@ from cognee.modules.users.methods import get_user
8
9
 
9
10
 
10
11
  async def get_specific_user_permission_datasets(
11
- user_id: UUID, permission_type: str, dataset_ids: list[UUID] = None
12
+ user_id: UUID, permission_type: str, dataset_ids: Optional[list[UUID]] = None
12
13
  ) -> list[Dataset]:
13
14
  """
14
15
  Return a list of datasets user has given permission for. If a list of datasets is provided,
cognee/root_dir.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from pathlib import Path
2
+ from typing import Optional
2
3
 
3
4
  ROOT_DIR = Path(__file__).resolve().parent
4
5
 
@@ -6,3 +7,21 @@ ROOT_DIR = Path(__file__).resolve().parent
6
7
  def get_absolute_path(path_from_root: str) -> str:
7
8
  absolute_path = ROOT_DIR / path_from_root
8
9
  return str(absolute_path.resolve())
10
+
11
+
12
+ def ensure_absolute_path(path: str) -> str:
13
+ """Ensures a path is absolute.
14
+
15
+ Args:
16
+ path: The path to validate.
17
+
18
+ Returns:
19
+ Absolute path as string
20
+ """
21
+ if path is None:
22
+ raise ValueError("Path cannot be None")
23
+ path_obj = Path(path).expanduser()
24
+ if path_obj.is_absolute():
25
+ return str(path_obj.resolve())
26
+
27
+ raise ValueError(f"Path must be absolute. Got relative path: {path}")
@@ -268,7 +268,7 @@ def setup_logging(log_level=None, name=None):
268
268
  global _is_structlog_configured
269
269
 
270
270
  # Regular detailed logging for non-CLI usage
271
- log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
271
+ log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO").upper()]
272
272
 
273
273
  # Configure external library logging early to suppress verbose output
274
274
  configure_external_library_logging()
File without changes
@@ -0,0 +1,127 @@
1
+ from uuid import NAMESPACE_OID, uuid5
2
+
3
+ from cognee.infrastructure.databases.graph import get_graph_engine
4
+ from cognee.infrastructure.databases.vector import get_vector_engine
5
+
6
+ from cognee.low_level import DataPoint
7
+ from cognee.infrastructure.llm import LLMGateway
8
+ from cognee.shared.logging_utils import get_logger
9
+ from cognee.modules.engine.models import NodeSet
10
+ from cognee.tasks.storage import add_data_points, index_graph_edges
11
+ from typing import Optional, List, Any
12
+ from pydantic import Field
13
+
14
+ logger = get_logger("coding_rule_association")
15
+
16
+
17
+ class Rule(DataPoint):
18
+ """A single developer rule extracted from text."""
19
+
20
+ text: str = Field(..., description="The coding rule associated with the conversation")
21
+ belongs_to_set: Optional[NodeSet] = None
22
+ metadata: dict = {"index_fields": ["rule"]}
23
+
24
+
25
+ class RuleSet(DataPoint):
26
+ """Collection of parsed rules."""
27
+
28
+ rules: List[Rule] = Field(
29
+ ...,
30
+ description="List of developer rules extracted from the input text. Each rule represents a coding best practice or guideline.",
31
+ )
32
+
33
+
34
+ async def get_existing_rules(rules_nodeset_name: str) -> List[str]:
35
+ graph_engine = await get_graph_engine()
36
+ nodes_data, _ = await graph_engine.get_nodeset_subgraph(
37
+ node_type=NodeSet, node_name=[rules_nodeset_name]
38
+ )
39
+
40
+ existing_rules = [
41
+ item[1]["text"]
42
+ for item in nodes_data
43
+ if isinstance(item, tuple)
44
+ and len(item) == 2
45
+ and isinstance(item[1], dict)
46
+ and "text" in item[1]
47
+ ]
48
+
49
+ return existing_rules
50
+
51
+
52
+ async def get_origin_edges(data: str, rules: List[Rule]) -> list[Any]:
53
+ vector_engine = get_vector_engine()
54
+
55
+ origin_chunk = await vector_engine.search("DocumentChunk_text", data, limit=1)
56
+
57
+ try:
58
+ origin_id = origin_chunk[0].id
59
+ except (AttributeError, KeyError, TypeError, IndexError):
60
+ origin_id = None
61
+
62
+ relationships = []
63
+
64
+ if origin_id and isinstance(rules, (list, tuple)) and len(rules) > 0:
65
+ for rule in rules:
66
+ try:
67
+ rule_id = getattr(rule, "id", None)
68
+ if rule_id is not None:
69
+ rel_name = "rule_associated_from"
70
+ relationships.append(
71
+ (
72
+ rule_id,
73
+ origin_id,
74
+ rel_name,
75
+ {
76
+ "relationship_name": rel_name,
77
+ "source_node_id": rule_id,
78
+ "target_node_id": origin_id,
79
+ "ontology_valid": False,
80
+ },
81
+ )
82
+ )
83
+ except Exception as e:
84
+ logger.info(f"Warning: Skipping invalid rule due to error: {e}")
85
+ else:
86
+ logger.info("No valid origin_id or rules provided.")
87
+
88
+ return relationships
89
+
90
+
91
+ async def add_rule_associations(
92
+ data: str,
93
+ rules_nodeset_name: str,
94
+ user_prompt_location: str = "coding_rule_association_agent_user.txt",
95
+ system_prompt_location: str = "coding_rule_association_agent_system.txt",
96
+ ):
97
+ if isinstance(data, list):
98
+ # If data is a list of strings join all strings in list
99
+ data = " ".join(data)
100
+
101
+ graph_engine = await get_graph_engine()
102
+ existing_rules = await get_existing_rules(rules_nodeset_name=rules_nodeset_name)
103
+ existing_rules = "\n".join(f"- {rule}" for rule in existing_rules)
104
+
105
+ user_context = {"chat": data, "rules": existing_rules}
106
+
107
+ user_prompt = LLMGateway.render_prompt(user_prompt_location, context=user_context)
108
+ system_prompt = LLMGateway.render_prompt(system_prompt_location, context={})
109
+
110
+ rule_list = await LLMGateway.acreate_structured_output(
111
+ text_input=user_prompt, system_prompt=system_prompt, response_model=RuleSet
112
+ )
113
+
114
+ rules_nodeset = NodeSet(
115
+ id=uuid5(NAMESPACE_OID, name=rules_nodeset_name), name=rules_nodeset_name
116
+ )
117
+ for rule in rule_list.rules:
118
+ rule.belongs_to_set = rules_nodeset
119
+
120
+ edges_to_save = await get_origin_edges(data=data, rules=rule_list.rules)
121
+
122
+ await add_data_points(data_points=rule_list.rules)
123
+
124
+ if len(edges_to_save) > 0:
125
+ await graph_engine.add_edges(edges_to_save)
126
+
127
+ await index_graph_edges()
@@ -1,11 +1,15 @@
1
1
  import os
2
+ from pathlib import Path
2
3
  from urllib.parse import urlparse
3
4
  from typing import Union, BinaryIO, Any
4
5
 
5
6
  from cognee.modules.ingestion.exceptions import IngestionError
6
7
  from cognee.modules.ingestion import save_data_to_file
8
+ from cognee.shared.logging_utils import get_logger
7
9
  from pydantic_settings import BaseSettings, SettingsConfigDict
8
10
 
11
+ logger = get_logger()
12
+
9
13
 
10
14
  class SaveDataSettings(BaseSettings):
11
15
  accept_local_file_path: bool = True
@@ -30,6 +34,16 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
30
34
  if isinstance(data_item, str):
31
35
  parsed_url = urlparse(data_item)
32
36
 
37
+ try:
38
+ # In case data item is a string with a relative path transform data item to absolute path and check
39
+ # if the file exists
40
+ abs_path = (Path.cwd() / Path(data_item)).resolve()
41
+ abs_path.is_file()
42
+ except (OSError, ValueError):
43
+ # In case file path is too long it's most likely not a relative path
44
+ logger.debug(f"Data item was too long to be a possible file path: {abs_path}")
45
+ abs_path = Path("")
46
+
33
47
  # data is s3 file path
34
48
  if parsed_url.scheme == "s3":
35
49
  return data_item
@@ -56,6 +70,15 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
56
70
  return file_path
57
71
  else:
58
72
  raise IngestionError(message="Local files are not accepted.")
73
+ # Data is a relative file path
74
+ elif abs_path.is_file():
75
+ if settings.accept_local_file_path:
76
+ # Normalize path separators before creating file URL
77
+ normalized_path = os.path.normpath(abs_path)
78
+ # Use forward slashes in file URLs for consistency
79
+ url_path = normalized_path.replace(os.sep, "/")
80
+ file_path = "file://" + url_path
81
+ return file_path
59
82
 
60
83
  # data is text, save it to data storage and return the file path
61
84
  return await save_data_to_file(data_item)
@@ -0,0 +1,2 @@
1
+ from .extract_subgraph import extract_subgraph
2
+ from .extract_subgraph_chunks import extract_subgraph_chunks
@@ -0,0 +1,7 @@
1
+ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
2
+
3
+
4
+ async def extract_subgraph(subgraphs: list[CogneeGraph]):
5
+ for subgraph in subgraphs:
6
+ for edge in subgraph.edges:
7
+ yield edge
@@ -0,0 +1,11 @@
1
+ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
2
+
3
+
4
+ async def extract_subgraph_chunks(subgraphs: list[CogneeGraph]):
5
+ """
6
+ Get all Document Chunks from subgraphs and forward to next task in pipeline
7
+ """
8
+ for subgraph in subgraphs:
9
+ for node in subgraph.nodes.values():
10
+ if node.attributes["type"] == "DocumentChunk":
11
+ yield node.attributes["text"]