cognee 0.4.1__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +8 -0
  3. cognee/api/v1/add/routers/get_add_router.py +3 -1
  4. cognee/api/v1/cognify/routers/get_cognify_router.py +28 -1
  5. cognee/api/v1/ontologies/__init__.py +4 -0
  6. cognee/api/v1/ontologies/ontologies.py +183 -0
  7. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  8. cognee/api/v1/ontologies/routers/get_ontology_router.py +107 -0
  9. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  10. cognee/cli/commands/cognify_command.py +8 -1
  11. cognee/cli/config.py +1 -1
  12. cognee/context_global_variables.py +41 -9
  13. cognee/infrastructure/databases/cache/config.py +3 -1
  14. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  15. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  16. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  17. cognee/infrastructure/databases/graph/config.py +4 -0
  18. cognee/infrastructure/databases/graph/get_graph_engine.py +2 -0
  19. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  20. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +37 -3
  21. cognee/infrastructure/databases/vector/config.py +3 -0
  22. cognee/infrastructure/databases/vector/create_vector_engine.py +5 -1
  23. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +1 -4
  24. cognee/infrastructure/engine/models/Edge.py +13 -1
  25. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  26. cognee/infrastructure/llm/config.py +2 -0
  27. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -2
  28. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +7 -1
  29. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +7 -1
  30. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +8 -16
  31. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +12 -2
  32. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +13 -2
  33. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +5 -2
  34. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  35. cognee/infrastructure/loaders/core/__init__.py +2 -1
  36. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  37. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  38. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  39. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  40. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  41. cognee/modules/chunking/CsvChunker.py +35 -0
  42. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  43. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  44. cognee/modules/data/methods/__init__.py +1 -0
  45. cognee/modules/data/methods/create_dataset.py +4 -2
  46. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  47. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  48. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  49. cognee/modules/data/models/Dataset.py +2 -0
  50. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  51. cognee/modules/data/processing/document_types/__init__.py +1 -0
  52. cognee/modules/graph/cognee_graph/CogneeGraph.py +4 -2
  53. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  54. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  55. cognee/modules/ingestion/identify.py +4 -4
  56. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  57. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  58. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  59. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  60. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  61. cognee/modules/retrieval/base_retriever.py +7 -3
  62. cognee/modules/retrieval/completion_retriever.py +11 -4
  63. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +6 -2
  64. cognee/modules/retrieval/graph_completion_cot_retriever.py +14 -51
  65. cognee/modules/retrieval/graph_completion_retriever.py +4 -1
  66. cognee/modules/retrieval/temporal_retriever.py +9 -2
  67. cognee/modules/retrieval/utils/brute_force_triplet_search.py +1 -1
  68. cognee/modules/retrieval/utils/completion.py +2 -22
  69. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  70. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +69 -0
  71. cognee/modules/search/methods/search.py +5 -3
  72. cognee/modules/users/methods/create_user.py +12 -27
  73. cognee/modules/users/methods/get_authenticated_user.py +2 -1
  74. cognee/modules/users/methods/get_default_user.py +4 -2
  75. cognee/modules/users/methods/get_user.py +1 -1
  76. cognee/modules/users/methods/get_user_by_email.py +1 -1
  77. cognee/modules/users/models/DatasetDatabase.py +9 -0
  78. cognee/modules/users/models/Tenant.py +6 -7
  79. cognee/modules/users/models/User.py +6 -5
  80. cognee/modules/users/models/UserTenant.py +12 -0
  81. cognee/modules/users/models/__init__.py +1 -0
  82. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  83. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  84. cognee/modules/users/tenants/methods/__init__.py +1 -0
  85. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  86. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  87. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  88. cognee/shared/logging_utils.py +2 -0
  89. cognee/tasks/chunks/__init__.py +1 -0
  90. cognee/tasks/chunks/chunk_by_row.py +94 -0
  91. cognee/tasks/documents/classify_documents.py +2 -0
  92. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  93. cognee/tasks/ingestion/ingest_data.py +1 -1
  94. cognee/tasks/memify/__init__.py +2 -0
  95. cognee/tasks/memify/cognify_session.py +41 -0
  96. cognee/tasks/memify/extract_user_sessions.py +73 -0
  97. cognee/tasks/storage/index_data_points.py +33 -22
  98. cognee/tasks/storage/index_graph_edges.py +37 -57
  99. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  100. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  101. cognee/tests/test_add_docling_document.py +2 -2
  102. cognee/tests/test_cognee_server_start.py +84 -1
  103. cognee/tests/test_conversation_history.py +45 -4
  104. cognee/tests/test_data/example_with_header.csv +3 -0
  105. cognee/tests/test_delete_bmw_example.py +60 -0
  106. cognee/tests/test_edge_ingestion.py +27 -0
  107. cognee/tests/test_feedback_enrichment.py +1 -1
  108. cognee/tests/test_library.py +6 -4
  109. cognee/tests/test_load.py +62 -0
  110. cognee/tests/test_multi_tenancy.py +165 -0
  111. cognee/tests/test_parallel_databases.py +2 -0
  112. cognee/tests/test_relational_db_migration.py +54 -2
  113. cognee/tests/test_search_db.py +7 -1
  114. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  115. cognee/tests/unit/api/test_ontology_endpoint.py +264 -0
  116. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  117. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  118. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  119. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  120. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  121. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  122. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  123. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  124. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  125. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  126. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  127. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  128. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  129. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  130. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +88 -71
  131. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +135 -104
  132. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
  133. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -1
  134. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
  135. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
cognee/__init__.py CHANGED
@@ -19,6 +19,7 @@ from .api.v1.add import add
19
19
  from .api.v1.delete import delete
20
20
  from .api.v1.cognify import cognify
21
21
  from .modules.memify import memify
22
+ from .modules.run_custom_pipeline import run_custom_pipeline
22
23
  from .api.v1.update import update
23
24
  from .api.v1.config.config import config
24
25
  from .api.v1.datasets.datasets import datasets
cognee/api/client.py CHANGED
@@ -23,6 +23,7 @@ from cognee.api.v1.settings.routers import get_settings_router
23
23
  from cognee.api.v1.datasets.routers import get_datasets_router
24
24
  from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
25
25
  from cognee.api.v1.search.routers import get_search_router
26
+ from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
26
27
  from cognee.api.v1.memify.routers import get_memify_router
27
28
  from cognee.api.v1.add.routers import get_add_router
28
29
  from cognee.api.v1.delete.routers import get_delete_router
@@ -39,6 +40,8 @@ from cognee.api.v1.users.routers import (
39
40
  )
40
41
  from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION
41
42
 
43
+ # Ensure application logging is configured for container stdout/stderr
44
+ setup_logging()
42
45
  logger = get_logger()
43
46
 
44
47
  if os.getenv("ENV", "prod") == "prod":
@@ -74,6 +77,9 @@ async def lifespan(app: FastAPI):
74
77
 
75
78
  await get_default_user()
76
79
 
80
+ # Emit a clear startup message for docker logs
81
+ logger.info("Backend server has started")
82
+
77
83
  yield
78
84
 
79
85
 
@@ -258,6 +264,8 @@ app.include_router(
258
264
 
259
265
  app.include_router(get_datasets_router(), prefix="/api/v1/datasets", tags=["datasets"])
260
266
 
267
+ app.include_router(get_ontology_router(), prefix="/api/v1/ontologies", tags=["ontologies"])
268
+
261
269
  app.include_router(get_settings_router(), prefix="/api/v1/settings", tags=["settings"])
262
270
 
263
271
  app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"])
@@ -82,7 +82,9 @@ def get_add_router() -> APIRouter:
82
82
  datasetName,
83
83
  user=user,
84
84
  dataset_id=datasetId,
85
- node_set=node_set if node_set else None,
85
+ node_set=node_set
86
+ if node_set != [""]
87
+ else None, # Transform default node_set endpoint value to None
86
88
  )
87
89
 
88
90
  if isinstance(add_run, PipelineRunErrored):
@@ -41,6 +41,9 @@ class CognifyPayloadDTO(InDTO):
41
41
  custom_prompt: Optional[str] = Field(
42
42
  default="", description="Custom prompt for entity extraction and graph generation"
43
43
  )
44
+ ontology_key: Optional[List[str]] = Field(
45
+ default=None, description="Reference to one or more previously uploaded ontologies"
46
+ )
44
47
 
45
48
 
46
49
  def get_cognify_router() -> APIRouter:
@@ -68,6 +71,7 @@ def get_cognify_router() -> APIRouter:
68
71
  - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
69
72
  - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
70
73
  - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
74
+ - **ontology_key** (Optional[List[str]]): Reference to one or more previously uploaded ontology files to use for knowledge graph construction.
71
75
 
72
76
  ## Response
73
77
  - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@@ -82,7 +86,8 @@ def get_cognify_router() -> APIRouter:
82
86
  {
83
87
  "datasets": ["research_papers", "documentation"],
84
88
  "run_in_background": false,
85
- "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
89
+ "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections.",
90
+ "ontology_key": ["medical_ontology_v1"]
86
91
  }
87
92
  ```
88
93
 
@@ -108,13 +113,35 @@ def get_cognify_router() -> APIRouter:
108
113
  )
109
114
 
110
115
  from cognee.api.v1.cognify import cognify as cognee_cognify
116
+ from cognee.api.v1.ontologies.ontologies import OntologyService
111
117
 
112
118
  try:
113
119
  datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
120
+ config_to_use = None
121
+
122
+ if payload.ontology_key:
123
+ ontology_service = OntologyService()
124
+ ontology_contents = ontology_service.get_ontology_contents(
125
+ payload.ontology_key, user
126
+ )
127
+
128
+ from cognee.modules.ontology.ontology_config import Config
129
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import (
130
+ RDFLibOntologyResolver,
131
+ )
132
+ from io import StringIO
133
+
134
+ ontology_streams = [StringIO(content) for content in ontology_contents]
135
+ config_to_use: Config = {
136
+ "ontology_config": {
137
+ "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_streams)
138
+ }
139
+ }
114
140
 
115
141
  cognify_run = await cognee_cognify(
116
142
  datasets,
117
143
  user,
144
+ config=config_to_use,
118
145
  run_in_background=payload.run_in_background,
119
146
  custom_prompt=payload.custom_prompt,
120
147
  )
@@ -0,0 +1,4 @@
1
+ from .ontologies import OntologyService
2
+ from .routers.get_ontology_router import get_ontology_router
3
+
4
+ __all__ = ["OntologyService", "get_ontology_router"]
@@ -0,0 +1,183 @@
1
+ import os
2
+ import json
3
+ import tempfile
4
+ from pathlib import Path
5
+ from datetime import datetime, timezone
6
+ from typing import Optional, List
7
+ from dataclasses import dataclass
8
+
9
+
10
+ @dataclass
11
+ class OntologyMetadata:
12
+ ontology_key: str
13
+ filename: str
14
+ size_bytes: int
15
+ uploaded_at: str
16
+ description: Optional[str] = None
17
+
18
+
19
+ class OntologyService:
20
+ def __init__(self):
21
+ pass
22
+
23
+ @property
24
+ def base_dir(self) -> Path:
25
+ return Path(tempfile.gettempdir()) / "ontologies"
26
+
27
+ def _get_user_dir(self, user_id: str) -> Path:
28
+ user_dir = self.base_dir / str(user_id)
29
+ user_dir.mkdir(parents=True, exist_ok=True)
30
+ return user_dir
31
+
32
+ def _get_metadata_path(self, user_dir: Path) -> Path:
33
+ return user_dir / "metadata.json"
34
+
35
+ def _load_metadata(self, user_dir: Path) -> dict:
36
+ metadata_path = self._get_metadata_path(user_dir)
37
+ if metadata_path.exists():
38
+ with open(metadata_path, "r") as f:
39
+ return json.load(f)
40
+ return {}
41
+
42
+ def _save_metadata(self, user_dir: Path, metadata: dict):
43
+ metadata_path = self._get_metadata_path(user_dir)
44
+ with open(metadata_path, "w") as f:
45
+ json.dump(metadata, f, indent=2)
46
+
47
+ async def upload_ontology(
48
+ self, ontology_key: str, file, user, description: Optional[str] = None
49
+ ) -> OntologyMetadata:
50
+ if not file.filename.lower().endswith(".owl"):
51
+ raise ValueError("File must be in .owl format")
52
+
53
+ user_dir = self._get_user_dir(str(user.id))
54
+ metadata = self._load_metadata(user_dir)
55
+
56
+ if ontology_key in metadata:
57
+ raise ValueError(f"Ontology key '{ontology_key}' already exists")
58
+
59
+ content = await file.read()
60
+ if len(content) > 10 * 1024 * 1024:
61
+ raise ValueError("File size exceeds 10MB limit")
62
+
63
+ file_path = user_dir / f"{ontology_key}.owl"
64
+ with open(file_path, "wb") as f:
65
+ f.write(content)
66
+
67
+ ontology_metadata = {
68
+ "filename": file.filename,
69
+ "size_bytes": len(content),
70
+ "uploaded_at": datetime.now(timezone.utc).isoformat(),
71
+ "description": description,
72
+ }
73
+ metadata[ontology_key] = ontology_metadata
74
+ self._save_metadata(user_dir, metadata)
75
+
76
+ return OntologyMetadata(
77
+ ontology_key=ontology_key,
78
+ filename=file.filename,
79
+ size_bytes=len(content),
80
+ uploaded_at=ontology_metadata["uploaded_at"],
81
+ description=description,
82
+ )
83
+
84
+ async def upload_ontologies(
85
+ self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
86
+ ) -> List[OntologyMetadata]:
87
+ """
88
+ Upload ontology files with their respective keys.
89
+
90
+ Args:
91
+ ontology_key: List of unique keys for each ontology
92
+ files: List of UploadFile objects (same length as keys)
93
+ user: Authenticated user
94
+ descriptions: Optional list of descriptions for each file
95
+
96
+ Returns:
97
+ List of OntologyMetadata objects for uploaded files
98
+
99
+ Raises:
100
+ ValueError: If keys duplicate, file format invalid, or array lengths don't match
101
+ """
102
+ if len(ontology_key) != len(files):
103
+ raise ValueError("Number of keys must match number of files")
104
+
105
+ if len(set(ontology_key)) != len(ontology_key):
106
+ raise ValueError("Duplicate ontology keys not allowed")
107
+
108
+ if descriptions and len(descriptions) != len(files):
109
+ raise ValueError("Number of descriptions must match number of files")
110
+
111
+ results = []
112
+ user_dir = self._get_user_dir(str(user.id))
113
+ metadata = self._load_metadata(user_dir)
114
+
115
+ for i, (key, file) in enumerate(zip(ontology_key, files)):
116
+ if key in metadata:
117
+ raise ValueError(f"Ontology key '{key}' already exists")
118
+
119
+ if not file.filename.lower().endswith(".owl"):
120
+ raise ValueError(f"File '{file.filename}' must be in .owl format")
121
+
122
+ content = await file.read()
123
+ if len(content) > 10 * 1024 * 1024:
124
+ raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
125
+
126
+ file_path = user_dir / f"{key}.owl"
127
+ with open(file_path, "wb") as f:
128
+ f.write(content)
129
+
130
+ ontology_metadata = {
131
+ "filename": file.filename,
132
+ "size_bytes": len(content),
133
+ "uploaded_at": datetime.now(timezone.utc).isoformat(),
134
+ "description": descriptions[i] if descriptions else None,
135
+ }
136
+ metadata[key] = ontology_metadata
137
+
138
+ results.append(
139
+ OntologyMetadata(
140
+ ontology_key=key,
141
+ filename=file.filename,
142
+ size_bytes=len(content),
143
+ uploaded_at=ontology_metadata["uploaded_at"],
144
+ description=descriptions[i] if descriptions else None,
145
+ )
146
+ )
147
+
148
+ self._save_metadata(user_dir, metadata)
149
+ return results
150
+
151
+ def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
152
+ """
153
+ Retrieve ontology content for one or more keys.
154
+
155
+ Args:
156
+ ontology_key: List of ontology keys to retrieve (can contain single item)
157
+ user: Authenticated user
158
+
159
+ Returns:
160
+ List of ontology content strings
161
+
162
+ Raises:
163
+ ValueError: If any ontology key not found
164
+ """
165
+ user_dir = self._get_user_dir(str(user.id))
166
+ metadata = self._load_metadata(user_dir)
167
+
168
+ contents = []
169
+ for key in ontology_key:
170
+ if key not in metadata:
171
+ raise ValueError(f"Ontology key '{key}' not found")
172
+
173
+ file_path = user_dir / f"{key}.owl"
174
+ if not file_path.exists():
175
+ raise ValueError(f"Ontology file for key '{key}' not found")
176
+
177
+ with open(file_path, "r", encoding="utf-8") as f:
178
+ contents.append(f.read())
179
+ return contents
180
+
181
+ def list_ontologies(self, user) -> dict:
182
+ user_dir = self._get_user_dir(str(user.id))
183
+ return self._load_metadata(user_dir)
File without changes
@@ -0,0 +1,107 @@
1
+ from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
2
+ from fastapi.responses import JSONResponse
3
+ from typing import Optional, List
4
+
5
+ from cognee.modules.users.models import User
6
+ from cognee.modules.users.methods import get_authenticated_user
7
+ from cognee.shared.utils import send_telemetry
8
+ from cognee import __version__ as cognee_version
9
+ from ..ontologies import OntologyService
10
+
11
+
12
+ def get_ontology_router() -> APIRouter:
13
+ router = APIRouter()
14
+ ontology_service = OntologyService()
15
+
16
+ @router.post("", response_model=dict)
17
+ async def upload_ontology(
18
+ ontology_key: str = Form(...),
19
+ ontology_file: List[UploadFile] = File(...),
20
+ descriptions: Optional[str] = Form(None),
21
+ user: User = Depends(get_authenticated_user),
22
+ ):
23
+ """
24
+ Upload ontology files with their respective keys for later use in cognify operations.
25
+
26
+ Supports both single and multiple file uploads:
27
+ - Single file: ontology_key=["key"], ontology_file=[file]
28
+ - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
29
+
30
+ ## Request Parameters
31
+ - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
32
+ - **ontology_file** (List[UploadFile]): OWL format ontology files
33
+ - **descriptions** (Optional[str]): JSON array string of optional descriptions
34
+
35
+ ## Response
36
+ Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
37
+
38
+ ## Error Codes
39
+ - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
40
+ - **500 Internal Server Error**: File system or processing errors
41
+ """
42
+ send_telemetry(
43
+ "Ontology Upload API Endpoint Invoked",
44
+ user.id,
45
+ additional_properties={
46
+ "endpoint": "POST /api/v1/ontologies",
47
+ "cognee_version": cognee_version,
48
+ },
49
+ )
50
+
51
+ try:
52
+ import json
53
+
54
+ ontology_keys = json.loads(ontology_key)
55
+ description_list = json.loads(descriptions) if descriptions else None
56
+
57
+ if not isinstance(ontology_keys, list):
58
+ raise ValueError("ontology_key must be a JSON array")
59
+
60
+ results = await ontology_service.upload_ontologies(
61
+ ontology_keys, ontology_file, user, description_list
62
+ )
63
+
64
+ return {
65
+ "uploaded_ontologies": [
66
+ {
67
+ "ontology_key": result.ontology_key,
68
+ "filename": result.filename,
69
+ "size_bytes": result.size_bytes,
70
+ "uploaded_at": result.uploaded_at,
71
+ "description": result.description,
72
+ }
73
+ for result in results
74
+ ]
75
+ }
76
+ except (json.JSONDecodeError, ValueError) as e:
77
+ return JSONResponse(status_code=400, content={"error": str(e)})
78
+ except Exception as e:
79
+ return JSONResponse(status_code=500, content={"error": str(e)})
80
+
81
+ @router.get("", response_model=dict)
82
+ async def list_ontologies(user: User = Depends(get_authenticated_user)):
83
+ """
84
+ List all uploaded ontologies for the authenticated user.
85
+
86
+ ## Response
87
+ Returns a dictionary mapping ontology keys to their metadata including filename, size, and upload timestamp.
88
+
89
+ ## Error Codes
90
+ - **500 Internal Server Error**: File system or processing errors
91
+ """
92
+ send_telemetry(
93
+ "Ontology List API Endpoint Invoked",
94
+ user.id,
95
+ additional_properties={
96
+ "endpoint": "GET /api/v1/ontologies",
97
+ "cognee_version": cognee_version,
98
+ },
99
+ )
100
+
101
+ try:
102
+ metadata = ontology_service.list_ontologies(user)
103
+ return metadata
104
+ except Exception as e:
105
+ return JSONResponse(status_code=500, content={"error": str(e)})
106
+
107
+ return router
@@ -1,15 +1,20 @@
1
1
  from uuid import UUID
2
- from typing import List
2
+ from typing import List, Union
3
3
 
4
4
  from fastapi import APIRouter, Depends
5
5
  from fastapi.responses import JSONResponse
6
6
 
7
7
  from cognee.modules.users.models import User
8
+ from cognee.api.DTO import InDTO
8
9
  from cognee.modules.users.methods import get_authenticated_user
9
10
  from cognee.shared.utils import send_telemetry
10
11
  from cognee import __version__ as cognee_version
11
12
 
12
13
 
14
+ class SelectTenantDTO(InDTO):
15
+ tenant_id: UUID | None = None
16
+
17
+
13
18
  def get_permissions_router() -> APIRouter:
14
19
  permissions_router = APIRouter()
15
20
 
@@ -226,4 +231,39 @@ def get_permissions_router() -> APIRouter:
226
231
  status_code=200, content={"message": "Tenant created.", "tenant_id": str(tenant_id)}
227
232
  )
228
233
 
234
+ @permissions_router.post("/tenants/select")
235
+ async def select_tenant(payload: SelectTenantDTO, user: User = Depends(get_authenticated_user)):
236
+ """
237
+ Select current tenant.
238
+
239
+ This endpoint selects a tenant with the specified UUID. Tenants are used
240
+ to organize users and resources in multi-tenant environments, providing
241
+ isolation and access control between different groups or organizations.
242
+
243
+ Sending a null/None value as tenant_id selects his default single user tenant
244
+
245
+ ## Request Parameters
246
+ - **tenant_id** (Union[UUID, None]): UUID of the tenant to select, If null/None is provided use the default single user tenant
247
+
248
+ ## Response
249
+ Returns a success message along with selected tenant id.
250
+ """
251
+ send_telemetry(
252
+ "Permissions API Endpoint Invoked",
253
+ user.id,
254
+ additional_properties={
255
+ "endpoint": f"POST /v1/permissions/tenants/{str(payload.tenant_id)}",
256
+ "tenant_id": str(payload.tenant_id),
257
+ },
258
+ )
259
+
260
+ from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method
261
+
262
+ await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id)
263
+
264
+ return JSONResponse(
265
+ status_code=200,
266
+ content={"message": "Tenant selected.", "tenant_id": str(payload.tenant_id)},
267
+ )
268
+
229
269
  return permissions_router
@@ -22,7 +22,7 @@ relationships, and creates semantic connections for enhanced search and reasonin
22
22
 
23
23
  Processing Pipeline:
24
24
  1. **Document Classification**: Identifies document types and structures
25
- 2. **Permission Validation**: Ensures user has processing rights
25
+ 2. **Permission Validation**: Ensures user has processing rights
26
26
  3. **Text Chunking**: Breaks content into semantically meaningful segments
27
27
  4. **Entity Extraction**: Identifies key concepts, people, places, organizations
28
28
  5. **Relationship Detection**: Discovers connections between entities
@@ -97,6 +97,13 @@ After successful cognify processing, use `cognee search` to query the knowledge
97
97
  chunker_class = LangchainChunker
98
98
  except ImportError:
99
99
  fmt.warning("LangchainChunker not available, using TextChunker")
100
+ elif args.chunker == "CsvChunker":
101
+ try:
102
+ from cognee.modules.chunking.CsvChunker import CsvChunker
103
+
104
+ chunker_class = CsvChunker
105
+ except ImportError:
106
+ fmt.warning("CsvChunker not available, using TextChunker")
100
107
 
101
108
  result = await cognee.cognify(
102
109
  datasets=datasets,
cognee/cli/config.py CHANGED
@@ -26,7 +26,7 @@ SEARCH_TYPE_CHOICES = [
26
26
  ]
27
27
 
28
28
  # Chunker choices
29
- CHUNKER_CHOICES = ["TextChunker", "LangchainChunker"]
29
+ CHUNKER_CHOICES = ["TextChunker", "LangchainChunker", "CsvChunker"]
30
30
 
31
31
  # Output format choices
32
32
  OUTPUT_FORMAT_CHOICES = ["json", "pretty", "simple"]
@@ -4,6 +4,8 @@ from typing import Union
4
4
  from uuid import UUID
5
5
 
6
6
  from cognee.base_config import get_base_config
7
+ from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
8
+ from cognee.infrastructure.databases.graph.config import get_graph_context_config
7
9
  from cognee.infrastructure.databases.utils import get_or_create_dataset_database
8
10
  from cognee.infrastructure.files.storage.config import file_storage_config
9
11
  from cognee.modules.users.methods import get_user
@@ -14,11 +16,40 @@ vector_db_config = ContextVar("vector_db_config", default=None)
14
16
  graph_db_config = ContextVar("graph_db_config", default=None)
15
17
  session_user = ContextVar("session_user", default=None)
16
18
 
19
+ VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"]
20
+ GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"]
21
+
17
22
 
18
23
  async def set_session_user_context_variable(user):
19
24
  session_user.set(user)
20
25
 
21
26
 
27
+ def multi_user_support_possible():
28
+ graph_db_config = get_graph_context_config()
29
+ vector_db_config = get_vectordb_context_config()
30
+ return (
31
+ graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT
32
+ and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT
33
+ )
34
+
35
+
36
+ def backend_access_control_enabled():
37
+ backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
38
+ if backend_access_control is None:
39
+ # If backend access control is not defined in environment variables,
40
+ # enable it by default if graph and vector DBs can support it, otherwise disable it
41
+ return multi_user_support_possible()
42
+ elif backend_access_control.lower() == "true":
43
+ # If enabled, ensure that the current graph and vector DBs can support it
44
+ multi_user_support = multi_user_support_possible()
45
+ if not multi_user_support:
46
+ raise EnvironmentError(
47
+ "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
48
+ )
49
+ return True
50
+ return False
51
+
52
+
22
53
  async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
23
54
  """
24
55
  If backend access control is enabled this function will ensure all datasets have their own databases,
@@ -38,9 +69,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
38
69
 
39
70
  """
40
71
 
41
- base_config = get_base_config()
42
-
43
- if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
72
+ if not backend_access_control_enabled():
44
73
  return
45
74
 
46
75
  user = await get_user(user_id)
@@ -48,6 +77,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
48
77
  # To ensure permissions are enforced properly all datasets will have their own databases
49
78
  dataset_database = await get_or_create_dataset_database(dataset, user)
50
79
 
80
+ base_config = get_base_config()
51
81
  data_root_directory = os.path.join(
52
82
  base_config.data_root_directory, str(user.tenant_id or user.id)
53
83
  )
@@ -57,15 +87,17 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
57
87
 
58
88
  # Set vector and graph database configuration based on dataset database information
59
89
  vector_config = {
60
- "vector_db_url": os.path.join(
61
- databases_directory_path, dataset_database.vector_database_name
62
- ),
63
- "vector_db_key": "",
64
- "vector_db_provider": "lancedb",
90
+ "vector_db_provider": dataset_database.vector_database_provider,
91
+ "vector_db_url": dataset_database.vector_database_url,
92
+ "vector_db_key": dataset_database.vector_database_key,
93
+ "vector_db_name": dataset_database.vector_database_name,
65
94
  }
66
95
 
67
96
  graph_config = {
68
- "graph_database_provider": "kuzu",
97
+ "graph_database_provider": dataset_database.graph_database_provider,
98
+ "graph_database_url": dataset_database.graph_database_url,
99
+ "graph_database_name": dataset_database.graph_database_name,
100
+ "graph_database_key": dataset_database.graph_database_key,
69
101
  "graph_file_path": os.path.join(
70
102
  databases_directory_path, dataset_database.graph_database_name
71
103
  ),
@@ -1,6 +1,6 @@
1
1
  from pydantic_settings import BaseSettings, SettingsConfigDict
2
2
  from functools import lru_cache
3
- from typing import Optional
3
+ from typing import Optional, Literal
4
4
 
5
5
 
6
6
  class CacheConfig(BaseSettings):
@@ -15,6 +15,7 @@ class CacheConfig(BaseSettings):
15
15
  - agentic_lock_timeout: Maximum time (in seconds) to wait for the lock release.
16
16
  """
17
17
 
18
+ cache_backend: Literal["redis", "fs"] = "fs"
18
19
  caching: bool = False
19
20
  shared_kuzu_lock: bool = False
20
21
  cache_host: str = "localhost"
@@ -28,6 +29,7 @@ class CacheConfig(BaseSettings):
28
29
 
29
30
  def to_dict(self) -> dict:
30
31
  return {
32
+ "cache_backend": self.cache_backend,
31
33
  "caching": self.caching,
32
34
  "shared_kuzu_lock": self.shared_kuzu_lock,
33
35
  "cache_host": self.cache_host,