cognee 0.4.1__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +8 -0
- cognee/api/v1/add/routers/get_add_router.py +3 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +28 -1
- cognee/api/v1/ontologies/__init__.py +4 -0
- cognee/api/v1/ontologies/ontologies.py +183 -0
- cognee/api/v1/ontologies/routers/__init__.py +0 -0
- cognee/api/v1/ontologies/routers/get_ontology_router.py +107 -0
- cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
- cognee/cli/commands/cognify_command.py +8 -1
- cognee/cli/config.py +1 -1
- cognee/context_global_variables.py +41 -9
- cognee/infrastructure/databases/cache/config.py +3 -1
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
- cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
- cognee/infrastructure/databases/graph/config.py +4 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +2 -0
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +37 -3
- cognee/infrastructure/databases/vector/config.py +3 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +5 -1
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +1 -4
- cognee/infrastructure/engine/models/Edge.py +13 -1
- cognee/infrastructure/files/utils/guess_file_type.py +4 -0
- cognee/infrastructure/llm/config.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +7 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +7 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +8 -16
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +12 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +13 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +5 -2
- cognee/infrastructure/loaders/LoaderEngine.py +1 -0
- cognee/infrastructure/loaders/core/__init__.py +2 -1
- cognee/infrastructure/loaders/core/csv_loader.py +93 -0
- cognee/infrastructure/loaders/core/text_loader.py +1 -2
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
- cognee/infrastructure/loaders/supported_loaders.py +2 -1
- cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
- cognee/modules/chunking/CsvChunker.py +35 -0
- cognee/modules/chunking/models/DocumentChunk.py +2 -1
- cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/create_dataset.py +4 -2
- cognee/modules/data/methods/get_dataset_ids.py +5 -1
- cognee/modules/data/methods/get_unique_data_id.py +68 -0
- cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
- cognee/modules/data/models/Dataset.py +2 -0
- cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
- cognee/modules/data/processing/document_types/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +4 -2
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
- cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
- cognee/modules/ingestion/identify.py +4 -4
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
- cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
- cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
- cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
- cognee/modules/retrieval/base_graph_retriever.py +7 -3
- cognee/modules/retrieval/base_retriever.py +7 -3
- cognee/modules/retrieval/completion_retriever.py +11 -4
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +6 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +14 -51
- cognee/modules/retrieval/graph_completion_retriever.py +4 -1
- cognee/modules/retrieval/temporal_retriever.py +9 -2
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +1 -1
- cognee/modules/retrieval/utils/completion.py +2 -22
- cognee/modules/run_custom_pipeline/__init__.py +1 -0
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +69 -0
- cognee/modules/search/methods/search.py +5 -3
- cognee/modules/users/methods/create_user.py +12 -27
- cognee/modules/users/methods/get_authenticated_user.py +2 -1
- cognee/modules/users/methods/get_default_user.py +4 -2
- cognee/modules/users/methods/get_user.py +1 -1
- cognee/modules/users/methods/get_user_by_email.py +1 -1
- cognee/modules/users/models/DatasetDatabase.py +9 -0
- cognee/modules/users/models/Tenant.py +6 -7
- cognee/modules/users/models/User.py +6 -5
- cognee/modules/users/models/UserTenant.py +12 -0
- cognee/modules/users/models/__init__.py +1 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
- cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
- cognee/modules/users/tenants/methods/__init__.py +1 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
- cognee/modules/users/tenants/methods/create_tenant.py +22 -8
- cognee/modules/users/tenants/methods/select_tenant.py +62 -0
- cognee/shared/logging_utils.py +2 -0
- cognee/tasks/chunks/__init__.py +1 -0
- cognee/tasks/chunks/chunk_by_row.py +94 -0
- cognee/tasks/documents/classify_documents.py +2 -0
- cognee/tasks/feedback/generate_improved_answers.py +3 -3
- cognee/tasks/ingestion/ingest_data.py +1 -1
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/cognify_session.py +41 -0
- cognee/tasks/memify/extract_user_sessions.py +73 -0
- cognee/tasks/storage/index_data_points.py +33 -22
- cognee/tasks/storage/index_graph_edges.py +37 -57
- cognee/tests/integration/documents/CsvDocument_test.py +70 -0
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
- cognee/tests/test_add_docling_document.py +2 -2
- cognee/tests/test_cognee_server_start.py +84 -1
- cognee/tests/test_conversation_history.py +45 -4
- cognee/tests/test_data/example_with_header.csv +3 -0
- cognee/tests/test_delete_bmw_example.py +60 -0
- cognee/tests/test_edge_ingestion.py +27 -0
- cognee/tests/test_feedback_enrichment.py +1 -1
- cognee/tests/test_library.py +6 -4
- cognee/tests/test_load.py +62 -0
- cognee/tests/test_multi_tenancy.py +165 -0
- cognee/tests/test_parallel_databases.py +2 -0
- cognee/tests/test_relational_db_migration.py +54 -2
- cognee/tests/test_search_db.py +7 -1
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
- cognee/tests/unit/api/test_ontology_endpoint.py +264 -0
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
- cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
- cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
- cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
- cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
- cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
- cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
- cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
- cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +88 -71
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +135 -104
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -1
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
cognee/__init__.py
CHANGED
|
@@ -19,6 +19,7 @@ from .api.v1.add import add
|
|
|
19
19
|
from .api.v1.delete import delete
|
|
20
20
|
from .api.v1.cognify import cognify
|
|
21
21
|
from .modules.memify import memify
|
|
22
|
+
from .modules.run_custom_pipeline import run_custom_pipeline
|
|
22
23
|
from .api.v1.update import update
|
|
23
24
|
from .api.v1.config.config import config
|
|
24
25
|
from .api.v1.datasets.datasets import datasets
|
cognee/api/client.py
CHANGED
|
@@ -23,6 +23,7 @@ from cognee.api.v1.settings.routers import get_settings_router
|
|
|
23
23
|
from cognee.api.v1.datasets.routers import get_datasets_router
|
|
24
24
|
from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
|
|
25
25
|
from cognee.api.v1.search.routers import get_search_router
|
|
26
|
+
from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
|
|
26
27
|
from cognee.api.v1.memify.routers import get_memify_router
|
|
27
28
|
from cognee.api.v1.add.routers import get_add_router
|
|
28
29
|
from cognee.api.v1.delete.routers import get_delete_router
|
|
@@ -39,6 +40,8 @@ from cognee.api.v1.users.routers import (
|
|
|
39
40
|
)
|
|
40
41
|
from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION
|
|
41
42
|
|
|
43
|
+
# Ensure application logging is configured for container stdout/stderr
|
|
44
|
+
setup_logging()
|
|
42
45
|
logger = get_logger()
|
|
43
46
|
|
|
44
47
|
if os.getenv("ENV", "prod") == "prod":
|
|
@@ -74,6 +77,9 @@ async def lifespan(app: FastAPI):
|
|
|
74
77
|
|
|
75
78
|
await get_default_user()
|
|
76
79
|
|
|
80
|
+
# Emit a clear startup message for docker logs
|
|
81
|
+
logger.info("Backend server has started")
|
|
82
|
+
|
|
77
83
|
yield
|
|
78
84
|
|
|
79
85
|
|
|
@@ -258,6 +264,8 @@ app.include_router(
|
|
|
258
264
|
|
|
259
265
|
app.include_router(get_datasets_router(), prefix="/api/v1/datasets", tags=["datasets"])
|
|
260
266
|
|
|
267
|
+
app.include_router(get_ontology_router(), prefix="/api/v1/ontologies", tags=["ontologies"])
|
|
268
|
+
|
|
261
269
|
app.include_router(get_settings_router(), prefix="/api/v1/settings", tags=["settings"])
|
|
262
270
|
|
|
263
271
|
app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"])
|
|
@@ -82,7 +82,9 @@ def get_add_router() -> APIRouter:
|
|
|
82
82
|
datasetName,
|
|
83
83
|
user=user,
|
|
84
84
|
dataset_id=datasetId,
|
|
85
|
-
node_set=node_set
|
|
85
|
+
node_set=node_set
|
|
86
|
+
if node_set != [""]
|
|
87
|
+
else None, # Transform default node_set endpoint value to None
|
|
86
88
|
)
|
|
87
89
|
|
|
88
90
|
if isinstance(add_run, PipelineRunErrored):
|
|
@@ -41,6 +41,9 @@ class CognifyPayloadDTO(InDTO):
|
|
|
41
41
|
custom_prompt: Optional[str] = Field(
|
|
42
42
|
default="", description="Custom prompt for entity extraction and graph generation"
|
|
43
43
|
)
|
|
44
|
+
ontology_key: Optional[List[str]] = Field(
|
|
45
|
+
default=None, description="Reference to one or more previously uploaded ontologies"
|
|
46
|
+
)
|
|
44
47
|
|
|
45
48
|
|
|
46
49
|
def get_cognify_router() -> APIRouter:
|
|
@@ -68,6 +71,7 @@ def get_cognify_router() -> APIRouter:
|
|
|
68
71
|
- **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
|
|
69
72
|
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
|
|
70
73
|
- **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
|
|
74
|
+
- **ontology_key** (Optional[List[str]]): Reference to one or more previously uploaded ontology files to use for knowledge graph construction.
|
|
71
75
|
|
|
72
76
|
## Response
|
|
73
77
|
- **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
|
|
@@ -82,7 +86,8 @@ def get_cognify_router() -> APIRouter:
|
|
|
82
86
|
{
|
|
83
87
|
"datasets": ["research_papers", "documentation"],
|
|
84
88
|
"run_in_background": false,
|
|
85
|
-
"custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
|
|
89
|
+
"custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections.",
|
|
90
|
+
"ontology_key": ["medical_ontology_v1"]
|
|
86
91
|
}
|
|
87
92
|
```
|
|
88
93
|
|
|
@@ -108,13 +113,35 @@ def get_cognify_router() -> APIRouter:
|
|
|
108
113
|
)
|
|
109
114
|
|
|
110
115
|
from cognee.api.v1.cognify import cognify as cognee_cognify
|
|
116
|
+
from cognee.api.v1.ontologies.ontologies import OntologyService
|
|
111
117
|
|
|
112
118
|
try:
|
|
113
119
|
datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
|
|
120
|
+
config_to_use = None
|
|
121
|
+
|
|
122
|
+
if payload.ontology_key:
|
|
123
|
+
ontology_service = OntologyService()
|
|
124
|
+
ontology_contents = ontology_service.get_ontology_contents(
|
|
125
|
+
payload.ontology_key, user
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
from cognee.modules.ontology.ontology_config import Config
|
|
129
|
+
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import (
|
|
130
|
+
RDFLibOntologyResolver,
|
|
131
|
+
)
|
|
132
|
+
from io import StringIO
|
|
133
|
+
|
|
134
|
+
ontology_streams = [StringIO(content) for content in ontology_contents]
|
|
135
|
+
config_to_use: Config = {
|
|
136
|
+
"ontology_config": {
|
|
137
|
+
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_streams)
|
|
138
|
+
}
|
|
139
|
+
}
|
|
114
140
|
|
|
115
141
|
cognify_run = await cognee_cognify(
|
|
116
142
|
datasets,
|
|
117
143
|
user,
|
|
144
|
+
config=config_to_use,
|
|
118
145
|
run_in_background=payload.run_in_background,
|
|
119
146
|
custom_prompt=payload.custom_prompt,
|
|
120
147
|
)
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import tempfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Optional, List
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class OntologyMetadata:
|
|
12
|
+
ontology_key: str
|
|
13
|
+
filename: str
|
|
14
|
+
size_bytes: int
|
|
15
|
+
uploaded_at: str
|
|
16
|
+
description: Optional[str] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class OntologyService:
|
|
20
|
+
def __init__(self):
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def base_dir(self) -> Path:
|
|
25
|
+
return Path(tempfile.gettempdir()) / "ontologies"
|
|
26
|
+
|
|
27
|
+
def _get_user_dir(self, user_id: str) -> Path:
|
|
28
|
+
user_dir = self.base_dir / str(user_id)
|
|
29
|
+
user_dir.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
return user_dir
|
|
31
|
+
|
|
32
|
+
def _get_metadata_path(self, user_dir: Path) -> Path:
|
|
33
|
+
return user_dir / "metadata.json"
|
|
34
|
+
|
|
35
|
+
def _load_metadata(self, user_dir: Path) -> dict:
|
|
36
|
+
metadata_path = self._get_metadata_path(user_dir)
|
|
37
|
+
if metadata_path.exists():
|
|
38
|
+
with open(metadata_path, "r") as f:
|
|
39
|
+
return json.load(f)
|
|
40
|
+
return {}
|
|
41
|
+
|
|
42
|
+
def _save_metadata(self, user_dir: Path, metadata: dict):
|
|
43
|
+
metadata_path = self._get_metadata_path(user_dir)
|
|
44
|
+
with open(metadata_path, "w") as f:
|
|
45
|
+
json.dump(metadata, f, indent=2)
|
|
46
|
+
|
|
47
|
+
async def upload_ontology(
|
|
48
|
+
self, ontology_key: str, file, user, description: Optional[str] = None
|
|
49
|
+
) -> OntologyMetadata:
|
|
50
|
+
if not file.filename.lower().endswith(".owl"):
|
|
51
|
+
raise ValueError("File must be in .owl format")
|
|
52
|
+
|
|
53
|
+
user_dir = self._get_user_dir(str(user.id))
|
|
54
|
+
metadata = self._load_metadata(user_dir)
|
|
55
|
+
|
|
56
|
+
if ontology_key in metadata:
|
|
57
|
+
raise ValueError(f"Ontology key '{ontology_key}' already exists")
|
|
58
|
+
|
|
59
|
+
content = await file.read()
|
|
60
|
+
if len(content) > 10 * 1024 * 1024:
|
|
61
|
+
raise ValueError("File size exceeds 10MB limit")
|
|
62
|
+
|
|
63
|
+
file_path = user_dir / f"{ontology_key}.owl"
|
|
64
|
+
with open(file_path, "wb") as f:
|
|
65
|
+
f.write(content)
|
|
66
|
+
|
|
67
|
+
ontology_metadata = {
|
|
68
|
+
"filename": file.filename,
|
|
69
|
+
"size_bytes": len(content),
|
|
70
|
+
"uploaded_at": datetime.now(timezone.utc).isoformat(),
|
|
71
|
+
"description": description,
|
|
72
|
+
}
|
|
73
|
+
metadata[ontology_key] = ontology_metadata
|
|
74
|
+
self._save_metadata(user_dir, metadata)
|
|
75
|
+
|
|
76
|
+
return OntologyMetadata(
|
|
77
|
+
ontology_key=ontology_key,
|
|
78
|
+
filename=file.filename,
|
|
79
|
+
size_bytes=len(content),
|
|
80
|
+
uploaded_at=ontology_metadata["uploaded_at"],
|
|
81
|
+
description=description,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
async def upload_ontologies(
|
|
85
|
+
self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
|
|
86
|
+
) -> List[OntologyMetadata]:
|
|
87
|
+
"""
|
|
88
|
+
Upload ontology files with their respective keys.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
ontology_key: List of unique keys for each ontology
|
|
92
|
+
files: List of UploadFile objects (same length as keys)
|
|
93
|
+
user: Authenticated user
|
|
94
|
+
descriptions: Optional list of descriptions for each file
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
List of OntologyMetadata objects for uploaded files
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
ValueError: If keys duplicate, file format invalid, or array lengths don't match
|
|
101
|
+
"""
|
|
102
|
+
if len(ontology_key) != len(files):
|
|
103
|
+
raise ValueError("Number of keys must match number of files")
|
|
104
|
+
|
|
105
|
+
if len(set(ontology_key)) != len(ontology_key):
|
|
106
|
+
raise ValueError("Duplicate ontology keys not allowed")
|
|
107
|
+
|
|
108
|
+
if descriptions and len(descriptions) != len(files):
|
|
109
|
+
raise ValueError("Number of descriptions must match number of files")
|
|
110
|
+
|
|
111
|
+
results = []
|
|
112
|
+
user_dir = self._get_user_dir(str(user.id))
|
|
113
|
+
metadata = self._load_metadata(user_dir)
|
|
114
|
+
|
|
115
|
+
for i, (key, file) in enumerate(zip(ontology_key, files)):
|
|
116
|
+
if key in metadata:
|
|
117
|
+
raise ValueError(f"Ontology key '{key}' already exists")
|
|
118
|
+
|
|
119
|
+
if not file.filename.lower().endswith(".owl"):
|
|
120
|
+
raise ValueError(f"File '{file.filename}' must be in .owl format")
|
|
121
|
+
|
|
122
|
+
content = await file.read()
|
|
123
|
+
if len(content) > 10 * 1024 * 1024:
|
|
124
|
+
raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
|
|
125
|
+
|
|
126
|
+
file_path = user_dir / f"{key}.owl"
|
|
127
|
+
with open(file_path, "wb") as f:
|
|
128
|
+
f.write(content)
|
|
129
|
+
|
|
130
|
+
ontology_metadata = {
|
|
131
|
+
"filename": file.filename,
|
|
132
|
+
"size_bytes": len(content),
|
|
133
|
+
"uploaded_at": datetime.now(timezone.utc).isoformat(),
|
|
134
|
+
"description": descriptions[i] if descriptions else None,
|
|
135
|
+
}
|
|
136
|
+
metadata[key] = ontology_metadata
|
|
137
|
+
|
|
138
|
+
results.append(
|
|
139
|
+
OntologyMetadata(
|
|
140
|
+
ontology_key=key,
|
|
141
|
+
filename=file.filename,
|
|
142
|
+
size_bytes=len(content),
|
|
143
|
+
uploaded_at=ontology_metadata["uploaded_at"],
|
|
144
|
+
description=descriptions[i] if descriptions else None,
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
self._save_metadata(user_dir, metadata)
|
|
149
|
+
return results
|
|
150
|
+
|
|
151
|
+
def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
|
|
152
|
+
"""
|
|
153
|
+
Retrieve ontology content for one or more keys.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
ontology_key: List of ontology keys to retrieve (can contain single item)
|
|
157
|
+
user: Authenticated user
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
List of ontology content strings
|
|
161
|
+
|
|
162
|
+
Raises:
|
|
163
|
+
ValueError: If any ontology key not found
|
|
164
|
+
"""
|
|
165
|
+
user_dir = self._get_user_dir(str(user.id))
|
|
166
|
+
metadata = self._load_metadata(user_dir)
|
|
167
|
+
|
|
168
|
+
contents = []
|
|
169
|
+
for key in ontology_key:
|
|
170
|
+
if key not in metadata:
|
|
171
|
+
raise ValueError(f"Ontology key '{key}' not found")
|
|
172
|
+
|
|
173
|
+
file_path = user_dir / f"{key}.owl"
|
|
174
|
+
if not file_path.exists():
|
|
175
|
+
raise ValueError(f"Ontology file for key '{key}' not found")
|
|
176
|
+
|
|
177
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
178
|
+
contents.append(f.read())
|
|
179
|
+
return contents
|
|
180
|
+
|
|
181
|
+
def list_ontologies(self, user) -> dict:
|
|
182
|
+
user_dir = self._get_user_dir(str(user.id))
|
|
183
|
+
return self._load_metadata(user_dir)
|
|
File without changes
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
|
|
2
|
+
from fastapi.responses import JSONResponse
|
|
3
|
+
from typing import Optional, List
|
|
4
|
+
|
|
5
|
+
from cognee.modules.users.models import User
|
|
6
|
+
from cognee.modules.users.methods import get_authenticated_user
|
|
7
|
+
from cognee.shared.utils import send_telemetry
|
|
8
|
+
from cognee import __version__ as cognee_version
|
|
9
|
+
from ..ontologies import OntologyService
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_ontology_router() -> APIRouter:
|
|
13
|
+
router = APIRouter()
|
|
14
|
+
ontology_service = OntologyService()
|
|
15
|
+
|
|
16
|
+
@router.post("", response_model=dict)
|
|
17
|
+
async def upload_ontology(
|
|
18
|
+
ontology_key: str = Form(...),
|
|
19
|
+
ontology_file: List[UploadFile] = File(...),
|
|
20
|
+
descriptions: Optional[str] = Form(None),
|
|
21
|
+
user: User = Depends(get_authenticated_user),
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Upload ontology files with their respective keys for later use in cognify operations.
|
|
25
|
+
|
|
26
|
+
Supports both single and multiple file uploads:
|
|
27
|
+
- Single file: ontology_key=["key"], ontology_file=[file]
|
|
28
|
+
- Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
|
|
29
|
+
|
|
30
|
+
## Request Parameters
|
|
31
|
+
- **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
|
|
32
|
+
- **ontology_file** (List[UploadFile]): OWL format ontology files
|
|
33
|
+
- **descriptions** (Optional[str]): JSON array string of optional descriptions
|
|
34
|
+
|
|
35
|
+
## Response
|
|
36
|
+
Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
|
|
37
|
+
|
|
38
|
+
## Error Codes
|
|
39
|
+
- **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
|
|
40
|
+
- **500 Internal Server Error**: File system or processing errors
|
|
41
|
+
"""
|
|
42
|
+
send_telemetry(
|
|
43
|
+
"Ontology Upload API Endpoint Invoked",
|
|
44
|
+
user.id,
|
|
45
|
+
additional_properties={
|
|
46
|
+
"endpoint": "POST /api/v1/ontologies",
|
|
47
|
+
"cognee_version": cognee_version,
|
|
48
|
+
},
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
import json
|
|
53
|
+
|
|
54
|
+
ontology_keys = json.loads(ontology_key)
|
|
55
|
+
description_list = json.loads(descriptions) if descriptions else None
|
|
56
|
+
|
|
57
|
+
if not isinstance(ontology_keys, list):
|
|
58
|
+
raise ValueError("ontology_key must be a JSON array")
|
|
59
|
+
|
|
60
|
+
results = await ontology_service.upload_ontologies(
|
|
61
|
+
ontology_keys, ontology_file, user, description_list
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
"uploaded_ontologies": [
|
|
66
|
+
{
|
|
67
|
+
"ontology_key": result.ontology_key,
|
|
68
|
+
"filename": result.filename,
|
|
69
|
+
"size_bytes": result.size_bytes,
|
|
70
|
+
"uploaded_at": result.uploaded_at,
|
|
71
|
+
"description": result.description,
|
|
72
|
+
}
|
|
73
|
+
for result in results
|
|
74
|
+
]
|
|
75
|
+
}
|
|
76
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
77
|
+
return JSONResponse(status_code=400, content={"error": str(e)})
|
|
78
|
+
except Exception as e:
|
|
79
|
+
return JSONResponse(status_code=500, content={"error": str(e)})
|
|
80
|
+
|
|
81
|
+
@router.get("", response_model=dict)
|
|
82
|
+
async def list_ontologies(user: User = Depends(get_authenticated_user)):
|
|
83
|
+
"""
|
|
84
|
+
List all uploaded ontologies for the authenticated user.
|
|
85
|
+
|
|
86
|
+
## Response
|
|
87
|
+
Returns a dictionary mapping ontology keys to their metadata including filename, size, and upload timestamp.
|
|
88
|
+
|
|
89
|
+
## Error Codes
|
|
90
|
+
- **500 Internal Server Error**: File system or processing errors
|
|
91
|
+
"""
|
|
92
|
+
send_telemetry(
|
|
93
|
+
"Ontology List API Endpoint Invoked",
|
|
94
|
+
user.id,
|
|
95
|
+
additional_properties={
|
|
96
|
+
"endpoint": "GET /api/v1/ontologies",
|
|
97
|
+
"cognee_version": cognee_version,
|
|
98
|
+
},
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
metadata = ontology_service.list_ontologies(user)
|
|
103
|
+
return metadata
|
|
104
|
+
except Exception as e:
|
|
105
|
+
return JSONResponse(status_code=500, content={"error": str(e)})
|
|
106
|
+
|
|
107
|
+
return router
|
|
@@ -1,15 +1,20 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
|
-
from typing import List
|
|
2
|
+
from typing import List, Union
|
|
3
3
|
|
|
4
4
|
from fastapi import APIRouter, Depends
|
|
5
5
|
from fastapi.responses import JSONResponse
|
|
6
6
|
|
|
7
7
|
from cognee.modules.users.models import User
|
|
8
|
+
from cognee.api.DTO import InDTO
|
|
8
9
|
from cognee.modules.users.methods import get_authenticated_user
|
|
9
10
|
from cognee.shared.utils import send_telemetry
|
|
10
11
|
from cognee import __version__ as cognee_version
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
class SelectTenantDTO(InDTO):
|
|
15
|
+
tenant_id: UUID | None = None
|
|
16
|
+
|
|
17
|
+
|
|
13
18
|
def get_permissions_router() -> APIRouter:
|
|
14
19
|
permissions_router = APIRouter()
|
|
15
20
|
|
|
@@ -226,4 +231,39 @@ def get_permissions_router() -> APIRouter:
|
|
|
226
231
|
status_code=200, content={"message": "Tenant created.", "tenant_id": str(tenant_id)}
|
|
227
232
|
)
|
|
228
233
|
|
|
234
|
+
@permissions_router.post("/tenants/select")
|
|
235
|
+
async def select_tenant(payload: SelectTenantDTO, user: User = Depends(get_authenticated_user)):
|
|
236
|
+
"""
|
|
237
|
+
Select current tenant.
|
|
238
|
+
|
|
239
|
+
This endpoint selects a tenant with the specified UUID. Tenants are used
|
|
240
|
+
to organize users and resources in multi-tenant environments, providing
|
|
241
|
+
isolation and access control between different groups or organizations.
|
|
242
|
+
|
|
243
|
+
Sending a null/None value as tenant_id selects his default single user tenant
|
|
244
|
+
|
|
245
|
+
## Request Parameters
|
|
246
|
+
- **tenant_id** (Union[UUID, None]): UUID of the tenant to select, If null/None is provided use the default single user tenant
|
|
247
|
+
|
|
248
|
+
## Response
|
|
249
|
+
Returns a success message along with selected tenant id.
|
|
250
|
+
"""
|
|
251
|
+
send_telemetry(
|
|
252
|
+
"Permissions API Endpoint Invoked",
|
|
253
|
+
user.id,
|
|
254
|
+
additional_properties={
|
|
255
|
+
"endpoint": f"POST /v1/permissions/tenants/{str(payload.tenant_id)}",
|
|
256
|
+
"tenant_id": str(payload.tenant_id),
|
|
257
|
+
},
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method
|
|
261
|
+
|
|
262
|
+
await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id)
|
|
263
|
+
|
|
264
|
+
return JSONResponse(
|
|
265
|
+
status_code=200,
|
|
266
|
+
content={"message": "Tenant selected.", "tenant_id": str(payload.tenant_id)},
|
|
267
|
+
)
|
|
268
|
+
|
|
229
269
|
return permissions_router
|
|
@@ -22,7 +22,7 @@ relationships, and creates semantic connections for enhanced search and reasonin
|
|
|
22
22
|
|
|
23
23
|
Processing Pipeline:
|
|
24
24
|
1. **Document Classification**: Identifies document types and structures
|
|
25
|
-
2. **Permission Validation**: Ensures user has processing rights
|
|
25
|
+
2. **Permission Validation**: Ensures user has processing rights
|
|
26
26
|
3. **Text Chunking**: Breaks content into semantically meaningful segments
|
|
27
27
|
4. **Entity Extraction**: Identifies key concepts, people, places, organizations
|
|
28
28
|
5. **Relationship Detection**: Discovers connections between entities
|
|
@@ -97,6 +97,13 @@ After successful cognify processing, use `cognee search` to query the knowledge
|
|
|
97
97
|
chunker_class = LangchainChunker
|
|
98
98
|
except ImportError:
|
|
99
99
|
fmt.warning("LangchainChunker not available, using TextChunker")
|
|
100
|
+
elif args.chunker == "CsvChunker":
|
|
101
|
+
try:
|
|
102
|
+
from cognee.modules.chunking.CsvChunker import CsvChunker
|
|
103
|
+
|
|
104
|
+
chunker_class = CsvChunker
|
|
105
|
+
except ImportError:
|
|
106
|
+
fmt.warning("CsvChunker not available, using TextChunker")
|
|
100
107
|
|
|
101
108
|
result = await cognee.cognify(
|
|
102
109
|
datasets=datasets,
|
cognee/cli/config.py
CHANGED
|
@@ -26,7 +26,7 @@ SEARCH_TYPE_CHOICES = [
|
|
|
26
26
|
]
|
|
27
27
|
|
|
28
28
|
# Chunker choices
|
|
29
|
-
CHUNKER_CHOICES = ["TextChunker", "LangchainChunker"]
|
|
29
|
+
CHUNKER_CHOICES = ["TextChunker", "LangchainChunker", "CsvChunker"]
|
|
30
30
|
|
|
31
31
|
# Output format choices
|
|
32
32
|
OUTPUT_FORMAT_CHOICES = ["json", "pretty", "simple"]
|
|
@@ -4,6 +4,8 @@ from typing import Union
|
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
from cognee.base_config import get_base_config
|
|
7
|
+
from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
|
|
8
|
+
from cognee.infrastructure.databases.graph.config import get_graph_context_config
|
|
7
9
|
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
|
8
10
|
from cognee.infrastructure.files.storage.config import file_storage_config
|
|
9
11
|
from cognee.modules.users.methods import get_user
|
|
@@ -14,11 +16,40 @@ vector_db_config = ContextVar("vector_db_config", default=None)
|
|
|
14
16
|
graph_db_config = ContextVar("graph_db_config", default=None)
|
|
15
17
|
session_user = ContextVar("session_user", default=None)
|
|
16
18
|
|
|
19
|
+
VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"]
|
|
20
|
+
GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"]
|
|
21
|
+
|
|
17
22
|
|
|
18
23
|
async def set_session_user_context_variable(user):
|
|
19
24
|
session_user.set(user)
|
|
20
25
|
|
|
21
26
|
|
|
27
|
+
def multi_user_support_possible():
|
|
28
|
+
graph_db_config = get_graph_context_config()
|
|
29
|
+
vector_db_config = get_vectordb_context_config()
|
|
30
|
+
return (
|
|
31
|
+
graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT
|
|
32
|
+
and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def backend_access_control_enabled():
|
|
37
|
+
backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
|
|
38
|
+
if backend_access_control is None:
|
|
39
|
+
# If backend access control is not defined in environment variables,
|
|
40
|
+
# enable it by default if graph and vector DBs can support it, otherwise disable it
|
|
41
|
+
return multi_user_support_possible()
|
|
42
|
+
elif backend_access_control.lower() == "true":
|
|
43
|
+
# If enabled, ensure that the current graph and vector DBs can support it
|
|
44
|
+
multi_user_support = multi_user_support_possible()
|
|
45
|
+
if not multi_user_support:
|
|
46
|
+
raise EnvironmentError(
|
|
47
|
+
"ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
|
|
48
|
+
)
|
|
49
|
+
return True
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
|
|
22
53
|
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
|
|
23
54
|
"""
|
|
24
55
|
If backend access control is enabled this function will ensure all datasets have their own databases,
|
|
@@ -38,9 +69,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|
|
38
69
|
|
|
39
70
|
"""
|
|
40
71
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
|
72
|
+
if not backend_access_control_enabled():
|
|
44
73
|
return
|
|
45
74
|
|
|
46
75
|
user = await get_user(user_id)
|
|
@@ -48,6 +77,7 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|
|
48
77
|
# To ensure permissions are enforced properly all datasets will have their own databases
|
|
49
78
|
dataset_database = await get_or_create_dataset_database(dataset, user)
|
|
50
79
|
|
|
80
|
+
base_config = get_base_config()
|
|
51
81
|
data_root_directory = os.path.join(
|
|
52
82
|
base_config.data_root_directory, str(user.tenant_id or user.id)
|
|
53
83
|
)
|
|
@@ -57,15 +87,17 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|
|
57
87
|
|
|
58
88
|
# Set vector and graph database configuration based on dataset database information
|
|
59
89
|
vector_config = {
|
|
60
|
-
"
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
"
|
|
64
|
-
"vector_db_provider": "lancedb",
|
|
90
|
+
"vector_db_provider": dataset_database.vector_database_provider,
|
|
91
|
+
"vector_db_url": dataset_database.vector_database_url,
|
|
92
|
+
"vector_db_key": dataset_database.vector_database_key,
|
|
93
|
+
"vector_db_name": dataset_database.vector_database_name,
|
|
65
94
|
}
|
|
66
95
|
|
|
67
96
|
graph_config = {
|
|
68
|
-
"graph_database_provider":
|
|
97
|
+
"graph_database_provider": dataset_database.graph_database_provider,
|
|
98
|
+
"graph_database_url": dataset_database.graph_database_url,
|
|
99
|
+
"graph_database_name": dataset_database.graph_database_name,
|
|
100
|
+
"graph_database_key": dataset_database.graph_database_key,
|
|
69
101
|
"graph_file_path": os.path.join(
|
|
70
102
|
databases_directory_path, dataset_database.graph_database_name
|
|
71
103
|
),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
2
2
|
from functools import lru_cache
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Optional, Literal
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class CacheConfig(BaseSettings):
|
|
@@ -15,6 +15,7 @@ class CacheConfig(BaseSettings):
|
|
|
15
15
|
- agentic_lock_timeout: Maximum time (in seconds) to wait for the lock release.
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
+
cache_backend: Literal["redis", "fs"] = "fs"
|
|
18
19
|
caching: bool = False
|
|
19
20
|
shared_kuzu_lock: bool = False
|
|
20
21
|
cache_host: str = "localhost"
|
|
@@ -28,6 +29,7 @@ class CacheConfig(BaseSettings):
|
|
|
28
29
|
|
|
29
30
|
def to_dict(self) -> dict:
|
|
30
31
|
return {
|
|
32
|
+
"cache_backend": self.cache_backend,
|
|
31
33
|
"caching": self.caching,
|
|
32
34
|
"shared_kuzu_lock": self.shared_kuzu_lock,
|
|
33
35
|
"cache_host": self.cache_host,
|