cognee 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
- cognee/api/v1/cognify/cognify.py +44 -7
- cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
- cognee/api/v1/prune/prune.py +2 -2
- cognee/api/v1/search/search.py +1 -1
- cognee/api/v1/sync/sync.py +16 -5
- cognee/base_config.py +19 -1
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
- cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
- cognee/infrastructure/databases/relational/ModelBase.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
- cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
- cognee/infrastructure/files/storage/StorageManager.py +18 -0
- cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
- cognee/modules/cloud/operations/check_api_key.py +4 -1
- cognee/modules/data/deletion/prune_system.py +5 -1
- cognee/modules/data/methods/create_authorized_dataset.py +9 -0
- cognee/modules/data/methods/get_authorized_dataset.py +1 -1
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
- cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
- cognee/modules/notebooks/methods/create_notebook.py +34 -0
- cognee/modules/notebooks/methods/get_notebooks.py +27 -1
- cognee/modules/notebooks/models/Notebook.py +206 -1
- cognee/modules/observability/get_observe.py +14 -0
- cognee/modules/observability/observers.py +1 -0
- cognee/modules/ontology/base_ontology_resolver.py +42 -0
- cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
- cognee/modules/ontology/matching_strategies.py +53 -0
- cognee/modules/ontology/models.py +20 -0
- cognee/modules/ontology/ontology_config.py +24 -0
- cognee/modules/ontology/ontology_env_config.py +45 -0
- cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
- cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
- cognee/modules/retrieval/temporal_retriever.py +2 -2
- cognee/modules/search/methods/get_search_type_tools.py +7 -0
- cognee/modules/search/methods/search.py +12 -13
- cognee/modules/search/utils/prepare_search_result.py +28 -6
- cognee/modules/search/utils/transform_context_to_graph.py +1 -1
- cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
- cognee/modules/users/methods/create_user.py +4 -24
- cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
- cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
- cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
- cognee/modules/users/permissions/methods/get_principal.py +9 -0
- cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
- cognee/modules/users/permissions/methods/get_role.py +10 -0
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
- cognee/modules/users/permissions/methods/get_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
- cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
- cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
- cognee/modules/users/roles/methods/create_role.py +10 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
- cognee/modules/users/tenants/methods/create_tenant.py +10 -0
- cognee/root_dir.py +5 -0
- cognee/shared/cache.py +346 -0
- cognee/shared/utils.py +12 -0
- cognee/tasks/graph/extract_graph_from_data.py +53 -10
- cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
- cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
- cognee/tasks/temporal_graph/models.py +11 -6
- cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
- cognee/tests/test_cognee_server_start.py +4 -4
- cognee/tests/test_temporal_graph.py +6 -34
- cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/RECORD +81 -73
- cognee/modules/notebooks/methods/create_tutorial_notebook.py +0 -92
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/entry_points.txt +0 -0
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/v1/cognify/cognify.py
CHANGED
|
@@ -3,6 +3,7 @@ from pydantic import BaseModel
|
|
|
3
3
|
from typing import Union, Optional
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
|
+
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
|
|
6
7
|
from cognee.shared.logging_utils import get_logger
|
|
7
8
|
from cognee.shared.data_models import KnowledgeGraph
|
|
8
9
|
from cognee.infrastructure.llm import get_max_chunk_tokens
|
|
@@ -10,7 +11,11 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
|
|
|
10
11
|
from cognee.modules.pipelines import run_pipeline
|
|
11
12
|
from cognee.modules.pipelines.tasks.task import Task
|
|
12
13
|
from cognee.modules.chunking.TextChunker import TextChunker
|
|
13
|
-
from cognee.modules.ontology.
|
|
14
|
+
from cognee.modules.ontology.ontology_config import Config
|
|
15
|
+
from cognee.modules.ontology.get_default_ontology_resolver import (
|
|
16
|
+
get_default_ontology_resolver,
|
|
17
|
+
get_ontology_resolver_from_env,
|
|
18
|
+
)
|
|
14
19
|
from cognee.modules.users.models import User
|
|
15
20
|
|
|
16
21
|
from cognee.tasks.documents import (
|
|
@@ -39,7 +44,7 @@ async def cognify(
|
|
|
39
44
|
graph_model: BaseModel = KnowledgeGraph,
|
|
40
45
|
chunker=TextChunker,
|
|
41
46
|
chunk_size: int = None,
|
|
42
|
-
|
|
47
|
+
config: Config = None,
|
|
43
48
|
vector_db_config: dict = None,
|
|
44
49
|
graph_db_config: dict = None,
|
|
45
50
|
run_in_background: bool = False,
|
|
@@ -100,8 +105,6 @@ async def cognify(
|
|
|
100
105
|
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
|
|
101
106
|
Default limits: ~512-8192 tokens depending on models.
|
|
102
107
|
Smaller chunks = more granular but potentially fragmented knowledge.
|
|
103
|
-
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
|
|
104
|
-
Useful for specialized fields like medical or legal documents.
|
|
105
108
|
vector_db_config: Custom vector database configuration for embeddings storage.
|
|
106
109
|
graph_db_config: Custom graph database configuration for relationship storage.
|
|
107
110
|
run_in_background: If True, starts processing asynchronously and returns immediately.
|
|
@@ -188,11 +191,28 @@ async def cognify(
|
|
|
188
191
|
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
|
|
189
192
|
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
|
|
190
193
|
"""
|
|
194
|
+
if config is None:
|
|
195
|
+
ontology_config = get_ontology_env_config()
|
|
196
|
+
if (
|
|
197
|
+
ontology_config.ontology_file_path
|
|
198
|
+
and ontology_config.ontology_resolver
|
|
199
|
+
and ontology_config.matching_strategy
|
|
200
|
+
):
|
|
201
|
+
config: Config = {
|
|
202
|
+
"ontology_config": {
|
|
203
|
+
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
else:
|
|
207
|
+
config: Config = {
|
|
208
|
+
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
|
|
209
|
+
}
|
|
210
|
+
|
|
191
211
|
if temporal_cognify:
|
|
192
212
|
tasks = await get_temporal_tasks(user, chunker, chunk_size)
|
|
193
213
|
else:
|
|
194
214
|
tasks = await get_default_tasks(
|
|
195
|
-
user, graph_model, chunker, chunk_size,
|
|
215
|
+
user, graph_model, chunker, chunk_size, config, custom_prompt
|
|
196
216
|
)
|
|
197
217
|
|
|
198
218
|
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
|
@@ -216,9 +236,26 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
216
236
|
graph_model: BaseModel = KnowledgeGraph,
|
|
217
237
|
chunker=TextChunker,
|
|
218
238
|
chunk_size: int = None,
|
|
219
|
-
|
|
239
|
+
config: Config = None,
|
|
220
240
|
custom_prompt: Optional[str] = None,
|
|
221
241
|
) -> list[Task]:
|
|
242
|
+
if config is None:
|
|
243
|
+
ontology_config = get_ontology_env_config()
|
|
244
|
+
if (
|
|
245
|
+
ontology_config.ontology_file_path
|
|
246
|
+
and ontology_config.ontology_resolver
|
|
247
|
+
and ontology_config.matching_strategy
|
|
248
|
+
):
|
|
249
|
+
config: Config = {
|
|
250
|
+
"ontology_config": {
|
|
251
|
+
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
else:
|
|
255
|
+
config: Config = {
|
|
256
|
+
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
|
|
257
|
+
}
|
|
258
|
+
|
|
222
259
|
default_tasks = [
|
|
223
260
|
Task(classify_documents),
|
|
224
261
|
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
|
@@ -230,7 +267,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
230
267
|
Task(
|
|
231
268
|
extract_graph_from_data,
|
|
232
269
|
graph_model=graph_model,
|
|
233
|
-
|
|
270
|
+
config=config,
|
|
234
271
|
custom_prompt=custom_prompt,
|
|
235
272
|
task_config={"batch_size": 10},
|
|
236
273
|
), # Generate knowledge graphs from the document chunks.
|
|
@@ -3,6 +3,7 @@ import asyncio
|
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
from pydantic import Field
|
|
5
5
|
from typing import List, Optional
|
|
6
|
+
from fastapi.encoders import jsonable_encoder
|
|
6
7
|
from fastapi.responses import JSONResponse
|
|
7
8
|
from fastapi import APIRouter, WebSocket, Depends, WebSocketDisconnect
|
|
8
9
|
from starlette.status import WS_1000_NORMAL_CLOSURE, WS_1008_POLICY_VIOLATION
|
|
@@ -119,7 +120,7 @@ def get_cognify_router() -> APIRouter:
|
|
|
119
120
|
|
|
120
121
|
# If any cognify run errored return JSONResponse with proper error status code
|
|
121
122
|
if any(isinstance(v, PipelineRunErrored) for v in cognify_run.values()):
|
|
122
|
-
return JSONResponse(status_code=420, content=cognify_run)
|
|
123
|
+
return JSONResponse(status_code=420, content=jsonable_encoder(cognify_run))
|
|
123
124
|
return cognify_run
|
|
124
125
|
except Exception as error:
|
|
125
126
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
cognee/api/v1/prune/prune.py
CHANGED
|
@@ -7,8 +7,8 @@ class prune:
|
|
|
7
7
|
await _prune_data()
|
|
8
8
|
|
|
9
9
|
@staticmethod
|
|
10
|
-
async def prune_system(graph=True, vector=True, metadata=False):
|
|
11
|
-
await _prune_system(graph, vector, metadata)
|
|
10
|
+
async def prune_system(graph=True, vector=True, metadata=False, cache=True):
|
|
11
|
+
await _prune_system(graph, vector, metadata, cache)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
if __name__ == "__main__":
|
cognee/api/v1/search/search.py
CHANGED
|
@@ -22,7 +22,7 @@ async def search(
|
|
|
22
22
|
node_type: Optional[Type] = NodeSet,
|
|
23
23
|
node_name: Optional[List[str]] = None,
|
|
24
24
|
save_interaction: bool = False,
|
|
25
|
-
last_k: Optional[int] =
|
|
25
|
+
last_k: Optional[int] = 1,
|
|
26
26
|
only_context: bool = False,
|
|
27
27
|
use_combined_context: bool = False,
|
|
28
28
|
) -> Union[List[SearchResult], CombinedSearchResult]:
|
cognee/api/v1/sync/sync.py
CHANGED
|
@@ -23,6 +23,7 @@ from cognee.modules.sync.methods import (
|
|
|
23
23
|
mark_sync_completed,
|
|
24
24
|
mark_sync_failed,
|
|
25
25
|
)
|
|
26
|
+
from cognee.shared.utils import create_secure_ssl_context
|
|
26
27
|
|
|
27
28
|
logger = get_logger("sync")
|
|
28
29
|
|
|
@@ -583,7 +584,9 @@ async def _check_hashes_diff(
|
|
|
583
584
|
logger.info(f"Checking missing hashes on cloud for dataset {dataset.id}")
|
|
584
585
|
|
|
585
586
|
try:
|
|
586
|
-
|
|
587
|
+
ssl_context = create_secure_ssl_context()
|
|
588
|
+
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
589
|
+
async with aiohttp.ClientSession(connector=connector) as session:
|
|
587
590
|
async with session.post(url, json=payload.dict(), headers=headers) as response:
|
|
588
591
|
if response.status == 200:
|
|
589
592
|
data = await response.json()
|
|
@@ -630,7 +633,9 @@ async def _download_missing_files(
|
|
|
630
633
|
|
|
631
634
|
headers = {"X-Api-Key": auth_token}
|
|
632
635
|
|
|
633
|
-
|
|
636
|
+
ssl_context = create_secure_ssl_context()
|
|
637
|
+
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
638
|
+
async with aiohttp.ClientSession(connector=connector) as session:
|
|
634
639
|
for file_hash in hashes_missing_on_local:
|
|
635
640
|
try:
|
|
636
641
|
# Download file from cloud by hash
|
|
@@ -749,7 +754,9 @@ async def _upload_missing_files(
|
|
|
749
754
|
|
|
750
755
|
headers = {"X-Api-Key": auth_token}
|
|
751
756
|
|
|
752
|
-
|
|
757
|
+
ssl_context = create_secure_ssl_context()
|
|
758
|
+
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
759
|
+
async with aiohttp.ClientSession(connector=connector) as session:
|
|
753
760
|
for file_info in files_to_upload:
|
|
754
761
|
try:
|
|
755
762
|
file_dir = os.path.dirname(file_info.raw_data_location)
|
|
@@ -809,7 +816,9 @@ async def _prune_cloud_dataset(
|
|
|
809
816
|
logger.info("Pruning cloud dataset to match local state")
|
|
810
817
|
|
|
811
818
|
try:
|
|
812
|
-
|
|
819
|
+
ssl_context = create_secure_ssl_context()
|
|
820
|
+
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
821
|
+
async with aiohttp.ClientSession(connector=connector) as session:
|
|
813
822
|
async with session.put(url, json=payload.dict(), headers=headers) as response:
|
|
814
823
|
if response.status == 200:
|
|
815
824
|
data = await response.json()
|
|
@@ -852,7 +861,9 @@ async def _trigger_remote_cognify(
|
|
|
852
861
|
logger.info(f"Triggering cognify processing for dataset {dataset_id}")
|
|
853
862
|
|
|
854
863
|
try:
|
|
855
|
-
|
|
864
|
+
ssl_context = create_secure_ssl_context()
|
|
865
|
+
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
866
|
+
async with aiohttp.ClientSession(connector=connector) as session:
|
|
856
867
|
async with session.post(url, json=payload, headers=headers) as response:
|
|
857
868
|
if response.status == 200:
|
|
858
869
|
data = await response.json()
|
cognee/base_config.py
CHANGED
|
@@ -10,13 +10,30 @@ import pydantic
|
|
|
10
10
|
class BaseConfig(BaseSettings):
|
|
11
11
|
data_root_directory: str = get_absolute_path(".data_storage")
|
|
12
12
|
system_root_directory: str = get_absolute_path(".cognee_system")
|
|
13
|
-
|
|
13
|
+
cache_root_directory: str = get_absolute_path(".cognee_cache")
|
|
14
|
+
monitoring_tool: object = Observer.NONE
|
|
14
15
|
|
|
15
16
|
@pydantic.model_validator(mode="after")
|
|
16
17
|
def validate_paths(self):
|
|
18
|
+
# Adding this here temporarily to ensure that the cache root directory is set correctly for S3 storage automatically
|
|
19
|
+
# I'll remove this after we update documentation for S3 storage
|
|
20
|
+
# Auto-configure cache root directory for S3 storage if not explicitly set
|
|
21
|
+
storage_backend = os.getenv("STORAGE_BACKEND", "").lower()
|
|
22
|
+
cache_root_env = os.getenv("CACHE_ROOT_DIRECTORY")
|
|
23
|
+
|
|
24
|
+
if storage_backend == "s3" and not cache_root_env:
|
|
25
|
+
# Auto-generate S3 cache path when using S3 storage
|
|
26
|
+
bucket_name = os.getenv("STORAGE_BUCKET_NAME")
|
|
27
|
+
if bucket_name:
|
|
28
|
+
self.cache_root_directory = f"s3://{bucket_name}/cognee/cache"
|
|
29
|
+
|
|
17
30
|
# Require absolute paths for root directories
|
|
18
31
|
self.data_root_directory = ensure_absolute_path(self.data_root_directory)
|
|
19
32
|
self.system_root_directory = ensure_absolute_path(self.system_root_directory)
|
|
33
|
+
# Set monitoring tool based on available keys
|
|
34
|
+
if self.langfuse_public_key and self.langfuse_secret_key:
|
|
35
|
+
self.monitoring_tool = Observer.LANGFUSE
|
|
36
|
+
|
|
20
37
|
return self
|
|
21
38
|
|
|
22
39
|
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
@@ -31,6 +48,7 @@ class BaseConfig(BaseSettings):
|
|
|
31
48
|
"data_root_directory": self.data_root_directory,
|
|
32
49
|
"system_root_directory": self.system_root_directory,
|
|
33
50
|
"monitoring_tool": self.monitoring_tool,
|
|
51
|
+
"cache_root_directory": self.cache_root_directory,
|
|
34
52
|
}
|
|
35
53
|
|
|
36
54
|
|
|
@@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
|
|
|
5
5
|
from cognee.tasks.graph import extract_graph_from_data
|
|
6
6
|
from cognee.tasks.storage import add_data_points
|
|
7
7
|
from cognee.shared.data_models import KnowledgeGraph
|
|
8
|
-
from cognee.modules.ontology.rdf_xml.
|
|
8
|
+
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
async def get_default_tasks_by_indices(
|
|
@@ -33,7 +33,7 @@ async def get_no_summary_tasks(
|
|
|
33
33
|
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
|
|
34
34
|
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
|
|
35
35
|
|
|
36
|
-
ontology_adapter =
|
|
36
|
+
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
|
|
37
37
|
|
|
38
38
|
graph_task = Task(
|
|
39
39
|
extract_graph_from_data,
|
|
@@ -7,6 +7,7 @@ import aiohttp
|
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
9
|
from cognee.infrastructure.databases.graph.kuzu.adapter import KuzuAdapter
|
|
10
|
+
from cognee.shared.utils import create_secure_ssl_context
|
|
10
11
|
|
|
11
12
|
logger = get_logger()
|
|
12
13
|
|
|
@@ -42,7 +43,9 @@ class RemoteKuzuAdapter(KuzuAdapter):
|
|
|
42
43
|
async def _get_session(self) -> aiohttp.ClientSession:
|
|
43
44
|
"""Get or create an aiohttp session."""
|
|
44
45
|
if self._session is None or self._session.closed:
|
|
45
|
-
|
|
46
|
+
ssl_context = create_secure_ssl_context()
|
|
47
|
+
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
48
|
+
self._session = aiohttp.ClientSession(connector=connector)
|
|
46
49
|
return self._session
|
|
47
50
|
|
|
48
51
|
async def close(self):
|
|
@@ -83,7 +83,7 @@ def process_data_for_chroma(data):
|
|
|
83
83
|
elif isinstance(value, list):
|
|
84
84
|
# Store lists as JSON strings with special prefix
|
|
85
85
|
processed_data[f"{key}__list"] = json.dumps(value)
|
|
86
|
-
elif isinstance(value, (str, int, float, bool))
|
|
86
|
+
elif isinstance(value, (str, int, float, bool)):
|
|
87
87
|
processed_data[key] = value
|
|
88
88
|
else:
|
|
89
89
|
processed_data[key] = str(value)
|
|
@@ -553,8 +553,4 @@ class ChromaDBAdapter(VectorDBInterface):
|
|
|
553
553
|
Returns a list of collection names.
|
|
554
554
|
"""
|
|
555
555
|
client = await self.get_connection()
|
|
556
|
-
|
|
557
|
-
return [
|
|
558
|
-
collection.name if hasattr(collection, "name") else collection["name"]
|
|
559
|
-
for collection in collections
|
|
560
|
-
]
|
|
556
|
+
return await client.list_collections()
|
|
@@ -14,6 +14,7 @@ from cognee.infrastructure.databases.vector.embeddings.embedding_rate_limiter im
|
|
|
14
14
|
embedding_rate_limit_async,
|
|
15
15
|
embedding_sleep_and_retry_async,
|
|
16
16
|
)
|
|
17
|
+
from cognee.shared.utils import create_secure_ssl_context
|
|
17
18
|
|
|
18
19
|
logger = get_logger("OllamaEmbeddingEngine")
|
|
19
20
|
|
|
@@ -94,16 +95,16 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
|
|
94
95
|
"""
|
|
95
96
|
Internal method to call the Ollama embeddings endpoint for a single prompt.
|
|
96
97
|
"""
|
|
97
|
-
payload = {
|
|
98
|
-
|
|
99
|
-
"prompt": prompt,
|
|
100
|
-
}
|
|
98
|
+
payload = {"model": self.model, "prompt": prompt, "input": prompt}
|
|
99
|
+
|
|
101
100
|
headers = {}
|
|
102
101
|
api_key = os.getenv("LLM_API_KEY")
|
|
103
102
|
if api_key:
|
|
104
103
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
105
104
|
|
|
106
|
-
|
|
105
|
+
ssl_context = create_secure_ssl_context()
|
|
106
|
+
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
107
|
+
async with aiohttp.ClientSession(connector=connector) as session:
|
|
107
108
|
async with session.post(
|
|
108
109
|
self.endpoint, json=payload, headers=headers, timeout=60.0
|
|
109
110
|
) as response:
|
|
@@ -253,6 +253,56 @@ class LocalFileStorage(Storage):
|
|
|
253
253
|
if os.path.exists(full_file_path):
|
|
254
254
|
os.remove(full_file_path)
|
|
255
255
|
|
|
256
|
+
def list_files(self, directory_path: str, recursive: bool = False) -> list[str]:
|
|
257
|
+
"""
|
|
258
|
+
List all files in the specified directory.
|
|
259
|
+
|
|
260
|
+
Parameters:
|
|
261
|
+
-----------
|
|
262
|
+
- directory_path (str): The directory path to list files from
|
|
263
|
+
- recursive (bool): If True, list files recursively in subdirectories
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
--------
|
|
267
|
+
- list[str]: List of file paths relative to the storage root
|
|
268
|
+
"""
|
|
269
|
+
from pathlib import Path
|
|
270
|
+
|
|
271
|
+
parsed_storage_path = get_parsed_path(self.storage_path)
|
|
272
|
+
|
|
273
|
+
if directory_path:
|
|
274
|
+
full_directory_path = os.path.join(parsed_storage_path, directory_path)
|
|
275
|
+
else:
|
|
276
|
+
full_directory_path = parsed_storage_path
|
|
277
|
+
|
|
278
|
+
directory_pathlib = Path(full_directory_path)
|
|
279
|
+
|
|
280
|
+
if not directory_pathlib.exists() or not directory_pathlib.is_dir():
|
|
281
|
+
return []
|
|
282
|
+
|
|
283
|
+
files = []
|
|
284
|
+
|
|
285
|
+
if recursive:
|
|
286
|
+
# Use rglob for recursive search
|
|
287
|
+
for file_path in directory_pathlib.rglob("*"):
|
|
288
|
+
if file_path.is_file():
|
|
289
|
+
# Get relative path from storage root
|
|
290
|
+
relative_path = os.path.relpath(str(file_path), parsed_storage_path)
|
|
291
|
+
# Normalize path separators for consistency
|
|
292
|
+
relative_path = relative_path.replace(os.sep, "/")
|
|
293
|
+
files.append(relative_path)
|
|
294
|
+
else:
|
|
295
|
+
# Use iterdir for just immediate directory
|
|
296
|
+
for file_path in directory_pathlib.iterdir():
|
|
297
|
+
if file_path.is_file():
|
|
298
|
+
# Get relative path from storage root
|
|
299
|
+
relative_path = os.path.relpath(str(file_path), parsed_storage_path)
|
|
300
|
+
# Normalize path separators for consistency
|
|
301
|
+
relative_path = relative_path.replace(os.sep, "/")
|
|
302
|
+
files.append(relative_path)
|
|
303
|
+
|
|
304
|
+
return files
|
|
305
|
+
|
|
256
306
|
def remove_all(self, tree_path: str = None):
|
|
257
307
|
"""
|
|
258
308
|
Remove an entire directory tree at the specified path, including all files and
|
|
@@ -155,21 +155,19 @@ class S3FileStorage(Storage):
|
|
|
155
155
|
"""
|
|
156
156
|
Ensure that the specified directory exists, creating it if necessary.
|
|
157
157
|
|
|
158
|
-
|
|
158
|
+
For S3 storage, this is a no-op since directories are created implicitly
|
|
159
|
+
when files are written to paths. S3 doesn't have actual directories,
|
|
160
|
+
just object keys with prefixes that appear as directories.
|
|
159
161
|
|
|
160
162
|
Parameters:
|
|
161
163
|
-----------
|
|
162
164
|
|
|
163
165
|
- directory_path (str): The path of the directory to check or create.
|
|
164
166
|
"""
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
if not self.s3.exists(directory_path):
|
|
170
|
-
self.s3.makedirs(directory_path, exist_ok=True)
|
|
171
|
-
|
|
172
|
-
await run_async(ensure_directory)
|
|
167
|
+
# In S3, directories don't exist as separate entities - they're just prefixes
|
|
168
|
+
# When you write a file to s3://bucket/path/to/file.txt, the "directories"
|
|
169
|
+
# path/ and path/to/ are implicitly created. No explicit action needed.
|
|
170
|
+
pass
|
|
173
171
|
|
|
174
172
|
async def copy_file(self, source_file_path: str, destination_file_path: str):
|
|
175
173
|
"""
|
|
@@ -213,6 +211,55 @@ class S3FileStorage(Storage):
|
|
|
213
211
|
|
|
214
212
|
await run_async(remove_file)
|
|
215
213
|
|
|
214
|
+
async def list_files(self, directory_path: str, recursive: bool = False) -> list[str]:
|
|
215
|
+
"""
|
|
216
|
+
List all files in the specified directory.
|
|
217
|
+
|
|
218
|
+
Parameters:
|
|
219
|
+
-----------
|
|
220
|
+
- directory_path (str): The directory path to list files from
|
|
221
|
+
- recursive (bool): If True, list files recursively in subdirectories
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
--------
|
|
225
|
+
- list[str]: List of file paths relative to the storage root
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
def list_files_sync():
|
|
229
|
+
if directory_path:
|
|
230
|
+
# Combine storage path with directory path
|
|
231
|
+
full_path = os.path.join(self.storage_path.replace("s3://", ""), directory_path)
|
|
232
|
+
else:
|
|
233
|
+
full_path = self.storage_path.replace("s3://", "")
|
|
234
|
+
|
|
235
|
+
if recursive:
|
|
236
|
+
# Use ** for recursive search
|
|
237
|
+
pattern = f"{full_path}/**"
|
|
238
|
+
else:
|
|
239
|
+
# Just files in the immediate directory
|
|
240
|
+
pattern = f"{full_path}/*"
|
|
241
|
+
|
|
242
|
+
# Use s3fs glob to find files
|
|
243
|
+
try:
|
|
244
|
+
all_paths = self.s3.glob(pattern)
|
|
245
|
+
# Filter to only files (not directories)
|
|
246
|
+
files = [path for path in all_paths if self.s3.isfile(path)]
|
|
247
|
+
|
|
248
|
+
# Convert back to relative paths from storage root
|
|
249
|
+
storage_prefix = self.storage_path.replace("s3://", "")
|
|
250
|
+
relative_files = []
|
|
251
|
+
for file_path in files:
|
|
252
|
+
if file_path.startswith(storage_prefix):
|
|
253
|
+
relative_path = file_path[len(storage_prefix) :].lstrip("/")
|
|
254
|
+
relative_files.append(relative_path)
|
|
255
|
+
|
|
256
|
+
return relative_files
|
|
257
|
+
except Exception:
|
|
258
|
+
# If directory doesn't exist or other error, return empty list
|
|
259
|
+
return []
|
|
260
|
+
|
|
261
|
+
return await run_async(list_files_sync)
|
|
262
|
+
|
|
216
263
|
async def remove_all(self, tree_path: str):
|
|
217
264
|
"""
|
|
218
265
|
Remove an entire directory tree at the specified path, including all files and
|
|
@@ -135,6 +135,24 @@ class StorageManager:
|
|
|
135
135
|
else:
|
|
136
136
|
return self.storage.remove(file_path)
|
|
137
137
|
|
|
138
|
+
async def list_files(self, directory_path: str, recursive: bool = False) -> list[str]:
|
|
139
|
+
"""
|
|
140
|
+
List all files in the specified directory.
|
|
141
|
+
|
|
142
|
+
Parameters:
|
|
143
|
+
-----------
|
|
144
|
+
- directory_path (str): The directory path to list files from
|
|
145
|
+
- recursive (bool): If True, list files recursively in subdirectories
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
--------
|
|
149
|
+
- list[str]: List of file paths relative to the storage root
|
|
150
|
+
"""
|
|
151
|
+
if inspect.iscoroutinefunction(self.storage.list_files):
|
|
152
|
+
return await self.storage.list_files(directory_path, recursive)
|
|
153
|
+
else:
|
|
154
|
+
return self.storage.list_files(directory_path, recursive)
|
|
155
|
+
|
|
138
156
|
async def remove_all(self, tree_path: str = None):
|
|
139
157
|
"""
|
|
140
158
|
Remove an entire directory tree at the specified path, including all files and
|
|
@@ -56,7 +56,12 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
|
|
|
56
56
|
file_type = guess_file_type(file)
|
|
57
57
|
|
|
58
58
|
file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
|
|
59
|
-
|
|
59
|
+
|
|
60
|
+
if isinstance(file_path, str):
|
|
61
|
+
file_name = Path(file_path).stem if file_path else None
|
|
62
|
+
else:
|
|
63
|
+
# In case file_path does not exist or is a integer return None
|
|
64
|
+
file_name = None
|
|
60
65
|
|
|
61
66
|
# Get file size
|
|
62
67
|
pos = file.tell() # remember current pointer
|
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py
CHANGED
|
@@ -12,6 +12,7 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
|
|
|
12
12
|
)
|
|
13
13
|
|
|
14
14
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
15
|
+
from cognee.infrastructure.llm.config import get_llm_config
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class AnthropicAdapter(LLMInterface):
|
|
@@ -27,7 +28,8 @@ class AnthropicAdapter(LLMInterface):
|
|
|
27
28
|
import anthropic
|
|
28
29
|
|
|
29
30
|
self.aclient = instructor.patch(
|
|
30
|
-
create=anthropic.AsyncAnthropic().messages.create,
|
|
31
|
+
create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
|
|
32
|
+
mode=instructor.Mode.ANTHROPIC_TOOLS,
|
|
31
33
|
)
|
|
32
34
|
|
|
33
35
|
self.model = model
|
|
@@ -57,7 +59,7 @@ class AnthropicAdapter(LLMInterface):
|
|
|
57
59
|
|
|
58
60
|
return await self.aclient(
|
|
59
61
|
model=self.model,
|
|
60
|
-
|
|
62
|
+
max_tokens=4096,
|
|
61
63
|
max_retries=5,
|
|
62
64
|
messages=[
|
|
63
65
|
{
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import aiohttp
|
|
2
2
|
|
|
3
3
|
from cognee.modules.cloud.exceptions import CloudConnectionError
|
|
4
|
+
from cognee.shared.utils import create_secure_ssl_context
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
async def check_api_key(auth_token: str):
|
|
@@ -10,7 +11,9 @@ async def check_api_key(auth_token: str):
|
|
|
10
11
|
headers = {"X-Api-Key": auth_token}
|
|
11
12
|
|
|
12
13
|
try:
|
|
13
|
-
|
|
14
|
+
ssl_context = create_secure_ssl_context()
|
|
15
|
+
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
16
|
+
async with aiohttp.ClientSession(connector=connector) as session:
|
|
14
17
|
async with session.post(url, headers=headers) as response:
|
|
15
18
|
if response.status == 200:
|
|
16
19
|
return
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
2
2
|
from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
|
|
3
3
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
4
|
+
from cognee.shared.cache import delete_cache
|
|
4
5
|
|
|
5
6
|
|
|
6
|
-
async def prune_system(graph=True, vector=True, metadata=True):
|
|
7
|
+
async def prune_system(graph=True, vector=True, metadata=True, cache=True):
|
|
7
8
|
if graph:
|
|
8
9
|
graph_engine = await get_graph_engine()
|
|
9
10
|
await graph_engine.delete_graph()
|
|
@@ -15,3 +16,6 @@ async def prune_system(graph=True, vector=True, metadata=True):
|
|
|
15
16
|
if metadata:
|
|
16
17
|
db_engine = get_relational_engine()
|
|
17
18
|
await db_engine.delete_database()
|
|
19
|
+
|
|
20
|
+
if cache:
|
|
21
|
+
await delete_cache()
|
|
@@ -6,6 +6,15 @@ from .create_dataset import create_dataset
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
async def create_authorized_dataset(dataset_name: str, user: User) -> Dataset:
|
|
9
|
+
"""
|
|
10
|
+
Create a new dataset and give all permissions on this dataset to the given user.
|
|
11
|
+
Args:
|
|
12
|
+
dataset_name: Name of the dataset.
|
|
13
|
+
user: The user object.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Dataset: The new authorized dataset.
|
|
17
|
+
"""
|
|
9
18
|
db_engine = get_relational_engine()
|
|
10
19
|
|
|
11
20
|
async with db_engine.get_async_session() as session:
|
|
@@ -15,7 +15,7 @@ async def get_authorized_dataset(
|
|
|
15
15
|
Get a specific dataset with permissions for a user.
|
|
16
16
|
|
|
17
17
|
Args:
|
|
18
|
-
|
|
18
|
+
user: User object
|
|
19
19
|
dataset_id (UUID): dataset id
|
|
20
20
|
permission_type (str): permission type(read, write, delete, share), default is read
|
|
21
21
|
|
|
@@ -11,6 +11,17 @@ from ..models import Dataset
|
|
|
11
11
|
async def get_authorized_dataset_by_name(
|
|
12
12
|
dataset_name: str, user: User, permission_type: str
|
|
13
13
|
) -> Optional[Dataset]:
|
|
14
|
+
"""
|
|
15
|
+
Get a specific dataset with the given name, with permissions for a given user.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
dataset_name: Name of the dataset.
|
|
19
|
+
user: User object.
|
|
20
|
+
permission_type (str): permission type(read, write, delete, share), default is read
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Optional[Dataset]: dataset with permissions
|
|
24
|
+
"""
|
|
14
25
|
authorized_datasets = await get_authorized_existing_datasets([], permission_type, user)
|
|
15
26
|
|
|
16
27
|
return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None)
|