cognee 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
  2. cognee/api/v1/cognify/cognify.py +44 -7
  3. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  4. cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
  5. cognee/api/v1/prune/prune.py +2 -2
  6. cognee/api/v1/search/search.py +1 -1
  7. cognee/api/v1/sync/sync.py +16 -5
  8. cognee/base_config.py +19 -1
  9. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  10. cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
  11. cognee/infrastructure/databases/relational/ModelBase.py +2 -1
  12. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
  13. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
  14. cognee/infrastructure/databases/vector/config.py +1 -1
  15. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
  16. cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
  17. cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
  18. cognee/infrastructure/files/storage/StorageManager.py +18 -0
  19. cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
  20. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
  21. cognee/infrastructure/utils/run_async.py +9 -4
  22. cognee/infrastructure/utils/run_sync.py +4 -3
  23. cognee/modules/cloud/operations/check_api_key.py +4 -1
  24. cognee/modules/data/deletion/prune_system.py +5 -1
  25. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  26. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  27. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  28. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  29. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  30. cognee/modules/notebooks/methods/create_notebook.py +34 -0
  31. cognee/modules/notebooks/methods/get_notebook.py +2 -2
  32. cognee/modules/notebooks/methods/get_notebooks.py +27 -1
  33. cognee/modules/notebooks/methods/update_notebook.py +0 -1
  34. cognee/modules/notebooks/models/Notebook.py +206 -1
  35. cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
  36. cognee/modules/observability/get_observe.py +14 -0
  37. cognee/modules/observability/observers.py +1 -0
  38. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  39. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  40. cognee/modules/ontology/matching_strategies.py +53 -0
  41. cognee/modules/ontology/models.py +20 -0
  42. cognee/modules/ontology/ontology_config.py +24 -0
  43. cognee/modules/ontology/ontology_env_config.py +45 -0
  44. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  45. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
  46. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
  47. cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
  48. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
  49. cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
  50. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  51. cognee/modules/retrieval/temporal_retriever.py +3 -3
  52. cognee/modules/retrieval/user_qa_feedback.py +1 -1
  53. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  54. cognee/modules/search/methods/search.py +12 -13
  55. cognee/modules/search/utils/prepare_search_result.py +31 -9
  56. cognee/modules/search/utils/transform_context_to_graph.py +1 -1
  57. cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
  58. cognee/modules/users/methods/create_user.py +4 -24
  59. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  60. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  61. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
  62. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  63. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  64. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  65. cognee/modules/users/permissions/methods/get_role.py +10 -0
  66. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  67. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  68. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  69. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  70. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  71. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  72. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  73. cognee/modules/users/roles/methods/create_role.py +10 -0
  74. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  75. cognee/modules/users/tenants/methods/create_tenant.py +10 -0
  76. cognee/root_dir.py +5 -0
  77. cognee/shared/cache.py +346 -0
  78. cognee/shared/utils.py +12 -0
  79. cognee/tasks/graph/extract_graph_from_data.py +53 -10
  80. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  81. cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
  82. cognee/tasks/temporal_graph/models.py +11 -6
  83. cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
  84. cognee/tests/test_cognee_server_start.py +4 -4
  85. cognee/tests/test_temporal_graph.py +6 -34
  86. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  87. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
  88. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
  89. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/RECORD +93 -86
  90. cognee-0.3.4.dist-info/entry_points.txt +2 -0
  91. cognee/api/v1/save/save.py +0 -335
  92. cognee/tests/test_save_export_path.py +0 -116
  93. cognee-0.3.2.dist-info/entry_points.txt +0 -2
  94. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
  95. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
  96. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
@@ -16,7 +16,7 @@ def get_checks_router():
16
16
  api_token = request.headers.get("X-Api-Key")
17
17
 
18
18
  if api_token is None:
19
- return CloudApiKeyMissingError()
19
+ raise CloudApiKeyMissingError()
20
20
 
21
21
  return await check_api_key(api_token)
22
22
 
@@ -3,6 +3,7 @@ from pydantic import BaseModel
3
3
  from typing import Union, Optional
4
4
  from uuid import UUID
5
5
 
6
+ from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
6
7
  from cognee.shared.logging_utils import get_logger
7
8
  from cognee.shared.data_models import KnowledgeGraph
8
9
  from cognee.infrastructure.llm import get_max_chunk_tokens
@@ -10,7 +11,11 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
10
11
  from cognee.modules.pipelines import run_pipeline
11
12
  from cognee.modules.pipelines.tasks.task import Task
12
13
  from cognee.modules.chunking.TextChunker import TextChunker
13
- from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
14
+ from cognee.modules.ontology.ontology_config import Config
15
+ from cognee.modules.ontology.get_default_ontology_resolver import (
16
+ get_default_ontology_resolver,
17
+ get_ontology_resolver_from_env,
18
+ )
14
19
  from cognee.modules.users.models import User
15
20
 
16
21
  from cognee.tasks.documents import (
@@ -39,7 +44,7 @@ async def cognify(
39
44
  graph_model: BaseModel = KnowledgeGraph,
40
45
  chunker=TextChunker,
41
46
  chunk_size: int = None,
42
- ontology_file_path: Optional[str] = None,
47
+ config: Config = None,
43
48
  vector_db_config: dict = None,
44
49
  graph_db_config: dict = None,
45
50
  run_in_background: bool = False,
@@ -100,8 +105,6 @@ async def cognify(
100
105
  Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
101
106
  Default limits: ~512-8192 tokens depending on models.
102
107
  Smaller chunks = more granular but potentially fragmented knowledge.
103
- ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
104
- Useful for specialized fields like medical or legal documents.
105
108
  vector_db_config: Custom vector database configuration for embeddings storage.
106
109
  graph_db_config: Custom graph database configuration for relationship storage.
107
110
  run_in_background: If True, starts processing asynchronously and returns immediately.
@@ -188,11 +191,28 @@ async def cognify(
188
191
  - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
189
192
  - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
190
193
  """
194
+ if config is None:
195
+ ontology_config = get_ontology_env_config()
196
+ if (
197
+ ontology_config.ontology_file_path
198
+ and ontology_config.ontology_resolver
199
+ and ontology_config.matching_strategy
200
+ ):
201
+ config: Config = {
202
+ "ontology_config": {
203
+ "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
204
+ }
205
+ }
206
+ else:
207
+ config: Config = {
208
+ "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
209
+ }
210
+
191
211
  if temporal_cognify:
192
212
  tasks = await get_temporal_tasks(user, chunker, chunk_size)
193
213
  else:
194
214
  tasks = await get_default_tasks(
195
- user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
215
+ user, graph_model, chunker, chunk_size, config, custom_prompt
196
216
  )
197
217
 
198
218
  # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -216,9 +236,26 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
216
236
  graph_model: BaseModel = KnowledgeGraph,
217
237
  chunker=TextChunker,
218
238
  chunk_size: int = None,
219
- ontology_file_path: Optional[str] = None,
239
+ config: Config = None,
220
240
  custom_prompt: Optional[str] = None,
221
241
  ) -> list[Task]:
242
+ if config is None:
243
+ ontology_config = get_ontology_env_config()
244
+ if (
245
+ ontology_config.ontology_file_path
246
+ and ontology_config.ontology_resolver
247
+ and ontology_config.matching_strategy
248
+ ):
249
+ config: Config = {
250
+ "ontology_config": {
251
+ "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
252
+ }
253
+ }
254
+ else:
255
+ config: Config = {
256
+ "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
257
+ }
258
+
222
259
  default_tasks = [
223
260
  Task(classify_documents),
224
261
  Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -230,7 +267,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
230
267
  Task(
231
268
  extract_graph_from_data,
232
269
  graph_model=graph_model,
233
- ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
270
+ config=config,
234
271
  custom_prompt=custom_prompt,
235
272
  task_config={"batch_size": 10},
236
273
  ), # Generate knowledge graphs from the document chunks.
@@ -3,6 +3,7 @@ import asyncio
3
3
  from uuid import UUID
4
4
  from pydantic import Field
5
5
  from typing import List, Optional
6
+ from fastapi.encoders import jsonable_encoder
6
7
  from fastapi.responses import JSONResponse
7
8
  from fastapi import APIRouter, WebSocket, Depends, WebSocketDisconnect
8
9
  from starlette.status import WS_1000_NORMAL_CLOSURE, WS_1008_POLICY_VIOLATION
@@ -119,7 +120,7 @@ def get_cognify_router() -> APIRouter:
119
120
 
120
121
  # If any cognify run errored return JSONResponse with proper error status code
121
122
  if any(isinstance(v, PipelineRunErrored) for v in cognify_run.values()):
122
- return JSONResponse(status_code=420, content=cognify_run)
123
+ return JSONResponse(status_code=420, content=jsonable_encoder(cognify_run))
123
124
  return cognify_run
124
125
  except Exception as error:
125
126
  return JSONResponse(status_code=409, content={"error": str(error)})
@@ -31,7 +31,8 @@ def get_notebooks_router():
31
31
 
32
32
  @router.get("")
33
33
  async def get_notebooks_endpoint(user: User = Depends(get_authenticated_user)):
34
- return await get_notebooks(user.id)
34
+ async with get_async_session() as session:
35
+ return await get_notebooks(user.id, session)
35
36
 
36
37
  @router.post("")
37
38
  async def create_notebook_endpoint(
@@ -7,8 +7,8 @@ class prune:
7
7
  await _prune_data()
8
8
 
9
9
  @staticmethod
10
- async def prune_system(graph=True, vector=True, metadata=False):
11
- await _prune_system(graph, vector, metadata)
10
+ async def prune_system(graph=True, vector=True, metadata=False, cache=True):
11
+ await _prune_system(graph, vector, metadata, cache)
12
12
 
13
13
 
14
14
  if __name__ == "__main__":
@@ -22,7 +22,7 @@ async def search(
22
22
  node_type: Optional[Type] = NodeSet,
23
23
  node_name: Optional[List[str]] = None,
24
24
  save_interaction: bool = False,
25
- last_k: Optional[int] = None,
25
+ last_k: Optional[int] = 1,
26
26
  only_context: bool = False,
27
27
  use_combined_context: bool = False,
28
28
  ) -> Union[List[SearchResult], CombinedSearchResult]:
@@ -23,6 +23,7 @@ from cognee.modules.sync.methods import (
23
23
  mark_sync_completed,
24
24
  mark_sync_failed,
25
25
  )
26
+ from cognee.shared.utils import create_secure_ssl_context
26
27
 
27
28
  logger = get_logger("sync")
28
29
 
@@ -583,7 +584,9 @@ async def _check_hashes_diff(
583
584
  logger.info(f"Checking missing hashes on cloud for dataset {dataset.id}")
584
585
 
585
586
  try:
586
- async with aiohttp.ClientSession() as session:
587
+ ssl_context = create_secure_ssl_context()
588
+ connector = aiohttp.TCPConnector(ssl=ssl_context)
589
+ async with aiohttp.ClientSession(connector=connector) as session:
587
590
  async with session.post(url, json=payload.dict(), headers=headers) as response:
588
591
  if response.status == 200:
589
592
  data = await response.json()
@@ -630,7 +633,9 @@ async def _download_missing_files(
630
633
 
631
634
  headers = {"X-Api-Key": auth_token}
632
635
 
633
- async with aiohttp.ClientSession() as session:
636
+ ssl_context = create_secure_ssl_context()
637
+ connector = aiohttp.TCPConnector(ssl=ssl_context)
638
+ async with aiohttp.ClientSession(connector=connector) as session:
634
639
  for file_hash in hashes_missing_on_local:
635
640
  try:
636
641
  # Download file from cloud by hash
@@ -749,7 +754,9 @@ async def _upload_missing_files(
749
754
 
750
755
  headers = {"X-Api-Key": auth_token}
751
756
 
752
- async with aiohttp.ClientSession() as session:
757
+ ssl_context = create_secure_ssl_context()
758
+ connector = aiohttp.TCPConnector(ssl=ssl_context)
759
+ async with aiohttp.ClientSession(connector=connector) as session:
753
760
  for file_info in files_to_upload:
754
761
  try:
755
762
  file_dir = os.path.dirname(file_info.raw_data_location)
@@ -809,7 +816,9 @@ async def _prune_cloud_dataset(
809
816
  logger.info("Pruning cloud dataset to match local state")
810
817
 
811
818
  try:
812
- async with aiohttp.ClientSession() as session:
819
+ ssl_context = create_secure_ssl_context()
820
+ connector = aiohttp.TCPConnector(ssl=ssl_context)
821
+ async with aiohttp.ClientSession(connector=connector) as session:
813
822
  async with session.put(url, json=payload.dict(), headers=headers) as response:
814
823
  if response.status == 200:
815
824
  data = await response.json()
@@ -852,7 +861,9 @@ async def _trigger_remote_cognify(
852
861
  logger.info(f"Triggering cognify processing for dataset {dataset_id}")
853
862
 
854
863
  try:
855
- async with aiohttp.ClientSession() as session:
864
+ ssl_context = create_secure_ssl_context()
865
+ connector = aiohttp.TCPConnector(ssl=ssl_context)
866
+ async with aiohttp.ClientSession(connector=connector) as session:
856
867
  async with session.post(url, json=payload, headers=headers) as response:
857
868
  if response.status == 200:
858
869
  data = await response.json()
cognee/base_config.py CHANGED
@@ -10,13 +10,30 @@ import pydantic
10
10
  class BaseConfig(BaseSettings):
11
11
  data_root_directory: str = get_absolute_path(".data_storage")
12
12
  system_root_directory: str = get_absolute_path(".cognee_system")
13
- monitoring_tool: object = Observer.LANGFUSE
13
+ cache_root_directory: str = get_absolute_path(".cognee_cache")
14
+ monitoring_tool: object = Observer.NONE
14
15
 
15
16
  @pydantic.model_validator(mode="after")
16
17
  def validate_paths(self):
18
+ # Adding this here temporarily to ensure that the cache root directory is set correctly for S3 storage automatically
19
+ # I'll remove this after we update documentation for S3 storage
20
+ # Auto-configure cache root directory for S3 storage if not explicitly set
21
+ storage_backend = os.getenv("STORAGE_BACKEND", "").lower()
22
+ cache_root_env = os.getenv("CACHE_ROOT_DIRECTORY")
23
+
24
+ if storage_backend == "s3" and not cache_root_env:
25
+ # Auto-generate S3 cache path when using S3 storage
26
+ bucket_name = os.getenv("STORAGE_BUCKET_NAME")
27
+ if bucket_name:
28
+ self.cache_root_directory = f"s3://{bucket_name}/cognee/cache"
29
+
17
30
  # Require absolute paths for root directories
18
31
  self.data_root_directory = ensure_absolute_path(self.data_root_directory)
19
32
  self.system_root_directory = ensure_absolute_path(self.system_root_directory)
33
+ # Set monitoring tool based on available keys
34
+ if self.langfuse_public_key and self.langfuse_secret_key:
35
+ self.monitoring_tool = Observer.LANGFUSE
36
+
20
37
  return self
21
38
 
22
39
  langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
@@ -31,6 +48,7 @@ class BaseConfig(BaseSettings):
31
48
  "data_root_directory": self.data_root_directory,
32
49
  "system_root_directory": self.system_root_directory,
33
50
  "monitoring_tool": self.monitoring_tool,
51
+ "cache_root_directory": self.cache_root_directory,
34
52
  }
35
53
 
36
54
 
@@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
5
5
  from cognee.tasks.graph import extract_graph_from_data
6
6
  from cognee.tasks.storage import add_data_points
7
7
  from cognee.shared.data_models import KnowledgeGraph
8
- from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
8
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
9
9
 
10
10
 
11
11
  async def get_default_tasks_by_indices(
@@ -33,7 +33,7 @@ async def get_no_summary_tasks(
33
33
  # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
34
34
  base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
35
35
 
36
- ontology_adapter = OntologyResolver(ontology_file=ontology_file_path)
36
+ ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
37
37
 
38
38
  graph_task = Task(
39
39
  extract_graph_from_data,
@@ -7,6 +7,7 @@ import aiohttp
7
7
  from uuid import UUID
8
8
 
9
9
  from cognee.infrastructure.databases.graph.kuzu.adapter import KuzuAdapter
10
+ from cognee.shared.utils import create_secure_ssl_context
10
11
 
11
12
  logger = get_logger()
12
13
 
@@ -42,7 +43,9 @@ class RemoteKuzuAdapter(KuzuAdapter):
42
43
  async def _get_session(self) -> aiohttp.ClientSession:
43
44
  """Get or create an aiohttp session."""
44
45
  if self._session is None or self._session.closed:
45
- self._session = aiohttp.ClientSession()
46
+ ssl_context = create_secure_ssl_context()
47
+ connector = aiohttp.TCPConnector(ssl=ssl_context)
48
+ self._session = aiohttp.ClientSession(connector=connector)
46
49
  return self._session
47
50
 
48
51
  async def close(self):
@@ -1,7 +1,8 @@
1
1
  from sqlalchemy.orm import DeclarativeBase
2
+ from sqlalchemy.ext.asyncio import AsyncAttrs
2
3
 
3
4
 
4
- class Base(DeclarativeBase):
5
+ class Base(AsyncAttrs, DeclarativeBase):
5
6
  """
6
7
  Represents a base class for declarative models using SQLAlchemy.
7
8
 
@@ -58,8 +58,8 @@ class SQLAlchemyAdapter:
58
58
  else:
59
59
  self.engine = create_async_engine(
60
60
  connection_string,
61
- pool_size=5,
62
- max_overflow=10,
61
+ pool_size=20,
62
+ max_overflow=20,
63
63
  pool_recycle=280,
64
64
  pool_pre_ping=True,
65
65
  pool_timeout=280,
@@ -83,7 +83,7 @@ def process_data_for_chroma(data):
83
83
  elif isinstance(value, list):
84
84
  # Store lists as JSON strings with special prefix
85
85
  processed_data[f"{key}__list"] = json.dumps(value)
86
- elif isinstance(value, (str, int, float, bool)) or value is None:
86
+ elif isinstance(value, (str, int, float, bool)):
87
87
  processed_data[key] = value
88
88
  else:
89
89
  processed_data[key] = str(value)
@@ -553,8 +553,4 @@ class ChromaDBAdapter(VectorDBInterface):
553
553
  Returns a list of collection names.
554
554
  """
555
555
  client = await self.get_connection()
556
- collections = await client.list_collections()
557
- return [
558
- collection.name if hasattr(collection, "name") else collection["name"]
559
- for collection in collections
560
- ]
556
+ return await client.list_collections()
@@ -39,7 +39,7 @@ class VectorConfig(BaseSettings):
39
39
  values.vector_db_url = ensure_absolute_path(
40
40
  values.vector_db_url,
41
41
  )
42
- else:
42
+ elif not values.vector_db_url:
43
43
  # Default path
44
44
  databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
45
45
  values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
@@ -14,6 +14,7 @@ from cognee.infrastructure.databases.vector.embeddings.embedding_rate_limiter im
14
14
  embedding_rate_limit_async,
15
15
  embedding_sleep_and_retry_async,
16
16
  )
17
+ from cognee.shared.utils import create_secure_ssl_context
17
18
 
18
19
  logger = get_logger("OllamaEmbeddingEngine")
19
20
 
@@ -94,16 +95,16 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
94
95
  """
95
96
  Internal method to call the Ollama embeddings endpoint for a single prompt.
96
97
  """
97
- payload = {
98
- "model": self.model,
99
- "prompt": prompt,
100
- }
98
+ payload = {"model": self.model, "prompt": prompt, "input": prompt}
99
+
101
100
  headers = {}
102
101
  api_key = os.getenv("LLM_API_KEY")
103
102
  if api_key:
104
103
  headers["Authorization"] = f"Bearer {api_key}"
105
104
 
106
- async with aiohttp.ClientSession() as session:
105
+ ssl_context = create_secure_ssl_context()
106
+ connector = aiohttp.TCPConnector(ssl=ssl_context)
107
+ async with aiohttp.ClientSession(connector=connector) as session:
107
108
  async with session.post(
108
109
  self.endpoint, json=payload, headers=headers, timeout=60.0
109
110
  ) as response:
@@ -253,6 +253,56 @@ class LocalFileStorage(Storage):
253
253
  if os.path.exists(full_file_path):
254
254
  os.remove(full_file_path)
255
255
 
256
+ def list_files(self, directory_path: str, recursive: bool = False) -> list[str]:
257
+ """
258
+ List all files in the specified directory.
259
+
260
+ Parameters:
261
+ -----------
262
+ - directory_path (str): The directory path to list files from
263
+ - recursive (bool): If True, list files recursively in subdirectories
264
+
265
+ Returns:
266
+ --------
267
+ - list[str]: List of file paths relative to the storage root
268
+ """
269
+ from pathlib import Path
270
+
271
+ parsed_storage_path = get_parsed_path(self.storage_path)
272
+
273
+ if directory_path:
274
+ full_directory_path = os.path.join(parsed_storage_path, directory_path)
275
+ else:
276
+ full_directory_path = parsed_storage_path
277
+
278
+ directory_pathlib = Path(full_directory_path)
279
+
280
+ if not directory_pathlib.exists() or not directory_pathlib.is_dir():
281
+ return []
282
+
283
+ files = []
284
+
285
+ if recursive:
286
+ # Use rglob for recursive search
287
+ for file_path in directory_pathlib.rglob("*"):
288
+ if file_path.is_file():
289
+ # Get relative path from storage root
290
+ relative_path = os.path.relpath(str(file_path), parsed_storage_path)
291
+ # Normalize path separators for consistency
292
+ relative_path = relative_path.replace(os.sep, "/")
293
+ files.append(relative_path)
294
+ else:
295
+ # Use iterdir for just immediate directory
296
+ for file_path in directory_pathlib.iterdir():
297
+ if file_path.is_file():
298
+ # Get relative path from storage root
299
+ relative_path = os.path.relpath(str(file_path), parsed_storage_path)
300
+ # Normalize path separators for consistency
301
+ relative_path = relative_path.replace(os.sep, "/")
302
+ files.append(relative_path)
303
+
304
+ return files
305
+
256
306
  def remove_all(self, tree_path: str = None):
257
307
  """
258
308
  Remove an entire directory tree at the specified path, including all files and
@@ -155,21 +155,19 @@ class S3FileStorage(Storage):
155
155
  """
156
156
  Ensure that the specified directory exists, creating it if necessary.
157
157
 
158
- If the directory already exists, no action is taken.
158
+ For S3 storage, this is a no-op since directories are created implicitly
159
+ when files are written to paths. S3 doesn't have actual directories,
160
+ just object keys with prefixes that appear as directories.
159
161
 
160
162
  Parameters:
161
163
  -----------
162
164
 
163
165
  - directory_path (str): The path of the directory to check or create.
164
166
  """
165
- if not directory_path.strip():
166
- directory_path = self.storage_path.replace("s3://", "")
167
-
168
- def ensure_directory():
169
- if not self.s3.exists(directory_path):
170
- self.s3.makedirs(directory_path, exist_ok=True)
171
-
172
- await run_async(ensure_directory)
167
+ # In S3, directories don't exist as separate entities - they're just prefixes
168
+ # When you write a file to s3://bucket/path/to/file.txt, the "directories"
169
+ # path/ and path/to/ are implicitly created. No explicit action needed.
170
+ pass
173
171
 
174
172
  async def copy_file(self, source_file_path: str, destination_file_path: str):
175
173
  """
@@ -213,6 +211,55 @@ class S3FileStorage(Storage):
213
211
 
214
212
  await run_async(remove_file)
215
213
 
214
+ async def list_files(self, directory_path: str, recursive: bool = False) -> list[str]:
215
+ """
216
+ List all files in the specified directory.
217
+
218
+ Parameters:
219
+ -----------
220
+ - directory_path (str): The directory path to list files from
221
+ - recursive (bool): If True, list files recursively in subdirectories
222
+
223
+ Returns:
224
+ --------
225
+ - list[str]: List of file paths relative to the storage root
226
+ """
227
+
228
+ def list_files_sync():
229
+ if directory_path:
230
+ # Combine storage path with directory path
231
+ full_path = os.path.join(self.storage_path.replace("s3://", ""), directory_path)
232
+ else:
233
+ full_path = self.storage_path.replace("s3://", "")
234
+
235
+ if recursive:
236
+ # Use ** for recursive search
237
+ pattern = f"{full_path}/**"
238
+ else:
239
+ # Just files in the immediate directory
240
+ pattern = f"{full_path}/*"
241
+
242
+ # Use s3fs glob to find files
243
+ try:
244
+ all_paths = self.s3.glob(pattern)
245
+ # Filter to only files (not directories)
246
+ files = [path for path in all_paths if self.s3.isfile(path)]
247
+
248
+ # Convert back to relative paths from storage root
249
+ storage_prefix = self.storage_path.replace("s3://", "")
250
+ relative_files = []
251
+ for file_path in files:
252
+ if file_path.startswith(storage_prefix):
253
+ relative_path = file_path[len(storage_prefix) :].lstrip("/")
254
+ relative_files.append(relative_path)
255
+
256
+ return relative_files
257
+ except Exception:
258
+ # If directory doesn't exist or other error, return empty list
259
+ return []
260
+
261
+ return await run_async(list_files_sync)
262
+
216
263
  async def remove_all(self, tree_path: str):
217
264
  """
218
265
  Remove an entire directory tree at the specified path, including all files and
@@ -135,6 +135,24 @@ class StorageManager:
135
135
  else:
136
136
  return self.storage.remove(file_path)
137
137
 
138
+ async def list_files(self, directory_path: str, recursive: bool = False) -> list[str]:
139
+ """
140
+ List all files in the specified directory.
141
+
142
+ Parameters:
143
+ -----------
144
+ - directory_path (str): The directory path to list files from
145
+ - recursive (bool): If True, list files recursively in subdirectories
146
+
147
+ Returns:
148
+ --------
149
+ - list[str]: List of file paths relative to the storage root
150
+ """
151
+ if inspect.iscoroutinefunction(self.storage.list_files):
152
+ return await self.storage.list_files(directory_path, recursive)
153
+ else:
154
+ return self.storage.list_files(directory_path, recursive)
155
+
138
156
  async def remove_all(self, tree_path: str = None):
139
157
  """
140
158
  Remove an entire directory tree at the specified path, including all files and
@@ -56,7 +56,12 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
56
56
  file_type = guess_file_type(file)
57
57
 
58
58
  file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
59
- file_name = Path(file_path).stem if file_path else None
59
+
60
+ if isinstance(file_path, str):
61
+ file_name = Path(file_path).stem if file_path else None
62
+ else:
63
+ # In case file_path does not exist or is a integer return None
64
+ file_name = None
60
65
 
61
66
  # Get file size
62
67
  pos = file.tell() # remember current pointer
@@ -12,6 +12,7 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
12
12
  )
13
13
 
14
14
  from cognee.infrastructure.llm.LLMGateway import LLMGateway
15
+ from cognee.infrastructure.llm.config import get_llm_config
15
16
 
16
17
 
17
18
  class AnthropicAdapter(LLMInterface):
@@ -27,7 +28,8 @@ class AnthropicAdapter(LLMInterface):
27
28
  import anthropic
28
29
 
29
30
  self.aclient = instructor.patch(
30
- create=anthropic.AsyncAnthropic().messages.create, mode=instructor.Mode.ANTHROPIC_TOOLS
31
+ create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
32
+ mode=instructor.Mode.ANTHROPIC_TOOLS,
31
33
  )
32
34
 
33
35
  self.model = model
@@ -57,7 +59,7 @@ class AnthropicAdapter(LLMInterface):
57
59
 
58
60
  return await self.aclient(
59
61
  model=self.model,
60
- max_completion_tokens=4096,
62
+ max_tokens=4096,
61
63
  max_retries=5,
62
64
  messages=[
63
65
  {
@@ -1,13 +1,18 @@
1
1
  import asyncio
2
2
  from functools import partial
3
+ import inspect
3
4
 
4
5
 
5
6
  async def run_async(func, *args, loop=None, executor=None, **kwargs):
6
7
  if loop is None:
7
8
  try:
8
- running_loop = asyncio.get_running_loop()
9
+ loop = asyncio.get_running_loop()
9
10
  except RuntimeError:
10
- running_loop = asyncio.get_event_loop()
11
+ loop = asyncio.get_event_loop()
11
12
 
12
- pfunc = partial(func, *args, **kwargs)
13
- return await running_loop.run_in_executor(executor, pfunc)
13
+ if "loop" in inspect.signature(func).parameters:
14
+ pfunc = partial(func, *args, loop=loop, **kwargs)
15
+ else:
16
+ pfunc = partial(func, *args, **kwargs)
17
+
18
+ return await loop.run_in_executor(executor, pfunc)
@@ -2,16 +2,17 @@ import asyncio
2
2
  import threading
3
3
 
4
4
 
5
- def run_sync(coro, timeout=None):
5
+ def run_sync(coro, running_loop=None, timeout=None):
6
6
  result = None
7
7
  exception = None
8
8
 
9
9
  def runner():
10
- nonlocal result, exception
10
+ nonlocal result, exception, running_loop
11
11
 
12
12
  try:
13
13
  try:
14
- running_loop = asyncio.get_running_loop()
14
+ if not running_loop:
15
+ running_loop = asyncio.get_running_loop()
15
16
 
16
17
  result = asyncio.run_coroutine_threadsafe(coro, running_loop).result(timeout)
17
18
  except RuntimeError: