cognee 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
  2. cognee/api/v1/cognify/cognify.py +44 -7
  3. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  4. cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
  5. cognee/api/v1/prune/prune.py +2 -2
  6. cognee/api/v1/search/search.py +1 -1
  7. cognee/api/v1/sync/sync.py +16 -5
  8. cognee/base_config.py +19 -1
  9. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  10. cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
  11. cognee/infrastructure/databases/relational/ModelBase.py +2 -1
  12. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
  13. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
  14. cognee/infrastructure/databases/vector/config.py +1 -1
  15. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
  16. cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
  17. cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
  18. cognee/infrastructure/files/storage/StorageManager.py +18 -0
  19. cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
  20. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
  21. cognee/infrastructure/utils/run_async.py +9 -4
  22. cognee/infrastructure/utils/run_sync.py +4 -3
  23. cognee/modules/cloud/operations/check_api_key.py +4 -1
  24. cognee/modules/data/deletion/prune_system.py +5 -1
  25. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  26. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  27. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  28. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  29. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  30. cognee/modules/notebooks/methods/create_notebook.py +34 -0
  31. cognee/modules/notebooks/methods/get_notebook.py +2 -2
  32. cognee/modules/notebooks/methods/get_notebooks.py +27 -1
  33. cognee/modules/notebooks/methods/update_notebook.py +0 -1
  34. cognee/modules/notebooks/models/Notebook.py +206 -1
  35. cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
  36. cognee/modules/observability/get_observe.py +14 -0
  37. cognee/modules/observability/observers.py +1 -0
  38. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  39. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  40. cognee/modules/ontology/matching_strategies.py +53 -0
  41. cognee/modules/ontology/models.py +20 -0
  42. cognee/modules/ontology/ontology_config.py +24 -0
  43. cognee/modules/ontology/ontology_env_config.py +45 -0
  44. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  45. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
  46. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
  47. cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
  48. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
  49. cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
  50. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  51. cognee/modules/retrieval/temporal_retriever.py +3 -3
  52. cognee/modules/retrieval/user_qa_feedback.py +1 -1
  53. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  54. cognee/modules/search/methods/search.py +12 -13
  55. cognee/modules/search/utils/prepare_search_result.py +31 -9
  56. cognee/modules/search/utils/transform_context_to_graph.py +1 -1
  57. cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
  58. cognee/modules/users/methods/create_user.py +4 -24
  59. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  60. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  61. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
  62. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  63. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  64. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  65. cognee/modules/users/permissions/methods/get_role.py +10 -0
  66. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  67. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  68. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  69. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  70. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  71. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  72. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  73. cognee/modules/users/roles/methods/create_role.py +10 -0
  74. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  75. cognee/modules/users/tenants/methods/create_tenant.py +10 -0
  76. cognee/root_dir.py +5 -0
  77. cognee/shared/cache.py +346 -0
  78. cognee/shared/utils.py +12 -0
  79. cognee/tasks/graph/extract_graph_from_data.py +53 -10
  80. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  81. cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
  82. cognee/tasks/temporal_graph/models.py +11 -6
  83. cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
  84. cognee/tests/test_cognee_server_start.py +4 -4
  85. cognee/tests/test_temporal_graph.py +6 -34
  86. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  87. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
  88. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
  89. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/RECORD +93 -86
  90. cognee-0.3.4.dist-info/entry_points.txt +2 -0
  91. cognee/api/v1/save/save.py +0 -335
  92. cognee/tests/test_save_export_path.py +0 -116
  93. cognee-0.3.2.dist-info/entry_points.txt +0 -2
  94. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
  95. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
  96. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,6 +1,7 @@
1
1
  import aiohttp
2
2
 
3
3
  from cognee.modules.cloud.exceptions import CloudConnectionError
4
+ from cognee.shared.utils import create_secure_ssl_context
4
5
 
5
6
 
6
7
  async def check_api_key(auth_token: str):
@@ -10,7 +11,9 @@ async def check_api_key(auth_token: str):
10
11
  headers = {"X-Api-Key": auth_token}
11
12
 
12
13
  try:
13
- async with aiohttp.ClientSession() as session:
14
+ ssl_context = create_secure_ssl_context()
15
+ connector = aiohttp.TCPConnector(ssl=ssl_context)
16
+ async with aiohttp.ClientSession(connector=connector) as session:
14
17
  async with session.post(url, headers=headers) as response:
15
18
  if response.status == 200:
16
19
  return
@@ -1,9 +1,10 @@
1
1
  from cognee.infrastructure.databases.vector import get_vector_engine
2
2
  from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
3
3
  from cognee.infrastructure.databases.relational import get_relational_engine
4
+ from cognee.shared.cache import delete_cache
4
5
 
5
6
 
6
- async def prune_system(graph=True, vector=True, metadata=True):
7
+ async def prune_system(graph=True, vector=True, metadata=True, cache=True):
7
8
  if graph:
8
9
  graph_engine = await get_graph_engine()
9
10
  await graph_engine.delete_graph()
@@ -15,3 +16,6 @@ async def prune_system(graph=True, vector=True, metadata=True):
15
16
  if metadata:
16
17
  db_engine = get_relational_engine()
17
18
  await db_engine.delete_database()
19
+
20
+ if cache:
21
+ await delete_cache()
@@ -6,6 +6,15 @@ from .create_dataset import create_dataset
6
6
 
7
7
 
8
8
  async def create_authorized_dataset(dataset_name: str, user: User) -> Dataset:
9
+ """
10
+ Create a new dataset and give all permissions on this dataset to the given user.
11
+ Args:
12
+ dataset_name: Name of the dataset.
13
+ user: The user object.
14
+
15
+ Returns:
16
+ Dataset: The new authorized dataset.
17
+ """
9
18
  db_engine = get_relational_engine()
10
19
 
11
20
  async with db_engine.get_async_session() as session:
@@ -15,7 +15,7 @@ async def get_authorized_dataset(
15
15
  Get a specific dataset with permissions for a user.
16
16
 
17
17
  Args:
18
- user_id (UUID): user id
18
+ user: User object
19
19
  dataset_id (UUID): dataset id
20
20
  permission_type (str): permission type(read, write, delete, share), default is read
21
21
 
@@ -11,6 +11,17 @@ from ..models import Dataset
11
11
  async def get_authorized_dataset_by_name(
12
12
  dataset_name: str, user: User, permission_type: str
13
13
  ) -> Optional[Dataset]:
14
+ """
15
+ Get a specific dataset with the given name, with permissions for a given user.
16
+
17
+ Args:
18
+ dataset_name: Name of the dataset.
19
+ user: User object.
20
+ permission_type (str): permission type(read, write, delete, share), default is read
21
+
22
+ Returns:
23
+ Optional[Dataset]: dataset with permissions
24
+ """
14
25
  authorized_datasets = await get_authorized_existing_datasets([], permission_type, user)
15
26
 
16
27
  return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None)
@@ -7,8 +7,14 @@ from cognee.modules.engine.utils import (
7
7
  generate_node_id,
8
8
  generate_node_name,
9
9
  )
10
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
11
+ from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
10
12
  from cognee.shared.data_models import KnowledgeGraph
11
- from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
13
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
14
+ from cognee.modules.ontology.get_default_ontology_resolver import (
15
+ get_default_ontology_resolver,
16
+ get_ontology_resolver_from_env,
17
+ )
12
18
 
13
19
 
14
20
  def _create_node_key(node_id: str, category: str) -> str:
@@ -83,7 +89,7 @@ def _process_ontology_edges(
83
89
 
84
90
  def _create_type_node(
85
91
  node_type: str,
86
- ontology_resolver: OntologyResolver,
92
+ ontology_resolver: RDFLibOntologyResolver,
87
93
  added_nodes_map: dict,
88
94
  added_ontology_nodes_map: dict,
89
95
  name_mapping: dict,
@@ -141,7 +147,7 @@ def _create_entity_node(
141
147
  node_name: str,
142
148
  node_description: str,
143
149
  type_node: EntityType,
144
- ontology_resolver: OntologyResolver,
150
+ ontology_resolver: RDFLibOntologyResolver,
145
151
  added_nodes_map: dict,
146
152
  added_ontology_nodes_map: dict,
147
153
  name_mapping: dict,
@@ -198,7 +204,7 @@ def _create_entity_node(
198
204
  def _process_graph_nodes(
199
205
  data_chunk: DocumentChunk,
200
206
  graph: KnowledgeGraph,
201
- ontology_resolver: OntologyResolver,
207
+ ontology_resolver: RDFLibOntologyResolver,
202
208
  added_nodes_map: dict,
203
209
  added_ontology_nodes_map: dict,
204
210
  name_mapping: dict,
@@ -277,7 +283,7 @@ def _process_graph_edges(
277
283
  def expand_with_nodes_and_edges(
278
284
  data_chunks: list[DocumentChunk],
279
285
  chunk_graphs: list[KnowledgeGraph],
280
- ontology_resolver: OntologyResolver = None,
286
+ ontology_resolver: BaseOntologyResolver = None,
281
287
  existing_edges_map: Optional[dict[str, bool]] = None,
282
288
  ):
283
289
  """
@@ -296,8 +302,8 @@ def expand_with_nodes_and_edges(
296
302
  chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
297
303
  data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
298
304
  from the chunk content.
299
- ontology_resolver (OntologyResolver, optional): Resolver for validating entities and
300
- types against an ontology. If None, a default OntologyResolver is created.
305
+ ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and
306
+ types against an ontology. If None, a default RDFLibOntologyResolver is created.
301
307
  Defaults to None.
302
308
  existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
303
309
  duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
@@ -320,7 +326,15 @@ def expand_with_nodes_and_edges(
320
326
  existing_edges_map = {}
321
327
 
322
328
  if ontology_resolver is None:
323
- ontology_resolver = OntologyResolver()
329
+ ontology_config = get_ontology_env_config()
330
+ if (
331
+ ontology_config.ontology_file_path
332
+ and ontology_config.ontology_resolver
333
+ and ontology_config.matching_strategy
334
+ ):
335
+ ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict())
336
+ else:
337
+ ontology_resolver = get_default_ontology_resolver()
324
338
 
325
339
  added_nodes_map = {}
326
340
  added_ontology_nodes_map = {}
@@ -23,8 +23,6 @@ async def retrieve_existing_edges(
23
23
  chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
24
24
  data chunk. Each graph contains nodes (entities) and edges (relationships) that
25
25
  were extracted from the chunk content.
26
- graph_engine (GraphDBInterface): Interface to the graph database that will be queried
27
- to check for existing edges. Must implement the has_edges() method.
28
26
 
29
27
  Returns:
30
28
  dict[str, bool]: A mapping of edge keys to boolean values indicating existence.
@@ -6,6 +6,40 @@ from cognee.infrastructure.databases.relational import with_async_session
6
6
 
7
7
  from ..models.Notebook import Notebook, NotebookCell
8
8
 
9
+ TUTORIAL_NOTEBOOK_NAME = "Python Development with Cognee Tutorial 🧠"
10
+
11
+
12
+ async def _create_tutorial_notebook(
13
+ user_id: UUID, session: AsyncSession, force_refresh: bool = False
14
+ ) -> None:
15
+ """
16
+ Create the default tutorial notebook for new users.
17
+ Dynamically fetches from: https://github.com/topoteretes/cognee/blob/notebook_tutorial/notebooks/starter_tutorial.zip
18
+ """
19
+ TUTORIAL_ZIP_URL = (
20
+ "https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip"
21
+ )
22
+
23
+ try:
24
+ # Create notebook from remote zip file (includes notebook + data files)
25
+ notebook = await Notebook.from_ipynb_zip_url(
26
+ zip_url=TUTORIAL_ZIP_URL,
27
+ owner_id=user_id,
28
+ notebook_filename="tutorial.ipynb",
29
+ name=TUTORIAL_NOTEBOOK_NAME,
30
+ deletable=False,
31
+ force=force_refresh,
32
+ )
33
+
34
+ # Add to session and commit
35
+ session.add(notebook)
36
+ await session.commit()
37
+
38
+ except Exception as e:
39
+ print(f"Failed to fetch tutorial notebook from {TUTORIAL_ZIP_URL}: {e}")
40
+
41
+ raise e
42
+
9
43
 
10
44
  @with_async_session
11
45
  async def create_notebook(
@@ -1,6 +1,6 @@
1
1
  from uuid import UUID
2
2
  from typing import Optional
3
- from sqlalchemy import select
3
+ from sqlalchemy import and_, select
4
4
  from sqlalchemy.ext.asyncio import AsyncSession
5
5
 
6
6
  from cognee.infrastructure.databases.relational import with_async_session
@@ -15,7 +15,7 @@ async def get_notebook(
15
15
  session: AsyncSession,
16
16
  ) -> Optional[Notebook]:
17
17
  result = await session.execute(
18
- select(Notebook).where(Notebook.owner_id == user_id and Notebook.id == notebook_id)
18
+ select(Notebook).where(and_(Notebook.owner_id == user_id, Notebook.id == notebook_id))
19
19
  )
20
20
 
21
21
  return result.scalar()
@@ -1,11 +1,16 @@
1
1
  from uuid import UUID
2
2
  from typing import List
3
- from sqlalchemy import select
3
+ from sqlalchemy import select, and_
4
4
  from sqlalchemy.ext.asyncio import AsyncSession
5
5
 
6
6
  from cognee.infrastructure.databases.relational import with_async_session
7
7
 
8
8
  from ..models.Notebook import Notebook
9
+ from .create_notebook import _create_tutorial_notebook, TUTORIAL_NOTEBOOK_NAME
10
+
11
+ from cognee.shared.logging_utils import get_logger
12
+
13
+ logger = get_logger()
9
14
 
10
15
 
11
16
  @with_async_session
@@ -13,6 +18,27 @@ async def get_notebooks(
13
18
  user_id: UUID,
14
19
  session: AsyncSession,
15
20
  ) -> List[Notebook]:
21
+ # Check if tutorial notebook already exists for this user
22
+ tutorial_query = select(Notebook).where(
23
+ and_(
24
+ Notebook.owner_id == user_id,
25
+ Notebook.name == TUTORIAL_NOTEBOOK_NAME,
26
+ ~Notebook.deletable,
27
+ )
28
+ )
29
+ tutorial_result = await session.execute(tutorial_query)
30
+ tutorial_notebook = tutorial_result.scalar_one_or_none()
31
+
32
+ # If tutorial notebook doesn't exist, create it
33
+ if tutorial_notebook is None:
34
+ logger.info(f"Tutorial notebook not found for user {user_id}, creating it")
35
+ try:
36
+ await _create_tutorial_notebook(user_id, session, force_refresh=False)
37
+ except Exception as e:
38
+ # Log the error but continue to return existing notebooks
39
+ logger.error(f"Failed to create tutorial notebook for user {user_id}: {e}")
40
+
41
+ # Get all notebooks for the user
16
42
  result = await session.execute(select(Notebook).where(Notebook.owner_id == user_id))
17
43
 
18
44
  return list(result.scalars().all())
@@ -1,4 +1,3 @@
1
- from typing import Callable, AsyncContextManager
2
1
  from sqlalchemy.ext.asyncio import AsyncSession
3
2
 
4
3
  from cognee.infrastructure.databases.relational import with_async_session
@@ -1,13 +1,24 @@
1
1
  import json
2
- from typing import List, Literal
2
+ import nbformat
3
+ import asyncio
4
+ from nbformat.notebooknode import NotebookNode
5
+ from typing import List, Literal, Optional, cast, Tuple
3
6
  from uuid import uuid4, UUID as UUID_t
4
7
  from pydantic import BaseModel, ConfigDict
5
8
  from datetime import datetime, timezone
6
9
  from fastapi.encoders import jsonable_encoder
7
10
  from sqlalchemy import Boolean, Column, DateTime, JSON, UUID, String, TypeDecorator
8
11
  from sqlalchemy.orm import mapped_column, Mapped
12
+ from pathlib import Path
9
13
 
10
14
  from cognee.infrastructure.databases.relational import Base
15
+ from cognee.shared.cache import (
16
+ download_and_extract_zip,
17
+ get_tutorial_data_dir,
18
+ generate_content_hash,
19
+ )
20
+ from cognee.infrastructure.files.storage.get_file_storage import get_file_storage
21
+ from cognee.base_config import get_base_config
11
22
 
12
23
 
13
24
  class NotebookCell(BaseModel):
@@ -51,3 +62,197 @@ class Notebook(Base):
51
62
  deletable: Mapped[bool] = mapped_column(Boolean, default=True)
52
63
 
53
64
  created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
65
+
66
+ @classmethod
67
+ async def from_ipynb_zip_url(
68
+ cls,
69
+ zip_url: str,
70
+ owner_id: UUID_t,
71
+ notebook_filename: str = "tutorial.ipynb",
72
+ name: Optional[str] = None,
73
+ deletable: bool = True,
74
+ force: bool = False,
75
+ ) -> "Notebook":
76
+ """
77
+ Create a Notebook instance from a remote zip file containing notebook + data files.
78
+
79
+ Args:
80
+ zip_url: Remote URL to fetch the .zip file from
81
+ owner_id: UUID of the notebook owner
82
+ notebook_filename: Name of the .ipynb file within the zip
83
+ name: Optional custom name for the notebook
84
+ deletable: Whether the notebook can be deleted
85
+ force: If True, re-download even if already cached
86
+
87
+ Returns:
88
+ Notebook instance
89
+ """
90
+ # Generate a cache key based on the zip URL
91
+ content_hash = generate_content_hash(zip_url, notebook_filename)
92
+
93
+ # Download and extract the zip file to tutorial_data/{content_hash}
94
+ try:
95
+ extracted_cache_dir = await download_and_extract_zip(
96
+ url=zip_url,
97
+ cache_dir_name=f"tutorial_data/{content_hash}",
98
+ version_or_hash=content_hash,
99
+ force=force,
100
+ )
101
+ except Exception as e:
102
+ raise RuntimeError(f"Failed to download tutorial zip from {zip_url}") from e
103
+
104
+ # Use cache system to access the notebook file
105
+ from cognee.shared.cache import cache_file_exists, read_cache_file
106
+
107
+ notebook_file_path = f"{extracted_cache_dir}/{notebook_filename}"
108
+
109
+ # Check if the notebook file exists in cache
110
+ if not await cache_file_exists(notebook_file_path):
111
+ raise FileNotFoundError(f"Notebook file '{notebook_filename}' not found in zip")
112
+
113
+ # Read and parse the notebook using cache system
114
+ async with await read_cache_file(notebook_file_path, encoding="utf-8") as f:
115
+ notebook_content = await asyncio.to_thread(f.read)
116
+ notebook = cls.from_ipynb_string(notebook_content, owner_id, name, deletable)
117
+
118
+ # Update file paths in notebook cells to point to actual cached data files
119
+ await cls._update_file_paths_in_cells(notebook, extracted_cache_dir)
120
+
121
+ return notebook
122
+
123
+ @staticmethod
124
+ async def _update_file_paths_in_cells(notebook: "Notebook", cache_dir: str) -> None:
125
+ """
126
+ Update file paths in code cells to use actual cached data files.
127
+ Works with both local filesystem and S3 storage.
128
+
129
+ Args:
130
+ notebook: Parsed Notebook instance with cells to update
131
+ cache_dir: Path to the cached tutorial directory containing data files
132
+ """
133
+ import re
134
+ from cognee.shared.cache import list_cache_files, cache_file_exists
135
+ from cognee.shared.logging_utils import get_logger
136
+
137
+ logger = get_logger()
138
+
139
+ # Look for data files in the data subdirectory
140
+ data_dir = f"{cache_dir}/data"
141
+
142
+ try:
143
+ # Get all data files in the cache directory using cache system
144
+ data_files = {}
145
+ if await cache_file_exists(data_dir):
146
+ file_list = await list_cache_files(data_dir)
147
+ else:
148
+ file_list = []
149
+
150
+ for file_path in file_list:
151
+ # Extract just the filename
152
+ filename = file_path.split("/")[-1]
153
+ # Use the file path as provided by cache system
154
+ data_files[filename] = file_path
155
+
156
+ except Exception as e:
157
+ # If we can't list files, skip updating paths
158
+ logger.error(f"Error listing data files in {data_dir}: {e}")
159
+ return
160
+
161
+ # Pattern to match file://data/filename patterns in code cells
162
+ file_pattern = r'"file://data/([^"]+)"'
163
+
164
+ def replace_path(match):
165
+ filename = match.group(1)
166
+ if filename in data_files:
167
+ file_path = data_files[filename]
168
+ # For local filesystem, preserve file:// prefix
169
+ if not file_path.startswith("s3://"):
170
+ return f'"file://{file_path}"'
171
+ else:
172
+ # For S3, return the S3 URL as-is
173
+ return f'"{file_path}"'
174
+ return match.group(0) # Keep original if file not found
175
+
176
+ # Update only code cells
177
+ updated_cells = 0
178
+ for cell in notebook.cells:
179
+ if cell.type == "code":
180
+ original_content = cell.content
181
+ # Update file paths in the cell content
182
+ cell.content = re.sub(file_pattern, replace_path, cell.content)
183
+ if original_content != cell.content:
184
+ updated_cells += 1
185
+
186
+ # Log summary of updates (useful for monitoring)
187
+ if updated_cells > 0:
188
+ logger.info(f"Updated file paths in {updated_cells} notebook cells")
189
+
190
+ @classmethod
191
+ def from_ipynb_string(
192
+ cls,
193
+ notebook_content: str,
194
+ owner_id: UUID_t,
195
+ name: Optional[str] = None,
196
+ deletable: bool = True,
197
+ ) -> "Notebook":
198
+ """
199
+ Create a Notebook instance from Jupyter notebook string content.
200
+
201
+ Args:
202
+ notebook_content: Raw Jupyter notebook content as string
203
+ owner_id: UUID of the notebook owner
204
+ name: Optional custom name for the notebook
205
+ deletable: Whether the notebook can be deleted
206
+
207
+ Returns:
208
+ Notebook instance ready to be saved to database
209
+ """
210
+ # Parse and validate the Jupyter notebook using nbformat
211
+ # Note: nbformat.reads() has loose typing, so we cast to NotebookNode
212
+ jupyter_nb = cast(
213
+ NotebookNode, nbformat.reads(notebook_content, as_version=nbformat.NO_CONVERT)
214
+ )
215
+
216
+ # Convert Jupyter cells to NotebookCell objects
217
+ cells = []
218
+ for jupyter_cell in jupyter_nb.cells:
219
+ # Each cell is also a NotebookNode with dynamic attributes
220
+ cell = cast(NotebookNode, jupyter_cell)
221
+ # Skip raw cells as they're not supported in our model
222
+ if cell.cell_type == "raw":
223
+ continue
224
+
225
+ # Get the source content
226
+ content = cell.source
227
+
228
+ # Generate a name based on content or cell index
229
+ cell_name = cls._generate_cell_name(cell)
230
+
231
+ # Map cell types (jupyter uses "code"/"markdown", we use same)
232
+ cell_type = "code" if cell.cell_type == "code" else "markdown"
233
+
234
+ cells.append(NotebookCell(id=uuid4(), type=cell_type, name=cell_name, content=content))
235
+
236
+ # Extract notebook name from metadata if not provided
237
+ if name is None:
238
+ kernelspec = jupyter_nb.metadata.get("kernelspec", {})
239
+ name = kernelspec.get("display_name") or kernelspec.get("name", "Imported Notebook")
240
+
241
+ return cls(id=uuid4(), owner_id=owner_id, name=name, cells=cells, deletable=deletable)
242
+
243
+ @staticmethod
244
+ def _generate_cell_name(jupyter_cell: NotebookNode) -> str:
245
+ """Generate a meaningful name for a notebook cell using nbformat cell."""
246
+ if jupyter_cell.cell_type == "markdown":
247
+ # Try to extract a title from markdown headers
248
+ content = jupyter_cell.source
249
+
250
+ lines = content.strip().split("\n")
251
+ if lines and lines[0].startswith("#"):
252
+ # Extract header text, clean it up
253
+ header = lines[0].lstrip("#").strip()
254
+ return header[:50] if len(header) > 50 else header
255
+ else:
256
+ return "Markdown Cell"
257
+ else:
258
+ return "Code Cell"
@@ -5,16 +5,18 @@ import traceback
5
5
 
6
6
  def wrap_in_async_handler(user_code: str) -> str:
7
7
  return (
8
- "from cognee.infrastructure.utils.run_sync import run_sync\n\n"
9
- "async def __user_main__():\n"
8
+ "import asyncio\n"
9
+ + "asyncio.set_event_loop(running_loop)\n\n"
10
+ + "from cognee.infrastructure.utils.run_sync import run_sync\n\n"
11
+ + "async def __user_main__():\n"
10
12
  + "\n".join(" " + line for line in user_code.strip().split("\n"))
11
13
  + "\n"
12
- " globals().update(locals())\n\n"
13
- "run_sync(__user_main__())\n"
14
+ + " globals().update(locals())\n\n"
15
+ + "run_sync(__user_main__(), running_loop)\n"
14
16
  )
15
17
 
16
18
 
17
- def run_in_local_sandbox(code, environment=None):
19
+ def run_in_local_sandbox(code, environment=None, loop=None):
18
20
  environment = environment or {}
19
21
  code = wrap_in_async_handler(code.replace("\xa0", "\n"))
20
22
 
@@ -31,6 +33,7 @@ def run_in_local_sandbox(code, environment=None):
31
33
  printOutput.append(output)
32
34
 
33
35
  environment["print"] = customPrintFunction
36
+ environment["running_loop"] = loop
34
37
 
35
38
  try:
36
39
  exec(code, environment)
@@ -9,3 +9,17 @@ def get_observe():
9
9
  from langfuse.decorators import observe
10
10
 
11
11
  return observe
12
+ elif monitoring == Observer.NONE:
13
+ # Return a no-op decorator that handles keyword arguments
14
+ def no_op_decorator(*args, **kwargs):
15
+ if len(args) == 1 and callable(args[0]) and not kwargs:
16
+ # Direct decoration: @observe
17
+ return args[0]
18
+ else:
19
+ # Parameterized decoration: @observe(as_type="generation")
20
+ def decorator(func):
21
+ return func
22
+
23
+ return decorator
24
+
25
+ return no_op_decorator
@@ -4,6 +4,7 @@ from enum import Enum
4
4
  class Observer(str, Enum):
5
5
  """Monitoring tools"""
6
6
 
7
+ NONE = "none"
7
8
  LANGFUSE = "langfuse"
8
9
  LLMLITE = "llmlite"
9
10
  LANGSMITH = "langsmith"
@@ -0,0 +1,42 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Tuple, Optional
3
+
4
+ from cognee.modules.ontology.models import AttachedOntologyNode
5
+ from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
6
+
7
+
8
+ class BaseOntologyResolver(ABC):
9
+ """Abstract base class for ontology resolvers."""
10
+
11
+ def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
12
+ """Initialize the ontology resolver with a matching strategy.
13
+
14
+ Args:
15
+ matching_strategy: The strategy to use for entity matching.
16
+ Defaults to FuzzyMatchingStrategy if None.
17
+ """
18
+ self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
19
+
20
+ @abstractmethod
21
+ def build_lookup(self) -> None:
22
+ """Build the lookup dictionary for ontology entities."""
23
+ pass
24
+
25
+ @abstractmethod
26
+ def refresh_lookup(self) -> None:
27
+ """Refresh the lookup dictionary."""
28
+ pass
29
+
30
+ @abstractmethod
31
+ def find_closest_match(self, name: str, category: str) -> Optional[str]:
32
+ """Find the closest match for a given name in the specified category."""
33
+ pass
34
+
35
+ @abstractmethod
36
+ def get_subgraph(
37
+ self, node_name: str, node_type: str = "individuals", directed: bool = True
38
+ ) -> Tuple[
39
+ List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
40
+ ]:
41
+ """Get a subgraph for the given node."""
42
+ pass
@@ -0,0 +1,41 @@
1
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
2
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
3
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
4
+
5
+
6
+ def get_default_ontology_resolver() -> BaseOntologyResolver:
7
+ return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())
8
+
9
+
10
+ def get_ontology_resolver_from_env(
11
+ ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = ""
12
+ ) -> BaseOntologyResolver:
13
+ """
14
+ Create and return an ontology resolver instance based on environment parameters.
15
+
16
+ Currently, this function supports only the RDFLib-based ontology resolver
17
+ with a fuzzy matching strategy.
18
+
19
+ Args:
20
+ ontology_resolver (str): The ontology resolver type to use.
21
+ Supported value: "rdflib".
22
+ matching_strategy (str): The matching strategy to apply.
23
+ Supported value: "fuzzy".
24
+ ontology_file_path (str): Path to the ontology file required for the resolver.
25
+
26
+ Returns:
27
+ BaseOntologyResolver: An instance of the requested ontology resolver.
28
+
29
+ Raises:
30
+ EnvironmentError: If the provided resolver or strategy is unsupported,
31
+ or if required parameters are missing.
32
+ """
33
+ if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
34
+ return RDFLibOntologyResolver(
35
+ matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
36
+ )
37
+ else:
38
+ raise EnvironmentError(
39
+ f"Unsupported ontology resolver: {ontology_resolver}. "
40
+ f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy."
41
+ )