cognee 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
- cognee/api/v1/cognify/cognify.py +44 -7
- cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
- cognee/api/v1/prune/prune.py +2 -2
- cognee/api/v1/search/search.py +1 -1
- cognee/api/v1/sync/sync.py +16 -5
- cognee/base_config.py +19 -1
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
- cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
- cognee/infrastructure/databases/relational/ModelBase.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
- cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
- cognee/infrastructure/files/storage/StorageManager.py +18 -0
- cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
- cognee/modules/cloud/operations/check_api_key.py +4 -1
- cognee/modules/data/deletion/prune_system.py +5 -1
- cognee/modules/data/methods/create_authorized_dataset.py +9 -0
- cognee/modules/data/methods/get_authorized_dataset.py +1 -1
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
- cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
- cognee/modules/notebooks/methods/create_notebook.py +34 -0
- cognee/modules/notebooks/methods/get_notebooks.py +27 -1
- cognee/modules/notebooks/models/Notebook.py +206 -1
- cognee/modules/observability/get_observe.py +14 -0
- cognee/modules/observability/observers.py +1 -0
- cognee/modules/ontology/base_ontology_resolver.py +42 -0
- cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
- cognee/modules/ontology/matching_strategies.py +53 -0
- cognee/modules/ontology/models.py +20 -0
- cognee/modules/ontology/ontology_config.py +24 -0
- cognee/modules/ontology/ontology_env_config.py +45 -0
- cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
- cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
- cognee/modules/retrieval/temporal_retriever.py +2 -2
- cognee/modules/search/methods/get_search_type_tools.py +7 -0
- cognee/modules/search/methods/search.py +12 -13
- cognee/modules/search/utils/prepare_search_result.py +28 -6
- cognee/modules/search/utils/transform_context_to_graph.py +1 -1
- cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
- cognee/modules/users/methods/create_user.py +4 -24
- cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
- cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
- cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
- cognee/modules/users/permissions/methods/get_principal.py +9 -0
- cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
- cognee/modules/users/permissions/methods/get_role.py +10 -0
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
- cognee/modules/users/permissions/methods/get_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
- cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
- cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
- cognee/modules/users/roles/methods/create_role.py +10 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
- cognee/modules/users/tenants/methods/create_tenant.py +10 -0
- cognee/root_dir.py +5 -0
- cognee/shared/cache.py +346 -0
- cognee/shared/utils.py +12 -0
- cognee/tasks/graph/extract_graph_from_data.py +53 -10
- cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
- cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
- cognee/tasks/temporal_graph/models.py +11 -6
- cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
- cognee/tests/test_cognee_server_start.py +4 -4
- cognee/tests/test_temporal_graph.py +6 -34
- cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/RECORD +81 -73
- cognee/modules/notebooks/methods/create_tutorial_notebook.py +0 -92
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/entry_points.txt +0 -0
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -7,8 +7,14 @@ from cognee.modules.engine.utils import (
|
|
|
7
7
|
generate_node_id,
|
|
8
8
|
generate_node_name,
|
|
9
9
|
)
|
|
10
|
+
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
|
11
|
+
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
|
|
10
12
|
from cognee.shared.data_models import KnowledgeGraph
|
|
11
|
-
from cognee.modules.ontology.rdf_xml.
|
|
13
|
+
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
|
14
|
+
from cognee.modules.ontology.get_default_ontology_resolver import (
|
|
15
|
+
get_default_ontology_resolver,
|
|
16
|
+
get_ontology_resolver_from_env,
|
|
17
|
+
)
|
|
12
18
|
|
|
13
19
|
|
|
14
20
|
def _create_node_key(node_id: str, category: str) -> str:
|
|
@@ -83,7 +89,7 @@ def _process_ontology_edges(
|
|
|
83
89
|
|
|
84
90
|
def _create_type_node(
|
|
85
91
|
node_type: str,
|
|
86
|
-
ontology_resolver:
|
|
92
|
+
ontology_resolver: RDFLibOntologyResolver,
|
|
87
93
|
added_nodes_map: dict,
|
|
88
94
|
added_ontology_nodes_map: dict,
|
|
89
95
|
name_mapping: dict,
|
|
@@ -141,7 +147,7 @@ def _create_entity_node(
|
|
|
141
147
|
node_name: str,
|
|
142
148
|
node_description: str,
|
|
143
149
|
type_node: EntityType,
|
|
144
|
-
ontology_resolver:
|
|
150
|
+
ontology_resolver: RDFLibOntologyResolver,
|
|
145
151
|
added_nodes_map: dict,
|
|
146
152
|
added_ontology_nodes_map: dict,
|
|
147
153
|
name_mapping: dict,
|
|
@@ -198,7 +204,7 @@ def _create_entity_node(
|
|
|
198
204
|
def _process_graph_nodes(
|
|
199
205
|
data_chunk: DocumentChunk,
|
|
200
206
|
graph: KnowledgeGraph,
|
|
201
|
-
ontology_resolver:
|
|
207
|
+
ontology_resolver: RDFLibOntologyResolver,
|
|
202
208
|
added_nodes_map: dict,
|
|
203
209
|
added_ontology_nodes_map: dict,
|
|
204
210
|
name_mapping: dict,
|
|
@@ -277,7 +283,7 @@ def _process_graph_edges(
|
|
|
277
283
|
def expand_with_nodes_and_edges(
|
|
278
284
|
data_chunks: list[DocumentChunk],
|
|
279
285
|
chunk_graphs: list[KnowledgeGraph],
|
|
280
|
-
ontology_resolver:
|
|
286
|
+
ontology_resolver: BaseOntologyResolver = None,
|
|
281
287
|
existing_edges_map: Optional[dict[str, bool]] = None,
|
|
282
288
|
):
|
|
283
289
|
"""
|
|
@@ -296,8 +302,8 @@ def expand_with_nodes_and_edges(
|
|
|
296
302
|
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
|
|
297
303
|
data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
|
|
298
304
|
from the chunk content.
|
|
299
|
-
ontology_resolver (
|
|
300
|
-
types against an ontology. If None, a default
|
|
305
|
+
ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and
|
|
306
|
+
types against an ontology. If None, a default RDFLibOntologyResolver is created.
|
|
301
307
|
Defaults to None.
|
|
302
308
|
existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
|
|
303
309
|
duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
|
|
@@ -320,7 +326,15 @@ def expand_with_nodes_and_edges(
|
|
|
320
326
|
existing_edges_map = {}
|
|
321
327
|
|
|
322
328
|
if ontology_resolver is None:
|
|
323
|
-
|
|
329
|
+
ontology_config = get_ontology_env_config()
|
|
330
|
+
if (
|
|
331
|
+
ontology_config.ontology_file_path
|
|
332
|
+
and ontology_config.ontology_resolver
|
|
333
|
+
and ontology_config.matching_strategy
|
|
334
|
+
):
|
|
335
|
+
ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict())
|
|
336
|
+
else:
|
|
337
|
+
ontology_resolver = get_default_ontology_resolver()
|
|
324
338
|
|
|
325
339
|
added_nodes_map = {}
|
|
326
340
|
added_ontology_nodes_map = {}
|
|
@@ -23,8 +23,6 @@ async def retrieve_existing_edges(
|
|
|
23
23
|
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
|
|
24
24
|
data chunk. Each graph contains nodes (entities) and edges (relationships) that
|
|
25
25
|
were extracted from the chunk content.
|
|
26
|
-
graph_engine (GraphDBInterface): Interface to the graph database that will be queried
|
|
27
|
-
to check for existing edges. Must implement the has_edges() method.
|
|
28
26
|
|
|
29
27
|
Returns:
|
|
30
28
|
dict[str, bool]: A mapping of edge keys to boolean values indicating existence.
|
|
@@ -6,6 +6,40 @@ from cognee.infrastructure.databases.relational import with_async_session
|
|
|
6
6
|
|
|
7
7
|
from ..models.Notebook import Notebook, NotebookCell
|
|
8
8
|
|
|
9
|
+
TUTORIAL_NOTEBOOK_NAME = "Python Development with Cognee Tutorial 🧠"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def _create_tutorial_notebook(
|
|
13
|
+
user_id: UUID, session: AsyncSession, force_refresh: bool = False
|
|
14
|
+
) -> None:
|
|
15
|
+
"""
|
|
16
|
+
Create the default tutorial notebook for new users.
|
|
17
|
+
Dynamically fetches from: https://github.com/topoteretes/cognee/blob/notebook_tutorial/notebooks/starter_tutorial.zip
|
|
18
|
+
"""
|
|
19
|
+
TUTORIAL_ZIP_URL = (
|
|
20
|
+
"https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
# Create notebook from remote zip file (includes notebook + data files)
|
|
25
|
+
notebook = await Notebook.from_ipynb_zip_url(
|
|
26
|
+
zip_url=TUTORIAL_ZIP_URL,
|
|
27
|
+
owner_id=user_id,
|
|
28
|
+
notebook_filename="tutorial.ipynb",
|
|
29
|
+
name=TUTORIAL_NOTEBOOK_NAME,
|
|
30
|
+
deletable=False,
|
|
31
|
+
force=force_refresh,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Add to session and commit
|
|
35
|
+
session.add(notebook)
|
|
36
|
+
await session.commit()
|
|
37
|
+
|
|
38
|
+
except Exception as e:
|
|
39
|
+
print(f"Failed to fetch tutorial notebook from {TUTORIAL_ZIP_URL}: {e}")
|
|
40
|
+
|
|
41
|
+
raise e
|
|
42
|
+
|
|
9
43
|
|
|
10
44
|
@with_async_session
|
|
11
45
|
async def create_notebook(
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import List
|
|
3
|
-
from sqlalchemy import select
|
|
3
|
+
from sqlalchemy import select, and_
|
|
4
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
5
|
|
|
6
6
|
from cognee.infrastructure.databases.relational import with_async_session
|
|
7
7
|
|
|
8
8
|
from ..models.Notebook import Notebook
|
|
9
|
+
from .create_notebook import _create_tutorial_notebook, TUTORIAL_NOTEBOOK_NAME
|
|
10
|
+
|
|
11
|
+
from cognee.shared.logging_utils import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger()
|
|
9
14
|
|
|
10
15
|
|
|
11
16
|
@with_async_session
|
|
@@ -13,6 +18,27 @@ async def get_notebooks(
|
|
|
13
18
|
user_id: UUID,
|
|
14
19
|
session: AsyncSession,
|
|
15
20
|
) -> List[Notebook]:
|
|
21
|
+
# Check if tutorial notebook already exists for this user
|
|
22
|
+
tutorial_query = select(Notebook).where(
|
|
23
|
+
and_(
|
|
24
|
+
Notebook.owner_id == user_id,
|
|
25
|
+
Notebook.name == TUTORIAL_NOTEBOOK_NAME,
|
|
26
|
+
~Notebook.deletable,
|
|
27
|
+
)
|
|
28
|
+
)
|
|
29
|
+
tutorial_result = await session.execute(tutorial_query)
|
|
30
|
+
tutorial_notebook = tutorial_result.scalar_one_or_none()
|
|
31
|
+
|
|
32
|
+
# If tutorial notebook doesn't exist, create it
|
|
33
|
+
if tutorial_notebook is None:
|
|
34
|
+
logger.info(f"Tutorial notebook not found for user {user_id}, creating it")
|
|
35
|
+
try:
|
|
36
|
+
await _create_tutorial_notebook(user_id, session, force_refresh=False)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
# Log the error but continue to return existing notebooks
|
|
39
|
+
logger.error(f"Failed to create tutorial notebook for user {user_id}: {e}")
|
|
40
|
+
|
|
41
|
+
# Get all notebooks for the user
|
|
16
42
|
result = await session.execute(select(Notebook).where(Notebook.owner_id == user_id))
|
|
17
43
|
|
|
18
44
|
return list(result.scalars().all())
|
|
@@ -1,13 +1,24 @@
|
|
|
1
1
|
import json
|
|
2
|
-
|
|
2
|
+
import nbformat
|
|
3
|
+
import asyncio
|
|
4
|
+
from nbformat.notebooknode import NotebookNode
|
|
5
|
+
from typing import List, Literal, Optional, cast, Tuple
|
|
3
6
|
from uuid import uuid4, UUID as UUID_t
|
|
4
7
|
from pydantic import BaseModel, ConfigDict
|
|
5
8
|
from datetime import datetime, timezone
|
|
6
9
|
from fastapi.encoders import jsonable_encoder
|
|
7
10
|
from sqlalchemy import Boolean, Column, DateTime, JSON, UUID, String, TypeDecorator
|
|
8
11
|
from sqlalchemy.orm import mapped_column, Mapped
|
|
12
|
+
from pathlib import Path
|
|
9
13
|
|
|
10
14
|
from cognee.infrastructure.databases.relational import Base
|
|
15
|
+
from cognee.shared.cache import (
|
|
16
|
+
download_and_extract_zip,
|
|
17
|
+
get_tutorial_data_dir,
|
|
18
|
+
generate_content_hash,
|
|
19
|
+
)
|
|
20
|
+
from cognee.infrastructure.files.storage.get_file_storage import get_file_storage
|
|
21
|
+
from cognee.base_config import get_base_config
|
|
11
22
|
|
|
12
23
|
|
|
13
24
|
class NotebookCell(BaseModel):
|
|
@@ -51,3 +62,197 @@ class Notebook(Base):
|
|
|
51
62
|
deletable: Mapped[bool] = mapped_column(Boolean, default=True)
|
|
52
63
|
|
|
53
64
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
async def from_ipynb_zip_url(
|
|
68
|
+
cls,
|
|
69
|
+
zip_url: str,
|
|
70
|
+
owner_id: UUID_t,
|
|
71
|
+
notebook_filename: str = "tutorial.ipynb",
|
|
72
|
+
name: Optional[str] = None,
|
|
73
|
+
deletable: bool = True,
|
|
74
|
+
force: bool = False,
|
|
75
|
+
) -> "Notebook":
|
|
76
|
+
"""
|
|
77
|
+
Create a Notebook instance from a remote zip file containing notebook + data files.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
zip_url: Remote URL to fetch the .zip file from
|
|
81
|
+
owner_id: UUID of the notebook owner
|
|
82
|
+
notebook_filename: Name of the .ipynb file within the zip
|
|
83
|
+
name: Optional custom name for the notebook
|
|
84
|
+
deletable: Whether the notebook can be deleted
|
|
85
|
+
force: If True, re-download even if already cached
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Notebook instance
|
|
89
|
+
"""
|
|
90
|
+
# Generate a cache key based on the zip URL
|
|
91
|
+
content_hash = generate_content_hash(zip_url, notebook_filename)
|
|
92
|
+
|
|
93
|
+
# Download and extract the zip file to tutorial_data/{content_hash}
|
|
94
|
+
try:
|
|
95
|
+
extracted_cache_dir = await download_and_extract_zip(
|
|
96
|
+
url=zip_url,
|
|
97
|
+
cache_dir_name=f"tutorial_data/{content_hash}",
|
|
98
|
+
version_or_hash=content_hash,
|
|
99
|
+
force=force,
|
|
100
|
+
)
|
|
101
|
+
except Exception as e:
|
|
102
|
+
raise RuntimeError(f"Failed to download tutorial zip from {zip_url}") from e
|
|
103
|
+
|
|
104
|
+
# Use cache system to access the notebook file
|
|
105
|
+
from cognee.shared.cache import cache_file_exists, read_cache_file
|
|
106
|
+
|
|
107
|
+
notebook_file_path = f"{extracted_cache_dir}/{notebook_filename}"
|
|
108
|
+
|
|
109
|
+
# Check if the notebook file exists in cache
|
|
110
|
+
if not await cache_file_exists(notebook_file_path):
|
|
111
|
+
raise FileNotFoundError(f"Notebook file '{notebook_filename}' not found in zip")
|
|
112
|
+
|
|
113
|
+
# Read and parse the notebook using cache system
|
|
114
|
+
async with await read_cache_file(notebook_file_path, encoding="utf-8") as f:
|
|
115
|
+
notebook_content = await asyncio.to_thread(f.read)
|
|
116
|
+
notebook = cls.from_ipynb_string(notebook_content, owner_id, name, deletable)
|
|
117
|
+
|
|
118
|
+
# Update file paths in notebook cells to point to actual cached data files
|
|
119
|
+
await cls._update_file_paths_in_cells(notebook, extracted_cache_dir)
|
|
120
|
+
|
|
121
|
+
return notebook
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
async def _update_file_paths_in_cells(notebook: "Notebook", cache_dir: str) -> None:
|
|
125
|
+
"""
|
|
126
|
+
Update file paths in code cells to use actual cached data files.
|
|
127
|
+
Works with both local filesystem and S3 storage.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
notebook: Parsed Notebook instance with cells to update
|
|
131
|
+
cache_dir: Path to the cached tutorial directory containing data files
|
|
132
|
+
"""
|
|
133
|
+
import re
|
|
134
|
+
from cognee.shared.cache import list_cache_files, cache_file_exists
|
|
135
|
+
from cognee.shared.logging_utils import get_logger
|
|
136
|
+
|
|
137
|
+
logger = get_logger()
|
|
138
|
+
|
|
139
|
+
# Look for data files in the data subdirectory
|
|
140
|
+
data_dir = f"{cache_dir}/data"
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
# Get all data files in the cache directory using cache system
|
|
144
|
+
data_files = {}
|
|
145
|
+
if await cache_file_exists(data_dir):
|
|
146
|
+
file_list = await list_cache_files(data_dir)
|
|
147
|
+
else:
|
|
148
|
+
file_list = []
|
|
149
|
+
|
|
150
|
+
for file_path in file_list:
|
|
151
|
+
# Extract just the filename
|
|
152
|
+
filename = file_path.split("/")[-1]
|
|
153
|
+
# Use the file path as provided by cache system
|
|
154
|
+
data_files[filename] = file_path
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
# If we can't list files, skip updating paths
|
|
158
|
+
logger.error(f"Error listing data files in {data_dir}: {e}")
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
# Pattern to match file://data/filename patterns in code cells
|
|
162
|
+
file_pattern = r'"file://data/([^"]+)"'
|
|
163
|
+
|
|
164
|
+
def replace_path(match):
|
|
165
|
+
filename = match.group(1)
|
|
166
|
+
if filename in data_files:
|
|
167
|
+
file_path = data_files[filename]
|
|
168
|
+
# For local filesystem, preserve file:// prefix
|
|
169
|
+
if not file_path.startswith("s3://"):
|
|
170
|
+
return f'"file://{file_path}"'
|
|
171
|
+
else:
|
|
172
|
+
# For S3, return the S3 URL as-is
|
|
173
|
+
return f'"{file_path}"'
|
|
174
|
+
return match.group(0) # Keep original if file not found
|
|
175
|
+
|
|
176
|
+
# Update only code cells
|
|
177
|
+
updated_cells = 0
|
|
178
|
+
for cell in notebook.cells:
|
|
179
|
+
if cell.type == "code":
|
|
180
|
+
original_content = cell.content
|
|
181
|
+
# Update file paths in the cell content
|
|
182
|
+
cell.content = re.sub(file_pattern, replace_path, cell.content)
|
|
183
|
+
if original_content != cell.content:
|
|
184
|
+
updated_cells += 1
|
|
185
|
+
|
|
186
|
+
# Log summary of updates (useful for monitoring)
|
|
187
|
+
if updated_cells > 0:
|
|
188
|
+
logger.info(f"Updated file paths in {updated_cells} notebook cells")
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def from_ipynb_string(
|
|
192
|
+
cls,
|
|
193
|
+
notebook_content: str,
|
|
194
|
+
owner_id: UUID_t,
|
|
195
|
+
name: Optional[str] = None,
|
|
196
|
+
deletable: bool = True,
|
|
197
|
+
) -> "Notebook":
|
|
198
|
+
"""
|
|
199
|
+
Create a Notebook instance from Jupyter notebook string content.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
notebook_content: Raw Jupyter notebook content as string
|
|
203
|
+
owner_id: UUID of the notebook owner
|
|
204
|
+
name: Optional custom name for the notebook
|
|
205
|
+
deletable: Whether the notebook can be deleted
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Notebook instance ready to be saved to database
|
|
209
|
+
"""
|
|
210
|
+
# Parse and validate the Jupyter notebook using nbformat
|
|
211
|
+
# Note: nbformat.reads() has loose typing, so we cast to NotebookNode
|
|
212
|
+
jupyter_nb = cast(
|
|
213
|
+
NotebookNode, nbformat.reads(notebook_content, as_version=nbformat.NO_CONVERT)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Convert Jupyter cells to NotebookCell objects
|
|
217
|
+
cells = []
|
|
218
|
+
for jupyter_cell in jupyter_nb.cells:
|
|
219
|
+
# Each cell is also a NotebookNode with dynamic attributes
|
|
220
|
+
cell = cast(NotebookNode, jupyter_cell)
|
|
221
|
+
# Skip raw cells as they're not supported in our model
|
|
222
|
+
if cell.cell_type == "raw":
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
# Get the source content
|
|
226
|
+
content = cell.source
|
|
227
|
+
|
|
228
|
+
# Generate a name based on content or cell index
|
|
229
|
+
cell_name = cls._generate_cell_name(cell)
|
|
230
|
+
|
|
231
|
+
# Map cell types (jupyter uses "code"/"markdown", we use same)
|
|
232
|
+
cell_type = "code" if cell.cell_type == "code" else "markdown"
|
|
233
|
+
|
|
234
|
+
cells.append(NotebookCell(id=uuid4(), type=cell_type, name=cell_name, content=content))
|
|
235
|
+
|
|
236
|
+
# Extract notebook name from metadata if not provided
|
|
237
|
+
if name is None:
|
|
238
|
+
kernelspec = jupyter_nb.metadata.get("kernelspec", {})
|
|
239
|
+
name = kernelspec.get("display_name") or kernelspec.get("name", "Imported Notebook")
|
|
240
|
+
|
|
241
|
+
return cls(id=uuid4(), owner_id=owner_id, name=name, cells=cells, deletable=deletable)
|
|
242
|
+
|
|
243
|
+
@staticmethod
|
|
244
|
+
def _generate_cell_name(jupyter_cell: NotebookNode) -> str:
|
|
245
|
+
"""Generate a meaningful name for a notebook cell using nbformat cell."""
|
|
246
|
+
if jupyter_cell.cell_type == "markdown":
|
|
247
|
+
# Try to extract a title from markdown headers
|
|
248
|
+
content = jupyter_cell.source
|
|
249
|
+
|
|
250
|
+
lines = content.strip().split("\n")
|
|
251
|
+
if lines and lines[0].startswith("#"):
|
|
252
|
+
# Extract header text, clean it up
|
|
253
|
+
header = lines[0].lstrip("#").strip()
|
|
254
|
+
return header[:50] if len(header) > 50 else header
|
|
255
|
+
else:
|
|
256
|
+
return "Markdown Cell"
|
|
257
|
+
else:
|
|
258
|
+
return "Code Cell"
|
|
@@ -9,3 +9,17 @@ def get_observe():
|
|
|
9
9
|
from langfuse.decorators import observe
|
|
10
10
|
|
|
11
11
|
return observe
|
|
12
|
+
elif monitoring == Observer.NONE:
|
|
13
|
+
# Return a no-op decorator that handles keyword arguments
|
|
14
|
+
def no_op_decorator(*args, **kwargs):
|
|
15
|
+
if len(args) == 1 and callable(args[0]) and not kwargs:
|
|
16
|
+
# Direct decoration: @observe
|
|
17
|
+
return args[0]
|
|
18
|
+
else:
|
|
19
|
+
# Parameterized decoration: @observe(as_type="generation")
|
|
20
|
+
def decorator(func):
|
|
21
|
+
return func
|
|
22
|
+
|
|
23
|
+
return decorator
|
|
24
|
+
|
|
25
|
+
return no_op_decorator
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List, Tuple, Optional
|
|
3
|
+
|
|
4
|
+
from cognee.modules.ontology.models import AttachedOntologyNode
|
|
5
|
+
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseOntologyResolver(ABC):
|
|
9
|
+
"""Abstract base class for ontology resolvers."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
|
|
12
|
+
"""Initialize the ontology resolver with a matching strategy.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
matching_strategy: The strategy to use for entity matching.
|
|
16
|
+
Defaults to FuzzyMatchingStrategy if None.
|
|
17
|
+
"""
|
|
18
|
+
self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def build_lookup(self) -> None:
|
|
22
|
+
"""Build the lookup dictionary for ontology entities."""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def refresh_lookup(self) -> None:
|
|
27
|
+
"""Refresh the lookup dictionary."""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def find_closest_match(self, name: str, category: str) -> Optional[str]:
|
|
32
|
+
"""Find the closest match for a given name in the specified category."""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def get_subgraph(
|
|
37
|
+
self, node_name: str, node_type: str = "individuals", directed: bool = True
|
|
38
|
+
) -> Tuple[
|
|
39
|
+
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
|
|
40
|
+
]:
|
|
41
|
+
"""Get a subgraph for the given node."""
|
|
42
|
+
pass
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
|
2
|
+
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
|
3
|
+
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_default_ontology_resolver() -> BaseOntologyResolver:
|
|
7
|
+
return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_ontology_resolver_from_env(
|
|
11
|
+
ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = ""
|
|
12
|
+
) -> BaseOntologyResolver:
|
|
13
|
+
"""
|
|
14
|
+
Create and return an ontology resolver instance based on environment parameters.
|
|
15
|
+
|
|
16
|
+
Currently, this function supports only the RDFLib-based ontology resolver
|
|
17
|
+
with a fuzzy matching strategy.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
ontology_resolver (str): The ontology resolver type to use.
|
|
21
|
+
Supported value: "rdflib".
|
|
22
|
+
matching_strategy (str): The matching strategy to apply.
|
|
23
|
+
Supported value: "fuzzy".
|
|
24
|
+
ontology_file_path (str): Path to the ontology file required for the resolver.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
BaseOntologyResolver: An instance of the requested ontology resolver.
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
EnvironmentError: If the provided resolver or strategy is unsupported,
|
|
31
|
+
or if required parameters are missing.
|
|
32
|
+
"""
|
|
33
|
+
if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
|
|
34
|
+
return RDFLibOntologyResolver(
|
|
35
|
+
matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
raise EnvironmentError(
|
|
39
|
+
f"Unsupported ontology resolver: {ontology_resolver}. "
|
|
40
|
+
f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy."
|
|
41
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import difflib
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MatchingStrategy(ABC):
|
|
7
|
+
"""Abstract base class for ontology entity matching strategies."""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
|
11
|
+
"""Find the best match for a given name from a list of candidates.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
name: The name to match
|
|
15
|
+
candidates: List of candidate names to match against
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
The best matching candidate name, or None if no match found
|
|
19
|
+
"""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FuzzyMatchingStrategy(MatchingStrategy):
|
|
24
|
+
"""Fuzzy matching strategy using difflib for approximate string matching."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, cutoff: float = 0.8):
|
|
27
|
+
"""Initialize fuzzy matching strategy.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
|
|
31
|
+
"""
|
|
32
|
+
self.cutoff = cutoff
|
|
33
|
+
|
|
34
|
+
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
|
35
|
+
"""Find the closest fuzzy match for a given name.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
name: The normalized name to match
|
|
39
|
+
candidates: List of normalized candidate names
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
The best matching candidate name, or None if no match meets the cutoff
|
|
43
|
+
"""
|
|
44
|
+
if not candidates:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
# Check for exact match first
|
|
48
|
+
if name in candidates:
|
|
49
|
+
return name
|
|
50
|
+
|
|
51
|
+
# Find fuzzy match
|
|
52
|
+
best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
|
|
53
|
+
return best_match[0] if best_match else None
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AttachedOntologyNode:
|
|
5
|
+
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, uri: Any, category: str):
|
|
8
|
+
self.uri = uri
|
|
9
|
+
self.name = self._extract_name(uri)
|
|
10
|
+
self.category = category
|
|
11
|
+
|
|
12
|
+
@staticmethod
|
|
13
|
+
def _extract_name(uri: Any) -> str:
|
|
14
|
+
uri_str = str(uri)
|
|
15
|
+
if "#" in uri_str:
|
|
16
|
+
return uri_str.split("#")[-1]
|
|
17
|
+
return uri_str.rstrip("/").split("/")[-1]
|
|
18
|
+
|
|
19
|
+
def __repr__(self):
|
|
20
|
+
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import TypedDict, Optional
|
|
2
|
+
|
|
3
|
+
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
|
4
|
+
from cognee.modules.ontology.matching_strategies import MatchingStrategy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OntologyConfig(TypedDict, total=False):
|
|
8
|
+
"""Configuration containing ontology resolver.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
ontology_resolver: The ontology resolver instance to use
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
ontology_resolver: Optional[BaseOntologyResolver]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Config(TypedDict, total=False):
|
|
18
|
+
"""Top-level configuration dictionary.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
ontology_config: Configuration containing ontology resolver
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
ontology_config: Optional[OntologyConfig]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""This module contains the configuration for ontology handling."""
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OntologyEnvConfig(BaseSettings):
|
|
8
|
+
"""
|
|
9
|
+
Represents the configuration for ontology handling, including parameters for
|
|
10
|
+
ontology file storage and resolution/matching strategies.
|
|
11
|
+
|
|
12
|
+
Public methods:
|
|
13
|
+
- to_dict
|
|
14
|
+
|
|
15
|
+
Instance variables:
|
|
16
|
+
- ontology_resolver
|
|
17
|
+
- ontology_matching
|
|
18
|
+
- ontology_file_path
|
|
19
|
+
- model_config
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
ontology_resolver: str = "rdflib"
|
|
23
|
+
matching_strategy: str = "fuzzy"
|
|
24
|
+
ontology_file_path: str = ""
|
|
25
|
+
|
|
26
|
+
model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
|
|
27
|
+
|
|
28
|
+
def to_dict(self) -> dict:
|
|
29
|
+
"""
|
|
30
|
+
Return the configuration as a dictionary.
|
|
31
|
+
"""
|
|
32
|
+
return {
|
|
33
|
+
"ontology_resolver": self.ontology_resolver,
|
|
34
|
+
"matching_strategy": self.matching_strategy,
|
|
35
|
+
"ontology_file_path": self.ontology_file_path,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@lru_cache
|
|
40
|
+
def get_ontology_env_config():
|
|
41
|
+
"""
|
|
42
|
+
Retrieve the ontology configuration. This function utilizes caching to return a
|
|
43
|
+
singleton instance of the OntologyConfig class for efficiency.
|
|
44
|
+
"""
|
|
45
|
+
return OntologyEnvConfig()
|