cognee 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
- cognee/api/v1/cognify/cognify.py +44 -7
- cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
- cognee/api/v1/prune/prune.py +2 -2
- cognee/api/v1/search/search.py +1 -1
- cognee/api/v1/sync/sync.py +16 -5
- cognee/base_config.py +19 -1
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
- cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
- cognee/infrastructure/databases/relational/ModelBase.py +2 -1
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
- cognee/infrastructure/databases/vector/config.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
- cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
- cognee/infrastructure/files/storage/StorageManager.py +18 -0
- cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
- cognee/infrastructure/utils/run_async.py +9 -4
- cognee/infrastructure/utils/run_sync.py +4 -3
- cognee/modules/cloud/operations/check_api_key.py +4 -1
- cognee/modules/data/deletion/prune_system.py +5 -1
- cognee/modules/data/methods/create_authorized_dataset.py +9 -0
- cognee/modules/data/methods/get_authorized_dataset.py +1 -1
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
- cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
- cognee/modules/notebooks/methods/create_notebook.py +34 -0
- cognee/modules/notebooks/methods/get_notebook.py +2 -2
- cognee/modules/notebooks/methods/get_notebooks.py +27 -1
- cognee/modules/notebooks/methods/update_notebook.py +0 -1
- cognee/modules/notebooks/models/Notebook.py +206 -1
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
- cognee/modules/observability/get_observe.py +14 -0
- cognee/modules/observability/observers.py +1 -0
- cognee/modules/ontology/base_ontology_resolver.py +42 -0
- cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
- cognee/modules/ontology/matching_strategies.py +53 -0
- cognee/modules/ontology/models.py +20 -0
- cognee/modules/ontology/ontology_config.py +24 -0
- cognee/modules/ontology/ontology_env_config.py +45 -0
- cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
- cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +3 -3
- cognee/modules/retrieval/user_qa_feedback.py +1 -1
- cognee/modules/search/methods/get_search_type_tools.py +7 -0
- cognee/modules/search/methods/search.py +12 -13
- cognee/modules/search/utils/prepare_search_result.py +31 -9
- cognee/modules/search/utils/transform_context_to_graph.py +1 -1
- cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
- cognee/modules/users/methods/create_user.py +4 -24
- cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
- cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
- cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
- cognee/modules/users/permissions/methods/get_principal.py +9 -0
- cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
- cognee/modules/users/permissions/methods/get_role.py +10 -0
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
- cognee/modules/users/permissions/methods/get_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
- cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
- cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
- cognee/modules/users/roles/methods/create_role.py +10 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
- cognee/modules/users/tenants/methods/create_tenant.py +10 -0
- cognee/root_dir.py +5 -0
- cognee/shared/cache.py +346 -0
- cognee/shared/utils.py +12 -0
- cognee/tasks/graph/extract_graph_from_data.py +53 -10
- cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
- cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
- cognee/tasks/temporal_graph/models.py +11 -6
- cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
- cognee/tests/test_cognee_server_start.py +4 -4
- cognee/tests/test_temporal_graph.py +6 -34
- cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/RECORD +93 -86
- cognee-0.3.4.dist-info/entry_points.txt +2 -0
- cognee/api/v1/save/save.py +0 -335
- cognee/tests/test_save_export_path.py +0 -116
- cognee-0.3.2.dist-info/entry_points.txt +0 -2
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import aiohttp
|
|
2
2
|
|
|
3
3
|
from cognee.modules.cloud.exceptions import CloudConnectionError
|
|
4
|
+
from cognee.shared.utils import create_secure_ssl_context
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
async def check_api_key(auth_token: str):
|
|
@@ -10,7 +11,9 @@ async def check_api_key(auth_token: str):
|
|
|
10
11
|
headers = {"X-Api-Key": auth_token}
|
|
11
12
|
|
|
12
13
|
try:
|
|
13
|
-
|
|
14
|
+
ssl_context = create_secure_ssl_context()
|
|
15
|
+
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
16
|
+
async with aiohttp.ClientSession(connector=connector) as session:
|
|
14
17
|
async with session.post(url, headers=headers) as response:
|
|
15
18
|
if response.status == 200:
|
|
16
19
|
return
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
2
2
|
from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
|
|
3
3
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
4
|
+
from cognee.shared.cache import delete_cache
|
|
4
5
|
|
|
5
6
|
|
|
6
|
-
async def prune_system(graph=True, vector=True, metadata=True):
|
|
7
|
+
async def prune_system(graph=True, vector=True, metadata=True, cache=True):
|
|
7
8
|
if graph:
|
|
8
9
|
graph_engine = await get_graph_engine()
|
|
9
10
|
await graph_engine.delete_graph()
|
|
@@ -15,3 +16,6 @@ async def prune_system(graph=True, vector=True, metadata=True):
|
|
|
15
16
|
if metadata:
|
|
16
17
|
db_engine = get_relational_engine()
|
|
17
18
|
await db_engine.delete_database()
|
|
19
|
+
|
|
20
|
+
if cache:
|
|
21
|
+
await delete_cache()
|
|
@@ -6,6 +6,15 @@ from .create_dataset import create_dataset
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
async def create_authorized_dataset(dataset_name: str, user: User) -> Dataset:
|
|
9
|
+
"""
|
|
10
|
+
Create a new dataset and give all permissions on this dataset to the given user.
|
|
11
|
+
Args:
|
|
12
|
+
dataset_name: Name of the dataset.
|
|
13
|
+
user: The user object.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Dataset: The new authorized dataset.
|
|
17
|
+
"""
|
|
9
18
|
db_engine = get_relational_engine()
|
|
10
19
|
|
|
11
20
|
async with db_engine.get_async_session() as session:
|
|
@@ -15,7 +15,7 @@ async def get_authorized_dataset(
|
|
|
15
15
|
Get a specific dataset with permissions for a user.
|
|
16
16
|
|
|
17
17
|
Args:
|
|
18
|
-
|
|
18
|
+
user: User object
|
|
19
19
|
dataset_id (UUID): dataset id
|
|
20
20
|
permission_type (str): permission type(read, write, delete, share), default is read
|
|
21
21
|
|
|
@@ -11,6 +11,17 @@ from ..models import Dataset
|
|
|
11
11
|
async def get_authorized_dataset_by_name(
|
|
12
12
|
dataset_name: str, user: User, permission_type: str
|
|
13
13
|
) -> Optional[Dataset]:
|
|
14
|
+
"""
|
|
15
|
+
Get a specific dataset with the given name, with permissions for a given user.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
dataset_name: Name of the dataset.
|
|
19
|
+
user: User object.
|
|
20
|
+
permission_type (str): permission type(read, write, delete, share), default is read
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Optional[Dataset]: dataset with permissions
|
|
24
|
+
"""
|
|
14
25
|
authorized_datasets = await get_authorized_existing_datasets([], permission_type, user)
|
|
15
26
|
|
|
16
27
|
return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None)
|
|
@@ -7,8 +7,14 @@ from cognee.modules.engine.utils import (
|
|
|
7
7
|
generate_node_id,
|
|
8
8
|
generate_node_name,
|
|
9
9
|
)
|
|
10
|
+
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
|
11
|
+
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
|
|
10
12
|
from cognee.shared.data_models import KnowledgeGraph
|
|
11
|
-
from cognee.modules.ontology.rdf_xml.
|
|
13
|
+
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
|
14
|
+
from cognee.modules.ontology.get_default_ontology_resolver import (
|
|
15
|
+
get_default_ontology_resolver,
|
|
16
|
+
get_ontology_resolver_from_env,
|
|
17
|
+
)
|
|
12
18
|
|
|
13
19
|
|
|
14
20
|
def _create_node_key(node_id: str, category: str) -> str:
|
|
@@ -83,7 +89,7 @@ def _process_ontology_edges(
|
|
|
83
89
|
|
|
84
90
|
def _create_type_node(
|
|
85
91
|
node_type: str,
|
|
86
|
-
ontology_resolver:
|
|
92
|
+
ontology_resolver: RDFLibOntologyResolver,
|
|
87
93
|
added_nodes_map: dict,
|
|
88
94
|
added_ontology_nodes_map: dict,
|
|
89
95
|
name_mapping: dict,
|
|
@@ -141,7 +147,7 @@ def _create_entity_node(
|
|
|
141
147
|
node_name: str,
|
|
142
148
|
node_description: str,
|
|
143
149
|
type_node: EntityType,
|
|
144
|
-
ontology_resolver:
|
|
150
|
+
ontology_resolver: RDFLibOntologyResolver,
|
|
145
151
|
added_nodes_map: dict,
|
|
146
152
|
added_ontology_nodes_map: dict,
|
|
147
153
|
name_mapping: dict,
|
|
@@ -198,7 +204,7 @@ def _create_entity_node(
|
|
|
198
204
|
def _process_graph_nodes(
|
|
199
205
|
data_chunk: DocumentChunk,
|
|
200
206
|
graph: KnowledgeGraph,
|
|
201
|
-
ontology_resolver:
|
|
207
|
+
ontology_resolver: RDFLibOntologyResolver,
|
|
202
208
|
added_nodes_map: dict,
|
|
203
209
|
added_ontology_nodes_map: dict,
|
|
204
210
|
name_mapping: dict,
|
|
@@ -277,7 +283,7 @@ def _process_graph_edges(
|
|
|
277
283
|
def expand_with_nodes_and_edges(
|
|
278
284
|
data_chunks: list[DocumentChunk],
|
|
279
285
|
chunk_graphs: list[KnowledgeGraph],
|
|
280
|
-
ontology_resolver:
|
|
286
|
+
ontology_resolver: BaseOntologyResolver = None,
|
|
281
287
|
existing_edges_map: Optional[dict[str, bool]] = None,
|
|
282
288
|
):
|
|
283
289
|
"""
|
|
@@ -296,8 +302,8 @@ def expand_with_nodes_and_edges(
|
|
|
296
302
|
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
|
|
297
303
|
data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
|
|
298
304
|
from the chunk content.
|
|
299
|
-
ontology_resolver (
|
|
300
|
-
types against an ontology. If None, a default
|
|
305
|
+
ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and
|
|
306
|
+
types against an ontology. If None, a default RDFLibOntologyResolver is created.
|
|
301
307
|
Defaults to None.
|
|
302
308
|
existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
|
|
303
309
|
duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
|
|
@@ -320,7 +326,15 @@ def expand_with_nodes_and_edges(
|
|
|
320
326
|
existing_edges_map = {}
|
|
321
327
|
|
|
322
328
|
if ontology_resolver is None:
|
|
323
|
-
|
|
329
|
+
ontology_config = get_ontology_env_config()
|
|
330
|
+
if (
|
|
331
|
+
ontology_config.ontology_file_path
|
|
332
|
+
and ontology_config.ontology_resolver
|
|
333
|
+
and ontology_config.matching_strategy
|
|
334
|
+
):
|
|
335
|
+
ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict())
|
|
336
|
+
else:
|
|
337
|
+
ontology_resolver = get_default_ontology_resolver()
|
|
324
338
|
|
|
325
339
|
added_nodes_map = {}
|
|
326
340
|
added_ontology_nodes_map = {}
|
|
@@ -23,8 +23,6 @@ async def retrieve_existing_edges(
|
|
|
23
23
|
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
|
|
24
24
|
data chunk. Each graph contains nodes (entities) and edges (relationships) that
|
|
25
25
|
were extracted from the chunk content.
|
|
26
|
-
graph_engine (GraphDBInterface): Interface to the graph database that will be queried
|
|
27
|
-
to check for existing edges. Must implement the has_edges() method.
|
|
28
26
|
|
|
29
27
|
Returns:
|
|
30
28
|
dict[str, bool]: A mapping of edge keys to boolean values indicating existence.
|
|
@@ -6,6 +6,40 @@ from cognee.infrastructure.databases.relational import with_async_session
|
|
|
6
6
|
|
|
7
7
|
from ..models.Notebook import Notebook, NotebookCell
|
|
8
8
|
|
|
9
|
+
TUTORIAL_NOTEBOOK_NAME = "Python Development with Cognee Tutorial 🧠"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def _create_tutorial_notebook(
|
|
13
|
+
user_id: UUID, session: AsyncSession, force_refresh: bool = False
|
|
14
|
+
) -> None:
|
|
15
|
+
"""
|
|
16
|
+
Create the default tutorial notebook for new users.
|
|
17
|
+
Dynamically fetches from: https://github.com/topoteretes/cognee/blob/notebook_tutorial/notebooks/starter_tutorial.zip
|
|
18
|
+
"""
|
|
19
|
+
TUTORIAL_ZIP_URL = (
|
|
20
|
+
"https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
# Create notebook from remote zip file (includes notebook + data files)
|
|
25
|
+
notebook = await Notebook.from_ipynb_zip_url(
|
|
26
|
+
zip_url=TUTORIAL_ZIP_URL,
|
|
27
|
+
owner_id=user_id,
|
|
28
|
+
notebook_filename="tutorial.ipynb",
|
|
29
|
+
name=TUTORIAL_NOTEBOOK_NAME,
|
|
30
|
+
deletable=False,
|
|
31
|
+
force=force_refresh,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Add to session and commit
|
|
35
|
+
session.add(notebook)
|
|
36
|
+
await session.commit()
|
|
37
|
+
|
|
38
|
+
except Exception as e:
|
|
39
|
+
print(f"Failed to fetch tutorial notebook from {TUTORIAL_ZIP_URL}: {e}")
|
|
40
|
+
|
|
41
|
+
raise e
|
|
42
|
+
|
|
9
43
|
|
|
10
44
|
@with_async_session
|
|
11
45
|
async def create_notebook(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import Optional
|
|
3
|
-
from sqlalchemy import select
|
|
3
|
+
from sqlalchemy import and_, select
|
|
4
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
5
|
|
|
6
6
|
from cognee.infrastructure.databases.relational import with_async_session
|
|
@@ -15,7 +15,7 @@ async def get_notebook(
|
|
|
15
15
|
session: AsyncSession,
|
|
16
16
|
) -> Optional[Notebook]:
|
|
17
17
|
result = await session.execute(
|
|
18
|
-
select(Notebook).where(Notebook.owner_id == user_id
|
|
18
|
+
select(Notebook).where(and_(Notebook.owner_id == user_id, Notebook.id == notebook_id))
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
return result.scalar()
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import List
|
|
3
|
-
from sqlalchemy import select
|
|
3
|
+
from sqlalchemy import select, and_
|
|
4
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
5
|
|
|
6
6
|
from cognee.infrastructure.databases.relational import with_async_session
|
|
7
7
|
|
|
8
8
|
from ..models.Notebook import Notebook
|
|
9
|
+
from .create_notebook import _create_tutorial_notebook, TUTORIAL_NOTEBOOK_NAME
|
|
10
|
+
|
|
11
|
+
from cognee.shared.logging_utils import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger()
|
|
9
14
|
|
|
10
15
|
|
|
11
16
|
@with_async_session
|
|
@@ -13,6 +18,27 @@ async def get_notebooks(
|
|
|
13
18
|
user_id: UUID,
|
|
14
19
|
session: AsyncSession,
|
|
15
20
|
) -> List[Notebook]:
|
|
21
|
+
# Check if tutorial notebook already exists for this user
|
|
22
|
+
tutorial_query = select(Notebook).where(
|
|
23
|
+
and_(
|
|
24
|
+
Notebook.owner_id == user_id,
|
|
25
|
+
Notebook.name == TUTORIAL_NOTEBOOK_NAME,
|
|
26
|
+
~Notebook.deletable,
|
|
27
|
+
)
|
|
28
|
+
)
|
|
29
|
+
tutorial_result = await session.execute(tutorial_query)
|
|
30
|
+
tutorial_notebook = tutorial_result.scalar_one_or_none()
|
|
31
|
+
|
|
32
|
+
# If tutorial notebook doesn't exist, create it
|
|
33
|
+
if tutorial_notebook is None:
|
|
34
|
+
logger.info(f"Tutorial notebook not found for user {user_id}, creating it")
|
|
35
|
+
try:
|
|
36
|
+
await _create_tutorial_notebook(user_id, session, force_refresh=False)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
# Log the error but continue to return existing notebooks
|
|
39
|
+
logger.error(f"Failed to create tutorial notebook for user {user_id}: {e}")
|
|
40
|
+
|
|
41
|
+
# Get all notebooks for the user
|
|
16
42
|
result = await session.execute(select(Notebook).where(Notebook.owner_id == user_id))
|
|
17
43
|
|
|
18
44
|
return list(result.scalars().all())
|
|
@@ -1,13 +1,24 @@
|
|
|
1
1
|
import json
|
|
2
|
-
|
|
2
|
+
import nbformat
|
|
3
|
+
import asyncio
|
|
4
|
+
from nbformat.notebooknode import NotebookNode
|
|
5
|
+
from typing import List, Literal, Optional, cast, Tuple
|
|
3
6
|
from uuid import uuid4, UUID as UUID_t
|
|
4
7
|
from pydantic import BaseModel, ConfigDict
|
|
5
8
|
from datetime import datetime, timezone
|
|
6
9
|
from fastapi.encoders import jsonable_encoder
|
|
7
10
|
from sqlalchemy import Boolean, Column, DateTime, JSON, UUID, String, TypeDecorator
|
|
8
11
|
from sqlalchemy.orm import mapped_column, Mapped
|
|
12
|
+
from pathlib import Path
|
|
9
13
|
|
|
10
14
|
from cognee.infrastructure.databases.relational import Base
|
|
15
|
+
from cognee.shared.cache import (
|
|
16
|
+
download_and_extract_zip,
|
|
17
|
+
get_tutorial_data_dir,
|
|
18
|
+
generate_content_hash,
|
|
19
|
+
)
|
|
20
|
+
from cognee.infrastructure.files.storage.get_file_storage import get_file_storage
|
|
21
|
+
from cognee.base_config import get_base_config
|
|
11
22
|
|
|
12
23
|
|
|
13
24
|
class NotebookCell(BaseModel):
|
|
@@ -51,3 +62,197 @@ class Notebook(Base):
|
|
|
51
62
|
deletable: Mapped[bool] = mapped_column(Boolean, default=True)
|
|
52
63
|
|
|
53
64
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
async def from_ipynb_zip_url(
|
|
68
|
+
cls,
|
|
69
|
+
zip_url: str,
|
|
70
|
+
owner_id: UUID_t,
|
|
71
|
+
notebook_filename: str = "tutorial.ipynb",
|
|
72
|
+
name: Optional[str] = None,
|
|
73
|
+
deletable: bool = True,
|
|
74
|
+
force: bool = False,
|
|
75
|
+
) -> "Notebook":
|
|
76
|
+
"""
|
|
77
|
+
Create a Notebook instance from a remote zip file containing notebook + data files.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
zip_url: Remote URL to fetch the .zip file from
|
|
81
|
+
owner_id: UUID of the notebook owner
|
|
82
|
+
notebook_filename: Name of the .ipynb file within the zip
|
|
83
|
+
name: Optional custom name for the notebook
|
|
84
|
+
deletable: Whether the notebook can be deleted
|
|
85
|
+
force: If True, re-download even if already cached
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Notebook instance
|
|
89
|
+
"""
|
|
90
|
+
# Generate a cache key based on the zip URL
|
|
91
|
+
content_hash = generate_content_hash(zip_url, notebook_filename)
|
|
92
|
+
|
|
93
|
+
# Download and extract the zip file to tutorial_data/{content_hash}
|
|
94
|
+
try:
|
|
95
|
+
extracted_cache_dir = await download_and_extract_zip(
|
|
96
|
+
url=zip_url,
|
|
97
|
+
cache_dir_name=f"tutorial_data/{content_hash}",
|
|
98
|
+
version_or_hash=content_hash,
|
|
99
|
+
force=force,
|
|
100
|
+
)
|
|
101
|
+
except Exception as e:
|
|
102
|
+
raise RuntimeError(f"Failed to download tutorial zip from {zip_url}") from e
|
|
103
|
+
|
|
104
|
+
# Use cache system to access the notebook file
|
|
105
|
+
from cognee.shared.cache import cache_file_exists, read_cache_file
|
|
106
|
+
|
|
107
|
+
notebook_file_path = f"{extracted_cache_dir}/{notebook_filename}"
|
|
108
|
+
|
|
109
|
+
# Check if the notebook file exists in cache
|
|
110
|
+
if not await cache_file_exists(notebook_file_path):
|
|
111
|
+
raise FileNotFoundError(f"Notebook file '{notebook_filename}' not found in zip")
|
|
112
|
+
|
|
113
|
+
# Read and parse the notebook using cache system
|
|
114
|
+
async with await read_cache_file(notebook_file_path, encoding="utf-8") as f:
|
|
115
|
+
notebook_content = await asyncio.to_thread(f.read)
|
|
116
|
+
notebook = cls.from_ipynb_string(notebook_content, owner_id, name, deletable)
|
|
117
|
+
|
|
118
|
+
# Update file paths in notebook cells to point to actual cached data files
|
|
119
|
+
await cls._update_file_paths_in_cells(notebook, extracted_cache_dir)
|
|
120
|
+
|
|
121
|
+
return notebook
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
async def _update_file_paths_in_cells(notebook: "Notebook", cache_dir: str) -> None:
|
|
125
|
+
"""
|
|
126
|
+
Update file paths in code cells to use actual cached data files.
|
|
127
|
+
Works with both local filesystem and S3 storage.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
notebook: Parsed Notebook instance with cells to update
|
|
131
|
+
cache_dir: Path to the cached tutorial directory containing data files
|
|
132
|
+
"""
|
|
133
|
+
import re
|
|
134
|
+
from cognee.shared.cache import list_cache_files, cache_file_exists
|
|
135
|
+
from cognee.shared.logging_utils import get_logger
|
|
136
|
+
|
|
137
|
+
logger = get_logger()
|
|
138
|
+
|
|
139
|
+
# Look for data files in the data subdirectory
|
|
140
|
+
data_dir = f"{cache_dir}/data"
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
# Get all data files in the cache directory using cache system
|
|
144
|
+
data_files = {}
|
|
145
|
+
if await cache_file_exists(data_dir):
|
|
146
|
+
file_list = await list_cache_files(data_dir)
|
|
147
|
+
else:
|
|
148
|
+
file_list = []
|
|
149
|
+
|
|
150
|
+
for file_path in file_list:
|
|
151
|
+
# Extract just the filename
|
|
152
|
+
filename = file_path.split("/")[-1]
|
|
153
|
+
# Use the file path as provided by cache system
|
|
154
|
+
data_files[filename] = file_path
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
# If we can't list files, skip updating paths
|
|
158
|
+
logger.error(f"Error listing data files in {data_dir}: {e}")
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
# Pattern to match file://data/filename patterns in code cells
|
|
162
|
+
file_pattern = r'"file://data/([^"]+)"'
|
|
163
|
+
|
|
164
|
+
def replace_path(match):
|
|
165
|
+
filename = match.group(1)
|
|
166
|
+
if filename in data_files:
|
|
167
|
+
file_path = data_files[filename]
|
|
168
|
+
# For local filesystem, preserve file:// prefix
|
|
169
|
+
if not file_path.startswith("s3://"):
|
|
170
|
+
return f'"file://{file_path}"'
|
|
171
|
+
else:
|
|
172
|
+
# For S3, return the S3 URL as-is
|
|
173
|
+
return f'"{file_path}"'
|
|
174
|
+
return match.group(0) # Keep original if file not found
|
|
175
|
+
|
|
176
|
+
# Update only code cells
|
|
177
|
+
updated_cells = 0
|
|
178
|
+
for cell in notebook.cells:
|
|
179
|
+
if cell.type == "code":
|
|
180
|
+
original_content = cell.content
|
|
181
|
+
# Update file paths in the cell content
|
|
182
|
+
cell.content = re.sub(file_pattern, replace_path, cell.content)
|
|
183
|
+
if original_content != cell.content:
|
|
184
|
+
updated_cells += 1
|
|
185
|
+
|
|
186
|
+
# Log summary of updates (useful for monitoring)
|
|
187
|
+
if updated_cells > 0:
|
|
188
|
+
logger.info(f"Updated file paths in {updated_cells} notebook cells")
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def from_ipynb_string(
|
|
192
|
+
cls,
|
|
193
|
+
notebook_content: str,
|
|
194
|
+
owner_id: UUID_t,
|
|
195
|
+
name: Optional[str] = None,
|
|
196
|
+
deletable: bool = True,
|
|
197
|
+
) -> "Notebook":
|
|
198
|
+
"""
|
|
199
|
+
Create a Notebook instance from Jupyter notebook string content.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
notebook_content: Raw Jupyter notebook content as string
|
|
203
|
+
owner_id: UUID of the notebook owner
|
|
204
|
+
name: Optional custom name for the notebook
|
|
205
|
+
deletable: Whether the notebook can be deleted
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Notebook instance ready to be saved to database
|
|
209
|
+
"""
|
|
210
|
+
# Parse and validate the Jupyter notebook using nbformat
|
|
211
|
+
# Note: nbformat.reads() has loose typing, so we cast to NotebookNode
|
|
212
|
+
jupyter_nb = cast(
|
|
213
|
+
NotebookNode, nbformat.reads(notebook_content, as_version=nbformat.NO_CONVERT)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Convert Jupyter cells to NotebookCell objects
|
|
217
|
+
cells = []
|
|
218
|
+
for jupyter_cell in jupyter_nb.cells:
|
|
219
|
+
# Each cell is also a NotebookNode with dynamic attributes
|
|
220
|
+
cell = cast(NotebookNode, jupyter_cell)
|
|
221
|
+
# Skip raw cells as they're not supported in our model
|
|
222
|
+
if cell.cell_type == "raw":
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
# Get the source content
|
|
226
|
+
content = cell.source
|
|
227
|
+
|
|
228
|
+
# Generate a name based on content or cell index
|
|
229
|
+
cell_name = cls._generate_cell_name(cell)
|
|
230
|
+
|
|
231
|
+
# Map cell types (jupyter uses "code"/"markdown", we use same)
|
|
232
|
+
cell_type = "code" if cell.cell_type == "code" else "markdown"
|
|
233
|
+
|
|
234
|
+
cells.append(NotebookCell(id=uuid4(), type=cell_type, name=cell_name, content=content))
|
|
235
|
+
|
|
236
|
+
# Extract notebook name from metadata if not provided
|
|
237
|
+
if name is None:
|
|
238
|
+
kernelspec = jupyter_nb.metadata.get("kernelspec", {})
|
|
239
|
+
name = kernelspec.get("display_name") or kernelspec.get("name", "Imported Notebook")
|
|
240
|
+
|
|
241
|
+
return cls(id=uuid4(), owner_id=owner_id, name=name, cells=cells, deletable=deletable)
|
|
242
|
+
|
|
243
|
+
@staticmethod
|
|
244
|
+
def _generate_cell_name(jupyter_cell: NotebookNode) -> str:
|
|
245
|
+
"""Generate a meaningful name for a notebook cell using nbformat cell."""
|
|
246
|
+
if jupyter_cell.cell_type == "markdown":
|
|
247
|
+
# Try to extract a title from markdown headers
|
|
248
|
+
content = jupyter_cell.source
|
|
249
|
+
|
|
250
|
+
lines = content.strip().split("\n")
|
|
251
|
+
if lines and lines[0].startswith("#"):
|
|
252
|
+
# Extract header text, clean it up
|
|
253
|
+
header = lines[0].lstrip("#").strip()
|
|
254
|
+
return header[:50] if len(header) > 50 else header
|
|
255
|
+
else:
|
|
256
|
+
return "Markdown Cell"
|
|
257
|
+
else:
|
|
258
|
+
return "Code Cell"
|
|
@@ -5,16 +5,18 @@ import traceback
|
|
|
5
5
|
|
|
6
6
|
def wrap_in_async_handler(user_code: str) -> str:
|
|
7
7
|
return (
|
|
8
|
-
"
|
|
9
|
-
"
|
|
8
|
+
"import asyncio\n"
|
|
9
|
+
+ "asyncio.set_event_loop(running_loop)\n\n"
|
|
10
|
+
+ "from cognee.infrastructure.utils.run_sync import run_sync\n\n"
|
|
11
|
+
+ "async def __user_main__():\n"
|
|
10
12
|
+ "\n".join(" " + line for line in user_code.strip().split("\n"))
|
|
11
13
|
+ "\n"
|
|
12
|
-
" globals().update(locals())\n\n"
|
|
13
|
-
"run_sync(__user_main__())\n"
|
|
14
|
+
+ " globals().update(locals())\n\n"
|
|
15
|
+
+ "run_sync(__user_main__(), running_loop)\n"
|
|
14
16
|
)
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
def run_in_local_sandbox(code, environment=None):
|
|
19
|
+
def run_in_local_sandbox(code, environment=None, loop=None):
|
|
18
20
|
environment = environment or {}
|
|
19
21
|
code = wrap_in_async_handler(code.replace("\xa0", "\n"))
|
|
20
22
|
|
|
@@ -31,6 +33,7 @@ def run_in_local_sandbox(code, environment=None):
|
|
|
31
33
|
printOutput.append(output)
|
|
32
34
|
|
|
33
35
|
environment["print"] = customPrintFunction
|
|
36
|
+
environment["running_loop"] = loop
|
|
34
37
|
|
|
35
38
|
try:
|
|
36
39
|
exec(code, environment)
|
|
@@ -9,3 +9,17 @@ def get_observe():
|
|
|
9
9
|
from langfuse.decorators import observe
|
|
10
10
|
|
|
11
11
|
return observe
|
|
12
|
+
elif monitoring == Observer.NONE:
|
|
13
|
+
# Return a no-op decorator that handles keyword arguments
|
|
14
|
+
def no_op_decorator(*args, **kwargs):
|
|
15
|
+
if len(args) == 1 and callable(args[0]) and not kwargs:
|
|
16
|
+
# Direct decoration: @observe
|
|
17
|
+
return args[0]
|
|
18
|
+
else:
|
|
19
|
+
# Parameterized decoration: @observe(as_type="generation")
|
|
20
|
+
def decorator(func):
|
|
21
|
+
return func
|
|
22
|
+
|
|
23
|
+
return decorator
|
|
24
|
+
|
|
25
|
+
return no_op_decorator
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List, Tuple, Optional
|
|
3
|
+
|
|
4
|
+
from cognee.modules.ontology.models import AttachedOntologyNode
|
|
5
|
+
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseOntologyResolver(ABC):
|
|
9
|
+
"""Abstract base class for ontology resolvers."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
|
|
12
|
+
"""Initialize the ontology resolver with a matching strategy.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
matching_strategy: The strategy to use for entity matching.
|
|
16
|
+
Defaults to FuzzyMatchingStrategy if None.
|
|
17
|
+
"""
|
|
18
|
+
self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def build_lookup(self) -> None:
|
|
22
|
+
"""Build the lookup dictionary for ontology entities."""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def refresh_lookup(self) -> None:
|
|
27
|
+
"""Refresh the lookup dictionary."""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def find_closest_match(self, name: str, category: str) -> Optional[str]:
|
|
32
|
+
"""Find the closest match for a given name in the specified category."""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def get_subgraph(
|
|
37
|
+
self, node_name: str, node_type: str = "individuals", directed: bool = True
|
|
38
|
+
) -> Tuple[
|
|
39
|
+
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
|
|
40
|
+
]:
|
|
41
|
+
"""Get a subgraph for the given node."""
|
|
42
|
+
pass
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
|
2
|
+
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
|
3
|
+
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_default_ontology_resolver() -> BaseOntologyResolver:
|
|
7
|
+
return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_ontology_resolver_from_env(
|
|
11
|
+
ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = ""
|
|
12
|
+
) -> BaseOntologyResolver:
|
|
13
|
+
"""
|
|
14
|
+
Create and return an ontology resolver instance based on environment parameters.
|
|
15
|
+
|
|
16
|
+
Currently, this function supports only the RDFLib-based ontology resolver
|
|
17
|
+
with a fuzzy matching strategy.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
ontology_resolver (str): The ontology resolver type to use.
|
|
21
|
+
Supported value: "rdflib".
|
|
22
|
+
matching_strategy (str): The matching strategy to apply.
|
|
23
|
+
Supported value: "fuzzy".
|
|
24
|
+
ontology_file_path (str): Path to the ontology file required for the resolver.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
BaseOntologyResolver: An instance of the requested ontology resolver.
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
EnvironmentError: If the provided resolver or strategy is unsupported,
|
|
31
|
+
or if required parameters are missing.
|
|
32
|
+
"""
|
|
33
|
+
if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
|
|
34
|
+
return RDFLibOntologyResolver(
|
|
35
|
+
matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
raise EnvironmentError(
|
|
39
|
+
f"Unsupported ontology resolver: {ontology_resolver}. "
|
|
40
|
+
f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy."
|
|
41
|
+
)
|