cognee 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
  2. cognee/api/v1/cognify/cognify.py +44 -7
  3. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  4. cognee/api/v1/prune/prune.py +2 -2
  5. cognee/api/v1/search/search.py +1 -1
  6. cognee/api/v1/sync/sync.py +16 -5
  7. cognee/base_config.py +19 -1
  8. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  9. cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
  10. cognee/infrastructure/databases/relational/ModelBase.py +2 -1
  11. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
  12. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
  13. cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
  14. cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
  15. cognee/infrastructure/files/storage/StorageManager.py +18 -0
  16. cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
  17. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
  18. cognee/modules/cloud/operations/check_api_key.py +4 -1
  19. cognee/modules/data/deletion/prune_system.py +5 -1
  20. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  21. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  22. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  23. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  24. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  25. cognee/modules/notebooks/methods/create_notebook.py +34 -0
  26. cognee/modules/notebooks/methods/get_notebooks.py +27 -1
  27. cognee/modules/notebooks/models/Notebook.py +206 -1
  28. cognee/modules/observability/get_observe.py +14 -0
  29. cognee/modules/observability/observers.py +1 -0
  30. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  31. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  32. cognee/modules/ontology/matching_strategies.py +53 -0
  33. cognee/modules/ontology/models.py +20 -0
  34. cognee/modules/ontology/ontology_config.py +24 -0
  35. cognee/modules/ontology/ontology_env_config.py +45 -0
  36. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  37. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
  38. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
  39. cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
  40. cognee/modules/retrieval/temporal_retriever.py +2 -2
  41. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  42. cognee/modules/search/methods/search.py +12 -13
  43. cognee/modules/search/utils/prepare_search_result.py +28 -6
  44. cognee/modules/search/utils/transform_context_to_graph.py +1 -1
  45. cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
  46. cognee/modules/users/methods/create_user.py +4 -24
  47. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  48. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  49. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
  50. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  51. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  52. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  53. cognee/modules/users/permissions/methods/get_role.py +10 -0
  54. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  55. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  56. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  57. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  58. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  59. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  60. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  61. cognee/modules/users/roles/methods/create_role.py +10 -0
  62. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  63. cognee/modules/users/tenants/methods/create_tenant.py +10 -0
  64. cognee/root_dir.py +5 -0
  65. cognee/shared/cache.py +346 -0
  66. cognee/shared/utils.py +12 -0
  67. cognee/tasks/graph/extract_graph_from_data.py +53 -10
  68. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  69. cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
  70. cognee/tasks/temporal_graph/models.py +11 -6
  71. cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
  72. cognee/tests/test_cognee_server_start.py +4 -4
  73. cognee/tests/test_temporal_graph.py +6 -34
  74. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  75. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
  76. {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
  77. {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/RECORD +81 -73
  78. cognee/modules/notebooks/methods/create_tutorial_notebook.py +0 -92
  79. {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
  80. {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/entry_points.txt +0 -0
  81. {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
  82. {cognee-0.3.3.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
@@ -7,8 +7,14 @@ from cognee.modules.engine.utils import (
7
7
  generate_node_id,
8
8
  generate_node_name,
9
9
  )
10
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
11
+ from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
10
12
  from cognee.shared.data_models import KnowledgeGraph
11
- from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
13
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
14
+ from cognee.modules.ontology.get_default_ontology_resolver import (
15
+ get_default_ontology_resolver,
16
+ get_ontology_resolver_from_env,
17
+ )
12
18
 
13
19
 
14
20
  def _create_node_key(node_id: str, category: str) -> str:
@@ -83,7 +89,7 @@ def _process_ontology_edges(
83
89
 
84
90
  def _create_type_node(
85
91
  node_type: str,
86
- ontology_resolver: OntologyResolver,
92
+ ontology_resolver: RDFLibOntologyResolver,
87
93
  added_nodes_map: dict,
88
94
  added_ontology_nodes_map: dict,
89
95
  name_mapping: dict,
@@ -141,7 +147,7 @@ def _create_entity_node(
141
147
  node_name: str,
142
148
  node_description: str,
143
149
  type_node: EntityType,
144
- ontology_resolver: OntologyResolver,
150
+ ontology_resolver: RDFLibOntologyResolver,
145
151
  added_nodes_map: dict,
146
152
  added_ontology_nodes_map: dict,
147
153
  name_mapping: dict,
@@ -198,7 +204,7 @@ def _create_entity_node(
198
204
  def _process_graph_nodes(
199
205
  data_chunk: DocumentChunk,
200
206
  graph: KnowledgeGraph,
201
- ontology_resolver: OntologyResolver,
207
+ ontology_resolver: RDFLibOntologyResolver,
202
208
  added_nodes_map: dict,
203
209
  added_ontology_nodes_map: dict,
204
210
  name_mapping: dict,
@@ -277,7 +283,7 @@ def _process_graph_edges(
277
283
  def expand_with_nodes_and_edges(
278
284
  data_chunks: list[DocumentChunk],
279
285
  chunk_graphs: list[KnowledgeGraph],
280
- ontology_resolver: OntologyResolver = None,
286
+ ontology_resolver: BaseOntologyResolver = None,
281
287
  existing_edges_map: Optional[dict[str, bool]] = None,
282
288
  ):
283
289
  """
@@ -296,8 +302,8 @@ def expand_with_nodes_and_edges(
296
302
  chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
297
303
  data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
298
304
  from the chunk content.
299
- ontology_resolver (OntologyResolver, optional): Resolver for validating entities and
300
- types against an ontology. If None, a default OntologyResolver is created.
305
+ ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and
306
+ types against an ontology. If None, a default RDFLibOntologyResolver is created.
301
307
  Defaults to None.
302
308
  existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
303
309
  duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
@@ -320,7 +326,15 @@ def expand_with_nodes_and_edges(
320
326
  existing_edges_map = {}
321
327
 
322
328
  if ontology_resolver is None:
323
- ontology_resolver = OntologyResolver()
329
+ ontology_config = get_ontology_env_config()
330
+ if (
331
+ ontology_config.ontology_file_path
332
+ and ontology_config.ontology_resolver
333
+ and ontology_config.matching_strategy
334
+ ):
335
+ ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict())
336
+ else:
337
+ ontology_resolver = get_default_ontology_resolver()
324
338
 
325
339
  added_nodes_map = {}
326
340
  added_ontology_nodes_map = {}
@@ -23,8 +23,6 @@ async def retrieve_existing_edges(
23
23
  chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
24
24
  data chunk. Each graph contains nodes (entities) and edges (relationships) that
25
25
  were extracted from the chunk content.
26
- graph_engine (GraphDBInterface): Interface to the graph database that will be queried
27
- to check for existing edges. Must implement the has_edges() method.
28
26
 
29
27
  Returns:
30
28
  dict[str, bool]: A mapping of edge keys to boolean values indicating existence.
@@ -6,6 +6,40 @@ from cognee.infrastructure.databases.relational import with_async_session
6
6
 
7
7
  from ..models.Notebook import Notebook, NotebookCell
8
8
 
9
+ TUTORIAL_NOTEBOOK_NAME = "Python Development with Cognee Tutorial 🧠"
10
+
11
+
12
+ async def _create_tutorial_notebook(
13
+ user_id: UUID, session: AsyncSession, force_refresh: bool = False
14
+ ) -> None:
15
+ """
16
+ Create the default tutorial notebook for new users.
17
+ Dynamically fetches from: https://github.com/topoteretes/cognee/blob/notebook_tutorial/notebooks/starter_tutorial.zip
18
+ """
19
+ TUTORIAL_ZIP_URL = (
20
+ "https://github.com/topoteretes/cognee/raw/notebook_tutorial/notebooks/starter_tutorial.zip"
21
+ )
22
+
23
+ try:
24
+ # Create notebook from remote zip file (includes notebook + data files)
25
+ notebook = await Notebook.from_ipynb_zip_url(
26
+ zip_url=TUTORIAL_ZIP_URL,
27
+ owner_id=user_id,
28
+ notebook_filename="tutorial.ipynb",
29
+ name=TUTORIAL_NOTEBOOK_NAME,
30
+ deletable=False,
31
+ force=force_refresh,
32
+ )
33
+
34
+ # Add to session and commit
35
+ session.add(notebook)
36
+ await session.commit()
37
+
38
+ except Exception as e:
39
+ print(f"Failed to fetch tutorial notebook from {TUTORIAL_ZIP_URL}: {e}")
40
+
41
+ raise e
42
+
9
43
 
10
44
  @with_async_session
11
45
  async def create_notebook(
@@ -1,11 +1,16 @@
1
1
  from uuid import UUID
2
2
  from typing import List
3
- from sqlalchemy import select
3
+ from sqlalchemy import select, and_
4
4
  from sqlalchemy.ext.asyncio import AsyncSession
5
5
 
6
6
  from cognee.infrastructure.databases.relational import with_async_session
7
7
 
8
8
  from ..models.Notebook import Notebook
9
+ from .create_notebook import _create_tutorial_notebook, TUTORIAL_NOTEBOOK_NAME
10
+
11
+ from cognee.shared.logging_utils import get_logger
12
+
13
+ logger = get_logger()
9
14
 
10
15
 
11
16
  @with_async_session
@@ -13,6 +18,27 @@ async def get_notebooks(
13
18
  user_id: UUID,
14
19
  session: AsyncSession,
15
20
  ) -> List[Notebook]:
21
+ # Check if tutorial notebook already exists for this user
22
+ tutorial_query = select(Notebook).where(
23
+ and_(
24
+ Notebook.owner_id == user_id,
25
+ Notebook.name == TUTORIAL_NOTEBOOK_NAME,
26
+ ~Notebook.deletable,
27
+ )
28
+ )
29
+ tutorial_result = await session.execute(tutorial_query)
30
+ tutorial_notebook = tutorial_result.scalar_one_or_none()
31
+
32
+ # If tutorial notebook doesn't exist, create it
33
+ if tutorial_notebook is None:
34
+ logger.info(f"Tutorial notebook not found for user {user_id}, creating it")
35
+ try:
36
+ await _create_tutorial_notebook(user_id, session, force_refresh=False)
37
+ except Exception as e:
38
+ # Log the error but continue to return existing notebooks
39
+ logger.error(f"Failed to create tutorial notebook for user {user_id}: {e}")
40
+
41
+ # Get all notebooks for the user
16
42
  result = await session.execute(select(Notebook).where(Notebook.owner_id == user_id))
17
43
 
18
44
  return list(result.scalars().all())
@@ -1,13 +1,24 @@
1
1
  import json
2
- from typing import List, Literal
2
+ import nbformat
3
+ import asyncio
4
+ from nbformat.notebooknode import NotebookNode
5
+ from typing import List, Literal, Optional, cast, Tuple
3
6
  from uuid import uuid4, UUID as UUID_t
4
7
  from pydantic import BaseModel, ConfigDict
5
8
  from datetime import datetime, timezone
6
9
  from fastapi.encoders import jsonable_encoder
7
10
  from sqlalchemy import Boolean, Column, DateTime, JSON, UUID, String, TypeDecorator
8
11
  from sqlalchemy.orm import mapped_column, Mapped
12
+ from pathlib import Path
9
13
 
10
14
  from cognee.infrastructure.databases.relational import Base
15
+ from cognee.shared.cache import (
16
+ download_and_extract_zip,
17
+ get_tutorial_data_dir,
18
+ generate_content_hash,
19
+ )
20
+ from cognee.infrastructure.files.storage.get_file_storage import get_file_storage
21
+ from cognee.base_config import get_base_config
11
22
 
12
23
 
13
24
  class NotebookCell(BaseModel):
@@ -51,3 +62,197 @@ class Notebook(Base):
51
62
  deletable: Mapped[bool] = mapped_column(Boolean, default=True)
52
63
 
53
64
  created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
65
+
66
+ @classmethod
67
+ async def from_ipynb_zip_url(
68
+ cls,
69
+ zip_url: str,
70
+ owner_id: UUID_t,
71
+ notebook_filename: str = "tutorial.ipynb",
72
+ name: Optional[str] = None,
73
+ deletable: bool = True,
74
+ force: bool = False,
75
+ ) -> "Notebook":
76
+ """
77
+ Create a Notebook instance from a remote zip file containing notebook + data files.
78
+
79
+ Args:
80
+ zip_url: Remote URL to fetch the .zip file from
81
+ owner_id: UUID of the notebook owner
82
+ notebook_filename: Name of the .ipynb file within the zip
83
+ name: Optional custom name for the notebook
84
+ deletable: Whether the notebook can be deleted
85
+ force: If True, re-download even if already cached
86
+
87
+ Returns:
88
+ Notebook instance
89
+ """
90
+ # Generate a cache key based on the zip URL
91
+ content_hash = generate_content_hash(zip_url, notebook_filename)
92
+
93
+ # Download and extract the zip file to tutorial_data/{content_hash}
94
+ try:
95
+ extracted_cache_dir = await download_and_extract_zip(
96
+ url=zip_url,
97
+ cache_dir_name=f"tutorial_data/{content_hash}",
98
+ version_or_hash=content_hash,
99
+ force=force,
100
+ )
101
+ except Exception as e:
102
+ raise RuntimeError(f"Failed to download tutorial zip from {zip_url}") from e
103
+
104
+ # Use cache system to access the notebook file
105
+ from cognee.shared.cache import cache_file_exists, read_cache_file
106
+
107
+ notebook_file_path = f"{extracted_cache_dir}/{notebook_filename}"
108
+
109
+ # Check if the notebook file exists in cache
110
+ if not await cache_file_exists(notebook_file_path):
111
+ raise FileNotFoundError(f"Notebook file '{notebook_filename}' not found in zip")
112
+
113
+ # Read and parse the notebook using cache system
114
+ async with await read_cache_file(notebook_file_path, encoding="utf-8") as f:
115
+ notebook_content = await asyncio.to_thread(f.read)
116
+ notebook = cls.from_ipynb_string(notebook_content, owner_id, name, deletable)
117
+
118
+ # Update file paths in notebook cells to point to actual cached data files
119
+ await cls._update_file_paths_in_cells(notebook, extracted_cache_dir)
120
+
121
+ return notebook
122
+
123
+ @staticmethod
124
+ async def _update_file_paths_in_cells(notebook: "Notebook", cache_dir: str) -> None:
125
+ """
126
+ Update file paths in code cells to use actual cached data files.
127
+ Works with both local filesystem and S3 storage.
128
+
129
+ Args:
130
+ notebook: Parsed Notebook instance with cells to update
131
+ cache_dir: Path to the cached tutorial directory containing data files
132
+ """
133
+ import re
134
+ from cognee.shared.cache import list_cache_files, cache_file_exists
135
+ from cognee.shared.logging_utils import get_logger
136
+
137
+ logger = get_logger()
138
+
139
+ # Look for data files in the data subdirectory
140
+ data_dir = f"{cache_dir}/data"
141
+
142
+ try:
143
+ # Get all data files in the cache directory using cache system
144
+ data_files = {}
145
+ if await cache_file_exists(data_dir):
146
+ file_list = await list_cache_files(data_dir)
147
+ else:
148
+ file_list = []
149
+
150
+ for file_path in file_list:
151
+ # Extract just the filename
152
+ filename = file_path.split("/")[-1]
153
+ # Use the file path as provided by cache system
154
+ data_files[filename] = file_path
155
+
156
+ except Exception as e:
157
+ # If we can't list files, skip updating paths
158
+ logger.error(f"Error listing data files in {data_dir}: {e}")
159
+ return
160
+
161
+ # Pattern to match file://data/filename patterns in code cells
162
+ file_pattern = r'"file://data/([^"]+)"'
163
+
164
+ def replace_path(match):
165
+ filename = match.group(1)
166
+ if filename in data_files:
167
+ file_path = data_files[filename]
168
+ # For local filesystem, preserve file:// prefix
169
+ if not file_path.startswith("s3://"):
170
+ return f'"file://{file_path}"'
171
+ else:
172
+ # For S3, return the S3 URL as-is
173
+ return f'"{file_path}"'
174
+ return match.group(0) # Keep original if file not found
175
+
176
+ # Update only code cells
177
+ updated_cells = 0
178
+ for cell in notebook.cells:
179
+ if cell.type == "code":
180
+ original_content = cell.content
181
+ # Update file paths in the cell content
182
+ cell.content = re.sub(file_pattern, replace_path, cell.content)
183
+ if original_content != cell.content:
184
+ updated_cells += 1
185
+
186
+ # Log summary of updates (useful for monitoring)
187
+ if updated_cells > 0:
188
+ logger.info(f"Updated file paths in {updated_cells} notebook cells")
189
+
190
+ @classmethod
191
+ def from_ipynb_string(
192
+ cls,
193
+ notebook_content: str,
194
+ owner_id: UUID_t,
195
+ name: Optional[str] = None,
196
+ deletable: bool = True,
197
+ ) -> "Notebook":
198
+ """
199
+ Create a Notebook instance from Jupyter notebook string content.
200
+
201
+ Args:
202
+ notebook_content: Raw Jupyter notebook content as string
203
+ owner_id: UUID of the notebook owner
204
+ name: Optional custom name for the notebook
205
+ deletable: Whether the notebook can be deleted
206
+
207
+ Returns:
208
+ Notebook instance ready to be saved to database
209
+ """
210
+ # Parse and validate the Jupyter notebook using nbformat
211
+ # Note: nbformat.reads() has loose typing, so we cast to NotebookNode
212
+ jupyter_nb = cast(
213
+ NotebookNode, nbformat.reads(notebook_content, as_version=nbformat.NO_CONVERT)
214
+ )
215
+
216
+ # Convert Jupyter cells to NotebookCell objects
217
+ cells = []
218
+ for jupyter_cell in jupyter_nb.cells:
219
+ # Each cell is also a NotebookNode with dynamic attributes
220
+ cell = cast(NotebookNode, jupyter_cell)
221
+ # Skip raw cells as they're not supported in our model
222
+ if cell.cell_type == "raw":
223
+ continue
224
+
225
+ # Get the source content
226
+ content = cell.source
227
+
228
+ # Generate a name based on content or cell index
229
+ cell_name = cls._generate_cell_name(cell)
230
+
231
+ # Map cell types (jupyter uses "code"/"markdown", we use same)
232
+ cell_type = "code" if cell.cell_type == "code" else "markdown"
233
+
234
+ cells.append(NotebookCell(id=uuid4(), type=cell_type, name=cell_name, content=content))
235
+
236
+ # Extract notebook name from metadata if not provided
237
+ if name is None:
238
+ kernelspec = jupyter_nb.metadata.get("kernelspec", {})
239
+ name = kernelspec.get("display_name") or kernelspec.get("name", "Imported Notebook")
240
+
241
+ return cls(id=uuid4(), owner_id=owner_id, name=name, cells=cells, deletable=deletable)
242
+
243
+ @staticmethod
244
+ def _generate_cell_name(jupyter_cell: NotebookNode) -> str:
245
+ """Generate a meaningful name for a notebook cell using nbformat cell."""
246
+ if jupyter_cell.cell_type == "markdown":
247
+ # Try to extract a title from markdown headers
248
+ content = jupyter_cell.source
249
+
250
+ lines = content.strip().split("\n")
251
+ if lines and lines[0].startswith("#"):
252
+ # Extract header text, clean it up
253
+ header = lines[0].lstrip("#").strip()
254
+ return header[:50] if len(header) > 50 else header
255
+ else:
256
+ return "Markdown Cell"
257
+ else:
258
+ return "Code Cell"
@@ -9,3 +9,17 @@ def get_observe():
9
9
  from langfuse.decorators import observe
10
10
 
11
11
  return observe
12
+ elif monitoring == Observer.NONE:
13
+ # Return a no-op decorator that handles keyword arguments
14
+ def no_op_decorator(*args, **kwargs):
15
+ if len(args) == 1 and callable(args[0]) and not kwargs:
16
+ # Direct decoration: @observe
17
+ return args[0]
18
+ else:
19
+ # Parameterized decoration: @observe(as_type="generation")
20
+ def decorator(func):
21
+ return func
22
+
23
+ return decorator
24
+
25
+ return no_op_decorator
@@ -4,6 +4,7 @@ from enum import Enum
4
4
  class Observer(str, Enum):
5
5
  """Monitoring tools"""
6
6
 
7
+ NONE = "none"
7
8
  LANGFUSE = "langfuse"
8
9
  LLMLITE = "llmlite"
9
10
  LANGSMITH = "langsmith"
@@ -0,0 +1,42 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Tuple, Optional
3
+
4
+ from cognee.modules.ontology.models import AttachedOntologyNode
5
+ from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
6
+
7
+
8
+ class BaseOntologyResolver(ABC):
9
+ """Abstract base class for ontology resolvers."""
10
+
11
+ def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
12
+ """Initialize the ontology resolver with a matching strategy.
13
+
14
+ Args:
15
+ matching_strategy: The strategy to use for entity matching.
16
+ Defaults to FuzzyMatchingStrategy if None.
17
+ """
18
+ self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
19
+
20
+ @abstractmethod
21
+ def build_lookup(self) -> None:
22
+ """Build the lookup dictionary for ontology entities."""
23
+ pass
24
+
25
+ @abstractmethod
26
+ def refresh_lookup(self) -> None:
27
+ """Refresh the lookup dictionary."""
28
+ pass
29
+
30
+ @abstractmethod
31
+ def find_closest_match(self, name: str, category: str) -> Optional[str]:
32
+ """Find the closest match for a given name in the specified category."""
33
+ pass
34
+
35
+ @abstractmethod
36
+ def get_subgraph(
37
+ self, node_name: str, node_type: str = "individuals", directed: bool = True
38
+ ) -> Tuple[
39
+ List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
40
+ ]:
41
+ """Get a subgraph for the given node."""
42
+ pass
@@ -0,0 +1,41 @@
1
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
2
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
3
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
4
+
5
+
6
+ def get_default_ontology_resolver() -> BaseOntologyResolver:
7
+ return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())
8
+
9
+
10
+ def get_ontology_resolver_from_env(
11
+ ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = ""
12
+ ) -> BaseOntologyResolver:
13
+ """
14
+ Create and return an ontology resolver instance based on environment parameters.
15
+
16
+ Currently, this function supports only the RDFLib-based ontology resolver
17
+ with a fuzzy matching strategy.
18
+
19
+ Args:
20
+ ontology_resolver (str): The ontology resolver type to use.
21
+ Supported value: "rdflib".
22
+ matching_strategy (str): The matching strategy to apply.
23
+ Supported value: "fuzzy".
24
+ ontology_file_path (str): Path to the ontology file required for the resolver.
25
+
26
+ Returns:
27
+ BaseOntologyResolver: An instance of the requested ontology resolver.
28
+
29
+ Raises:
30
+ EnvironmentError: If the provided resolver or strategy is unsupported,
31
+ or if required parameters are missing.
32
+ """
33
+ if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
34
+ return RDFLibOntologyResolver(
35
+ matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
36
+ )
37
+ else:
38
+ raise EnvironmentError(
39
+ f"Unsupported ontology resolver: {ontology_resolver}. "
40
+ f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy."
41
+ )
@@ -0,0 +1,53 @@
1
+ import difflib
2
+ from abc import ABC, abstractmethod
3
+ from typing import List, Optional
4
+
5
+
6
+ class MatchingStrategy(ABC):
7
+ """Abstract base class for ontology entity matching strategies."""
8
+
9
+ @abstractmethod
10
+ def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
11
+ """Find the best match for a given name from a list of candidates.
12
+
13
+ Args:
14
+ name: The name to match
15
+ candidates: List of candidate names to match against
16
+
17
+ Returns:
18
+ The best matching candidate name, or None if no match found
19
+ """
20
+ pass
21
+
22
+
23
+ class FuzzyMatchingStrategy(MatchingStrategy):
24
+ """Fuzzy matching strategy using difflib for approximate string matching."""
25
+
26
+ def __init__(self, cutoff: float = 0.8):
27
+ """Initialize fuzzy matching strategy.
28
+
29
+ Args:
30
+ cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
31
+ """
32
+ self.cutoff = cutoff
33
+
34
+ def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
35
+ """Find the closest fuzzy match for a given name.
36
+
37
+ Args:
38
+ name: The normalized name to match
39
+ candidates: List of normalized candidate names
40
+
41
+ Returns:
42
+ The best matching candidate name, or None if no match meets the cutoff
43
+ """
44
+ if not candidates:
45
+ return None
46
+
47
+ # Check for exact match first
48
+ if name in candidates:
49
+ return name
50
+
51
+ # Find fuzzy match
52
+ best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
53
+ return best_match[0] if best_match else None
@@ -0,0 +1,20 @@
1
+ from typing import Any
2
+
3
+
4
+ class AttachedOntologyNode:
5
+ """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
6
+
7
+ def __init__(self, uri: Any, category: str):
8
+ self.uri = uri
9
+ self.name = self._extract_name(uri)
10
+ self.category = category
11
+
12
+ @staticmethod
13
+ def _extract_name(uri: Any) -> str:
14
+ uri_str = str(uri)
15
+ if "#" in uri_str:
16
+ return uri_str.split("#")[-1]
17
+ return uri_str.rstrip("/").split("/")[-1]
18
+
19
+ def __repr__(self):
20
+ return f"AttachedOntologyNode(name={self.name}, category={self.category})"
@@ -0,0 +1,24 @@
1
+ from typing import TypedDict, Optional
2
+
3
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
4
+ from cognee.modules.ontology.matching_strategies import MatchingStrategy
5
+
6
+
7
+ class OntologyConfig(TypedDict, total=False):
8
+ """Configuration containing ontology resolver.
9
+
10
+ Attributes:
11
+ ontology_resolver: The ontology resolver instance to use
12
+ """
13
+
14
+ ontology_resolver: Optional[BaseOntologyResolver]
15
+
16
+
17
+ class Config(TypedDict, total=False):
18
+ """Top-level configuration dictionary.
19
+
20
+ Attributes:
21
+ ontology_config: Configuration containing ontology resolver
22
+ """
23
+
24
+ ontology_config: Optional[OntologyConfig]
@@ -0,0 +1,45 @@
1
+ """This module contains the configuration for ontology handling."""
2
+
3
+ from functools import lru_cache
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+
7
+ class OntologyEnvConfig(BaseSettings):
8
+ """
9
+ Represents the configuration for ontology handling, including parameters for
10
+ ontology file storage and resolution/matching strategies.
11
+
12
+ Public methods:
13
+ - to_dict
14
+
15
+ Instance variables:
16
+ - ontology_resolver
17
+ - ontology_matching
18
+ - ontology_file_path
19
+ - model_config
20
+ """
21
+
22
+ ontology_resolver: str = "rdflib"
23
+ matching_strategy: str = "fuzzy"
24
+ ontology_file_path: str = ""
25
+
26
+ model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
27
+
28
+ def to_dict(self) -> dict:
29
+ """
30
+ Return the configuration as a dictionary.
31
+ """
32
+ return {
33
+ "ontology_resolver": self.ontology_resolver,
34
+ "matching_strategy": self.matching_strategy,
35
+ "ontology_file_path": self.ontology_file_path,
36
+ }
37
+
38
+
39
+ @lru_cache
40
+ def get_ontology_env_config():
41
+ """
42
+ Retrieve the ontology configuration. This function utilizes caching to return a
43
+ singleton instance of the OntologyConfig class for efficiency.
44
+ """
45
+ return OntologyEnvConfig()