cognee 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
  2. cognee/api/v1/cognify/cognify.py +44 -7
  3. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  4. cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
  5. cognee/api/v1/prune/prune.py +2 -2
  6. cognee/api/v1/search/search.py +1 -1
  7. cognee/api/v1/sync/sync.py +16 -5
  8. cognee/base_config.py +19 -1
  9. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  10. cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
  11. cognee/infrastructure/databases/relational/ModelBase.py +2 -1
  12. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
  13. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
  14. cognee/infrastructure/databases/vector/config.py +1 -1
  15. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
  16. cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
  17. cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
  18. cognee/infrastructure/files/storage/StorageManager.py +18 -0
  19. cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
  20. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
  21. cognee/infrastructure/utils/run_async.py +9 -4
  22. cognee/infrastructure/utils/run_sync.py +4 -3
  23. cognee/modules/cloud/operations/check_api_key.py +4 -1
  24. cognee/modules/data/deletion/prune_system.py +5 -1
  25. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  26. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  27. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  28. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  29. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  30. cognee/modules/notebooks/methods/create_notebook.py +34 -0
  31. cognee/modules/notebooks/methods/get_notebook.py +2 -2
  32. cognee/modules/notebooks/methods/get_notebooks.py +27 -1
  33. cognee/modules/notebooks/methods/update_notebook.py +0 -1
  34. cognee/modules/notebooks/models/Notebook.py +206 -1
  35. cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
  36. cognee/modules/observability/get_observe.py +14 -0
  37. cognee/modules/observability/observers.py +1 -0
  38. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  39. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  40. cognee/modules/ontology/matching_strategies.py +53 -0
  41. cognee/modules/ontology/models.py +20 -0
  42. cognee/modules/ontology/ontology_config.py +24 -0
  43. cognee/modules/ontology/ontology_env_config.py +45 -0
  44. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  45. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
  46. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
  47. cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
  48. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
  49. cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
  50. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  51. cognee/modules/retrieval/temporal_retriever.py +3 -3
  52. cognee/modules/retrieval/user_qa_feedback.py +1 -1
  53. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  54. cognee/modules/search/methods/search.py +12 -13
  55. cognee/modules/search/utils/prepare_search_result.py +31 -9
  56. cognee/modules/search/utils/transform_context_to_graph.py +1 -1
  57. cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
  58. cognee/modules/users/methods/create_user.py +4 -24
  59. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  60. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  61. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
  62. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  63. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  64. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  65. cognee/modules/users/permissions/methods/get_role.py +10 -0
  66. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  67. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  68. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  69. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  70. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  71. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  72. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  73. cognee/modules/users/roles/methods/create_role.py +10 -0
  74. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  75. cognee/modules/users/tenants/methods/create_tenant.py +10 -0
  76. cognee/root_dir.py +5 -0
  77. cognee/shared/cache.py +346 -0
  78. cognee/shared/utils.py +12 -0
  79. cognee/tasks/graph/extract_graph_from_data.py +53 -10
  80. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  81. cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
  82. cognee/tasks/temporal_graph/models.py +11 -6
  83. cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
  84. cognee/tests/test_cognee_server_start.py +4 -4
  85. cognee/tests/test_temporal_graph.py +6 -34
  86. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  87. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
  88. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
  89. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/RECORD +93 -86
  90. cognee-0.3.4.dist-info/entry_points.txt +2 -0
  91. cognee/api/v1/save/save.py +0 -335
  92. cognee/tests/test_save_export_path.py +0 -116
  93. cognee-0.3.2.dist-info/entry_points.txt +0 -2
  94. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
  95. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
  96. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
@@ -3,8 +3,14 @@ from typing import Type, List, Optional
3
3
  from pydantic import BaseModel
4
4
 
5
5
  from cognee.infrastructure.databases.graph import get_graph_engine
6
+ from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
6
7
  from cognee.tasks.storage.add_data_points import add_data_points
7
- from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
8
+ from cognee.modules.ontology.ontology_config import Config
9
+ from cognee.modules.ontology.get_default_ontology_resolver import (
10
+ get_default_ontology_resolver,
11
+ get_ontology_resolver_from_env,
12
+ )
13
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
8
14
  from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
9
15
  from cognee.modules.graph.utils import (
10
16
  expand_with_nodes_and_edges,
@@ -24,9 +30,28 @@ async def integrate_chunk_graphs(
24
30
  data_chunks: list[DocumentChunk],
25
31
  chunk_graphs: list,
26
32
  graph_model: Type[BaseModel],
27
- ontology_adapter: OntologyResolver,
33
+ ontology_resolver: BaseOntologyResolver,
28
34
  ) -> List[DocumentChunk]:
29
- """Updates DocumentChunk objects, integrates data points and edges into databases."""
35
+ """Integrate chunk graphs with ontology validation and store in databases.
36
+
37
+ This function processes document chunks and their associated knowledge graphs,
38
+ validates entities against an ontology resolver, and stores the integrated
39
+ data points and edges in the configured databases.
40
+
41
+ Args:
42
+ data_chunks: List of document chunks containing source data
43
+ chunk_graphs: List of knowledge graphs corresponding to each chunk
44
+ graph_model: Pydantic model class for graph data validation
45
+ ontology_resolver: Resolver for validating entities against ontology
46
+
47
+ Returns:
48
+ List of updated DocumentChunk objects with integrated data
49
+
50
+ Raises:
51
+ InvalidChunkGraphInputError: If input validation fails
52
+ InvalidGraphModelError: If graph model validation fails
53
+ InvalidOntologyAdapterError: If ontology resolver validation fails
54
+ """
30
55
 
31
56
  if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
32
57
  raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
@@ -36,9 +61,9 @@ async def integrate_chunk_graphs(
36
61
  )
37
62
  if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
38
63
  raise InvalidGraphModelError(graph_model)
39
- if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
64
+ if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"):
40
65
  raise InvalidOntologyAdapterError(
41
- type(ontology_adapter).__name__ if ontology_adapter else "None"
66
+ type(ontology_resolver).__name__ if ontology_resolver else "None"
42
67
  )
43
68
 
44
69
  graph_engine = await get_graph_engine()
@@ -55,7 +80,7 @@ async def integrate_chunk_graphs(
55
80
  )
56
81
 
57
82
  graph_nodes, graph_edges = expand_with_nodes_and_edges(
58
- data_chunks, chunk_graphs, ontology_adapter, existing_edges_map
83
+ data_chunks, chunk_graphs, ontology_resolver, existing_edges_map
59
84
  )
60
85
 
61
86
  if len(graph_nodes) > 0:
@@ -70,7 +95,7 @@ async def integrate_chunk_graphs(
70
95
  async def extract_graph_from_data(
71
96
  data_chunks: List[DocumentChunk],
72
97
  graph_model: Type[BaseModel],
73
- ontology_adapter: OntologyResolver = None,
98
+ config: Config = None,
74
99
  custom_prompt: Optional[str] = None,
75
100
  ) -> List[DocumentChunk]:
76
101
  """
@@ -101,6 +126,24 @@ async def extract_graph_from_data(
101
126
  if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids
102
127
  ]
103
128
 
104
- return await integrate_chunk_graphs(
105
- data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
106
- )
129
+ # Extract resolver from config if provided, otherwise get default
130
+ if config is None:
131
+ ontology_config = get_ontology_env_config()
132
+ if (
133
+ ontology_config.ontology_file_path
134
+ and ontology_config.ontology_resolver
135
+ and ontology_config.matching_strategy
136
+ ):
137
+ config: Config = {
138
+ "ontology_config": {
139
+ "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
140
+ }
141
+ }
142
+ else:
143
+ config: Config = {
144
+ "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
145
+ }
146
+
147
+ ontology_resolver = config["ontology_config"]["ontology_resolver"]
148
+
149
+ return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver)
@@ -3,7 +3,7 @@ from typing import List
3
3
 
4
4
  from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
5
5
  from cognee.shared.data_models import KnowledgeGraph
6
- from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
6
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
7
7
  from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
8
8
  from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
9
9
  extract_content_nodes_and_relationship_names,
@@ -17,9 +17,21 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
17
17
  async def extract_graph_from_data(
18
18
  data_chunks: List[DocumentChunk],
19
19
  n_rounds: int = 2,
20
- ontology_adapter: OntologyResolver = None,
20
+ ontology_adapter: BaseOntologyResolver = None,
21
21
  ) -> List[DocumentChunk]:
22
- """Extract and update graph data from document chunks in multiple steps."""
22
+ """Extract and update graph data from document chunks using cascade extraction.
23
+
24
+ This function performs multi-step graph extraction from document chunks,
25
+ using cascade extraction techniques to build comprehensive knowledge graphs.
26
+
27
+ Args:
28
+ data_chunks: List of document chunks to process
29
+ n_rounds: Number of extraction rounds to perform (default: 2)
30
+ ontology_adapter: Resolver for validating entities against ontology
31
+
32
+ Returns:
33
+ List of updated DocumentChunk objects with extracted graph data
34
+ """
23
35
  chunk_nodes = await asyncio.gather(
24
36
  *[extract_nodes(chunk.text, n_rounds) for chunk in data_chunks]
25
37
  )
@@ -44,5 +56,5 @@ async def extract_graph_from_data(
44
56
  data_chunks=data_chunks,
45
57
  chunk_graphs=chunk_graphs,
46
58
  graph_model=KnowledgeGraph,
47
- ontology_adapter=ontology_adapter or OntologyResolver(),
59
+ ontology_adapter=ontology_adapter,
48
60
  )
@@ -41,6 +41,7 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
41
41
  abs_path.is_file()
42
42
  except (OSError, ValueError):
43
43
  # In case file path is too long it's most likely not a relative path
44
+ abs_path = data_item
44
45
  logger.debug(f"Data item was too long to be a possible file path: {abs_path}")
45
46
  abs_path = Path("")
46
47
 
@@ -3,12 +3,17 @@ from pydantic import BaseModel, Field
3
3
 
4
4
 
5
5
  class Timestamp(BaseModel):
6
- year: int = Field(..., ge=1, le=9999)
7
- month: int = Field(..., ge=1, le=12)
8
- day: int = Field(..., ge=1, le=31)
9
- hour: int = Field(..., ge=0, le=23)
10
- minute: int = Field(..., ge=0, le=59)
11
- second: int = Field(..., ge=0, le=59)
6
+ year: int = Field(
7
+ ...,
8
+ ge=1,
9
+ le=9999,
10
+ description="Always required. If only a year is known, use it.",
11
+ )
12
+ month: int = Field(1, ge=1, le=12, description="If unknown, default to 1")
13
+ day: int = Field(1, ge=1, le=31, description="If unknown, default to 1")
14
+ hour: int = Field(0, ge=0, le=23, description="If unknown, default to 0")
15
+ minute: int = Field(0, ge=0, le=59, description="If unknown, default to 0")
16
+ second: int = Field(0, ge=0, le=59, description="If unknown, default to 0")
12
17
 
13
18
 
14
19
  class Interval(BaseModel):
@@ -49,7 +49,7 @@ class TestCliMain:
49
49
  def test_main_no_command(self, mock_create_parser):
50
50
  """Test main function when no command is provided"""
51
51
  mock_parser = MagicMock()
52
- mock_parser.parse_args.return_value = MagicMock(command=None)
52
+ mock_parser.parse_args.return_value = MagicMock(command=None, spec={})
53
53
  mock_create_parser.return_value = (mock_parser, {})
54
54
 
55
55
  result = main()
@@ -64,7 +64,7 @@ class TestCliMain:
64
64
  mock_command.execute.return_value = None
65
65
 
66
66
  mock_parser = MagicMock()
67
- mock_args = MagicMock(command="test")
67
+ mock_args = MagicMock(command="test", spec={})
68
68
  mock_parser.parse_args.return_value = mock_args
69
69
 
70
70
  mock_create_parser.return_value = (mock_parser, {"test": mock_command})
@@ -84,7 +84,7 @@ class TestCliMain:
84
84
  mock_command.execute.side_effect = CliCommandException("Test error", error_code=2)
85
85
 
86
86
  mock_parser = MagicMock()
87
- mock_args = MagicMock(command="test")
87
+ mock_args = MagicMock(command="test", spec={})
88
88
  mock_parser.parse_args.return_value = mock_args
89
89
 
90
90
  mock_create_parser.return_value = (mock_parser, {"test": mock_command})
@@ -103,7 +103,7 @@ class TestCliMain:
103
103
  mock_command.execute.side_effect = Exception("Generic error")
104
104
 
105
105
  mock_parser = MagicMock()
106
- mock_args = MagicMock(command="test")
106
+ mock_args = MagicMock(command="test", spec={})
107
107
  mock_parser.parse_args.return_value = mock_args
108
108
 
109
109
  mock_create_parser.return_value = (mock_parser, {"test": mock_command})
@@ -126,7 +126,7 @@ class TestCliMain:
126
126
  mock_command.execute.side_effect = test_exception
127
127
 
128
128
  mock_parser = MagicMock()
129
- mock_args = MagicMock(command="test")
129
+ mock_args = MagicMock(command="test", spec={})
130
130
  mock_parser.parse_args.return_value = mock_args
131
131
 
132
132
  mock_create_parser.return_value = (mock_parser, {"test": mock_command})
@@ -48,7 +48,7 @@ class TestCogneeServerStart(unittest.TestCase):
48
48
  """Test that the server is running and can accept connections."""
49
49
  # Test health endpoint
50
50
  health_response = requests.get("http://localhost:8000/health", timeout=15)
51
- self.assertIn(health_response.status_code, [200, 503])
51
+ self.assertIn(health_response.status_code, [200])
52
52
 
53
53
  # Test root endpoint
54
54
  root_response = requests.get("http://localhost:8000/", timeout=15)
@@ -88,7 +88,7 @@ class TestCogneeServerStart(unittest.TestCase):
88
88
  payload = {"datasets": [dataset_name]}
89
89
 
90
90
  add_response = requests.post(url, headers=headers, data=form_data, files=file, timeout=50)
91
- if add_response.status_code not in [200, 201, 409]:
91
+ if add_response.status_code not in [200, 201]:
92
92
  add_response.raise_for_status()
93
93
 
94
94
  # Cognify request
@@ -99,7 +99,7 @@ class TestCogneeServerStart(unittest.TestCase):
99
99
  }
100
100
 
101
101
  cognify_response = requests.post(url, headers=headers, json=payload, timeout=150)
102
- if cognify_response.status_code not in [200, 201, 409]:
102
+ if cognify_response.status_code not in [200, 201]:
103
103
  cognify_response.raise_for_status()
104
104
 
105
105
  # TODO: Add test to verify cognify pipeline is complete before testing search
@@ -115,7 +115,7 @@ class TestCogneeServerStart(unittest.TestCase):
115
115
  payload = {"searchType": "GRAPH_COMPLETION", "query": "What's in the document?"}
116
116
 
117
117
  search_response = requests.post(url, headers=headers, json=payload, timeout=50)
118
- if search_response.status_code not in [200, 201, 409]:
118
+ if search_response.status_code not in [200, 201]:
119
119
  search_response.raise_for_status()
120
120
 
121
121
 
@@ -97,7 +97,7 @@ async def main():
97
97
  f"Expected exactly one DocumentChunk, but found {type_counts.get('DocumentChunk', 0)}"
98
98
  )
99
99
 
100
- assert type_counts.get("Entity", 0) >= 20, (
100
+ assert type_counts.get("Entity", 0) >= 10, (
101
101
  f"Expected multiple entities (assert is set to 20), but found {type_counts.get('Entity', 0)}"
102
102
  )
103
103
 
@@ -105,52 +105,24 @@ async def main():
105
105
  f"Expected multiple entity types, but found {type_counts.get('EntityType', 0)}"
106
106
  )
107
107
 
108
- assert type_counts.get("Event", 0) >= 20, (
108
+ assert type_counts.get("Event", 0) >= 10, (
109
109
  f"Expected multiple events (assert is set to 20), but found {type_counts.get('Event', 0)}"
110
110
  )
111
111
 
112
- assert type_counts.get("Timestamp", 0) >= 20, (
113
- f"Expected multiple timestamps (assert is set to 20), but found {type_counts.get('Timestamp', 0)}"
112
+ assert type_counts.get("Timestamp", 0) >= 10, (
113
+ f"Expected multiple timestamps (assert is set to 10), but found {type_counts.get('Timestamp', 0)}"
114
114
  )
115
115
 
116
- assert type_counts.get("Interval", 0) >= 2, (
117
- f"Expected multiple intervals, but found {type_counts.get('Interval', 0)}"
118
- )
119
-
120
- assert edge_type_counts.get("contains", 0) >= 20, (
116
+ assert edge_type_counts.get("contains", 0) >= 10, (
121
117
  f"Expected multiple 'contains' edge, but found {edge_type_counts.get('contains', 0)}"
122
118
  )
123
119
 
124
- assert edge_type_counts.get("is_a", 0) >= 20, (
120
+ assert edge_type_counts.get("is_a", 0) >= 10, (
125
121
  f"Expected multiple 'is_a' edge, but found {edge_type_counts.get('is_a', 0)}"
126
122
  )
127
123
 
128
- assert edge_type_counts.get("during", 0) == type_counts.get("Interval", 0), (
129
- "Expected the same amount of during and interval objects in the graph"
130
- )
131
-
132
- assert edge_type_counts.get("during", 0) == type_counts.get("Interval", 0), (
133
- "Expected the same amount of during and interval objects in the graph"
134
- )
135
-
136
- assert edge_type_counts.get("time_from", 0) == type_counts.get("Interval", 0), (
137
- "Expected the same amount of time_from and interval objects in the graph"
138
- )
139
-
140
- assert edge_type_counts.get("time_to", 0) == type_counts.get("Interval", 0), (
141
- "Expected the same amount of time_to and interval objects in the graph"
142
- )
143
-
144
124
  retriever = TemporalRetriever()
145
125
 
146
- result_before = await retriever.extract_time_from_query("What happened before 1890?")
147
-
148
- assert result_before[0] is None
149
-
150
- result_after = await retriever.extract_time_from_query("What happened after 1891?")
151
-
152
- assert result_after[1] is None
153
-
154
126
  result_between = await retriever.extract_time_from_query("What happened between 1890 and 1900?")
155
127
 
156
128
  assert result_between[1]