cognee 0.5.0.dev0__py3-none-any.whl → 0.5.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +1 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +12 -37
- cognee/api/v1/ontologies/routers/get_ontology_router.py +27 -25
- cognee/api/v1/search/search.py +4 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/context_global_variables.py +61 -16
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/graph/config.py +3 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +1 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +62 -48
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +2 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +1 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +35 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -16
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +40 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +39 -36
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +19 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +11 -9
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +23 -21
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +42 -34
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +85 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_cot_retriever.py +4 -0
- cognee/modules/retrieval/graph_completion_retriever.py +10 -0
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +4 -0
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +42 -10
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +8 -1
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +21 -0
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +15 -3
- cognee/shared/logging_utils.py +4 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/test_cognee_server_start.py +2 -4
- cognee/tests/test_conversation_history.py +23 -1
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_search_db.py +37 -1
- cognee/tests/unit/api/test_ontology_endpoint.py +77 -89
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/METADATA +76 -89
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/RECORD +118 -97
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/WHEEL +1 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- cognee/tests/test_delete_bmw_example.py +0 -60
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from unittest.mock import AsyncMock, patch
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from cognee.infrastructure.engine import DataPoint
|
|
6
|
+
from cognee.modules.engine.models import Triplet
|
|
7
|
+
from cognee.tasks.storage.add_data_points import (
|
|
8
|
+
add_data_points,
|
|
9
|
+
InvalidDataPointsInAddDataPointsError,
|
|
10
|
+
_extract_embeddable_text_from_datapoint,
|
|
11
|
+
_create_triplets_from_graph,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
adp_module = sys.modules["cognee.tasks.storage.add_data_points"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SimplePoint(DataPoint):
|
|
18
|
+
text: str
|
|
19
|
+
metadata: dict = {"index_fields": ["text"]}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.asyncio
|
|
23
|
+
@pytest.mark.parametrize("bad_input", [None, ["not_datapoint"]])
|
|
24
|
+
async def test_add_data_points_validates_inputs(bad_input):
|
|
25
|
+
with pytest.raises(InvalidDataPointsInAddDataPointsError):
|
|
26
|
+
await add_data_points(bad_input)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.asyncio
|
|
30
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
31
|
+
@patch.object(adp_module, "index_data_points")
|
|
32
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
33
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
34
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
35
|
+
async def test_add_data_points_indexes_nodes_and_edges(
|
|
36
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
37
|
+
):
|
|
38
|
+
dp1 = SimplePoint(text="first")
|
|
39
|
+
dp2 = SimplePoint(text="second")
|
|
40
|
+
|
|
41
|
+
edge1 = (str(dp1.id), str(dp2.id), "related_to", {"edge_text": "connects"})
|
|
42
|
+
custom_edges = [(str(dp2.id), str(dp1.id), "custom_edge", {})]
|
|
43
|
+
|
|
44
|
+
mock_get_graph.side_effect = [([dp1], [edge1]), ([dp2], [])]
|
|
45
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
46
|
+
graph_engine = AsyncMock()
|
|
47
|
+
mock_get_engine.return_value = graph_engine
|
|
48
|
+
|
|
49
|
+
result = await add_data_points([dp1, dp2], custom_edges=custom_edges)
|
|
50
|
+
|
|
51
|
+
assert result == [dp1, dp2]
|
|
52
|
+
graph_engine.add_nodes.assert_awaited_once()
|
|
53
|
+
mock_index_nodes.assert_awaited_once()
|
|
54
|
+
assert graph_engine.add_edges.await_count == 2
|
|
55
|
+
assert edge1 in graph_engine.add_edges.await_args_list[0].args[0]
|
|
56
|
+
assert graph_engine.add_edges.await_args_list[1].args[0] == custom_edges
|
|
57
|
+
assert mock_index_edges.await_count == 2
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@pytest.mark.asyncio
|
|
61
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
62
|
+
@patch.object(adp_module, "index_data_points")
|
|
63
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
64
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
65
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
66
|
+
async def test_add_data_points_indexes_triplets_when_enabled(
|
|
67
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
68
|
+
):
|
|
69
|
+
dp1 = SimplePoint(text="source")
|
|
70
|
+
dp2 = SimplePoint(text="target")
|
|
71
|
+
|
|
72
|
+
edge1 = (str(dp1.id), str(dp2.id), "relates", {"edge_text": "describes"})
|
|
73
|
+
|
|
74
|
+
mock_get_graph.side_effect = [([dp1], [edge1]), ([dp2], [])]
|
|
75
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
76
|
+
graph_engine = AsyncMock()
|
|
77
|
+
mock_get_engine.return_value = graph_engine
|
|
78
|
+
|
|
79
|
+
await add_data_points([dp1, dp2], embed_triplets=True)
|
|
80
|
+
|
|
81
|
+
assert mock_index_nodes.await_count == 2
|
|
82
|
+
nodes_arg = mock_index_nodes.await_args_list[0].args[0]
|
|
83
|
+
triplets_arg = mock_index_nodes.await_args_list[1].args[0]
|
|
84
|
+
assert nodes_arg == [dp1, dp2]
|
|
85
|
+
assert len(triplets_arg) == 1
|
|
86
|
+
assert isinstance(triplets_arg[0], Triplet)
|
|
87
|
+
mock_index_edges.assert_awaited_once()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@pytest.mark.asyncio
|
|
91
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
92
|
+
@patch.object(adp_module, "index_data_points")
|
|
93
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
94
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
95
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
96
|
+
async def test_add_data_points_with_empty_list(
|
|
97
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
98
|
+
):
|
|
99
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
100
|
+
graph_engine = AsyncMock()
|
|
101
|
+
mock_get_engine.return_value = graph_engine
|
|
102
|
+
|
|
103
|
+
result = await add_data_points([])
|
|
104
|
+
|
|
105
|
+
assert result == []
|
|
106
|
+
mock_get_graph.assert_not_called()
|
|
107
|
+
graph_engine.add_nodes.assert_awaited_once_with([])
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@pytest.mark.asyncio
|
|
111
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
112
|
+
@patch.object(adp_module, "index_data_points")
|
|
113
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
114
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
115
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
116
|
+
async def test_add_data_points_with_single_datapoint(
|
|
117
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
118
|
+
):
|
|
119
|
+
dp = SimplePoint(text="single")
|
|
120
|
+
mock_get_graph.side_effect = [([dp], [])]
|
|
121
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
122
|
+
graph_engine = AsyncMock()
|
|
123
|
+
mock_get_engine.return_value = graph_engine
|
|
124
|
+
|
|
125
|
+
result = await add_data_points([dp])
|
|
126
|
+
|
|
127
|
+
assert result == [dp]
|
|
128
|
+
mock_get_graph.assert_called_once()
|
|
129
|
+
mock_index_nodes.assert_awaited_once()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_extract_embeddable_text_from_datapoint():
|
|
133
|
+
dp = SimplePoint(text="hello world")
|
|
134
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
135
|
+
assert text == "hello world"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def test_extract_embeddable_text_with_multiple_fields():
|
|
139
|
+
class MultiField(DataPoint):
|
|
140
|
+
title: str
|
|
141
|
+
description: str
|
|
142
|
+
metadata: dict = {"index_fields": ["title", "description"]}
|
|
143
|
+
|
|
144
|
+
dp = MultiField(title="Test", description="Description")
|
|
145
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
146
|
+
assert text == "Test Description"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_extract_embeddable_text_with_no_index_fields():
|
|
150
|
+
class NoIndex(DataPoint):
|
|
151
|
+
text: str
|
|
152
|
+
metadata: dict = {"index_fields": []}
|
|
153
|
+
|
|
154
|
+
dp = NoIndex(text="ignored")
|
|
155
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
156
|
+
assert text == ""
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_create_triplets_from_graph():
|
|
160
|
+
dp1 = SimplePoint(text="source node")
|
|
161
|
+
dp2 = SimplePoint(text="target node")
|
|
162
|
+
edge = (str(dp1.id), str(dp2.id), "connects_to", {"edge_text": "links"})
|
|
163
|
+
|
|
164
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge])
|
|
165
|
+
|
|
166
|
+
assert len(triplets) == 1
|
|
167
|
+
assert isinstance(triplets[0], Triplet)
|
|
168
|
+
assert triplets[0].from_node_id == str(dp1.id)
|
|
169
|
+
assert triplets[0].to_node_id == str(dp2.id)
|
|
170
|
+
assert "source node" in triplets[0].text
|
|
171
|
+
assert "target node" in triplets[0].text
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def test_extract_embeddable_text_with_none_datapoint():
|
|
175
|
+
text = _extract_embeddable_text_from_datapoint(None)
|
|
176
|
+
assert text == ""
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def test_extract_embeddable_text_without_metadata():
|
|
180
|
+
class NoMetadata(DataPoint):
|
|
181
|
+
text: str
|
|
182
|
+
|
|
183
|
+
dp = NoMetadata(text="test")
|
|
184
|
+
delattr(dp, "metadata")
|
|
185
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
186
|
+
assert text == ""
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_extract_embeddable_text_with_whitespace_only():
|
|
190
|
+
class WhitespaceField(DataPoint):
|
|
191
|
+
text: str
|
|
192
|
+
metadata: dict = {"index_fields": ["text"]}
|
|
193
|
+
|
|
194
|
+
dp = WhitespaceField(text=" ")
|
|
195
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
196
|
+
assert text == ""
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def test_create_triplets_skips_short_edge_tuples():
|
|
200
|
+
dp = SimplePoint(text="node")
|
|
201
|
+
incomplete_edge = (str(dp.id), str(dp.id))
|
|
202
|
+
|
|
203
|
+
triplets = _create_triplets_from_graph([dp], [incomplete_edge])
|
|
204
|
+
|
|
205
|
+
assert len(triplets) == 0
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def test_create_triplets_skips_missing_source_node():
|
|
209
|
+
dp1 = SimplePoint(text="target")
|
|
210
|
+
edge = ("missing_id", str(dp1.id), "relates", {})
|
|
211
|
+
|
|
212
|
+
triplets = _create_triplets_from_graph([dp1], [edge])
|
|
213
|
+
|
|
214
|
+
assert len(triplets) == 0
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def test_create_triplets_skips_missing_target_node():
|
|
218
|
+
dp1 = SimplePoint(text="source")
|
|
219
|
+
edge = (str(dp1.id), "missing_id", "relates", {})
|
|
220
|
+
|
|
221
|
+
triplets = _create_triplets_from_graph([dp1], [edge])
|
|
222
|
+
|
|
223
|
+
assert len(triplets) == 0
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_create_triplets_skips_none_relationship():
|
|
227
|
+
dp1 = SimplePoint(text="source")
|
|
228
|
+
dp2 = SimplePoint(text="target")
|
|
229
|
+
edge = (str(dp1.id), str(dp2.id), None, {})
|
|
230
|
+
|
|
231
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge])
|
|
232
|
+
|
|
233
|
+
assert len(triplets) == 0
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def test_create_triplets_uses_relationship_name_when_no_edge_text():
|
|
237
|
+
dp1 = SimplePoint(text="source")
|
|
238
|
+
dp2 = SimplePoint(text="target")
|
|
239
|
+
edge = (str(dp1.id), str(dp2.id), "connects_to", {})
|
|
240
|
+
|
|
241
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge])
|
|
242
|
+
|
|
243
|
+
assert len(triplets) == 1
|
|
244
|
+
assert "connects_to" in triplets[0].text
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def test_create_triplets_prevents_duplicates():
|
|
248
|
+
dp1 = SimplePoint(text="source")
|
|
249
|
+
dp2 = SimplePoint(text="target")
|
|
250
|
+
edge = (str(dp1.id), str(dp2.id), "relates", {"edge_text": "links"})
|
|
251
|
+
|
|
252
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge, edge])
|
|
253
|
+
|
|
254
|
+
assert len(triplets) == 1
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def test_create_triplets_skips_nodes_without_id():
|
|
258
|
+
class NodeNoId:
|
|
259
|
+
pass
|
|
260
|
+
|
|
261
|
+
dp = SimplePoint(text="valid")
|
|
262
|
+
node_no_id = NodeNoId()
|
|
263
|
+
edge = (str(dp.id), "some_id", "relates", {})
|
|
264
|
+
|
|
265
|
+
triplets = _create_triplets_from_graph([dp, node_no_id], [edge])
|
|
266
|
+
|
|
267
|
+
assert len(triplets) == 0
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@pytest.mark.asyncio
|
|
271
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
272
|
+
@patch.object(adp_module, "index_data_points")
|
|
273
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
274
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
275
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
276
|
+
async def test_add_data_points_with_empty_custom_edges(
|
|
277
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
278
|
+
):
|
|
279
|
+
dp = SimplePoint(text="test")
|
|
280
|
+
mock_get_graph.side_effect = [([dp], [])]
|
|
281
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
282
|
+
graph_engine = AsyncMock()
|
|
283
|
+
mock_get_engine.return_value = graph_engine
|
|
284
|
+
|
|
285
|
+
result = await add_data_points([dp], custom_edges=[])
|
|
286
|
+
|
|
287
|
+
assert result == [dp]
|
|
288
|
+
assert graph_engine.add_edges.await_count == 1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognee
|
|
3
|
-
Version: 0.5.0.
|
|
3
|
+
Version: 0.5.0.dev1
|
|
4
4
|
Summary: Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning.
|
|
5
5
|
Project-URL: Homepage, https://www.cognee.ai
|
|
6
6
|
Project-URL: Repository, https://github.com/topoteretes/cognee
|
|
@@ -16,8 +16,9 @@ Classifier: Operating System :: Microsoft :: Windows
|
|
|
16
16
|
Classifier: Operating System :: POSIX :: Linux
|
|
17
17
|
Classifier: Topic :: Software Development :: Libraries
|
|
18
18
|
Requires-Python: <3.14,>=3.10
|
|
19
|
-
Requires-Dist: aiofiles
|
|
19
|
+
Requires-Dist: aiofiles>=23.2.1
|
|
20
20
|
Requires-Dist: aiohttp<4.0.0,>=3.11.14
|
|
21
|
+
Requires-Dist: aiolimiter>=1.2.1
|
|
21
22
|
Requires-Dist: aiosqlite<1.0.0,>=0.20.0
|
|
22
23
|
Requires-Dist: alembic<2,>=1.13.3
|
|
23
24
|
Requires-Dist: diskcache>=5.6.3
|
|
@@ -40,7 +41,7 @@ Requires-Dist: numpy<=4.0.0,>=1.26.4
|
|
|
40
41
|
Requires-Dist: onnxruntime<=1.22.1
|
|
41
42
|
Requires-Dist: openai>=1.80.1
|
|
42
43
|
Requires-Dist: pydantic-settings<3,>=2.2.1
|
|
43
|
-
Requires-Dist: pydantic<
|
|
44
|
+
Requires-Dist: pydantic<2.12.0,>=2.10.5
|
|
44
45
|
Requires-Dist: pylance<=0.36.0,>=0.22.0
|
|
45
46
|
Requires-Dist: pympler<2.0.0,>=1.1
|
|
46
47
|
Requires-Dist: pypdf<7.0.0,>=4.1.0
|
|
@@ -98,7 +99,8 @@ Provides-Extra: docling
|
|
|
98
99
|
Requires-Dist: docling>=2.54; extra == 'docling'
|
|
99
100
|
Requires-Dist: transformers>=4.55; extra == 'docling'
|
|
100
101
|
Provides-Extra: docs
|
|
101
|
-
Requires-Dist: lxml<
|
|
102
|
+
Requires-Dist: lxml<5,>=4.9.3; (python_version < '3.13') and extra == 'docs'
|
|
103
|
+
Requires-Dist: lxml<6,>=5; (python_version >= '3.13') and extra == 'docs'
|
|
102
104
|
Requires-Dist: unstructured[csv,doc,docx,epub,md,odt,org,pdf,ppt,pptx,rst,rtf,tsv,xlsx]<19,>=0.18.1; extra == 'docs'
|
|
103
105
|
Provides-Extra: evals
|
|
104
106
|
Requires-Dist: gdown<6,>=5.2.0; extra == 'evals'
|
|
@@ -145,7 +147,8 @@ Requires-Dist: redis<6.0.0,>=5.0.3; extra == 'redis'
|
|
|
145
147
|
Provides-Extra: scraping
|
|
146
148
|
Requires-Dist: apscheduler<=3.11.0,>=3.10.0; extra == 'scraping'
|
|
147
149
|
Requires-Dist: beautifulsoup4>=4.13.1; extra == 'scraping'
|
|
148
|
-
Requires-Dist: lxml
|
|
150
|
+
Requires-Dist: lxml<5,>=4.9.3; (python_version < '3.13') and extra == 'scraping'
|
|
151
|
+
Requires-Dist: lxml<6,>=5; (python_version >= '3.13') and extra == 'scraping'
|
|
149
152
|
Requires-Dist: playwright>=1.9.0; extra == 'scraping'
|
|
150
153
|
Requires-Dist: protego>=0.1; extra == 'scraping'
|
|
151
154
|
Requires-Dist: tavily-python>=0.7.12; extra == 'scraping'
|
|
@@ -158,27 +161,27 @@ Description-Content-Type: text/markdown
|
|
|
158
161
|
|
|
159
162
|
<br />
|
|
160
163
|
|
|
161
|
-
|
|
164
|
+
Cognee - Accurate and Persistent AI Memory
|
|
162
165
|
|
|
163
166
|
<p align="center">
|
|
164
167
|
<a href="https://www.youtube.com/watch?v=1bezuvLwJmw&t=2s">Demo</a>
|
|
165
168
|
.
|
|
166
|
-
<a href="https://cognee.ai">
|
|
169
|
+
<a href="https://docs.cognee.ai/">Docs</a>
|
|
170
|
+
.
|
|
171
|
+
<a href="https://cognee.ai">Learn More</a>
|
|
167
172
|
·
|
|
168
173
|
<a href="https://discord.gg/NQPKmU5CCg">Join Discord</a>
|
|
169
174
|
·
|
|
170
175
|
<a href="https://www.reddit.com/r/AIMemory/">Join r/AIMemory</a>
|
|
171
176
|
.
|
|
172
|
-
<a href="https://
|
|
173
|
-
.
|
|
174
|
-
<a href="https://github.com/topoteretes/cognee-community">cognee community repo</a>
|
|
177
|
+
<a href="https://github.com/topoteretes/cognee-community">Community Plugins & Add-ons</a>
|
|
175
178
|
</p>
|
|
176
179
|
|
|
177
180
|
|
|
178
181
|
[](https://GitHub.com/topoteretes/cognee/network/)
|
|
179
182
|
[](https://GitHub.com/topoteretes/cognee/stargazers/)
|
|
180
183
|
[](https://GitHub.com/topoteretes/cognee/commit/)
|
|
181
|
-
[](https://github.com/topoteretes/cognee/tags/)
|
|
182
185
|
[](https://pepy.tech/project/cognee)
|
|
183
186
|
[](https://github.com/topoteretes/cognee/blob/main/LICENSE)
|
|
184
187
|
[](https://github.com/topoteretes/cognee/graphs/contributors)
|
|
@@ -194,11 +197,7 @@ Description-Content-Type: text/markdown
|
|
|
194
197
|
</a>
|
|
195
198
|
</p>
|
|
196
199
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Extract, Cognify, Load) pipelines.
|
|
200
|
+
Use your data to build personalized and dynamic memory for AI Agents. Cognee lets you replace RAG with scalable and modular ECL (Extract, Cognify, Load) pipelines.
|
|
202
201
|
|
|
203
202
|
<p align="center">
|
|
204
203
|
🌐 Available Languages
|
|
@@ -206,7 +205,7 @@ Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Ext
|
|
|
206
205
|
<!-- Keep these links. Translations will automatically update with the README. -->
|
|
207
206
|
<a href="https://www.readme-i18n.com/topoteretes/cognee?lang=de">Deutsch</a> |
|
|
208
207
|
<a href="https://www.readme-i18n.com/topoteretes/cognee?lang=es">Español</a> |
|
|
209
|
-
<a href="https://www.readme-i18n.com/topoteretes/cognee?lang=fr">
|
|
208
|
+
<a href="https://www.readme-i18n.com/topoteretes/cognee?lang=fr">Français</a> |
|
|
210
209
|
<a href="https://www.readme-i18n.com/topoteretes/cognee?lang=ja">日本語</a> |
|
|
211
210
|
<a href="https://www.readme-i18n.com/topoteretes/cognee?lang=ko">한국어</a> |
|
|
212
211
|
<a href="https://www.readme-i18n.com/topoteretes/cognee?lang=pt">Português</a> |
|
|
@@ -220,69 +219,65 @@ Build dynamic memory for Agents and replace RAG using scalable, modular ECL (Ext
|
|
|
220
219
|
</div>
|
|
221
220
|
</div>
|
|
222
221
|
|
|
222
|
+
## About Cognee
|
|
223
223
|
|
|
224
|
+
Cognee is an open-source tool and platform that transforms your raw data into persistent and dynamic AI memory for Agents. It combines vector search with graph databases to make your documents both searchable by meaning and connected by relationships.
|
|
224
225
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
Get started quickly with a Google Colab <a href="https://colab.research.google.com/drive/12Vi9zID-M3fpKpKiaqDBvkk98ElkRPWy?usp=sharing">notebook</a> , <a href="https://deepnote.com/workspace/cognee-382213d0-0444-4c89-8265-13770e333c02/project/cognee-demo-78ffacb9-5832-4611-bb1a-560386068b30/notebook/Notebook-1-75b24cda566d4c24ab348f7150792601?utm_source=share-modal&utm_medium=product-shared-content&utm_campaign=notebook&utm_content=78ffacb9-5832-4611-bb1a-560386068b30">Deepnote notebook</a> or <a href="https://github.com/topoteretes/cognee/tree/main/cognee-starter-kit">starter repo</a>
|
|
228
|
-
|
|
226
|
+
You can use Cognee in two ways:
|
|
229
227
|
|
|
230
|
-
|
|
228
|
+
1. [Self-host Cognee Open Source](https://docs.cognee.ai/getting-started/installation), which stores all data locally by default.
|
|
229
|
+
2. [Connect to Cognee Cloud](https://platform.cognee.ai/), and get the same OSS stack on managed infrastructure for easier development and productionization.
|
|
231
230
|
|
|
232
|
-
|
|
233
|
-
Our hosted solution is just our deployment of OSS cognee on Modal, with the goal of making development and productionization easier.
|
|
231
|
+
### Cognee Open Source (self-hosted):
|
|
234
232
|
|
|
235
|
-
|
|
233
|
+
- Interconnects any type of data — including past conversations, files, images, and audio transcriptions
|
|
234
|
+
- Replaces traditional RAG systems with a unified memory layer built on graphs and vectors
|
|
235
|
+
- Reduces developer effort and infrastructure cost while improving quality and precision
|
|
236
|
+
- Provides Pythonic data pipelines for ingestion from 30+ data sources
|
|
237
|
+
- Offers high customizability through user-defined tasks, modular pipelines, and built-in search endpoints
|
|
236
238
|
|
|
237
|
-
|
|
238
|
-
-
|
|
239
|
-
-
|
|
240
|
-
-
|
|
241
|
-
-
|
|
239
|
+
### Cognee Cloud (managed):
|
|
240
|
+
- Hosted web UI dashboard
|
|
241
|
+
- Automatic version updates
|
|
242
|
+
- Resource usage analytics
|
|
243
|
+
- GDPR compliant, enterprise-grade security
|
|
242
244
|
|
|
243
|
-
|
|
244
|
-
- Includes a managed UI and a [hosted solution](https://www.cognee.ai)
|
|
245
|
+
## Basic Usage & Feature Guide
|
|
245
246
|
|
|
247
|
+
To learn more, [check out this short, end-to-end Colab walkthrough](https://colab.research.google.com/drive/12Vi9zID-M3fpKpKiaqDBvkk98ElkRPWy?usp=sharing) of Cognee's core features.
|
|
246
248
|
|
|
249
|
+
[](https://colab.research.google.com/drive/12Vi9zID-M3fpKpKiaqDBvkk98ElkRPWy?usp=sharing)
|
|
247
250
|
|
|
248
|
-
##
|
|
251
|
+
## Quickstart
|
|
249
252
|
|
|
253
|
+
Let’s try Cognee in just a few lines of code. For detailed setup and configuration, see the [Cognee Docs](https://docs.cognee.ai/getting-started/installation#environment-configuration).
|
|
250
254
|
|
|
251
|
-
###
|
|
255
|
+
### Prerequisites
|
|
252
256
|
|
|
253
|
-
|
|
257
|
+
- Python 3.10 to 3.13
|
|
254
258
|
|
|
255
|
-
|
|
259
|
+
### Step 1: Install Cognee
|
|
256
260
|
|
|
257
|
-
|
|
261
|
+
You can install Cognee with **pip**, **poetry**, **uv**, or your preferred Python package manager.
|
|
258
262
|
|
|
259
263
|
```bash
|
|
260
264
|
uv pip install cognee
|
|
261
265
|
```
|
|
262
266
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
### 💻 Basic Usage
|
|
266
|
-
|
|
267
|
-
#### Setup
|
|
268
|
-
|
|
269
|
-
```
|
|
267
|
+
### Step 2: Configure the LLM
|
|
268
|
+
```python
|
|
270
269
|
import os
|
|
271
270
|
os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY"
|
|
272
|
-
|
|
273
271
|
```
|
|
272
|
+
Alternatively, create a `.env` file using our [template](https://github.com/topoteretes/cognee/blob/main/.env.template).
|
|
274
273
|
|
|
275
|
-
|
|
276
|
-
To use different LLM providers, for more info check out our <a href="https://docs.cognee.ai/setup-configuration/llm-providers">documentation</a>
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
#### Simple example
|
|
274
|
+
To integrate other LLM providers, see our [LLM Provider Documentation](https://docs.cognee.ai/setup-configuration/llm-providers).
|
|
280
275
|
|
|
276
|
+
### Step 3: Run the Pipeline
|
|
281
277
|
|
|
278
|
+
Cognee will take your documents, generate a knowledge graph from them and then query the graph based on combined relationships.
|
|
282
279
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
This script will run the default pipeline:
|
|
280
|
+
Now, run a minimal pipeline:
|
|
286
281
|
|
|
287
282
|
```python
|
|
288
283
|
import cognee
|
|
@@ -300,7 +295,7 @@ async def main():
|
|
|
300
295
|
await cognee.memify()
|
|
301
296
|
|
|
302
297
|
# Query the knowledge graph
|
|
303
|
-
results = await cognee.search("What does
|
|
298
|
+
results = await cognee.search("What does Cognee do?")
|
|
304
299
|
|
|
305
300
|
# Display the results
|
|
306
301
|
for result in results:
|
|
@@ -311,69 +306,61 @@ if __name__ == '__main__':
|
|
|
311
306
|
asyncio.run(main())
|
|
312
307
|
|
|
313
308
|
```
|
|
314
|
-
Example output:
|
|
315
|
-
```
|
|
316
|
-
Cognee turns documents into AI memory.
|
|
317
309
|
|
|
310
|
+
As you can see, the output is generated from the document we previously stored in Cognee:
|
|
311
|
+
|
|
312
|
+
```bash
|
|
313
|
+
Cognee turns documents into AI memory.
|
|
318
314
|
```
|
|
319
|
-
##### Via CLI
|
|
320
315
|
|
|
321
|
-
|
|
316
|
+
### Use the Cognee CLI
|
|
322
317
|
|
|
323
|
-
|
|
318
|
+
As an alternative, you can get started with these essential commands:
|
|
319
|
+
|
|
320
|
+
```bash
|
|
324
321
|
cognee-cli add "Cognee turns documents into AI memory."
|
|
325
322
|
|
|
326
323
|
cognee-cli cognify
|
|
327
324
|
|
|
328
|
-
cognee-cli search "What does
|
|
325
|
+
cognee-cli search "What does Cognee do?"
|
|
329
326
|
cognee-cli delete --all
|
|
330
327
|
|
|
331
328
|
```
|
|
332
|
-
|
|
333
|
-
|
|
329
|
+
|
|
330
|
+
To open the local UI, run:
|
|
331
|
+
```bash
|
|
334
332
|
cognee-cli -ui
|
|
335
333
|
```
|
|
336
334
|
|
|
335
|
+
## Demos & Examples
|
|
337
336
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
### Hosted Platform
|
|
342
|
-
|
|
343
|
-
Get up and running in minutes with automatic updates, analytics, and enterprise security.
|
|
344
|
-
|
|
345
|
-
1. Sign up on [cogwit](https://www.cognee.ai)
|
|
346
|
-
2. Add your API key to local UI and sync your data to Cogwit
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
## Demos
|
|
337
|
+
See Cognee in action:
|
|
352
338
|
|
|
353
|
-
|
|
339
|
+
### Persistent Agent Memory
|
|
354
340
|
|
|
355
|
-
[
|
|
341
|
+
[Cognee Memory for LangGraph Agents](https://github.com/user-attachments/assets/e113b628-7212-4a2b-b288-0be39a93a1c3)
|
|
356
342
|
|
|
357
|
-
|
|
343
|
+
### Simple GraphRAG
|
|
358
344
|
|
|
359
|
-
[
|
|
345
|
+
[Watch Demo](https://github.com/user-attachments/assets/f2186b2e-305a-42b0-9c2d-9f4473f15df8)
|
|
360
346
|
|
|
361
|
-
|
|
347
|
+
### Cognee with Ollama
|
|
362
348
|
|
|
363
|
-
[
|
|
349
|
+
[Watch Demo](https://github.com/user-attachments/assets/39672858-f774-4136-b957-1e2de67b8981)
|
|
364
350
|
|
|
365
351
|
|
|
366
|
-
##
|
|
367
|
-
Your contributions are at the core of making this a true open source project. Any contributions you make are **greatly appreciated**. See [`CONTRIBUTING.md`](CONTRIBUTING.md) for more information.
|
|
352
|
+
## Community & Support
|
|
368
353
|
|
|
354
|
+
### Contributing
|
|
355
|
+
We welcome contributions from the community! Your input helps make Cognee better for everyone. See [`CONTRIBUTING.md`](CONTRIBUTING.md) to get started.
|
|
369
356
|
|
|
370
|
-
|
|
357
|
+
### Code of Conduct
|
|
371
358
|
|
|
372
|
-
We
|
|
359
|
+
We're committed to fostering an inclusive and respectful community. Read our [Code of Conduct](https://github.com/topoteretes/cognee/blob/main/CODE_OF_CONDUCT.md) for guidelines.
|
|
373
360
|
|
|
374
|
-
## Citation
|
|
361
|
+
## Research & Citation
|
|
375
362
|
|
|
376
|
-
We
|
|
363
|
+
We recently published a research paper on optimizing knowledge graphs for LLM reasoning:
|
|
377
364
|
|
|
378
365
|
```bibtex
|
|
379
366
|
@misc{markovic2025optimizinginterfaceknowledgegraphs,
|