cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +5 -1
- cognee/api/v1/add/add.py +1 -2
- cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
- cognee/api/v1/cognify/cognify.py +16 -24
- cognee/api/v1/cognify/routers/__init__.py +1 -0
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +37 -12
- cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
- cognee/api/v1/search/search.py +0 -4
- cognee/api/v1/ui/ui.py +68 -38
- cognee/context_global_variables.py +16 -61
- cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +28 -16
- cognee/infrastructure/databases/graph/config.py +0 -3
- cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
- cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
- cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
- cognee/infrastructure/databases/utils/__init__.py +0 -3
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
- cognee/infrastructure/databases/vector/config.py +0 -2
- cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
- cognee/infrastructure/files/storage/s3_config.py +0 -2
- cognee/infrastructure/llm/LLMGateway.py +2 -5
- cognee/infrastructure/llm/config.py +0 -35
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
- cognee/modules/cognify/config.py +0 -2
- cognee/modules/data/deletion/prune_system.py +2 -52
- cognee/modules/data/methods/delete_dataset.py +0 -26
- cognee/modules/engine/models/__init__.py +0 -1
- cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
- cognee/modules/memify/memify.py +7 -1
- cognee/modules/pipelines/operations/pipeline.py +2 -18
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/code_retriever.py +232 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_retriever.py +0 -10
- cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
- cognee/modules/retrieval/temporal_retriever.py +0 -4
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
- cognee/modules/search/methods/get_search_type_tools.py +8 -54
- cognee/modules/search/methods/no_access_control_search.py +0 -4
- cognee/modules/search/methods/search.py +0 -21
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +0 -19
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +3 -15
- cognee/shared/logging_utils.py +0 -4
- cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
- cognee/tasks/code/get_local_dependencies_checker.py +20 -0
- cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
- cognee/tasks/documents/__init__.py +1 -0
- cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
- cognee/tasks/graph/extract_graph_from_data.py +10 -9
- cognee/tasks/repo_processor/__init__.py +2 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
- cognee/tasks/repo_processor/get_non_code_files.py +158 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
- cognee/tasks/storage/add_data_points.py +2 -142
- cognee/tests/test_cognee_server_start.py +4 -2
- cognee/tests/test_conversation_history.py +1 -23
- cognee/tests/test_delete_bmw_example.py +60 -0
- cognee/tests/test_search_db.py +1 -37
- cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
- cognee/api/v1/ui/node_setup.py +0 -360
- cognee/api/v1/ui/npm_utils.py +0 -50
- cognee/eval_framework/Dockerfile +0 -29
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
- cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
- cognee/modules/engine/models/Triplet.py +0 -9
- cognee/modules/retrieval/register_retriever.py +0 -10
- cognee/modules/retrieval/registered_community_retrievers.py +0 -1
- cognee/modules/retrieval/triplet_retriever.py +0 -182
- cognee/shared/rate_limiting.py +0 -30
- cognee/tasks/memify/get_triplet_datapoints.py +0 -289
- cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
- cognee/tests/integration/tasks/test_add_data_points.py +0 -139
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
- cognee/tests/test_dataset_database_handler.py +0 -137
- cognee/tests/test_dataset_delete.py +0 -76
- cognee/tests/test_edge_centered_payload.py +0 -170
- cognee/tests/test_pipeline_cache.py +0 -164
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
- cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
import pathlib
|
|
2
|
-
import pytest
|
|
3
|
-
import pytest_asyncio
|
|
4
|
-
|
|
5
|
-
import cognee
|
|
6
|
-
from cognee.low_level import setup
|
|
7
|
-
from cognee.infrastructure.engine import DataPoint
|
|
8
|
-
from cognee.tasks.storage.add_data_points import add_data_points
|
|
9
|
-
from cognee.tasks.storage.exceptions import InvalidDataPointsInAddDataPointsError
|
|
10
|
-
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class Person(DataPoint):
|
|
14
|
-
name: str
|
|
15
|
-
age: int
|
|
16
|
-
metadata: dict = {"index_fields": ["name"]}
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class Company(DataPoint):
|
|
20
|
-
name: str
|
|
21
|
-
industry: str
|
|
22
|
-
metadata: dict = {"index_fields": ["name", "industry"]}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@pytest_asyncio.fixture
|
|
26
|
-
async def clean_test_environment():
|
|
27
|
-
"""Set up a clean test environment for add_data_points tests."""
|
|
28
|
-
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
29
|
-
system_directory_path = str(base_dir / ".cognee_system/test_add_data_points_integration")
|
|
30
|
-
data_directory_path = str(base_dir / ".data_storage/test_add_data_points_integration")
|
|
31
|
-
|
|
32
|
-
cognee.config.system_root_directory(system_directory_path)
|
|
33
|
-
cognee.config.data_root_directory(data_directory_path)
|
|
34
|
-
|
|
35
|
-
await cognee.prune.prune_data()
|
|
36
|
-
await cognee.prune.prune_system(metadata=True)
|
|
37
|
-
await setup()
|
|
38
|
-
|
|
39
|
-
yield
|
|
40
|
-
|
|
41
|
-
try:
|
|
42
|
-
await cognee.prune.prune_data()
|
|
43
|
-
await cognee.prune.prune_system(metadata=True)
|
|
44
|
-
except Exception:
|
|
45
|
-
pass
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
@pytest.mark.asyncio
|
|
49
|
-
async def test_add_data_points_comprehensive(clean_test_environment):
|
|
50
|
-
"""Comprehensive integration test for add_data_points functionality."""
|
|
51
|
-
|
|
52
|
-
person1 = Person(name="Alice", age=30)
|
|
53
|
-
person2 = Person(name="Bob", age=25)
|
|
54
|
-
result = await add_data_points([person1, person2])
|
|
55
|
-
|
|
56
|
-
assert result == [person1, person2]
|
|
57
|
-
assert len(result) == 2
|
|
58
|
-
|
|
59
|
-
graph_engine = await get_graph_engine()
|
|
60
|
-
nodes, edges = await graph_engine.get_graph_data()
|
|
61
|
-
assert len(nodes) >= 2
|
|
62
|
-
|
|
63
|
-
result_empty = await add_data_points([])
|
|
64
|
-
assert result_empty == []
|
|
65
|
-
|
|
66
|
-
person3 = Person(name="Charlie", age=35)
|
|
67
|
-
person4 = Person(name="Diana", age=32)
|
|
68
|
-
custom_edge = (str(person3.id), str(person4.id), "knows", {"edge_text": "friends with"})
|
|
69
|
-
|
|
70
|
-
result_custom = await add_data_points([person3, person4], custom_edges=[custom_edge])
|
|
71
|
-
assert len(result_custom) == 2
|
|
72
|
-
|
|
73
|
-
nodes, edges = await graph_engine.get_graph_data()
|
|
74
|
-
assert len(edges) == 1
|
|
75
|
-
assert len(nodes) == 4
|
|
76
|
-
|
|
77
|
-
class Employee(DataPoint):
|
|
78
|
-
name: str
|
|
79
|
-
works_at: Company
|
|
80
|
-
metadata: dict = {"index_fields": ["name"]}
|
|
81
|
-
|
|
82
|
-
company = Company(name="TechCorp", industry="Technology")
|
|
83
|
-
employee = Employee(name="Eve", works_at=company)
|
|
84
|
-
|
|
85
|
-
result_rel = await add_data_points([employee])
|
|
86
|
-
assert len(result_rel) == 1
|
|
87
|
-
|
|
88
|
-
nodes, edges = await graph_engine.get_graph_data()
|
|
89
|
-
assert len(nodes) == 6
|
|
90
|
-
assert len(edges) == 2
|
|
91
|
-
|
|
92
|
-
person5 = Person(name="Frank", age=40)
|
|
93
|
-
person6 = Person(name="Grace", age=38)
|
|
94
|
-
triplet_edge = (str(person5.id), str(person6.id), "married_to", {"edge_text": "is married to"})
|
|
95
|
-
|
|
96
|
-
result_triplet = await add_data_points(
|
|
97
|
-
[person5, person6], custom_edges=[triplet_edge], embed_triplets=True
|
|
98
|
-
)
|
|
99
|
-
assert len(result_triplet) == 2
|
|
100
|
-
|
|
101
|
-
nodes, edges = await graph_engine.get_graph_data()
|
|
102
|
-
assert len(nodes) == 8
|
|
103
|
-
assert len(edges) == 3
|
|
104
|
-
|
|
105
|
-
batch1 = [Person(name="Leo", age=25), Person(name="Mia", age=30)]
|
|
106
|
-
batch2 = [Person(name="Noah", age=35), Person(name="Olivia", age=40)]
|
|
107
|
-
|
|
108
|
-
result_batch1 = await add_data_points(batch1)
|
|
109
|
-
result_batch2 = await add_data_points(batch2)
|
|
110
|
-
|
|
111
|
-
assert len(result_batch1) == 2
|
|
112
|
-
assert len(result_batch2) == 2
|
|
113
|
-
|
|
114
|
-
nodes, edges = await graph_engine.get_graph_data()
|
|
115
|
-
assert len(nodes) == 12
|
|
116
|
-
assert len(edges) == 3
|
|
117
|
-
|
|
118
|
-
person7 = Person(name="Paul", age=33)
|
|
119
|
-
person8 = Person(name="Quinn", age=31)
|
|
120
|
-
edge1 = (str(person7.id), str(person8.id), "colleague_of", {"edge_text": "works with"})
|
|
121
|
-
edge2 = (str(person8.id), str(person7.id), "colleague_of", {"edge_text": "works with"})
|
|
122
|
-
|
|
123
|
-
result_bi = await add_data_points([person7, person8], custom_edges=[edge1, edge2])
|
|
124
|
-
assert len(result_bi) == 2
|
|
125
|
-
|
|
126
|
-
nodes, edges = await graph_engine.get_graph_data()
|
|
127
|
-
assert len(nodes) == 14
|
|
128
|
-
assert len(edges) == 5
|
|
129
|
-
|
|
130
|
-
person_invalid = Person(name="Invalid", age=50)
|
|
131
|
-
with pytest.raises(InvalidDataPointsInAddDataPointsError, match="must be a list"):
|
|
132
|
-
await add_data_points(person_invalid)
|
|
133
|
-
|
|
134
|
-
with pytest.raises(InvalidDataPointsInAddDataPointsError, match="must be a DataPoint"):
|
|
135
|
-
await add_data_points(["not", "datapoints"])
|
|
136
|
-
|
|
137
|
-
final_nodes, final_edges = await graph_engine.get_graph_data()
|
|
138
|
-
assert len(final_nodes) == 14
|
|
139
|
-
assert len(final_edges) == 5
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import pathlib
|
|
3
|
-
import pytest
|
|
4
|
-
import pytest_asyncio
|
|
5
|
-
from unittest.mock import AsyncMock, patch
|
|
6
|
-
|
|
7
|
-
import cognee
|
|
8
|
-
from cognee.tasks.memify.get_triplet_datapoints import get_triplet_datapoints
|
|
9
|
-
from cognee.modules.engine.models import Triplet
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@pytest_asyncio.fixture
|
|
13
|
-
async def setup_test_environment():
|
|
14
|
-
"""Set up a clean test environment with a simple graph."""
|
|
15
|
-
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
16
|
-
data_directory_path = str(base_dir / ".data_storage/test_get_triplet_datapoints_integration")
|
|
17
|
-
cognee_directory_path = str(base_dir / ".cognee_system/test_get_triplet_datapoints_integration")
|
|
18
|
-
|
|
19
|
-
cognee.config.data_root_directory(data_directory_path)
|
|
20
|
-
cognee.config.system_root_directory(cognee_directory_path)
|
|
21
|
-
|
|
22
|
-
await cognee.prune.prune_data()
|
|
23
|
-
await cognee.prune.prune_system(metadata=True)
|
|
24
|
-
|
|
25
|
-
dataset_name = "test_triplets"
|
|
26
|
-
|
|
27
|
-
text = "Volkswagen is a german car manufacturer from Wolfsburg. They produce different models such as Golf, Polo and Touareg."
|
|
28
|
-
await cognee.add(text, dataset_name)
|
|
29
|
-
await cognee.cognify([dataset_name])
|
|
30
|
-
|
|
31
|
-
yield dataset_name
|
|
32
|
-
|
|
33
|
-
await cognee.prune.prune_data()
|
|
34
|
-
await cognee.prune.prune_system(metadata=True)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
@pytest.mark.asyncio
|
|
38
|
-
async def test_get_triplet_datapoints_integration(setup_test_environment):
|
|
39
|
-
"""Integration test: verify get_triplet_datapoints works with real graph data."""
|
|
40
|
-
|
|
41
|
-
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
42
|
-
|
|
43
|
-
graph_engine = await get_graph_engine()
|
|
44
|
-
|
|
45
|
-
if not hasattr(graph_engine, "get_triplets_batch"):
|
|
46
|
-
pytest.skip("Graph engine does not support get_triplets_batch")
|
|
47
|
-
|
|
48
|
-
triplets = []
|
|
49
|
-
with patch(
|
|
50
|
-
"cognee.tasks.memify.get_triplet_datapoints.index_data_points", new_callable=AsyncMock
|
|
51
|
-
):
|
|
52
|
-
async for triplet in get_triplet_datapoints([{}], triplets_batch_size=10):
|
|
53
|
-
triplets.append(triplet)
|
|
54
|
-
|
|
55
|
-
nodes, edges = await graph_engine.get_graph_data()
|
|
56
|
-
|
|
57
|
-
if len(edges) > 0 and len(triplets) == 0:
|
|
58
|
-
test_triplets = await graph_engine.get_triplets_batch(offset=0, limit=10)
|
|
59
|
-
if len(test_triplets) == 0:
|
|
60
|
-
pytest.fail(
|
|
61
|
-
f"Edges exist in graph ({len(edges)} edges) but get_triplets_batch found none. "
|
|
62
|
-
f"This indicates the query pattern may not match the graph structure."
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
for triplet in triplets:
|
|
66
|
-
assert isinstance(triplet, Triplet), "Each item should be a Triplet instance"
|
|
67
|
-
assert triplet.from_node_id, "Triplet should have from_node_id"
|
|
68
|
-
assert triplet.to_node_id, "Triplet should have to_node_id"
|
|
69
|
-
assert triplet.text, "Triplet should have embeddable text"
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
# Set custom dataset database handler environment variable
|
|
5
|
-
os.environ["VECTOR_DATASET_DATABASE_HANDLER"] = "custom_lancedb_handler"
|
|
6
|
-
os.environ["GRAPH_DATASET_DATABASE_HANDLER"] = "custom_kuzu_handler"
|
|
7
|
-
|
|
8
|
-
import cognee
|
|
9
|
-
from cognee.modules.users.methods import get_default_user
|
|
10
|
-
from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
|
|
11
|
-
from cognee.shared.logging_utils import setup_logging, ERROR
|
|
12
|
-
from cognee.api.v1.search import SearchType
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class LanceDBTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|
16
|
-
@classmethod
|
|
17
|
-
async def create_dataset(cls, dataset_id, user):
|
|
18
|
-
import pathlib
|
|
19
|
-
|
|
20
|
-
cognee_directory_path = str(
|
|
21
|
-
pathlib.Path(
|
|
22
|
-
os.path.join(
|
|
23
|
-
pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
|
|
24
|
-
)
|
|
25
|
-
).resolve()
|
|
26
|
-
)
|
|
27
|
-
databases_directory_path = os.path.join(cognee_directory_path, "databases", str(user.id))
|
|
28
|
-
os.makedirs(databases_directory_path, exist_ok=True)
|
|
29
|
-
|
|
30
|
-
vector_db_name = "test.lance.db"
|
|
31
|
-
|
|
32
|
-
return {
|
|
33
|
-
"vector_dataset_database_handler": "custom_lancedb_handler",
|
|
34
|
-
"vector_database_name": vector_db_name,
|
|
35
|
-
"vector_database_url": os.path.join(databases_directory_path, vector_db_name),
|
|
36
|
-
"vector_database_provider": "lancedb",
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class KuzuTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|
41
|
-
@classmethod
|
|
42
|
-
async def create_dataset(cls, dataset_id, user):
|
|
43
|
-
databases_directory_path = os.path.join("databases", str(user.id))
|
|
44
|
-
os.makedirs(databases_directory_path, exist_ok=True)
|
|
45
|
-
|
|
46
|
-
graph_db_name = "test.kuzu"
|
|
47
|
-
return {
|
|
48
|
-
"graph_dataset_database_handler": "custom_kuzu_handler",
|
|
49
|
-
"graph_database_name": graph_db_name,
|
|
50
|
-
"graph_database_url": os.path.join(databases_directory_path, graph_db_name),
|
|
51
|
-
"graph_database_provider": "kuzu",
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
async def main():
|
|
56
|
-
import pathlib
|
|
57
|
-
|
|
58
|
-
data_directory_path = str(
|
|
59
|
-
pathlib.Path(
|
|
60
|
-
os.path.join(
|
|
61
|
-
pathlib.Path(__file__).parent, ".data_storage/test_dataset_database_handler"
|
|
62
|
-
)
|
|
63
|
-
).resolve()
|
|
64
|
-
)
|
|
65
|
-
cognee.config.data_root_directory(data_directory_path)
|
|
66
|
-
cognee_directory_path = str(
|
|
67
|
-
pathlib.Path(
|
|
68
|
-
os.path.join(
|
|
69
|
-
pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
|
|
70
|
-
)
|
|
71
|
-
).resolve()
|
|
72
|
-
)
|
|
73
|
-
cognee.config.system_root_directory(cognee_directory_path)
|
|
74
|
-
|
|
75
|
-
# Add custom dataset database handler
|
|
76
|
-
from cognee.infrastructure.databases.dataset_database_handler.use_dataset_database_handler import (
|
|
77
|
-
use_dataset_database_handler,
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
use_dataset_database_handler(
|
|
81
|
-
"custom_lancedb_handler", LanceDBTestDatasetDatabaseHandler, "lancedb"
|
|
82
|
-
)
|
|
83
|
-
use_dataset_database_handler("custom_kuzu_handler", KuzuTestDatasetDatabaseHandler, "kuzu")
|
|
84
|
-
|
|
85
|
-
# Create a clean slate for cognee -- reset data and system state
|
|
86
|
-
print("Resetting cognee data...")
|
|
87
|
-
await cognee.prune.prune_data()
|
|
88
|
-
await cognee.prune.prune_system(metadata=True)
|
|
89
|
-
print("Data reset complete.\n")
|
|
90
|
-
|
|
91
|
-
# cognee knowledge graph will be created based on this text
|
|
92
|
-
text = """
|
|
93
|
-
Natural language processing (NLP) is an interdisciplinary
|
|
94
|
-
subfield of computer science and information retrieval.
|
|
95
|
-
"""
|
|
96
|
-
|
|
97
|
-
print("Adding text to cognee:")
|
|
98
|
-
print(text.strip())
|
|
99
|
-
|
|
100
|
-
# Add the text, and make it available for cognify
|
|
101
|
-
await cognee.add(text)
|
|
102
|
-
print("Text added successfully.\n")
|
|
103
|
-
|
|
104
|
-
# Use LLMs and cognee to create knowledge graph
|
|
105
|
-
await cognee.cognify()
|
|
106
|
-
print("Cognify process complete.\n")
|
|
107
|
-
|
|
108
|
-
query_text = "Tell me about NLP"
|
|
109
|
-
print(f"Searching cognee for insights with query: '{query_text}'")
|
|
110
|
-
# Query cognee for insights on the added text
|
|
111
|
-
search_results = await cognee.search(
|
|
112
|
-
query_type=SearchType.GRAPH_COMPLETION, query_text=query_text
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
print("Search results:")
|
|
116
|
-
# Display results
|
|
117
|
-
for result_text in search_results:
|
|
118
|
-
print(result_text)
|
|
119
|
-
|
|
120
|
-
default_user = await get_default_user()
|
|
121
|
-
# Assert that the custom database files were created based on the custom dataset database handlers
|
|
122
|
-
assert os.path.exists(
|
|
123
|
-
os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.kuzu")
|
|
124
|
-
), "Graph database file not found."
|
|
125
|
-
assert os.path.exists(
|
|
126
|
-
os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.lance.db")
|
|
127
|
-
), "Vector database file not found."
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
if __name__ == "__main__":
|
|
131
|
-
logger = setup_logging(log_level=ERROR)
|
|
132
|
-
loop = asyncio.new_event_loop()
|
|
133
|
-
asyncio.set_event_loop(loop)
|
|
134
|
-
try:
|
|
135
|
-
loop.run_until_complete(main())
|
|
136
|
-
finally:
|
|
137
|
-
loop.run_until_complete(loop.shutdown_asyncgens())
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import asyncio
|
|
3
|
-
import pathlib
|
|
4
|
-
from uuid import UUID
|
|
5
|
-
|
|
6
|
-
import cognee
|
|
7
|
-
from cognee.shared.logging_utils import setup_logging, ERROR
|
|
8
|
-
from cognee.modules.data.methods.delete_dataset import delete_dataset
|
|
9
|
-
from cognee.modules.data.methods.get_dataset import get_dataset
|
|
10
|
-
from cognee.modules.users.methods import get_default_user
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
async def main():
|
|
14
|
-
# Set data and system directory paths
|
|
15
|
-
data_directory_path = str(
|
|
16
|
-
pathlib.Path(
|
|
17
|
-
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_dataset_delete")
|
|
18
|
-
).resolve()
|
|
19
|
-
)
|
|
20
|
-
cognee.config.data_root_directory(data_directory_path)
|
|
21
|
-
cognee_directory_path = str(
|
|
22
|
-
pathlib.Path(
|
|
23
|
-
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_dataset_delete")
|
|
24
|
-
).resolve()
|
|
25
|
-
)
|
|
26
|
-
cognee.config.system_root_directory(cognee_directory_path)
|
|
27
|
-
|
|
28
|
-
# Create a clean slate for cognee -- reset data and system state
|
|
29
|
-
print("Resetting cognee data...")
|
|
30
|
-
await cognee.prune.prune_data()
|
|
31
|
-
await cognee.prune.prune_system(metadata=True)
|
|
32
|
-
print("Data reset complete.\n")
|
|
33
|
-
|
|
34
|
-
# cognee knowledge graph will be created based on this text
|
|
35
|
-
text = """
|
|
36
|
-
Natural language processing (NLP) is an interdisciplinary
|
|
37
|
-
subfield of computer science and information retrieval.
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
# Add the text, and make it available for cognify
|
|
41
|
-
await cognee.add(text, "nlp_dataset")
|
|
42
|
-
await cognee.add("Quantum computing is the study of quantum computers.", "quantum_dataset")
|
|
43
|
-
|
|
44
|
-
# Use LLMs and cognee to create knowledge graph
|
|
45
|
-
ret_val = await cognee.cognify()
|
|
46
|
-
user = await get_default_user()
|
|
47
|
-
|
|
48
|
-
for val in ret_val:
|
|
49
|
-
dataset_id = str(val)
|
|
50
|
-
vector_db_path = os.path.join(
|
|
51
|
-
cognee_directory_path, "databases", str(user.id), dataset_id + ".lance.db"
|
|
52
|
-
)
|
|
53
|
-
graph_db_path = os.path.join(
|
|
54
|
-
cognee_directory_path, "databases", str(user.id), dataset_id + ".pkl"
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
# Check if databases are properly created and exist before deletion
|
|
58
|
-
assert os.path.exists(graph_db_path), "Graph database file not found."
|
|
59
|
-
assert os.path.exists(vector_db_path), "Vector database file not found."
|
|
60
|
-
|
|
61
|
-
dataset = await get_dataset(user_id=user.id, dataset_id=UUID(dataset_id))
|
|
62
|
-
await delete_dataset(dataset)
|
|
63
|
-
|
|
64
|
-
# Confirm databases have been deleted
|
|
65
|
-
assert not os.path.exists(graph_db_path), "Graph database file found."
|
|
66
|
-
assert not os.path.exists(vector_db_path), "Vector database file found."
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
if __name__ == "__main__":
|
|
70
|
-
logger = setup_logging(log_level=ERROR)
|
|
71
|
-
loop = asyncio.new_event_loop()
|
|
72
|
-
asyncio.set_event_loop(loop)
|
|
73
|
-
try:
|
|
74
|
-
loop.run_until_complete(main())
|
|
75
|
-
finally:
|
|
76
|
-
loop.run_until_complete(loop.shutdown_asyncgens())
|
|
@@ -1,170 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
End-to-end integration test for edge-centered payload and triplet embeddings.
|
|
3
|
-
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import os
|
|
7
|
-
import pathlib
|
|
8
|
-
import cognee
|
|
9
|
-
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
10
|
-
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
11
|
-
from cognee.modules.search.types import SearchType
|
|
12
|
-
from cognee.shared.logging_utils import get_logger
|
|
13
|
-
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
|
14
|
-
from cognee.modules.ontology.ontology_config import Config
|
|
15
|
-
|
|
16
|
-
logger = get_logger()
|
|
17
|
-
|
|
18
|
-
text_data = """
|
|
19
|
-
Apple is a technology company that produces the iPhone, iPad, and Mac computers.
|
|
20
|
-
The company is known for its innovative products and ecosystem integration.
|
|
21
|
-
|
|
22
|
-
Microsoft develops the Windows operating system and Office productivity suite.
|
|
23
|
-
They are also major players in cloud computing with Azure.
|
|
24
|
-
|
|
25
|
-
Google created the Android operating system and provides search engine services.
|
|
26
|
-
The company is a leader in artificial intelligence and machine learning.
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
ontology_content = """<?xml version="1.0"?>
|
|
30
|
-
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
31
|
-
xmlns:owl="http://www.w3.org/2002/07/owl#"
|
|
32
|
-
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
|
|
33
|
-
xmlns="http://example.org/tech#"
|
|
34
|
-
xml:base="http://example.org/tech">
|
|
35
|
-
|
|
36
|
-
<owl:Ontology rdf:about="http://example.org/tech"/>
|
|
37
|
-
|
|
38
|
-
<!-- Classes -->
|
|
39
|
-
<owl:Class rdf:ID="Company"/>
|
|
40
|
-
<owl:Class rdf:ID="TechnologyCompany"/>
|
|
41
|
-
<owl:Class rdf:ID="Product"/>
|
|
42
|
-
<owl:Class rdf:ID="Software"/>
|
|
43
|
-
<owl:Class rdf:ID="Hardware"/>
|
|
44
|
-
<owl:Class rdf:ID="Service"/>
|
|
45
|
-
|
|
46
|
-
<rdf:Description rdf:about="#TechnologyCompany">
|
|
47
|
-
<rdfs:subClassOf rdf:resource="#Company"/>
|
|
48
|
-
<rdfs:comment>A company operating in the technology sector.</rdfs:comment>
|
|
49
|
-
</rdf:Description>
|
|
50
|
-
|
|
51
|
-
<rdf:Description rdf:about="#Software">
|
|
52
|
-
<rdfs:subClassOf rdf:resource="#Product"/>
|
|
53
|
-
<rdfs:comment>Software products and applications.</rdfs:comment>
|
|
54
|
-
</rdf:Description>
|
|
55
|
-
|
|
56
|
-
<rdf:Description rdf:about="#Hardware">
|
|
57
|
-
<rdfs:subClassOf rdf:resource="#Product"/>
|
|
58
|
-
<rdfs:comment>Physical hardware products.</rdfs:comment>
|
|
59
|
-
</rdf:Description>
|
|
60
|
-
|
|
61
|
-
<!-- Individuals -->
|
|
62
|
-
<TechnologyCompany rdf:ID="apple">
|
|
63
|
-
<rdfs:label>Apple</rdfs:label>
|
|
64
|
-
</TechnologyCompany>
|
|
65
|
-
|
|
66
|
-
<TechnologyCompany rdf:ID="microsoft">
|
|
67
|
-
<rdfs:label>Microsoft</rdfs:label>
|
|
68
|
-
</TechnologyCompany>
|
|
69
|
-
|
|
70
|
-
<TechnologyCompany rdf:ID="google">
|
|
71
|
-
<rdfs:label>Google</rdfs:label>
|
|
72
|
-
</TechnologyCompany>
|
|
73
|
-
|
|
74
|
-
<Hardware rdf:ID="iphone">
|
|
75
|
-
<rdfs:label>iPhone</rdfs:label>
|
|
76
|
-
</Hardware>
|
|
77
|
-
|
|
78
|
-
<Software rdf:ID="windows">
|
|
79
|
-
<rdfs:label>Windows</rdfs:label>
|
|
80
|
-
</Software>
|
|
81
|
-
|
|
82
|
-
<Software rdf:ID="android">
|
|
83
|
-
<rdfs:label>Android</rdfs:label>
|
|
84
|
-
</Software>
|
|
85
|
-
|
|
86
|
-
</rdf:RDF>"""
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
async def main():
|
|
90
|
-
data_directory_path = str(
|
|
91
|
-
pathlib.Path(
|
|
92
|
-
os.path.join(
|
|
93
|
-
pathlib.Path(__file__).parent,
|
|
94
|
-
".data_storage/test_edge_centered_payload",
|
|
95
|
-
)
|
|
96
|
-
).resolve()
|
|
97
|
-
)
|
|
98
|
-
cognee_directory_path = str(
|
|
99
|
-
pathlib.Path(
|
|
100
|
-
os.path.join(
|
|
101
|
-
pathlib.Path(__file__).parent,
|
|
102
|
-
".cognee_system/test_edge_centered_payload",
|
|
103
|
-
)
|
|
104
|
-
).resolve()
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
cognee.config.data_root_directory(data_directory_path)
|
|
108
|
-
cognee.config.system_root_directory(cognee_directory_path)
|
|
109
|
-
|
|
110
|
-
dataset_name = "tech_companies"
|
|
111
|
-
|
|
112
|
-
await cognee.prune.prune_data()
|
|
113
|
-
await cognee.prune.prune_system(metadata=True)
|
|
114
|
-
|
|
115
|
-
await cognee.add(data=text_data, dataset_name=dataset_name)
|
|
116
|
-
|
|
117
|
-
import tempfile
|
|
118
|
-
|
|
119
|
-
with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f:
|
|
120
|
-
f.write(ontology_content)
|
|
121
|
-
ontology_file_path = f.name
|
|
122
|
-
|
|
123
|
-
try:
|
|
124
|
-
logger.info(f"Loading ontology from: {ontology_file_path}")
|
|
125
|
-
config: Config = {
|
|
126
|
-
"ontology_config": {
|
|
127
|
-
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_file_path)
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
await cognee.cognify(datasets=[dataset_name], config=config)
|
|
132
|
-
graph_engine = await get_graph_engine()
|
|
133
|
-
nodes_phase2, edges_phase2 = await graph_engine.get_graph_data()
|
|
134
|
-
|
|
135
|
-
vector_engine = get_vector_engine()
|
|
136
|
-
triplets_phase2 = await vector_engine.search(
|
|
137
|
-
query_text="technology", limit=None, collection_name="Triplet_text"
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
assert len(triplets_phase2) == len(edges_phase2), (
|
|
141
|
-
f"Triplet embeddings and number of edges do not match. Vector db contains {len(triplets_phase2)} edge triplets while graph db contains {len(edges_phase2)} edges."
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
search_results_phase2 = await cognee.search(
|
|
145
|
-
query_type=SearchType.TRIPLET_COMPLETION,
|
|
146
|
-
query_text="What products does Apple make?",
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
assert search_results_phase2 is not None, (
|
|
150
|
-
"Search should return results for triplet embeddings in simple ontology use case."
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
finally:
|
|
154
|
-
if os.path.exists(ontology_file_path):
|
|
155
|
-
os.unlink(ontology_file_path)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
if __name__ == "__main__":
|
|
159
|
-
import asyncio
|
|
160
|
-
from cognee.shared.logging_utils import setup_logging
|
|
161
|
-
|
|
162
|
-
setup_logging()
|
|
163
|
-
|
|
164
|
-
loop = asyncio.new_event_loop()
|
|
165
|
-
asyncio.set_event_loop(loop)
|
|
166
|
-
try:
|
|
167
|
-
loop.run_until_complete(main())
|
|
168
|
-
finally:
|
|
169
|
-
loop.run_until_complete(loop.shutdown_asyncgens())
|
|
170
|
-
loop.close()
|