cognee 0.5.0.dev0__py3-none-any.whl → 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. cognee/api/client.py +1 -5
  2. cognee/api/v1/add/add.py +2 -1
  3. cognee/api/v1/cognify/cognify.py +24 -16
  4. cognee/api/v1/cognify/routers/__init__.py +0 -1
  5. cognee/api/v1/cognify/routers/get_cognify_router.py +3 -1
  6. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  7. cognee/api/v1/ontologies/ontologies.py +12 -37
  8. cognee/api/v1/ontologies/routers/get_ontology_router.py +27 -25
  9. cognee/api/v1/search/search.py +4 -0
  10. cognee/api/v1/ui/node_setup.py +360 -0
  11. cognee/api/v1/ui/npm_utils.py +50 -0
  12. cognee/api/v1/ui/ui.py +38 -68
  13. cognee/context_global_variables.py +61 -16
  14. cognee/eval_framework/Dockerfile +29 -0
  15. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  16. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  17. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  18. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  19. cognee/eval_framework/eval_config.py +2 -2
  20. cognee/eval_framework/modal_run_eval.py +16 -28
  21. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  22. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  23. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  24. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  25. cognee/infrastructure/databases/graph/config.py +3 -0
  26. cognee/infrastructure/databases/graph/get_graph_engine.py +1 -0
  27. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  28. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  29. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  30. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  31. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  32. cognee/infrastructure/databases/utils/__init__.py +3 -0
  33. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  34. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +62 -48
  35. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  36. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  37. cognee/infrastructure/databases/vector/config.py +2 -0
  38. cognee/infrastructure/databases/vector/create_vector_engine.py +1 -0
  39. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  40. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  41. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
  42. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  43. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  44. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  45. cognee/infrastructure/files/storage/s3_config.py +2 -0
  46. cognee/infrastructure/llm/LLMGateway.py +5 -2
  47. cognee/infrastructure/llm/config.py +35 -0
  48. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  49. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  50. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -16
  51. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  52. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  53. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +40 -37
  54. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +39 -36
  55. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +19 -1
  56. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +11 -9
  57. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +23 -21
  58. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +42 -34
  59. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  60. cognee/modules/cognify/config.py +2 -0
  61. cognee/modules/data/deletion/prune_system.py +52 -2
  62. cognee/modules/data/methods/delete_dataset.py +26 -0
  63. cognee/modules/engine/models/Triplet.py +9 -0
  64. cognee/modules/engine/models/__init__.py +1 -0
  65. cognee/modules/graph/cognee_graph/CogneeGraph.py +85 -37
  66. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  67. cognee/modules/memify/memify.py +1 -7
  68. cognee/modules/pipelines/operations/pipeline.py +18 -2
  69. cognee/modules/retrieval/__init__.py +1 -1
  70. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +4 -0
  71. cognee/modules/retrieval/graph_completion_cot_retriever.py +4 -0
  72. cognee/modules/retrieval/graph_completion_retriever.py +10 -0
  73. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  74. cognee/modules/retrieval/register_retriever.py +10 -0
  75. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  76. cognee/modules/retrieval/temporal_retriever.py +4 -0
  77. cognee/modules/retrieval/triplet_retriever.py +182 -0
  78. cognee/modules/retrieval/utils/brute_force_triplet_search.py +42 -10
  79. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +8 -1
  80. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  81. cognee/modules/search/methods/no_access_control_search.py +4 -0
  82. cognee/modules/search/methods/search.py +21 -0
  83. cognee/modules/search/types/SearchType.py +1 -1
  84. cognee/modules/settings/get_settings.py +19 -0
  85. cognee/modules/users/methods/get_authenticated_user.py +2 -2
  86. cognee/modules/users/models/DatasetDatabase.py +15 -3
  87. cognee/shared/logging_utils.py +4 -0
  88. cognee/shared/rate_limiting.py +30 -0
  89. cognee/tasks/documents/__init__.py +0 -1
  90. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  91. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  92. cognee/tasks/storage/add_data_points.py +142 -2
  93. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  94. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  95. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  96. cognee/tests/test_cognee_server_start.py +2 -4
  97. cognee/tests/test_conversation_history.py +23 -1
  98. cognee/tests/test_dataset_database_handler.py +137 -0
  99. cognee/tests/test_dataset_delete.py +76 -0
  100. cognee/tests/test_edge_centered_payload.py +170 -0
  101. cognee/tests/test_pipeline_cache.py +164 -0
  102. cognee/tests/test_search_db.py +37 -1
  103. cognee/tests/unit/api/test_ontology_endpoint.py +77 -89
  104. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  105. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  106. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  107. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  108. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  109. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  110. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  111. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  112. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  113. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/METADATA +76 -89
  114. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/RECORD +118 -97
  115. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/WHEEL +1 -1
  116. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  117. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  118. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  119. cognee/modules/retrieval/code_retriever.py +0 -232
  120. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  121. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  122. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  123. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  124. cognee/tasks/repo_processor/__init__.py +0 -2
  125. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  126. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  127. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  128. cognee/tests/test_delete_bmw_example.py +0 -60
  129. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/entry_points.txt +0 -0
  130. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/LICENSE +0 -0
  131. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,84 @@
1
+ import os
2
+ import pytest
3
+ import pathlib
4
+ import pytest_asyncio
5
+ import cognee
6
+
7
+ from cognee.low_level import setup
8
+ from cognee.tasks.storage import add_data_points
9
+ from cognee.modules.retrieval.exceptions.exceptions import NoDataError
10
+ from cognee.modules.retrieval.triplet_retriever import TripletRetriever
11
+ from cognee.modules.engine.models import Triplet
12
+
13
+
14
+ @pytest_asyncio.fixture
15
+ async def setup_test_environment_with_triplets():
16
+ """Set up a clean test environment with triplets."""
17
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
18
+ system_directory_path = str(base_dir / ".cognee_system/test_triplet_retriever_context_simple")
19
+ data_directory_path = str(base_dir / ".data_storage/test_triplet_retriever_context_simple")
20
+
21
+ cognee.config.system_root_directory(system_directory_path)
22
+ cognee.config.data_root_directory(data_directory_path)
23
+
24
+ await cognee.prune.prune_data()
25
+ await cognee.prune.prune_system(metadata=True)
26
+ await setup()
27
+
28
+ triplet1 = Triplet(
29
+ from_node_id="node1",
30
+ to_node_id="node2",
31
+ text="Alice knows Bob",
32
+ )
33
+ triplet2 = Triplet(
34
+ from_node_id="node2",
35
+ to_node_id="node3",
36
+ text="Bob works at Tech Corp",
37
+ )
38
+
39
+ triplets = [triplet1, triplet2]
40
+ await add_data_points(triplets)
41
+
42
+ yield
43
+
44
+ try:
45
+ await cognee.prune.prune_data()
46
+ await cognee.prune.prune_system(metadata=True)
47
+ except Exception:
48
+ pass
49
+
50
+
51
+ @pytest_asyncio.fixture
52
+ async def setup_test_environment_empty():
53
+ """Set up a clean test environment without triplets."""
54
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
55
+ system_directory_path = str(
56
+ base_dir / ".cognee_system/test_triplet_retriever_context_empty_collection"
57
+ )
58
+ data_directory_path = str(
59
+ base_dir / ".data_storage/test_triplet_retriever_context_empty_collection"
60
+ )
61
+
62
+ cognee.config.system_root_directory(system_directory_path)
63
+ cognee.config.data_root_directory(data_directory_path)
64
+
65
+ await cognee.prune.prune_data()
66
+ await cognee.prune.prune_system(metadata=True)
67
+
68
+ yield
69
+
70
+ try:
71
+ await cognee.prune.prune_data()
72
+ await cognee.prune.prune_system(metadata=True)
73
+ except Exception:
74
+ pass
75
+
76
+
77
+ @pytest.mark.asyncio
78
+ async def test_triplet_retriever_context_simple(setup_test_environment_with_triplets):
79
+ """Integration test: verify TripletRetriever can retrieve triplet context."""
80
+ retriever = TripletRetriever(top_k=5)
81
+
82
+ context = await retriever.get_context("Alice")
83
+
84
+ assert "Alice knows Bob" in context, "Failed to get Alice triplet"
@@ -0,0 +1,139 @@
1
+ import pathlib
2
+ import pytest
3
+ import pytest_asyncio
4
+
5
+ import cognee
6
+ from cognee.low_level import setup
7
+ from cognee.infrastructure.engine import DataPoint
8
+ from cognee.tasks.storage.add_data_points import add_data_points
9
+ from cognee.tasks.storage.exceptions import InvalidDataPointsInAddDataPointsError
10
+ from cognee.infrastructure.databases.graph import get_graph_engine
11
+
12
+
13
+ class Person(DataPoint):
14
+ name: str
15
+ age: int
16
+ metadata: dict = {"index_fields": ["name"]}
17
+
18
+
19
+ class Company(DataPoint):
20
+ name: str
21
+ industry: str
22
+ metadata: dict = {"index_fields": ["name", "industry"]}
23
+
24
+
25
+ @pytest_asyncio.fixture
26
+ async def clean_test_environment():
27
+ """Set up a clean test environment for add_data_points tests."""
28
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
29
+ system_directory_path = str(base_dir / ".cognee_system/test_add_data_points_integration")
30
+ data_directory_path = str(base_dir / ".data_storage/test_add_data_points_integration")
31
+
32
+ cognee.config.system_root_directory(system_directory_path)
33
+ cognee.config.data_root_directory(data_directory_path)
34
+
35
+ await cognee.prune.prune_data()
36
+ await cognee.prune.prune_system(metadata=True)
37
+ await setup()
38
+
39
+ yield
40
+
41
+ try:
42
+ await cognee.prune.prune_data()
43
+ await cognee.prune.prune_system(metadata=True)
44
+ except Exception:
45
+ pass
46
+
47
+
48
+ @pytest.mark.asyncio
49
+ async def test_add_data_points_comprehensive(clean_test_environment):
50
+ """Comprehensive integration test for add_data_points functionality."""
51
+
52
+ person1 = Person(name="Alice", age=30)
53
+ person2 = Person(name="Bob", age=25)
54
+ result = await add_data_points([person1, person2])
55
+
56
+ assert result == [person1, person2]
57
+ assert len(result) == 2
58
+
59
+ graph_engine = await get_graph_engine()
60
+ nodes, edges = await graph_engine.get_graph_data()
61
+ assert len(nodes) >= 2
62
+
63
+ result_empty = await add_data_points([])
64
+ assert result_empty == []
65
+
66
+ person3 = Person(name="Charlie", age=35)
67
+ person4 = Person(name="Diana", age=32)
68
+ custom_edge = (str(person3.id), str(person4.id), "knows", {"edge_text": "friends with"})
69
+
70
+ result_custom = await add_data_points([person3, person4], custom_edges=[custom_edge])
71
+ assert len(result_custom) == 2
72
+
73
+ nodes, edges = await graph_engine.get_graph_data()
74
+ assert len(edges) == 1
75
+ assert len(nodes) == 4
76
+
77
+ class Employee(DataPoint):
78
+ name: str
79
+ works_at: Company
80
+ metadata: dict = {"index_fields": ["name"]}
81
+
82
+ company = Company(name="TechCorp", industry="Technology")
83
+ employee = Employee(name="Eve", works_at=company)
84
+
85
+ result_rel = await add_data_points([employee])
86
+ assert len(result_rel) == 1
87
+
88
+ nodes, edges = await graph_engine.get_graph_data()
89
+ assert len(nodes) == 6
90
+ assert len(edges) == 2
91
+
92
+ person5 = Person(name="Frank", age=40)
93
+ person6 = Person(name="Grace", age=38)
94
+ triplet_edge = (str(person5.id), str(person6.id), "married_to", {"edge_text": "is married to"})
95
+
96
+ result_triplet = await add_data_points(
97
+ [person5, person6], custom_edges=[triplet_edge], embed_triplets=True
98
+ )
99
+ assert len(result_triplet) == 2
100
+
101
+ nodes, edges = await graph_engine.get_graph_data()
102
+ assert len(nodes) == 8
103
+ assert len(edges) == 3
104
+
105
+ batch1 = [Person(name="Leo", age=25), Person(name="Mia", age=30)]
106
+ batch2 = [Person(name="Noah", age=35), Person(name="Olivia", age=40)]
107
+
108
+ result_batch1 = await add_data_points(batch1)
109
+ result_batch2 = await add_data_points(batch2)
110
+
111
+ assert len(result_batch1) == 2
112
+ assert len(result_batch2) == 2
113
+
114
+ nodes, edges = await graph_engine.get_graph_data()
115
+ assert len(nodes) == 12
116
+ assert len(edges) == 3
117
+
118
+ person7 = Person(name="Paul", age=33)
119
+ person8 = Person(name="Quinn", age=31)
120
+ edge1 = (str(person7.id), str(person8.id), "colleague_of", {"edge_text": "works with"})
121
+ edge2 = (str(person8.id), str(person7.id), "colleague_of", {"edge_text": "works with"})
122
+
123
+ result_bi = await add_data_points([person7, person8], custom_edges=[edge1, edge2])
124
+ assert len(result_bi) == 2
125
+
126
+ nodes, edges = await graph_engine.get_graph_data()
127
+ assert len(nodes) == 14
128
+ assert len(edges) == 5
129
+
130
+ person_invalid = Person(name="Invalid", age=50)
131
+ with pytest.raises(InvalidDataPointsInAddDataPointsError, match="must be a list"):
132
+ await add_data_points(person_invalid)
133
+
134
+ with pytest.raises(InvalidDataPointsInAddDataPointsError, match="must be a DataPoint"):
135
+ await add_data_points(["not", "datapoints"])
136
+
137
+ final_nodes, final_edges = await graph_engine.get_graph_data()
138
+ assert len(final_nodes) == 14
139
+ assert len(final_edges) == 5
@@ -0,0 +1,69 @@
1
+ import os
2
+ import pathlib
3
+ import pytest
4
+ import pytest_asyncio
5
+ from unittest.mock import AsyncMock, patch
6
+
7
+ import cognee
8
+ from cognee.tasks.memify.get_triplet_datapoints import get_triplet_datapoints
9
+ from cognee.modules.engine.models import Triplet
10
+
11
+
12
+ @pytest_asyncio.fixture
13
+ async def setup_test_environment():
14
+ """Set up a clean test environment with a simple graph."""
15
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
16
+ data_directory_path = str(base_dir / ".data_storage/test_get_triplet_datapoints_integration")
17
+ cognee_directory_path = str(base_dir / ".cognee_system/test_get_triplet_datapoints_integration")
18
+
19
+ cognee.config.data_root_directory(data_directory_path)
20
+ cognee.config.system_root_directory(cognee_directory_path)
21
+
22
+ await cognee.prune.prune_data()
23
+ await cognee.prune.prune_system(metadata=True)
24
+
25
+ dataset_name = "test_triplets"
26
+
27
+ text = "Volkswagen is a german car manufacturer from Wolfsburg. They produce different models such as Golf, Polo and Touareg."
28
+ await cognee.add(text, dataset_name)
29
+ await cognee.cognify([dataset_name])
30
+
31
+ yield dataset_name
32
+
33
+ await cognee.prune.prune_data()
34
+ await cognee.prune.prune_system(metadata=True)
35
+
36
+
37
+ @pytest.mark.asyncio
38
+ async def test_get_triplet_datapoints_integration(setup_test_environment):
39
+ """Integration test: verify get_triplet_datapoints works with real graph data."""
40
+
41
+ from cognee.infrastructure.databases.graph import get_graph_engine
42
+
43
+ graph_engine = await get_graph_engine()
44
+
45
+ if not hasattr(graph_engine, "get_triplets_batch"):
46
+ pytest.skip("Graph engine does not support get_triplets_batch")
47
+
48
+ triplets = []
49
+ with patch(
50
+ "cognee.tasks.memify.get_triplet_datapoints.index_data_points", new_callable=AsyncMock
51
+ ):
52
+ async for triplet in get_triplet_datapoints([{}], triplets_batch_size=10):
53
+ triplets.append(triplet)
54
+
55
+ nodes, edges = await graph_engine.get_graph_data()
56
+
57
+ if len(edges) > 0 and len(triplets) == 0:
58
+ test_triplets = await graph_engine.get_triplets_batch(offset=0, limit=10)
59
+ if len(test_triplets) == 0:
60
+ pytest.fail(
61
+ f"Edges exist in graph ({len(edges)} edges) but get_triplets_batch found none. "
62
+ f"This indicates the query pattern may not match the graph structure."
63
+ )
64
+
65
+ for triplet in triplets:
66
+ assert isinstance(triplet, Triplet), "Each item should be a Triplet instance"
67
+ assert triplet.from_node_id, "Triplet should have from_node_id"
68
+ assert triplet.to_node_id, "Triplet should have to_node_id"
69
+ assert triplet.text, "Triplet should have embeddable text"
@@ -25,8 +25,6 @@ class TestCogneeServerStart(unittest.TestCase):
25
25
  "--port",
26
26
  "8000",
27
27
  ],
28
- stdout=subprocess.PIPE,
29
- stderr=subprocess.PIPE,
30
28
  preexec_fn=os.setsid,
31
29
  )
32
30
  # Give the server some time to start
@@ -150,8 +148,8 @@ class TestCogneeServerStart(unittest.TestCase):
150
148
  headers=headers,
151
149
  files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
152
150
  data={
153
- "ontology_key": json.dumps([ontology_key]),
154
- "description": json.dumps(["Test ontology"]),
151
+ "ontology_key": ontology_key,
152
+ "description": "Test ontology",
155
153
  },
156
154
  )
157
155
  self.assertEqual(ontology_response.status_code, 200)
@@ -8,10 +8,10 @@ Tests all retrievers that save conversation history to Redis cache:
8
8
  4. GRAPH_COMPLETION_CONTEXT_EXTENSION
9
9
  5. GRAPH_SUMMARY_COMPLETION
10
10
  6. TEMPORAL
11
+ 7. TRIPLET_COMPLETION
11
12
  """
12
13
 
13
14
  import os
14
- import shutil
15
15
  import cognee
16
16
  import pathlib
17
17
 
@@ -63,6 +63,10 @@ async def main():
63
63
 
64
64
  user = await get_default_user()
65
65
 
66
+ from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
67
+
68
+ await create_triplet_embeddings(user=user, dataset=dataset_name)
69
+
66
70
  cache_engine = get_cache_engine()
67
71
  assert cache_engine is not None, "Cache engine should be available for testing"
68
72
 
@@ -216,6 +220,24 @@ async def main():
216
220
  ]
217
221
  assert len(our_qa_temporal) == 1, "Should find Temporal question in history"
218
222
 
223
+ session_id_triplet = "test_session_triplet"
224
+
225
+ result_triplet = await cognee.search(
226
+ query_type=SearchType.TRIPLET_COMPLETION,
227
+ query_text="What companies are mentioned?",
228
+ session_id=session_id_triplet,
229
+ )
230
+
231
+ assert isinstance(result_triplet, list) and len(result_triplet) > 0, (
232
+ f"TRIPLET_COMPLETION should return non-empty list, got: {result_triplet!r}"
233
+ )
234
+
235
+ history_triplet = await cache_engine.get_latest_qa(str(user.id), session_id_triplet, last_n=10)
236
+ our_qa_triplet = [
237
+ h for h in history_triplet if h["question"] == "What companies are mentioned?"
238
+ ]
239
+ assert len(our_qa_triplet) == 1, "Should find Triplet question in history"
240
+
219
241
  from cognee.modules.retrieval.utils.session_cache import (
220
242
  get_conversation_history,
221
243
  )
@@ -0,0 +1,137 @@
1
+ import asyncio
2
+ import os
3
+
4
+ # Set custom dataset database handler environment variable
5
+ os.environ["VECTOR_DATASET_DATABASE_HANDLER"] = "custom_lancedb_handler"
6
+ os.environ["GRAPH_DATASET_DATABASE_HANDLER"] = "custom_kuzu_handler"
7
+
8
+ import cognee
9
+ from cognee.modules.users.methods import get_default_user
10
+ from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
11
+ from cognee.shared.logging_utils import setup_logging, ERROR
12
+ from cognee.api.v1.search import SearchType
13
+
14
+
15
+ class LanceDBTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
16
+ @classmethod
17
+ async def create_dataset(cls, dataset_id, user):
18
+ import pathlib
19
+
20
+ cognee_directory_path = str(
21
+ pathlib.Path(
22
+ os.path.join(
23
+ pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
24
+ )
25
+ ).resolve()
26
+ )
27
+ databases_directory_path = os.path.join(cognee_directory_path, "databases", str(user.id))
28
+ os.makedirs(databases_directory_path, exist_ok=True)
29
+
30
+ vector_db_name = "test.lance.db"
31
+
32
+ return {
33
+ "vector_dataset_database_handler": "custom_lancedb_handler",
34
+ "vector_database_name": vector_db_name,
35
+ "vector_database_url": os.path.join(databases_directory_path, vector_db_name),
36
+ "vector_database_provider": "lancedb",
37
+ }
38
+
39
+
40
+ class KuzuTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
41
+ @classmethod
42
+ async def create_dataset(cls, dataset_id, user):
43
+ databases_directory_path = os.path.join("databases", str(user.id))
44
+ os.makedirs(databases_directory_path, exist_ok=True)
45
+
46
+ graph_db_name = "test.kuzu"
47
+ return {
48
+ "graph_dataset_database_handler": "custom_kuzu_handler",
49
+ "graph_database_name": graph_db_name,
50
+ "graph_database_url": os.path.join(databases_directory_path, graph_db_name),
51
+ "graph_database_provider": "kuzu",
52
+ }
53
+
54
+
55
+ async def main():
56
+ import pathlib
57
+
58
+ data_directory_path = str(
59
+ pathlib.Path(
60
+ os.path.join(
61
+ pathlib.Path(__file__).parent, ".data_storage/test_dataset_database_handler"
62
+ )
63
+ ).resolve()
64
+ )
65
+ cognee.config.data_root_directory(data_directory_path)
66
+ cognee_directory_path = str(
67
+ pathlib.Path(
68
+ os.path.join(
69
+ pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
70
+ )
71
+ ).resolve()
72
+ )
73
+ cognee.config.system_root_directory(cognee_directory_path)
74
+
75
+ # Add custom dataset database handler
76
+ from cognee.infrastructure.databases.dataset_database_handler.use_dataset_database_handler import (
77
+ use_dataset_database_handler,
78
+ )
79
+
80
+ use_dataset_database_handler(
81
+ "custom_lancedb_handler", LanceDBTestDatasetDatabaseHandler, "lancedb"
82
+ )
83
+ use_dataset_database_handler("custom_kuzu_handler", KuzuTestDatasetDatabaseHandler, "kuzu")
84
+
85
+ # Create a clean slate for cognee -- reset data and system state
86
+ print("Resetting cognee data...")
87
+ await cognee.prune.prune_data()
88
+ await cognee.prune.prune_system(metadata=True)
89
+ print("Data reset complete.\n")
90
+
91
+ # cognee knowledge graph will be created based on this text
92
+ text = """
93
+ Natural language processing (NLP) is an interdisciplinary
94
+ subfield of computer science and information retrieval.
95
+ """
96
+
97
+ print("Adding text to cognee:")
98
+ print(text.strip())
99
+
100
+ # Add the text, and make it available for cognify
101
+ await cognee.add(text)
102
+ print("Text added successfully.\n")
103
+
104
+ # Use LLMs and cognee to create knowledge graph
105
+ await cognee.cognify()
106
+ print("Cognify process complete.\n")
107
+
108
+ query_text = "Tell me about NLP"
109
+ print(f"Searching cognee for insights with query: '{query_text}'")
110
+ # Query cognee for insights on the added text
111
+ search_results = await cognee.search(
112
+ query_type=SearchType.GRAPH_COMPLETION, query_text=query_text
113
+ )
114
+
115
+ print("Search results:")
116
+ # Display results
117
+ for result_text in search_results:
118
+ print(result_text)
119
+
120
+ default_user = await get_default_user()
121
+ # Assert that the custom database files were created based on the custom dataset database handlers
122
+ assert os.path.exists(
123
+ os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.kuzu")
124
+ ), "Graph database file not found."
125
+ assert os.path.exists(
126
+ os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.lance.db")
127
+ ), "Vector database file not found."
128
+
129
+
130
+ if __name__ == "__main__":
131
+ logger = setup_logging(log_level=ERROR)
132
+ loop = asyncio.new_event_loop()
133
+ asyncio.set_event_loop(loop)
134
+ try:
135
+ loop.run_until_complete(main())
136
+ finally:
137
+ loop.run_until_complete(loop.shutdown_asyncgens())
@@ -0,0 +1,76 @@
1
+ import os
2
+ import asyncio
3
+ import pathlib
4
+ from uuid import UUID
5
+
6
+ import cognee
7
+ from cognee.shared.logging_utils import setup_logging, ERROR
8
+ from cognee.modules.data.methods.delete_dataset import delete_dataset
9
+ from cognee.modules.data.methods.get_dataset import get_dataset
10
+ from cognee.modules.users.methods import get_default_user
11
+
12
+
13
+ async def main():
14
+ # Set data and system directory paths
15
+ data_directory_path = str(
16
+ pathlib.Path(
17
+ os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_dataset_delete")
18
+ ).resolve()
19
+ )
20
+ cognee.config.data_root_directory(data_directory_path)
21
+ cognee_directory_path = str(
22
+ pathlib.Path(
23
+ os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_dataset_delete")
24
+ ).resolve()
25
+ )
26
+ cognee.config.system_root_directory(cognee_directory_path)
27
+
28
+ # Create a clean slate for cognee -- reset data and system state
29
+ print("Resetting cognee data...")
30
+ await cognee.prune.prune_data()
31
+ await cognee.prune.prune_system(metadata=True)
32
+ print("Data reset complete.\n")
33
+
34
+ # cognee knowledge graph will be created based on this text
35
+ text = """
36
+ Natural language processing (NLP) is an interdisciplinary
37
+ subfield of computer science and information retrieval.
38
+ """
39
+
40
+ # Add the text, and make it available for cognify
41
+ await cognee.add(text, "nlp_dataset")
42
+ await cognee.add("Quantum computing is the study of quantum computers.", "quantum_dataset")
43
+
44
+ # Use LLMs and cognee to create knowledge graph
45
+ ret_val = await cognee.cognify()
46
+ user = await get_default_user()
47
+
48
+ for val in ret_val:
49
+ dataset_id = str(val)
50
+ vector_db_path = os.path.join(
51
+ cognee_directory_path, "databases", str(user.id), dataset_id + ".lance.db"
52
+ )
53
+ graph_db_path = os.path.join(
54
+ cognee_directory_path, "databases", str(user.id), dataset_id + ".pkl"
55
+ )
56
+
57
+ # Check if databases are properly created and exist before deletion
58
+ assert os.path.exists(graph_db_path), "Graph database file not found."
59
+ assert os.path.exists(vector_db_path), "Vector database file not found."
60
+
61
+ dataset = await get_dataset(user_id=user.id, dataset_id=UUID(dataset_id))
62
+ await delete_dataset(dataset)
63
+
64
+ # Confirm databases have been deleted
65
+ assert not os.path.exists(graph_db_path), "Graph database file found."
66
+ assert not os.path.exists(vector_db_path), "Vector database file found."
67
+
68
+
69
+ if __name__ == "__main__":
70
+ logger = setup_logging(log_level=ERROR)
71
+ loop = asyncio.new_event_loop()
72
+ asyncio.set_event_loop(loop)
73
+ try:
74
+ loop.run_until_complete(main())
75
+ finally:
76
+ loop.run_until_complete(loop.shutdown_asyncgens())