cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +9 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/add/routers/get_add_router.py +3 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/__init__.py +4 -0
- cognee/api/v1/ontologies/ontologies.py +158 -0
- cognee/api/v1/ontologies/routers/__init__.py +0 -0
- cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
- cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
- cognee/api/v1/search/search.py +4 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/cli/commands/cognify_command.py +8 -1
- cognee/cli/config.py +1 -1
- cognee/context_global_variables.py +86 -9
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/cache/config.py +3 -1
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
- cognee/infrastructure/databases/graph/config.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +5 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/engine/models/Edge.py +13 -1
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/files/utils/guess_file_type.py +4 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +37 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
- cognee/infrastructure/loaders/LoaderEngine.py +1 -0
- cognee/infrastructure/loaders/core/__init__.py +2 -1
- cognee/infrastructure/loaders/core/csv_loader.py +93 -0
- cognee/infrastructure/loaders/core/text_loader.py +1 -2
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
- cognee/infrastructure/loaders/supported_loaders.py +2 -1
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
- cognee/modules/chunking/CsvChunker.py +35 -0
- cognee/modules/chunking/models/DocumentChunk.py +2 -1
- cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/create_dataset.py +4 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/data/methods/get_dataset_ids.py +5 -1
- cognee/modules/data/methods/get_unique_data_id.py +68 -0
- cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
- cognee/modules/data/models/Dataset.py +2 -0
- cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
- cognee/modules/data/processing/document_types/__init__.py +1 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
- cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
- cognee/modules/ingestion/identify.py +4 -4
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
- cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
- cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/base_graph_retriever.py +7 -3
- cognee/modules/retrieval/base_retriever.py +7 -3
- cognee/modules/retrieval/completion_retriever.py +11 -4
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
- cognee/modules/retrieval/graph_completion_retriever.py +14 -1
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +13 -2
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
- cognee/modules/retrieval/utils/completion.py +2 -22
- cognee/modules/run_custom_pipeline/__init__.py +1 -0
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +26 -3
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/create_user.py +12 -27
- cognee/modules/users/methods/get_authenticated_user.py +3 -2
- cognee/modules/users/methods/get_default_user.py +4 -2
- cognee/modules/users/methods/get_user.py +1 -1
- cognee/modules/users/methods/get_user_by_email.py +1 -1
- cognee/modules/users/models/DatasetDatabase.py +24 -3
- cognee/modules/users/models/Tenant.py +6 -7
- cognee/modules/users/models/User.py +6 -5
- cognee/modules/users/models/UserTenant.py +12 -0
- cognee/modules/users/models/__init__.py +1 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
- cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
- cognee/modules/users/tenants/methods/__init__.py +1 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
- cognee/modules/users/tenants/methods/create_tenant.py +22 -8
- cognee/modules/users/tenants/methods/select_tenant.py +62 -0
- cognee/shared/logging_utils.py +6 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/chunks/__init__.py +1 -0
- cognee/tasks/chunks/chunk_by_row.py +94 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/documents/classify_documents.py +2 -0
- cognee/tasks/feedback/generate_improved_answers.py +3 -3
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/ingestion/ingest_data.py +1 -1
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/cognify_session.py +41 -0
- cognee/tasks/memify/extract_user_sessions.py +73 -0
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tasks/storage/index_data_points.py +33 -22
- cognee/tasks/storage/index_graph_edges.py +37 -57
- cognee/tests/integration/documents/CsvDocument_test.py +70 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
- cognee/tests/test_add_docling_document.py +2 -2
- cognee/tests/test_cognee_server_start.py +84 -3
- cognee/tests/test_conversation_history.py +68 -5
- cognee/tests/test_data/example_with_header.csv +3 -0
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_edge_ingestion.py +27 -0
- cognee/tests/test_feedback_enrichment.py +1 -1
- cognee/tests/test_library.py +6 -4
- cognee/tests/test_load.py +62 -0
- cognee/tests/test_multi_tenancy.py +165 -0
- cognee/tests/test_parallel_databases.py +2 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_relational_db_migration.py +54 -2
- cognee/tests/test_search_db.py +44 -2
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
- cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
- cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
- cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
- cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
- cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
- cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -7,6 +7,7 @@ import requests
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
import sys
|
|
9
9
|
import uuid
|
|
10
|
+
import json
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class TestCogneeServerStart(unittest.TestCase):
|
|
@@ -24,8 +25,6 @@ class TestCogneeServerStart(unittest.TestCase):
|
|
|
24
25
|
"--port",
|
|
25
26
|
"8000",
|
|
26
27
|
],
|
|
27
|
-
stdout=subprocess.PIPE,
|
|
28
|
-
stderr=subprocess.PIPE,
|
|
29
28
|
preexec_fn=os.setsid,
|
|
30
29
|
)
|
|
31
30
|
# Give the server some time to start
|
|
@@ -90,12 +89,71 @@ class TestCogneeServerStart(unittest.TestCase):
|
|
|
90
89
|
)
|
|
91
90
|
}
|
|
92
91
|
|
|
93
|
-
|
|
92
|
+
ontology_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
|
|
93
|
+
payload = {"datasets": [dataset_name], "ontology_key": [ontology_key]}
|
|
94
94
|
|
|
95
95
|
add_response = requests.post(url, headers=headers, data=form_data, files=file, timeout=50)
|
|
96
96
|
if add_response.status_code not in [200, 201]:
|
|
97
97
|
add_response.raise_for_status()
|
|
98
98
|
|
|
99
|
+
ontology_content = b"""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
100
|
+
xmlns:owl="http://www.w3.org/2002/07/owl#"
|
|
101
|
+
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
|
|
102
|
+
xmlns="http://example.org/ontology#"
|
|
103
|
+
xml:base="http://example.org/ontology">
|
|
104
|
+
|
|
105
|
+
<owl:Ontology rdf:about="http://example.org/ontology"/>
|
|
106
|
+
|
|
107
|
+
<!-- Classes -->
|
|
108
|
+
<owl:Class rdf:ID="Problem"/>
|
|
109
|
+
<owl:Class rdf:ID="HardwareProblem"/>
|
|
110
|
+
<owl:Class rdf:ID="SoftwareProblem"/>
|
|
111
|
+
<owl:Class rdf:ID="Concept"/>
|
|
112
|
+
<owl:Class rdf:ID="Object"/>
|
|
113
|
+
<owl:Class rdf:ID="Joke"/>
|
|
114
|
+
<owl:Class rdf:ID="Image"/>
|
|
115
|
+
<owl:Class rdf:ID="Person"/>
|
|
116
|
+
|
|
117
|
+
<rdf:Description rdf:about="#HardwareProblem">
|
|
118
|
+
<rdfs:subClassOf rdf:resource="#Problem"/>
|
|
119
|
+
<rdfs:comment>A failure caused by physical components.</rdfs:comment>
|
|
120
|
+
</rdf:Description>
|
|
121
|
+
|
|
122
|
+
<rdf:Description rdf:about="#SoftwareProblem">
|
|
123
|
+
<rdfs:subClassOf rdf:resource="#Problem"/>
|
|
124
|
+
<rdfs:comment>An error caused by software logic or configuration.</rdfs:comment>
|
|
125
|
+
</rdf:Description>
|
|
126
|
+
|
|
127
|
+
<rdf:Description rdf:about="#Person">
|
|
128
|
+
<rdfs:comment>A human being or individual.</rdfs:comment>
|
|
129
|
+
</rdf:Description>
|
|
130
|
+
|
|
131
|
+
<!-- Individuals -->
|
|
132
|
+
<Person rdf:ID="programmers">
|
|
133
|
+
<rdfs:label>Programmers</rdfs:label>
|
|
134
|
+
</Person>
|
|
135
|
+
|
|
136
|
+
<Object rdf:ID="light_bulb">
|
|
137
|
+
<rdfs:label>Light Bulb</rdfs:label>
|
|
138
|
+
</Object>
|
|
139
|
+
|
|
140
|
+
<HardwareProblem rdf:ID="hardware_problem">
|
|
141
|
+
<rdfs:label>Hardware Problem</rdfs:label>
|
|
142
|
+
</HardwareProblem>
|
|
143
|
+
|
|
144
|
+
</rdf:RDF>"""
|
|
145
|
+
|
|
146
|
+
ontology_response = requests.post(
|
|
147
|
+
"http://127.0.0.1:8000/api/v1/ontologies",
|
|
148
|
+
headers=headers,
|
|
149
|
+
files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
|
|
150
|
+
data={
|
|
151
|
+
"ontology_key": ontology_key,
|
|
152
|
+
"description": "Test ontology",
|
|
153
|
+
},
|
|
154
|
+
)
|
|
155
|
+
self.assertEqual(ontology_response.status_code, 200)
|
|
156
|
+
|
|
99
157
|
# Cognify request
|
|
100
158
|
url = "http://127.0.0.1:8000/api/v1/cognify"
|
|
101
159
|
headers = {
|
|
@@ -107,6 +165,29 @@ class TestCogneeServerStart(unittest.TestCase):
|
|
|
107
165
|
if cognify_response.status_code not in [200, 201]:
|
|
108
166
|
cognify_response.raise_for_status()
|
|
109
167
|
|
|
168
|
+
datasets_response = requests.get("http://127.0.0.1:8000/api/v1/datasets", headers=headers)
|
|
169
|
+
|
|
170
|
+
datasets = datasets_response.json()
|
|
171
|
+
dataset_id = None
|
|
172
|
+
for dataset in datasets:
|
|
173
|
+
if dataset["name"] == dataset_name:
|
|
174
|
+
dataset_id = dataset["id"]
|
|
175
|
+
break
|
|
176
|
+
|
|
177
|
+
graph_response = requests.get(
|
|
178
|
+
f"http://127.0.0.1:8000/api/v1/datasets/{dataset_id}/graph", headers=headers
|
|
179
|
+
)
|
|
180
|
+
self.assertEqual(graph_response.status_code, 200)
|
|
181
|
+
|
|
182
|
+
graph_data = graph_response.json()
|
|
183
|
+
ontology_nodes = [
|
|
184
|
+
node for node in graph_data.get("nodes") if node.get("properties").get("ontology_valid")
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
self.assertGreater(
|
|
188
|
+
len(ontology_nodes), 0, "No ontology nodes found - ontology was not integrated"
|
|
189
|
+
)
|
|
190
|
+
|
|
110
191
|
# TODO: Add test to verify cognify pipeline is complete before testing search
|
|
111
192
|
|
|
112
193
|
# Search request
|
|
@@ -8,17 +8,19 @@ Tests all retrievers that save conversation history to Redis cache:
|
|
|
8
8
|
4. GRAPH_COMPLETION_CONTEXT_EXTENSION
|
|
9
9
|
5. GRAPH_SUMMARY_COMPLETION
|
|
10
10
|
6. TEMPORAL
|
|
11
|
+
7. TRIPLET_COMPLETION
|
|
11
12
|
"""
|
|
12
13
|
|
|
13
14
|
import os
|
|
14
|
-
import shutil
|
|
15
15
|
import cognee
|
|
16
16
|
import pathlib
|
|
17
17
|
|
|
18
18
|
from cognee.infrastructure.databases.cache import get_cache_engine
|
|
19
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
19
20
|
from cognee.modules.search.types import SearchType
|
|
20
21
|
from cognee.shared.logging_utils import get_logger
|
|
21
22
|
from cognee.modules.users.methods import get_default_user
|
|
23
|
+
from collections import Counter
|
|
22
24
|
|
|
23
25
|
logger = get_logger()
|
|
24
26
|
|
|
@@ -54,13 +56,17 @@ async def main():
|
|
|
54
56
|
"""DataCo is a data analytics company. They help businesses make sense of their data."""
|
|
55
57
|
)
|
|
56
58
|
|
|
57
|
-
await cognee.add(text_1, dataset_name)
|
|
58
|
-
await cognee.add(text_2, dataset_name)
|
|
59
|
+
await cognee.add(data=text_1, dataset_name=dataset_name)
|
|
60
|
+
await cognee.add(data=text_2, dataset_name=dataset_name)
|
|
59
61
|
|
|
60
|
-
await cognee.cognify([dataset_name])
|
|
62
|
+
await cognee.cognify(datasets=[dataset_name])
|
|
61
63
|
|
|
62
64
|
user = await get_default_user()
|
|
63
65
|
|
|
66
|
+
from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
|
|
67
|
+
|
|
68
|
+
await create_triplet_embeddings(user=user, dataset=dataset_name)
|
|
69
|
+
|
|
64
70
|
cache_engine = get_cache_engine()
|
|
65
71
|
assert cache_engine is not None, "Cache engine should be available for testing"
|
|
66
72
|
|
|
@@ -188,7 +194,6 @@ async def main():
|
|
|
188
194
|
f"GRAPH_SUMMARY_COMPLETION should return non-empty list, got: {result_summary!r}"
|
|
189
195
|
)
|
|
190
196
|
|
|
191
|
-
# Verify saved
|
|
192
197
|
history_summary = await cache_engine.get_latest_qa(str(user.id), session_id_summary, last_n=10)
|
|
193
198
|
our_qa_summary = [
|
|
194
199
|
h for h in history_summary if h["question"] == "What are the key points about TechCorp?"
|
|
@@ -215,6 +220,24 @@ async def main():
|
|
|
215
220
|
]
|
|
216
221
|
assert len(our_qa_temporal) == 1, "Should find Temporal question in history"
|
|
217
222
|
|
|
223
|
+
session_id_triplet = "test_session_triplet"
|
|
224
|
+
|
|
225
|
+
result_triplet = await cognee.search(
|
|
226
|
+
query_type=SearchType.TRIPLET_COMPLETION,
|
|
227
|
+
query_text="What companies are mentioned?",
|
|
228
|
+
session_id=session_id_triplet,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
assert isinstance(result_triplet, list) and len(result_triplet) > 0, (
|
|
232
|
+
f"TRIPLET_COMPLETION should return non-empty list, got: {result_triplet!r}"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
history_triplet = await cache_engine.get_latest_qa(str(user.id), session_id_triplet, last_n=10)
|
|
236
|
+
our_qa_triplet = [
|
|
237
|
+
h for h in history_triplet if h["question"] == "What companies are mentioned?"
|
|
238
|
+
]
|
|
239
|
+
assert len(our_qa_triplet) == 1, "Should find Triplet question in history"
|
|
240
|
+
|
|
218
241
|
from cognee.modules.retrieval.utils.session_cache import (
|
|
219
242
|
get_conversation_history,
|
|
220
243
|
)
|
|
@@ -228,6 +251,46 @@ async def main():
|
|
|
228
251
|
assert "CONTEXT:" in formatted_history, "Formatted history should contain 'CONTEXT:' prefix"
|
|
229
252
|
assert "ANSWER:" in formatted_history, "Formatted history should contain 'ANSWER:' prefix"
|
|
230
253
|
|
|
254
|
+
from cognee.memify_pipelines.persist_sessions_in_knowledge_graph import (
|
|
255
|
+
persist_sessions_in_knowledge_graph_pipeline,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
logger.info("Starting persist_sessions_in_knowledge_graph tests")
|
|
259
|
+
|
|
260
|
+
await persist_sessions_in_knowledge_graph_pipeline(
|
|
261
|
+
user=user,
|
|
262
|
+
session_ids=[session_id_1, session_id_2],
|
|
263
|
+
dataset=dataset_name,
|
|
264
|
+
run_in_background=False,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
graph_engine = await get_graph_engine()
|
|
268
|
+
graph = await graph_engine.get_graph_data()
|
|
269
|
+
|
|
270
|
+
type_counts = Counter(node_data[1].get("type", {}) for node_data in graph[0])
|
|
271
|
+
|
|
272
|
+
"Tests the correct number of NodeSet nodes after session persistence"
|
|
273
|
+
assert type_counts.get("NodeSet", 0) == 1, (
|
|
274
|
+
f"Number of NodeSets in the graph is incorrect, found {type_counts.get('NodeSet', 0)} but there should be exactly 1."
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
"Tests the correct number of DocumentChunk nodes after session persistence"
|
|
278
|
+
assert type_counts.get("DocumentChunk", 0) == 4, (
|
|
279
|
+
f"Number of DocumentChunk ndoes in the graph is incorrect, found {type_counts.get('DocumentChunk', 0)} but there should be exactly 4 (2 original documents, 2 sessions)."
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine
|
|
283
|
+
|
|
284
|
+
vector_engine = get_vector_engine()
|
|
285
|
+
collection_size = await vector_engine.search(
|
|
286
|
+
collection_name="DocumentChunk_text",
|
|
287
|
+
query_text="test",
|
|
288
|
+
limit=1000,
|
|
289
|
+
)
|
|
290
|
+
assert len(collection_size) == 4, (
|
|
291
|
+
f"DocumentChunk_text collection should have exactly 4 embeddings, found {len(collection_size)}"
|
|
292
|
+
)
|
|
293
|
+
|
|
231
294
|
await cognee.prune.prune_data()
|
|
232
295
|
await cognee.prune.prune_system(metadata=True)
|
|
233
296
|
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
# Set custom dataset database handler environment variable
|
|
5
|
+
os.environ["VECTOR_DATASET_DATABASE_HANDLER"] = "custom_lancedb_handler"
|
|
6
|
+
os.environ["GRAPH_DATASET_DATABASE_HANDLER"] = "custom_kuzu_handler"
|
|
7
|
+
|
|
8
|
+
import cognee
|
|
9
|
+
from cognee.modules.users.methods import get_default_user
|
|
10
|
+
from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
|
|
11
|
+
from cognee.shared.logging_utils import setup_logging, ERROR
|
|
12
|
+
from cognee.api.v1.search import SearchType
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LanceDBTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|
16
|
+
@classmethod
|
|
17
|
+
async def create_dataset(cls, dataset_id, user):
|
|
18
|
+
import pathlib
|
|
19
|
+
|
|
20
|
+
cognee_directory_path = str(
|
|
21
|
+
pathlib.Path(
|
|
22
|
+
os.path.join(
|
|
23
|
+
pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
|
|
24
|
+
)
|
|
25
|
+
).resolve()
|
|
26
|
+
)
|
|
27
|
+
databases_directory_path = os.path.join(cognee_directory_path, "databases", str(user.id))
|
|
28
|
+
os.makedirs(databases_directory_path, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
vector_db_name = "test.lance.db"
|
|
31
|
+
|
|
32
|
+
return {
|
|
33
|
+
"vector_dataset_database_handler": "custom_lancedb_handler",
|
|
34
|
+
"vector_database_name": vector_db_name,
|
|
35
|
+
"vector_database_url": os.path.join(databases_directory_path, vector_db_name),
|
|
36
|
+
"vector_database_provider": "lancedb",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class KuzuTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|
41
|
+
@classmethod
|
|
42
|
+
async def create_dataset(cls, dataset_id, user):
|
|
43
|
+
databases_directory_path = os.path.join("databases", str(user.id))
|
|
44
|
+
os.makedirs(databases_directory_path, exist_ok=True)
|
|
45
|
+
|
|
46
|
+
graph_db_name = "test.kuzu"
|
|
47
|
+
return {
|
|
48
|
+
"graph_dataset_database_handler": "custom_kuzu_handler",
|
|
49
|
+
"graph_database_name": graph_db_name,
|
|
50
|
+
"graph_database_url": os.path.join(databases_directory_path, graph_db_name),
|
|
51
|
+
"graph_database_provider": "kuzu",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def main():
|
|
56
|
+
import pathlib
|
|
57
|
+
|
|
58
|
+
data_directory_path = str(
|
|
59
|
+
pathlib.Path(
|
|
60
|
+
os.path.join(
|
|
61
|
+
pathlib.Path(__file__).parent, ".data_storage/test_dataset_database_handler"
|
|
62
|
+
)
|
|
63
|
+
).resolve()
|
|
64
|
+
)
|
|
65
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
66
|
+
cognee_directory_path = str(
|
|
67
|
+
pathlib.Path(
|
|
68
|
+
os.path.join(
|
|
69
|
+
pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
|
|
70
|
+
)
|
|
71
|
+
).resolve()
|
|
72
|
+
)
|
|
73
|
+
cognee.config.system_root_directory(cognee_directory_path)
|
|
74
|
+
|
|
75
|
+
# Add custom dataset database handler
|
|
76
|
+
from cognee.infrastructure.databases.dataset_database_handler.use_dataset_database_handler import (
|
|
77
|
+
use_dataset_database_handler,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
use_dataset_database_handler(
|
|
81
|
+
"custom_lancedb_handler", LanceDBTestDatasetDatabaseHandler, "lancedb"
|
|
82
|
+
)
|
|
83
|
+
use_dataset_database_handler("custom_kuzu_handler", KuzuTestDatasetDatabaseHandler, "kuzu")
|
|
84
|
+
|
|
85
|
+
# Create a clean slate for cognee -- reset data and system state
|
|
86
|
+
print("Resetting cognee data...")
|
|
87
|
+
await cognee.prune.prune_data()
|
|
88
|
+
await cognee.prune.prune_system(metadata=True)
|
|
89
|
+
print("Data reset complete.\n")
|
|
90
|
+
|
|
91
|
+
# cognee knowledge graph will be created based on this text
|
|
92
|
+
text = """
|
|
93
|
+
Natural language processing (NLP) is an interdisciplinary
|
|
94
|
+
subfield of computer science and information retrieval.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
print("Adding text to cognee:")
|
|
98
|
+
print(text.strip())
|
|
99
|
+
|
|
100
|
+
# Add the text, and make it available for cognify
|
|
101
|
+
await cognee.add(text)
|
|
102
|
+
print("Text added successfully.\n")
|
|
103
|
+
|
|
104
|
+
# Use LLMs and cognee to create knowledge graph
|
|
105
|
+
await cognee.cognify()
|
|
106
|
+
print("Cognify process complete.\n")
|
|
107
|
+
|
|
108
|
+
query_text = "Tell me about NLP"
|
|
109
|
+
print(f"Searching cognee for insights with query: '{query_text}'")
|
|
110
|
+
# Query cognee for insights on the added text
|
|
111
|
+
search_results = await cognee.search(
|
|
112
|
+
query_type=SearchType.GRAPH_COMPLETION, query_text=query_text
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
print("Search results:")
|
|
116
|
+
# Display results
|
|
117
|
+
for result_text in search_results:
|
|
118
|
+
print(result_text)
|
|
119
|
+
|
|
120
|
+
default_user = await get_default_user()
|
|
121
|
+
# Assert that the custom database files were created based on the custom dataset database handlers
|
|
122
|
+
assert os.path.exists(
|
|
123
|
+
os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.kuzu")
|
|
124
|
+
), "Graph database file not found."
|
|
125
|
+
assert os.path.exists(
|
|
126
|
+
os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.lance.db")
|
|
127
|
+
), "Vector database file not found."
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
logger = setup_logging(log_level=ERROR)
|
|
132
|
+
loop = asyncio.new_event_loop()
|
|
133
|
+
asyncio.set_event_loop(loop)
|
|
134
|
+
try:
|
|
135
|
+
loop.run_until_complete(main())
|
|
136
|
+
finally:
|
|
137
|
+
loop.run_until_complete(loop.shutdown_asyncgens())
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import asyncio
|
|
3
|
+
import pathlib
|
|
4
|
+
from uuid import UUID
|
|
5
|
+
|
|
6
|
+
import cognee
|
|
7
|
+
from cognee.shared.logging_utils import setup_logging, ERROR
|
|
8
|
+
from cognee.modules.data.methods.delete_dataset import delete_dataset
|
|
9
|
+
from cognee.modules.data.methods.get_dataset import get_dataset
|
|
10
|
+
from cognee.modules.users.methods import get_default_user
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
async def main():
|
|
14
|
+
# Set data and system directory paths
|
|
15
|
+
data_directory_path = str(
|
|
16
|
+
pathlib.Path(
|
|
17
|
+
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_dataset_delete")
|
|
18
|
+
).resolve()
|
|
19
|
+
)
|
|
20
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
21
|
+
cognee_directory_path = str(
|
|
22
|
+
pathlib.Path(
|
|
23
|
+
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_dataset_delete")
|
|
24
|
+
).resolve()
|
|
25
|
+
)
|
|
26
|
+
cognee.config.system_root_directory(cognee_directory_path)
|
|
27
|
+
|
|
28
|
+
# Create a clean slate for cognee -- reset data and system state
|
|
29
|
+
print("Resetting cognee data...")
|
|
30
|
+
await cognee.prune.prune_data()
|
|
31
|
+
await cognee.prune.prune_system(metadata=True)
|
|
32
|
+
print("Data reset complete.\n")
|
|
33
|
+
|
|
34
|
+
# cognee knowledge graph will be created based on this text
|
|
35
|
+
text = """
|
|
36
|
+
Natural language processing (NLP) is an interdisciplinary
|
|
37
|
+
subfield of computer science and information retrieval.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# Add the text, and make it available for cognify
|
|
41
|
+
await cognee.add(text, "nlp_dataset")
|
|
42
|
+
await cognee.add("Quantum computing is the study of quantum computers.", "quantum_dataset")
|
|
43
|
+
|
|
44
|
+
# Use LLMs and cognee to create knowledge graph
|
|
45
|
+
ret_val = await cognee.cognify()
|
|
46
|
+
user = await get_default_user()
|
|
47
|
+
|
|
48
|
+
for val in ret_val:
|
|
49
|
+
dataset_id = str(val)
|
|
50
|
+
vector_db_path = os.path.join(
|
|
51
|
+
cognee_directory_path, "databases", str(user.id), dataset_id + ".lance.db"
|
|
52
|
+
)
|
|
53
|
+
graph_db_path = os.path.join(
|
|
54
|
+
cognee_directory_path, "databases", str(user.id), dataset_id + ".pkl"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Check if databases are properly created and exist before deletion
|
|
58
|
+
assert os.path.exists(graph_db_path), "Graph database file not found."
|
|
59
|
+
assert os.path.exists(vector_db_path), "Vector database file not found."
|
|
60
|
+
|
|
61
|
+
dataset = await get_dataset(user_id=user.id, dataset_id=UUID(dataset_id))
|
|
62
|
+
await delete_dataset(dataset)
|
|
63
|
+
|
|
64
|
+
# Confirm databases have been deleted
|
|
65
|
+
assert not os.path.exists(graph_db_path), "Graph database file found."
|
|
66
|
+
assert not os.path.exists(vector_db_path), "Vector database file found."
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
if __name__ == "__main__":
|
|
70
|
+
logger = setup_logging(log_level=ERROR)
|
|
71
|
+
loop = asyncio.new_event_loop()
|
|
72
|
+
asyncio.set_event_loop(loop)
|
|
73
|
+
try:
|
|
74
|
+
loop.run_until_complete(main())
|
|
75
|
+
finally:
|
|
76
|
+
loop.run_until_complete(loop.shutdown_asyncgens())
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
End-to-end integration test for edge-centered payload and triplet embeddings.
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import pathlib
|
|
8
|
+
import cognee
|
|
9
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
10
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
11
|
+
from cognee.modules.search.types import SearchType
|
|
12
|
+
from cognee.shared.logging_utils import get_logger
|
|
13
|
+
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
|
14
|
+
from cognee.modules.ontology.ontology_config import Config
|
|
15
|
+
|
|
16
|
+
logger = get_logger()
|
|
17
|
+
|
|
18
|
+
text_data = """
|
|
19
|
+
Apple is a technology company that produces the iPhone, iPad, and Mac computers.
|
|
20
|
+
The company is known for its innovative products and ecosystem integration.
|
|
21
|
+
|
|
22
|
+
Microsoft develops the Windows operating system and Office productivity suite.
|
|
23
|
+
They are also major players in cloud computing with Azure.
|
|
24
|
+
|
|
25
|
+
Google created the Android operating system and provides search engine services.
|
|
26
|
+
The company is a leader in artificial intelligence and machine learning.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
ontology_content = """<?xml version="1.0"?>
|
|
30
|
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
31
|
+
xmlns:owl="http://www.w3.org/2002/07/owl#"
|
|
32
|
+
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
|
|
33
|
+
xmlns="http://example.org/tech#"
|
|
34
|
+
xml:base="http://example.org/tech">
|
|
35
|
+
|
|
36
|
+
<owl:Ontology rdf:about="http://example.org/tech"/>
|
|
37
|
+
|
|
38
|
+
<!-- Classes -->
|
|
39
|
+
<owl:Class rdf:ID="Company"/>
|
|
40
|
+
<owl:Class rdf:ID="TechnologyCompany"/>
|
|
41
|
+
<owl:Class rdf:ID="Product"/>
|
|
42
|
+
<owl:Class rdf:ID="Software"/>
|
|
43
|
+
<owl:Class rdf:ID="Hardware"/>
|
|
44
|
+
<owl:Class rdf:ID="Service"/>
|
|
45
|
+
|
|
46
|
+
<rdf:Description rdf:about="#TechnologyCompany">
|
|
47
|
+
<rdfs:subClassOf rdf:resource="#Company"/>
|
|
48
|
+
<rdfs:comment>A company operating in the technology sector.</rdfs:comment>
|
|
49
|
+
</rdf:Description>
|
|
50
|
+
|
|
51
|
+
<rdf:Description rdf:about="#Software">
|
|
52
|
+
<rdfs:subClassOf rdf:resource="#Product"/>
|
|
53
|
+
<rdfs:comment>Software products and applications.</rdfs:comment>
|
|
54
|
+
</rdf:Description>
|
|
55
|
+
|
|
56
|
+
<rdf:Description rdf:about="#Hardware">
|
|
57
|
+
<rdfs:subClassOf rdf:resource="#Product"/>
|
|
58
|
+
<rdfs:comment>Physical hardware products.</rdfs:comment>
|
|
59
|
+
</rdf:Description>
|
|
60
|
+
|
|
61
|
+
<!-- Individuals -->
|
|
62
|
+
<TechnologyCompany rdf:ID="apple">
|
|
63
|
+
<rdfs:label>Apple</rdfs:label>
|
|
64
|
+
</TechnologyCompany>
|
|
65
|
+
|
|
66
|
+
<TechnologyCompany rdf:ID="microsoft">
|
|
67
|
+
<rdfs:label>Microsoft</rdfs:label>
|
|
68
|
+
</TechnologyCompany>
|
|
69
|
+
|
|
70
|
+
<TechnologyCompany rdf:ID="google">
|
|
71
|
+
<rdfs:label>Google</rdfs:label>
|
|
72
|
+
</TechnologyCompany>
|
|
73
|
+
|
|
74
|
+
<Hardware rdf:ID="iphone">
|
|
75
|
+
<rdfs:label>iPhone</rdfs:label>
|
|
76
|
+
</Hardware>
|
|
77
|
+
|
|
78
|
+
<Software rdf:ID="windows">
|
|
79
|
+
<rdfs:label>Windows</rdfs:label>
|
|
80
|
+
</Software>
|
|
81
|
+
|
|
82
|
+
<Software rdf:ID="android">
|
|
83
|
+
<rdfs:label>Android</rdfs:label>
|
|
84
|
+
</Software>
|
|
85
|
+
|
|
86
|
+
</rdf:RDF>"""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
async def main():
|
|
90
|
+
data_directory_path = str(
|
|
91
|
+
pathlib.Path(
|
|
92
|
+
os.path.join(
|
|
93
|
+
pathlib.Path(__file__).parent,
|
|
94
|
+
".data_storage/test_edge_centered_payload",
|
|
95
|
+
)
|
|
96
|
+
).resolve()
|
|
97
|
+
)
|
|
98
|
+
cognee_directory_path = str(
|
|
99
|
+
pathlib.Path(
|
|
100
|
+
os.path.join(
|
|
101
|
+
pathlib.Path(__file__).parent,
|
|
102
|
+
".cognee_system/test_edge_centered_payload",
|
|
103
|
+
)
|
|
104
|
+
).resolve()
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
108
|
+
cognee.config.system_root_directory(cognee_directory_path)
|
|
109
|
+
|
|
110
|
+
dataset_name = "tech_companies"
|
|
111
|
+
|
|
112
|
+
await cognee.prune.prune_data()
|
|
113
|
+
await cognee.prune.prune_system(metadata=True)
|
|
114
|
+
|
|
115
|
+
await cognee.add(data=text_data, dataset_name=dataset_name)
|
|
116
|
+
|
|
117
|
+
import tempfile
|
|
118
|
+
|
|
119
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".owl", delete=False) as f:
|
|
120
|
+
f.write(ontology_content)
|
|
121
|
+
ontology_file_path = f.name
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
logger.info(f"Loading ontology from: {ontology_file_path}")
|
|
125
|
+
config: Config = {
|
|
126
|
+
"ontology_config": {
|
|
127
|
+
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_file_path)
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
await cognee.cognify(datasets=[dataset_name], config=config)
|
|
132
|
+
graph_engine = await get_graph_engine()
|
|
133
|
+
nodes_phase2, edges_phase2 = await graph_engine.get_graph_data()
|
|
134
|
+
|
|
135
|
+
vector_engine = get_vector_engine()
|
|
136
|
+
triplets_phase2 = await vector_engine.search(
|
|
137
|
+
query_text="technology", limit=None, collection_name="Triplet_text"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
assert len(triplets_phase2) == len(edges_phase2), (
|
|
141
|
+
f"Triplet embeddings and number of edges do not match. Vector db contains {len(triplets_phase2)} edge triplets while graph db contains {len(edges_phase2)} edges."
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
search_results_phase2 = await cognee.search(
|
|
145
|
+
query_type=SearchType.TRIPLET_COMPLETION,
|
|
146
|
+
query_text="What products does Apple make?",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
assert search_results_phase2 is not None, (
|
|
150
|
+
"Search should return results for triplet embeddings in simple ontology use case."
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
finally:
|
|
154
|
+
if os.path.exists(ontology_file_path):
|
|
155
|
+
os.unlink(ontology_file_path)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
if __name__ == "__main__":
|
|
159
|
+
import asyncio
|
|
160
|
+
from cognee.shared.logging_utils import setup_logging
|
|
161
|
+
|
|
162
|
+
setup_logging()
|
|
163
|
+
|
|
164
|
+
loop = asyncio.new_event_loop()
|
|
165
|
+
asyncio.set_event_loop(loop)
|
|
166
|
+
try:
|
|
167
|
+
loop.run_until_complete(main())
|
|
168
|
+
finally:
|
|
169
|
+
loop.run_until_complete(loop.shutdown_asyncgens())
|
|
170
|
+
loop.close()
|
|
@@ -52,6 +52,33 @@ async def test_edge_ingestion():
|
|
|
52
52
|
|
|
53
53
|
edge_type_counts = Counter(edge_type[2] for edge_type in graph[1])
|
|
54
54
|
|
|
55
|
+
"Tests edge_text presence and format"
|
|
56
|
+
contains_edges = [edge for edge in graph[1] if edge[2] == "contains"]
|
|
57
|
+
assert len(contains_edges) > 0, "Expected at least one contains edge for edge_text verification"
|
|
58
|
+
|
|
59
|
+
edge_properties = contains_edges[0][3]
|
|
60
|
+
assert "edge_text" in edge_properties, "Expected edge_text in edge properties"
|
|
61
|
+
|
|
62
|
+
edge_text = edge_properties["edge_text"]
|
|
63
|
+
assert "relationship_name: contains" in edge_text, (
|
|
64
|
+
f"Expected 'relationship_name: contains' in edge_text, got: {edge_text}"
|
|
65
|
+
)
|
|
66
|
+
assert "entity_name:" in edge_text, f"Expected 'entity_name:' in edge_text, got: {edge_text}"
|
|
67
|
+
assert "entity_description:" in edge_text, (
|
|
68
|
+
f"Expected 'entity_description:' in edge_text, got: {edge_text}"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
all_edge_texts = [
|
|
72
|
+
edge[3].get("edge_text", "") for edge in contains_edges if "edge_text" in edge[3]
|
|
73
|
+
]
|
|
74
|
+
expected_entities = ["dave", "ana", "bob", "dexter", "apples", "cognee"]
|
|
75
|
+
found_entity = any(
|
|
76
|
+
any(entity in text.lower() for entity in expected_entities) for text in all_edge_texts
|
|
77
|
+
)
|
|
78
|
+
assert found_entity, (
|
|
79
|
+
f"Expected to find at least one entity name in edge_text: {all_edge_texts[:3]}"
|
|
80
|
+
)
|
|
81
|
+
|
|
55
82
|
"Tests the presence of basic nested edges"
|
|
56
83
|
for basic_nested_edge in basic_nested_edges:
|
|
57
84
|
assert edge_type_counts.get(basic_nested_edge, 0) >= 1, (
|
|
@@ -133,7 +133,7 @@ async def main():
|
|
|
133
133
|
extraction_tasks=extraction_tasks,
|
|
134
134
|
enrichment_tasks=enrichment_tasks,
|
|
135
135
|
data=[{}],
|
|
136
|
-
dataset=
|
|
136
|
+
dataset=dataset_name,
|
|
137
137
|
)
|
|
138
138
|
|
|
139
139
|
nodes_after, edges_after = await graph_engine.get_graph_data()
|