cognee 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +46 -3
- cognee/api/v1/memify/routers/get_memify_router.py +3 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +21 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/config.py +16 -1
- cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +26 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +70 -16
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/LLMGateway.py +0 -13
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +29 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +3 -1
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +32 -33
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -103
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -222
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/chunks/__init__.py +9 -0
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/graph/__init__.py +7 -0
- cognee/tasks/ingestion/data_item.py +8 -0
- cognee/tasks/ingestion/ingest_data.py +12 -1
- cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
- cognee/tasks/memify/__init__.py +8 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +351 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +276 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +228 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +217 -0
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +319 -0
- cognee/tests/integration/retrieval/test_structured_output.py +258 -0
- cognee/tests/integration/retrieval/test_summaries_retriever.py +195 -0
- cognee/tests/integration/retrieval/test_temporal_retriever.py +336 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +45 -1
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_custom_data_label.py +68 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +345 -205
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
- cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +122 -168
- cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +486 -157
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +693 -155
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +619 -200
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +300 -171
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +184 -155
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +544 -79
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +476 -28
- cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
- cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +267 -7
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +96 -20
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/RECORD +258 -157
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- cognee/tests/unit/modules/retrieval/structured_output_test.py +0 -204
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
cognee/tests/test_search_db.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
import pathlib
|
|
2
2
|
import os
|
|
3
|
+
import asyncio
|
|
4
|
+
import pytest
|
|
5
|
+
import pytest_asyncio
|
|
6
|
+
from collections import Counter
|
|
7
|
+
|
|
3
8
|
import cognee
|
|
4
9
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
5
10
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
6
11
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
7
|
-
from cognee.modules.graph.utils import resolve_edges_to_text
|
|
8
12
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
9
13
|
from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
|
|
10
14
|
GraphCompletionContextExtensionRetriever,
|
|
@@ -13,292 +17,428 @@ from cognee.modules.retrieval.graph_completion_cot_retriever import GraphComplet
|
|
|
13
17
|
from cognee.modules.retrieval.graph_summary_completion_retriever import (
|
|
14
18
|
GraphSummaryCompletionRetriever,
|
|
15
19
|
)
|
|
20
|
+
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
|
|
21
|
+
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
|
|
22
|
+
from cognee.modules.retrieval.completion_retriever import CompletionRetriever
|
|
23
|
+
from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
|
|
16
24
|
from cognee.modules.retrieval.triplet_retriever import TripletRetriever
|
|
17
25
|
from cognee.shared.logging_utils import get_logger
|
|
18
26
|
from cognee.modules.search.types import SearchType
|
|
19
27
|
from cognee.modules.users.methods import get_default_user
|
|
20
|
-
from collections import Counter
|
|
21
28
|
|
|
22
29
|
logger = get_logger()
|
|
23
30
|
|
|
24
31
|
|
|
25
|
-
async def
|
|
26
|
-
|
|
32
|
+
async def _reset_engines_and_prune() -> None:
|
|
33
|
+
"""Reset db engine caches and prune data/system.
|
|
34
|
+
|
|
35
|
+
Kept intentionally identical to the inlined setup logic to avoid event loop issues when
|
|
36
|
+
using deployed databases (Neo4j, PostgreSQL) and to ensure fresh instances per run.
|
|
37
|
+
"""
|
|
38
|
+
# Dispose of existing engines and clear caches to ensure fresh instances for each test
|
|
39
|
+
try:
|
|
40
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
41
|
+
|
|
42
|
+
vector_engine = get_vector_engine()
|
|
43
|
+
# Dispose SQLAlchemy engine connection pool if it exists
|
|
44
|
+
if hasattr(vector_engine, "engine") and hasattr(vector_engine.engine, "dispose"):
|
|
45
|
+
await vector_engine.engine.dispose(close=True)
|
|
46
|
+
except Exception:
|
|
47
|
+
# Engine might not exist yet
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
|
|
51
|
+
from cognee.infrastructure.databases.vector.create_vector_engine import _create_vector_engine
|
|
52
|
+
from cognee.infrastructure.databases.relational.create_relational_engine import (
|
|
53
|
+
create_relational_engine,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
_create_graph_engine.cache_clear()
|
|
57
|
+
_create_vector_engine.cache_clear()
|
|
58
|
+
create_relational_engine.cache_clear()
|
|
59
|
+
|
|
27
60
|
await cognee.prune.prune_data()
|
|
28
61
|
await cognee.prune.prune_system(metadata=True)
|
|
29
62
|
|
|
30
|
-
dataset_name = "test_dataset"
|
|
31
63
|
|
|
64
|
+
async def _seed_default_dataset(dataset_name: str) -> dict:
|
|
65
|
+
"""Add the shared test dataset contents and run cognify (same steps/order as before)."""
|
|
32
66
|
text_1 = """Germany is located in europe right next to the Netherlands"""
|
|
67
|
+
|
|
68
|
+
logger.info(f"Adding text data to dataset: {dataset_name}")
|
|
33
69
|
await cognee.add(text_1, dataset_name)
|
|
34
70
|
|
|
35
71
|
explanation_file_path_quantum = os.path.join(
|
|
36
72
|
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
|
37
73
|
)
|
|
38
74
|
|
|
75
|
+
logger.info(f"Adding file data to dataset: {dataset_name}")
|
|
39
76
|
await cognee.add([explanation_file_path_quantum], dataset_name)
|
|
40
77
|
|
|
78
|
+
logger.info(f"Running cognify on dataset: {dataset_name}")
|
|
41
79
|
await cognee.cognify([dataset_name])
|
|
42
80
|
|
|
81
|
+
return {
|
|
82
|
+
"dataset_name": dataset_name,
|
|
83
|
+
"text_1": text_1,
|
|
84
|
+
"explanation_file_path_quantum": explanation_file_path_quantum,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@pytest.fixture(scope="session")
|
|
89
|
+
def event_loop():
|
|
90
|
+
"""Use a single asyncio event loop for this test module.
|
|
91
|
+
|
|
92
|
+
This helps avoid "Future attached to a different loop" when running multiple async
|
|
93
|
+
tests that share clients/engines.
|
|
94
|
+
"""
|
|
95
|
+
loop = asyncio.new_event_loop()
|
|
96
|
+
try:
|
|
97
|
+
yield loop
|
|
98
|
+
finally:
|
|
99
|
+
loop.close()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async def setup_test_environment():
|
|
103
|
+
"""Helper function to set up test environment with data, cognify, and triplet embeddings."""
|
|
104
|
+
# This test runs for multiple db settings, to run this locally set the corresponding db envs
|
|
105
|
+
|
|
106
|
+
dataset_name = "test_dataset"
|
|
107
|
+
logger.info("Starting test setup: pruning data and system")
|
|
108
|
+
await _reset_engines_and_prune()
|
|
109
|
+
state = await _seed_default_dataset(dataset_name=dataset_name)
|
|
110
|
+
|
|
43
111
|
user = await get_default_user()
|
|
44
112
|
from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
|
|
45
113
|
|
|
114
|
+
logger.info("Creating triplet embeddings")
|
|
46
115
|
await create_triplet_embeddings(user=user, dataset=dataset_name, triplets_batch_size=5)
|
|
47
116
|
|
|
48
|
-
|
|
49
|
-
nodes, edges = await graph_engine.get_graph_data()
|
|
50
|
-
|
|
117
|
+
# Check if Triplet_text collection was created
|
|
51
118
|
vector_engine = get_vector_engine()
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
)
|
|
119
|
+
has_collection = await vector_engine.has_collection(collection_name="Triplet_text")
|
|
120
|
+
logger.info(f"Triplet_text collection exists after creation: {has_collection}")
|
|
55
121
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
122
|
+
if has_collection:
|
|
123
|
+
collection = await vector_engine.get_collection("Triplet_text")
|
|
124
|
+
count = await collection.count_rows() if hasattr(collection, "count_rows") else "unknown"
|
|
125
|
+
logger.info(f"Triplet_text collection row count: {count}")
|
|
59
126
|
|
|
60
|
-
|
|
61
|
-
query="Next to which country is Germany located?"
|
|
62
|
-
)
|
|
63
|
-
context_gk_cot = await GraphCompletionCotRetriever().get_context(
|
|
64
|
-
query="Next to which country is Germany located?"
|
|
65
|
-
)
|
|
66
|
-
context_gk_ext = await GraphCompletionContextExtensionRetriever().get_context(
|
|
67
|
-
query="Next to which country is Germany located?"
|
|
68
|
-
)
|
|
69
|
-
context_gk_sum = await GraphSummaryCompletionRetriever().get_context(
|
|
70
|
-
query="Next to which country is Germany located?"
|
|
71
|
-
)
|
|
72
|
-
context_triplet = await TripletRetriever().get_context(
|
|
73
|
-
query="Next to which country is Germany located?"
|
|
74
|
-
)
|
|
127
|
+
return state
|
|
75
128
|
|
|
76
|
-
for name, context in [
|
|
77
|
-
("GraphCompletionRetriever", context_gk),
|
|
78
|
-
("GraphCompletionCotRetriever", context_gk_cot),
|
|
79
|
-
("GraphCompletionContextExtensionRetriever", context_gk_ext),
|
|
80
|
-
("GraphSummaryCompletionRetriever", context_gk_sum),
|
|
81
|
-
]:
|
|
82
|
-
assert isinstance(context, list), f"{name}: Context should be a list"
|
|
83
|
-
assert len(context) > 0, f"{name}: Context should not be empty"
|
|
84
|
-
|
|
85
|
-
context_text = await resolve_edges_to_text(context)
|
|
86
|
-
lower = context_text.lower()
|
|
87
|
-
assert "germany" in lower or "netherlands" in lower, (
|
|
88
|
-
f"{name}: Context did not contain 'germany' or 'netherlands'; got: {context!r}"
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
assert isinstance(context_triplet, str), "TripletRetriever: Context should be a string"
|
|
92
|
-
assert len(context_triplet) > 0, "TripletRetriever: Context should not be empty"
|
|
93
|
-
lower_triplet = context_triplet.lower()
|
|
94
|
-
assert "germany" in lower_triplet or "netherlands" in lower_triplet, (
|
|
95
|
-
f"TripletRetriever: Context did not contain 'germany' or 'netherlands'; got: {context_triplet!r}"
|
|
96
|
-
)
|
|
97
129
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
query="Next to which country is Germany located?"
|
|
103
|
-
)
|
|
104
|
-
triplets_gk_ext = await GraphCompletionContextExtensionRetriever().get_triplets(
|
|
105
|
-
query="Next to which country is Germany located?"
|
|
106
|
-
)
|
|
107
|
-
triplets_gk_sum = await GraphSummaryCompletionRetriever().get_triplets(
|
|
108
|
-
query="Next to which country is Germany located?"
|
|
109
|
-
)
|
|
130
|
+
async def _get_retriever_context(retriever, query: str):
|
|
131
|
+
"""Retrieve objects and resolve context via the retriever API."""
|
|
132
|
+
retrieved_objects = await retriever.get_retrieved_objects(query)
|
|
133
|
+
return await retriever.get_context_from_objects(query, retrieved_objects)
|
|
110
134
|
|
|
111
|
-
for name, triplets in [
|
|
112
|
-
("GraphCompletionRetriever", triplets_gk),
|
|
113
|
-
("GraphCompletionCotRetriever", triplets_gk_cot),
|
|
114
|
-
("GraphCompletionContextExtensionRetriever", triplets_gk_ext),
|
|
115
|
-
("GraphSummaryCompletionRetriever", triplets_gk_sum),
|
|
116
|
-
]:
|
|
117
|
-
assert isinstance(triplets, list), f"{name}: Triplets should be a list"
|
|
118
|
-
assert triplets, f"{name}: Triplets list should not be empty"
|
|
119
|
-
for edge in triplets:
|
|
120
|
-
assert isinstance(edge, Edge), f"{name}: Elements should be Edge instances"
|
|
121
|
-
distance = edge.attributes.get("vector_distance")
|
|
122
|
-
node1_distance = edge.node1.attributes.get("vector_distance")
|
|
123
|
-
node2_distance = edge.node2.attributes.get("vector_distance")
|
|
124
|
-
assert isinstance(distance, float), (
|
|
125
|
-
f"{name}: vector_distance should be float, got {type(distance)}"
|
|
126
|
-
)
|
|
127
|
-
assert 0 <= distance <= 1, (
|
|
128
|
-
f"{name}: edge vector_distance {distance} out of [0,1], this shouldn't happen"
|
|
129
|
-
)
|
|
130
|
-
assert 0 <= node1_distance <= 1, (
|
|
131
|
-
f"{name}: node_1 vector_distance {distance} out of [0,1], this shouldn't happen"
|
|
132
|
-
)
|
|
133
|
-
assert 0 <= node2_distance <= 1, (
|
|
134
|
-
f"{name}: node_2 vector_distance {distance} out of [0,1], this shouldn't happen"
|
|
135
|
-
)
|
|
136
135
|
|
|
136
|
+
@pytest_asyncio.fixture(scope="session")
|
|
137
|
+
async def e2e_state():
|
|
138
|
+
"""Compute E2E artifacts once; tests only assert.
|
|
139
|
+
|
|
140
|
+
This avoids repeating expensive setup and LLM calls across multiple tests.
|
|
141
|
+
"""
|
|
142
|
+
await setup_test_environment()
|
|
143
|
+
|
|
144
|
+
# --- Graph/vector engine consistency ---
|
|
145
|
+
graph_engine = await get_graph_engine()
|
|
146
|
+
_nodes, edges = await graph_engine.get_graph_data()
|
|
147
|
+
|
|
148
|
+
vector_engine = get_vector_engine()
|
|
149
|
+
collection = await vector_engine.search(
|
|
150
|
+
collection_name="Triplet_text",
|
|
151
|
+
query_text="Test",
|
|
152
|
+
limit=None,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# --- Retriever contexts ---
|
|
156
|
+
query = "Next to which country is Germany located?"
|
|
157
|
+
|
|
158
|
+
contexts = {
|
|
159
|
+
"graph_completion": await _get_retriever_context(GraphCompletionRetriever(), query=query),
|
|
160
|
+
"graph_completion_cot": await _get_retriever_context(
|
|
161
|
+
GraphCompletionCotRetriever(), query=query
|
|
162
|
+
),
|
|
163
|
+
"graph_completion_context_extension": await _get_retriever_context(
|
|
164
|
+
GraphCompletionContextExtensionRetriever(), query=query
|
|
165
|
+
),
|
|
166
|
+
"graph_summary_completion": await _get_retriever_context(
|
|
167
|
+
GraphSummaryCompletionRetriever(), query=query
|
|
168
|
+
),
|
|
169
|
+
"chunks": await _get_retriever_context(ChunksRetriever(top_k=5), query=query),
|
|
170
|
+
"summaries": await _get_retriever_context(SummariesRetriever(top_k=5), query=query),
|
|
171
|
+
"rag_completion": await _get_retriever_context(CompletionRetriever(top_k=3), query=query),
|
|
172
|
+
"temporal": await _get_retriever_context(TemporalRetriever(top_k=5), query=query),
|
|
173
|
+
"triplet": await _get_retriever_context(TripletRetriever(), query=query),
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
# --- Retriever triplets + vector distance validation ---
|
|
177
|
+
triplets = {
|
|
178
|
+
"graph_completion": await GraphCompletionRetriever().get_triplets(query=query),
|
|
179
|
+
"graph_completion_cot": await GraphCompletionCotRetriever().get_triplets(query=query),
|
|
180
|
+
"graph_completion_context_extension": await GraphCompletionContextExtensionRetriever().get_triplets(
|
|
181
|
+
query=query
|
|
182
|
+
),
|
|
183
|
+
"graph_summary_completion": await GraphSummaryCompletionRetriever().get_triplets(
|
|
184
|
+
query=query
|
|
185
|
+
),
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
# --- Search operations + graph side effects ---
|
|
137
189
|
completion_gk = await cognee.search(
|
|
138
190
|
query_type=SearchType.GRAPH_COMPLETION,
|
|
139
191
|
query_text="Where is germany located, next to which country?",
|
|
140
192
|
save_interaction=True,
|
|
193
|
+
verbose=True,
|
|
141
194
|
)
|
|
142
195
|
completion_cot = await cognee.search(
|
|
143
196
|
query_type=SearchType.GRAPH_COMPLETION_COT,
|
|
144
197
|
query_text="What is the country next to germany??",
|
|
145
198
|
save_interaction=True,
|
|
199
|
+
verbose=True,
|
|
146
200
|
)
|
|
147
201
|
completion_ext = await cognee.search(
|
|
148
202
|
query_type=SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION,
|
|
149
203
|
query_text="What is the name of the country next to germany",
|
|
150
204
|
save_interaction=True,
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
await cognee.search(
|
|
154
|
-
query_type=SearchType.FEEDBACK, query_text="This was not the best answer", last_k=1
|
|
205
|
+
verbose=True,
|
|
155
206
|
)
|
|
156
207
|
|
|
157
208
|
completion_sum = await cognee.search(
|
|
158
209
|
query_type=SearchType.GRAPH_SUMMARY_COMPLETION,
|
|
159
210
|
query_text="Next to which country is Germany located?",
|
|
160
211
|
save_interaction=True,
|
|
212
|
+
verbose=True,
|
|
161
213
|
)
|
|
162
214
|
completion_triplet = await cognee.search(
|
|
163
215
|
query_type=SearchType.TRIPLET_COMPLETION,
|
|
164
216
|
query_text="Next to which country is Germany located?",
|
|
165
217
|
save_interaction=True,
|
|
218
|
+
verbose=True,
|
|
166
219
|
)
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
220
|
+
completion_chunks = await cognee.search(
|
|
221
|
+
query_type=SearchType.CHUNKS,
|
|
222
|
+
query_text="Germany",
|
|
223
|
+
save_interaction=False,
|
|
224
|
+
verbose=True,
|
|
225
|
+
)
|
|
226
|
+
completion_summaries = await cognee.search(
|
|
227
|
+
query_type=SearchType.SUMMARIES,
|
|
228
|
+
query_text="Germany",
|
|
229
|
+
save_interaction=False,
|
|
230
|
+
verbose=True,
|
|
231
|
+
)
|
|
232
|
+
completion_rag = await cognee.search(
|
|
233
|
+
query_type=SearchType.RAG_COMPLETION,
|
|
234
|
+
query_text="Next to which country is Germany located?",
|
|
235
|
+
save_interaction=False,
|
|
236
|
+
verbose=True,
|
|
237
|
+
)
|
|
238
|
+
completion_temporal = await cognee.search(
|
|
239
|
+
query_type=SearchType.TEMPORAL,
|
|
240
|
+
query_text="Next to which country is Germany located?",
|
|
241
|
+
save_interaction=False,
|
|
242
|
+
verbose=True,
|
|
172
243
|
)
|
|
173
244
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
245
|
+
# Snapshot after all E2E operations above (used by assertion-only tests).
|
|
246
|
+
graph_snapshot = await (await get_graph_engine()).get_graph_data()
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
"graph_edges": edges,
|
|
250
|
+
"triplet_collection": collection,
|
|
251
|
+
"vector_collection_edges_count": len(collection),
|
|
252
|
+
"graph_edges_count": len(edges),
|
|
253
|
+
"contexts": contexts,
|
|
254
|
+
"triplets": triplets,
|
|
255
|
+
"search_results": {
|
|
256
|
+
"graph_completion": completion_gk,
|
|
257
|
+
"graph_completion_cot": completion_cot,
|
|
258
|
+
"graph_completion_context_extension": completion_ext,
|
|
259
|
+
"graph_summary_completion": completion_sum,
|
|
260
|
+
"triplet_completion": completion_triplet,
|
|
261
|
+
"chunks": completion_chunks,
|
|
262
|
+
"summaries": completion_summaries,
|
|
263
|
+
"rag_completion": completion_rag,
|
|
264
|
+
"temporal": completion_temporal,
|
|
265
|
+
},
|
|
266
|
+
"graph_snapshot": graph_snapshot,
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@pytest.mark.asyncio
|
|
271
|
+
async def test_e2e_graph_vector_consistency(e2e_state):
|
|
272
|
+
"""Graph and vector stores contain the same triplet edges."""
|
|
273
|
+
assert e2e_state["graph_edges_count"] == e2e_state["vector_collection_edges_count"]
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@pytest.mark.asyncio
|
|
277
|
+
async def test_e2e_retriever_contexts(e2e_state):
|
|
278
|
+
"""All retrievers return non-empty, well-typed contexts."""
|
|
279
|
+
contexts = e2e_state["contexts"]
|
|
280
|
+
|
|
281
|
+
for name in [
|
|
282
|
+
"graph_completion",
|
|
283
|
+
"graph_completion_cot",
|
|
284
|
+
"graph_completion_context_extension",
|
|
285
|
+
"graph_summary_completion",
|
|
180
286
|
]:
|
|
181
|
-
|
|
182
|
-
assert
|
|
183
|
-
|
|
184
|
-
)
|
|
287
|
+
ctx = contexts[name]
|
|
288
|
+
assert isinstance(ctx, str), f"{name}: Context should be a string"
|
|
289
|
+
assert ctx.strip(), f"{name}: Context should not be empty"
|
|
290
|
+
lower = ctx.lower()
|
|
291
|
+
assert "germany" in lower or "netherlands" in lower
|
|
292
|
+
|
|
293
|
+
triplet_ctx = contexts["triplet"]
|
|
294
|
+
assert isinstance(triplet_ctx, str), "triplet: Context should be a string"
|
|
295
|
+
assert triplet_ctx.strip(), "triplet: Context should not be empty"
|
|
296
|
+
|
|
297
|
+
chunks_ctx = contexts["chunks"]
|
|
298
|
+
assert isinstance(chunks_ctx, str), "chunks: Context should be a string"
|
|
299
|
+
assert chunks_ctx.strip(), "chunks: Context should not be empty"
|
|
300
|
+
chunks_text = chunks_ctx.lower()
|
|
301
|
+
assert "germany" in chunks_text or "netherlands" in chunks_text
|
|
302
|
+
|
|
303
|
+
summaries_ctx = contexts["summaries"]
|
|
304
|
+
assert isinstance(summaries_ctx, str), "summaries: Context should be a string"
|
|
305
|
+
assert summaries_ctx.strip(), "summaries: Context should not be empty"
|
|
306
|
+
|
|
307
|
+
rag_ctx = contexts["rag_completion"]
|
|
308
|
+
assert isinstance(rag_ctx, str), "rag_completion: Context should be a string"
|
|
309
|
+
assert rag_ctx.strip(), "rag_completion: Context should not be empty"
|
|
310
|
+
|
|
311
|
+
temporal_ctx = contexts["temporal"]
|
|
312
|
+
assert isinstance(temporal_ctx, str), "temporal: Context should be a string"
|
|
313
|
+
assert temporal_ctx.strip(), "temporal: Context should not be empty"
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
@pytest.mark.asyncio
|
|
317
|
+
async def test_e2e_retriever_triplets_have_vector_distances(e2e_state):
|
|
318
|
+
"""Graph retriever triplets include sane vector_distance metadata."""
|
|
319
|
+
for name, triplets in e2e_state["triplets"].items():
|
|
320
|
+
assert isinstance(triplets, list), f"{name}: Triplets should be a list"
|
|
321
|
+
assert triplets, f"{name}: Triplets list should not be empty"
|
|
322
|
+
for edge in triplets:
|
|
323
|
+
assert isinstance(edge, Edge), f"{name}: Elements should be Edge instances"
|
|
324
|
+
vector_distances = edge.attributes.get("vector_distance")
|
|
325
|
+
assert vector_distances is not None, (
|
|
326
|
+
f"{name}: vector_distance should be set when retrievers return results"
|
|
327
|
+
)
|
|
328
|
+
assert isinstance(vector_distances, list) and vector_distances, (
|
|
329
|
+
f"{name}: vector_distance should be a non-empty list"
|
|
330
|
+
)
|
|
331
|
+
distance = vector_distances[0]
|
|
332
|
+
assert isinstance(distance, float), (
|
|
333
|
+
f"{name}: vector_distance[0] should be float, got {type(distance)}"
|
|
334
|
+
)
|
|
335
|
+
assert 0 <= distance <= 1
|
|
185
336
|
|
|
186
|
-
|
|
337
|
+
node1_distances = edge.node1.attributes.get("vector_distance")
|
|
338
|
+
node2_distances = edge.node2.attributes.get("vector_distance")
|
|
339
|
+
assert node1_distances is not None, (
|
|
340
|
+
f"{name}: node1 vector_distance should be set when retrievers return results"
|
|
341
|
+
)
|
|
342
|
+
assert node2_distances is not None, (
|
|
343
|
+
f"{name}: node2 vector_distance should be set when retrievers return results"
|
|
344
|
+
)
|
|
345
|
+
assert isinstance(node1_distances, list) and node1_distances, (
|
|
346
|
+
f"{name}: node1 vector_distance should be a non-empty list"
|
|
347
|
+
)
|
|
348
|
+
assert isinstance(node2_distances, list) and node2_distances, (
|
|
349
|
+
f"{name}: node2 vector_distance should be a non-empty list"
|
|
350
|
+
)
|
|
351
|
+
node1_distance = node1_distances[0]
|
|
352
|
+
node2_distance = node2_distances[0]
|
|
353
|
+
assert isinstance(node1_distance, float), (
|
|
354
|
+
f"{name}: node1 vector_distance[0] should be float, got {type(node1_distance)}"
|
|
355
|
+
)
|
|
356
|
+
assert isinstance(node2_distance, float), (
|
|
357
|
+
f"{name}: node2 vector_distance[0] should be float, got {type(node2_distance)}"
|
|
358
|
+
)
|
|
359
|
+
assert 0 <= node1_distance <= 1
|
|
360
|
+
assert 0 <= node2_distance <= 1
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
@pytest.mark.asyncio
|
|
364
|
+
async def test_e2e_search_results_and_wrappers(e2e_state):
|
|
365
|
+
"""Search returns expected shapes across search types and access modes."""
|
|
366
|
+
from cognee.context_global_variables import backend_access_control_enabled
|
|
367
|
+
|
|
368
|
+
sr = e2e_state["search_results"]
|
|
369
|
+
|
|
370
|
+
# Completion-like search types: validate wrapper + content
|
|
371
|
+
for name in [
|
|
372
|
+
"graph_completion",
|
|
373
|
+
"graph_completion_cot",
|
|
374
|
+
"graph_completion_context_extension",
|
|
375
|
+
"graph_summary_completion",
|
|
376
|
+
"triplet_completion",
|
|
377
|
+
"rag_completion",
|
|
378
|
+
"temporal",
|
|
379
|
+
]:
|
|
380
|
+
search_results = sr[name]
|
|
381
|
+
assert isinstance(search_results, list), f"{name}: should return a list"
|
|
382
|
+
assert len(search_results) == 1, f"{name}: expected single-element list"
|
|
187
383
|
|
|
188
384
|
if backend_access_control_enabled():
|
|
189
|
-
|
|
385
|
+
wrapper = search_results[0]
|
|
386
|
+
assert isinstance(wrapper, dict), (
|
|
387
|
+
f"{name}: expected wrapper dict in access control mode"
|
|
388
|
+
)
|
|
389
|
+
assert wrapper.get("dataset_id"), f"{name}: missing dataset_id in wrapper"
|
|
390
|
+
assert wrapper.get("dataset_name") == "test_dataset"
|
|
391
|
+
result_payload = wrapper.get("text_result")
|
|
190
392
|
else:
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
assert "netherlands" in text.lower(), (
|
|
195
|
-
f"{name}: expected 'netherlands' in result, got: {text!r}"
|
|
196
|
-
)
|
|
393
|
+
entry = search_results[0]
|
|
394
|
+
assert isinstance(entry, dict), f"{name}: expected dict entries"
|
|
395
|
+
result_payload = entry.get("text_result")
|
|
197
396
|
|
|
198
|
-
|
|
199
|
-
|
|
397
|
+
text_blob = str(result_payload)
|
|
398
|
+
assert text_blob.strip()
|
|
399
|
+
assert "netherlands" in text_blob.lower()
|
|
200
400
|
|
|
201
|
-
|
|
401
|
+
# Non-LLM search types: CHUNKS / SUMMARIES validate payload list + text
|
|
402
|
+
for name in ["chunks", "summaries"]:
|
|
403
|
+
search_results = sr[name]
|
|
404
|
+
assert isinstance(search_results, list), f"{name}: should return a list"
|
|
405
|
+
assert search_results, f"{name}: should not be empty"
|
|
202
406
|
|
|
203
|
-
|
|
407
|
+
entry = search_results[0]
|
|
408
|
+
assert isinstance(entry, dict), f"{name}: expected dict entries"
|
|
204
409
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
f"Expected exactly four CogneeUserInteraction nodes, but found {type_counts.get('CogneeUserInteraction', 0)}"
|
|
208
|
-
)
|
|
410
|
+
context_result = entry.get("context_result")
|
|
411
|
+
text_result = entry.get("text_result")
|
|
209
412
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
)
|
|
413
|
+
assert isinstance(context_result, str) and context_result.strip()
|
|
414
|
+
lower_context = context_result.lower()
|
|
415
|
+
assert "germany" in lower_context or "netherlands" in lower_context
|
|
214
416
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
417
|
+
assert isinstance(text_result, list) and text_result
|
|
418
|
+
first_text = text_result[0]
|
|
419
|
+
assert isinstance(first_text, dict)
|
|
420
|
+
assert str(first_text.get("text", "")).strip()
|
|
219
421
|
|
|
220
|
-
# Assert that there are at least 10 'used_graph_element_to_answer' edges.
|
|
221
|
-
assert edge_type_counts.get("used_graph_element_to_answer", 0) >= 10, (
|
|
222
|
-
f"Expected at least ten 'used_graph_element_to_answer' edges, but found {edge_type_counts.get('used_graph_element_to_answer', 0)}"
|
|
223
|
-
)
|
|
224
422
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
423
|
+
@pytest.mark.asyncio
|
|
424
|
+
async def test_e2e_graph_side_effects_and_node_fields(e2e_state):
|
|
425
|
+
"""Search interactions create expected graph nodes/edges and required fields."""
|
|
426
|
+
graph = e2e_state["graph_snapshot"]
|
|
427
|
+
nodes, edges = graph
|
|
229
428
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
f"Expected at least six 'belongs_to_set' edges, but found {edge_type_counts.get('belongs_to_set', 0)}"
|
|
233
|
-
)
|
|
429
|
+
type_counts = Counter(node_data[1].get("type", {}) for node_data in nodes)
|
|
430
|
+
edge_type_counts = Counter(edge_type[2] for edge_type in edges)
|
|
234
431
|
|
|
235
|
-
|
|
432
|
+
assert type_counts.get("CogneeUserInteraction", 0) == 4
|
|
433
|
+
assert type_counts.get("NodeSet", 0) == 1
|
|
434
|
+
assert edge_type_counts.get("used_graph_element_to_answer", 0) >= 10
|
|
435
|
+
assert edge_type_counts.get("belongs_to_set", 0) >= 4
|
|
236
436
|
|
|
237
437
|
required_fields_user_interaction = {"question", "answer", "context"}
|
|
238
|
-
required_fields_feedback = {"feedback", "sentiment"}
|
|
239
438
|
|
|
240
439
|
for node_id, data in nodes:
|
|
241
440
|
if data.get("type") == "CogneeUserInteraction":
|
|
242
|
-
assert required_fields_user_interaction.issubset(data.keys())
|
|
243
|
-
f"Node {node_id} is missing fields: {required_fields_user_interaction - set(data.keys())}"
|
|
244
|
-
)
|
|
245
|
-
|
|
441
|
+
assert required_fields_user_interaction.issubset(data.keys())
|
|
246
442
|
for field in required_fields_user_interaction:
|
|
247
443
|
value = data[field]
|
|
248
|
-
assert isinstance(value, str) and value.strip()
|
|
249
|
-
f"Node {node_id} has invalid value for '{field}': {value!r}"
|
|
250
|
-
)
|
|
251
|
-
|
|
252
|
-
if data.get("type") == "CogneeUserFeedback":
|
|
253
|
-
assert required_fields_feedback.issubset(data.keys()), (
|
|
254
|
-
f"Node {node_id} is missing fields: {required_fields_feedback - set(data.keys())}"
|
|
255
|
-
)
|
|
256
|
-
|
|
257
|
-
for field in required_fields_feedback:
|
|
258
|
-
value = data[field]
|
|
259
|
-
assert isinstance(value, str) and value.strip(), (
|
|
260
|
-
f"Node {node_id} has invalid value for '{field}': {value!r}"
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
await cognee.prune.prune_data()
|
|
264
|
-
await cognee.prune.prune_system(metadata=True)
|
|
265
|
-
|
|
266
|
-
await cognee.add(text_1, dataset_name)
|
|
267
|
-
|
|
268
|
-
await cognee.add([text], dataset_name)
|
|
269
|
-
|
|
270
|
-
await cognee.cognify([dataset_name])
|
|
271
|
-
|
|
272
|
-
await cognee.search(
|
|
273
|
-
query_type=SearchType.GRAPH_COMPLETION,
|
|
274
|
-
query_text="Next to which country is Germany located?",
|
|
275
|
-
save_interaction=True,
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
await cognee.search(
|
|
279
|
-
query_type=SearchType.FEEDBACK,
|
|
280
|
-
query_text="This was the best answer I've ever seen",
|
|
281
|
-
last_k=1,
|
|
282
|
-
)
|
|
283
|
-
|
|
284
|
-
await cognee.search(
|
|
285
|
-
query_type=SearchType.FEEDBACK,
|
|
286
|
-
query_text="Wow the correctness of this answer blows my mind",
|
|
287
|
-
last_k=1,
|
|
288
|
-
)
|
|
289
|
-
|
|
290
|
-
graph = await graph_engine.get_graph_data()
|
|
291
|
-
|
|
292
|
-
edges = graph[1]
|
|
293
|
-
|
|
294
|
-
for from_node, to_node, relationship_name, properties in edges:
|
|
295
|
-
if relationship_name == "used_graph_element_to_answer":
|
|
296
|
-
assert properties["feedback_weight"] >= 6, (
|
|
297
|
-
"Feedback weight calculation is not correct, it should be more then 6."
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
if __name__ == "__main__":
|
|
302
|
-
import asyncio
|
|
303
|
-
|
|
304
|
-
asyncio.run(main())
|
|
444
|
+
assert isinstance(value, str) and value.strip()
|