cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,21 +1,18 @@
|
|
|
1
|
-
import
|
|
2
|
-
import time
|
|
3
|
-
from typing import List, Optional, Type
|
|
1
|
+
from typing import List, Optional, Type, Union
|
|
4
2
|
|
|
5
3
|
from cognee.shared.logging_utils import get_logger, ERROR
|
|
6
4
|
from cognee.modules.graph.exceptions.exceptions import EntityNotFoundError
|
|
7
|
-
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
|
8
5
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
9
|
-
from cognee.infrastructure.databases.vector import
|
|
6
|
+
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
|
10
7
|
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
|
11
8
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
12
|
-
from cognee.modules.
|
|
13
|
-
from cognee.shared.utils import send_telemetry
|
|
9
|
+
from cognee.modules.retrieval.utils.node_edge_vector_search import NodeEdgeVectorSearch
|
|
14
10
|
|
|
15
11
|
logger = get_logger(level=ERROR)
|
|
16
12
|
|
|
17
13
|
|
|
18
14
|
def format_triplets(edges):
|
|
15
|
+
"""Formats edges into human-readable triplet strings."""
|
|
19
16
|
triplets = []
|
|
20
17
|
for edge in edges:
|
|
21
18
|
node1 = edge.node1
|
|
@@ -24,12 +21,10 @@ def format_triplets(edges):
|
|
|
24
21
|
node1_attributes = node1.attributes
|
|
25
22
|
node2_attributes = node2.attributes
|
|
26
23
|
|
|
27
|
-
# Filter only non-None properties
|
|
28
24
|
node1_info = {key: value for key, value in node1_attributes.items() if value is not None}
|
|
29
25
|
node2_info = {key: value for key, value in node2_attributes.items() if value is not None}
|
|
30
26
|
edge_info = {key: value for key, value in edge_attributes.items() if value is not None}
|
|
31
27
|
|
|
32
|
-
# Create the formatted triplet
|
|
33
28
|
triplet = f"Node1: {node1_info}\nEdge: {edge_info}\nNode2: {node2_info}\n\n\n"
|
|
34
29
|
triplets.append(triplet)
|
|
35
30
|
|
|
@@ -51,7 +46,6 @@ async def get_memory_fragment(
|
|
|
51
46
|
|
|
52
47
|
try:
|
|
53
48
|
graph_engine = await get_graph_engine()
|
|
54
|
-
|
|
55
49
|
await memory_fragment.project_graph_from_db(
|
|
56
50
|
graph_engine,
|
|
57
51
|
node_properties_to_project=properties_to_project,
|
|
@@ -61,20 +55,64 @@ async def get_memory_fragment(
|
|
|
61
55
|
relevant_ids_to_filter=relevant_ids_to_filter,
|
|
62
56
|
triplet_distance_penalty=triplet_distance_penalty,
|
|
63
57
|
)
|
|
64
|
-
|
|
65
58
|
except EntityNotFoundError:
|
|
66
|
-
# This is expected behavior - continue with empty fragment
|
|
67
59
|
pass
|
|
68
60
|
except Exception as e:
|
|
69
61
|
logger.error(f"Error during memory fragment creation: {str(e)}")
|
|
70
|
-
# Still return the fragment even if projection failed
|
|
71
|
-
pass
|
|
72
62
|
|
|
73
63
|
return memory_fragment
|
|
74
64
|
|
|
75
65
|
|
|
66
|
+
async def _get_top_triplet_importances(
|
|
67
|
+
memory_fragment: Optional[CogneeGraph],
|
|
68
|
+
vector_search: NodeEdgeVectorSearch,
|
|
69
|
+
properties_to_project: Optional[List[str]],
|
|
70
|
+
node_type: Optional[Type],
|
|
71
|
+
node_name: Optional[List[str]],
|
|
72
|
+
triplet_distance_penalty: float,
|
|
73
|
+
wide_search_limit: Optional[int],
|
|
74
|
+
top_k: int,
|
|
75
|
+
query_list_length: Optional[int] = None,
|
|
76
|
+
) -> Union[List[Edge], List[List[Edge]]]:
|
|
77
|
+
"""Creates memory fragment (if needed), maps distances, and calculates top triplet importances.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
query_list_length: Number of queries in batch mode (None for single-query mode).
|
|
81
|
+
When None, node_distances/edge_distances are flat lists; when set, they are list-of-lists.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
List[Edge]: For single-query mode (query_list_length is None).
|
|
85
|
+
List[List[Edge]]: For batch mode (query_list_length is set), one list per query.
|
|
86
|
+
"""
|
|
87
|
+
if memory_fragment is None:
|
|
88
|
+
if wide_search_limit is None:
|
|
89
|
+
relevant_node_ids = None
|
|
90
|
+
else:
|
|
91
|
+
relevant_node_ids = vector_search.extract_relevant_node_ids()
|
|
92
|
+
|
|
93
|
+
memory_fragment = await get_memory_fragment(
|
|
94
|
+
properties_to_project=properties_to_project,
|
|
95
|
+
node_type=node_type,
|
|
96
|
+
node_name=node_name,
|
|
97
|
+
relevant_ids_to_filter=relevant_node_ids,
|
|
98
|
+
triplet_distance_penalty=triplet_distance_penalty,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
await memory_fragment.map_vector_distances_to_graph_nodes(
|
|
102
|
+
node_distances=vector_search.node_distances, query_list_length=query_list_length
|
|
103
|
+
)
|
|
104
|
+
await memory_fragment.map_vector_distances_to_graph_edges(
|
|
105
|
+
edge_distances=vector_search.edge_distances, query_list_length=query_list_length
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
return await memory_fragment.calculate_top_triplet_importances(
|
|
109
|
+
k=top_k, query_list_length=query_list_length
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
76
113
|
async def brute_force_triplet_search(
|
|
77
|
-
query: str,
|
|
114
|
+
query: Optional[str] = None,
|
|
115
|
+
query_batch: Optional[List[str]] = None,
|
|
78
116
|
top_k: int = 5,
|
|
79
117
|
collections: Optional[List[str]] = None,
|
|
80
118
|
properties_to_project: Optional[List[str]] = None,
|
|
@@ -83,33 +121,49 @@ async def brute_force_triplet_search(
|
|
|
83
121
|
node_name: Optional[List[str]] = None,
|
|
84
122
|
wide_search_top_k: Optional[int] = 100,
|
|
85
123
|
triplet_distance_penalty: Optional[float] = 3.5,
|
|
86
|
-
) -> List[Edge]:
|
|
124
|
+
) -> Union[List[Edge], List[List[Edge]]]:
|
|
87
125
|
"""
|
|
88
126
|
Performs a brute force search to retrieve the top triplets from the graph.
|
|
89
127
|
|
|
90
128
|
Args:
|
|
91
|
-
query (str): The search query.
|
|
129
|
+
query (Optional[str]): The search query (single query mode). Exactly one of query or query_batch must be provided.
|
|
130
|
+
query_batch (Optional[List[str]]): List of search queries (batch mode). Exactly one of query or query_batch must be provided.
|
|
92
131
|
top_k (int): The number of top results to retrieve.
|
|
93
132
|
collections (Optional[List[str]]): List of collections to query.
|
|
94
133
|
properties_to_project (Optional[List[str]]): List of properties to project.
|
|
95
134
|
memory_fragment (Optional[CogneeGraph]): Existing memory fragment to reuse.
|
|
96
135
|
node_type: node type to filter
|
|
97
136
|
node_name: node name to filter
|
|
98
|
-
wide_search_top_k (Optional[int]): Number of initial elements to retrieve from collections
|
|
137
|
+
wide_search_top_k (Optional[int]): Number of initial elements to retrieve from collections.
|
|
138
|
+
Ignored in batch mode (always None to project full graph).
|
|
99
139
|
triplet_distance_penalty (Optional[float]): Default distance penalty in graph projection
|
|
100
140
|
|
|
101
141
|
Returns:
|
|
102
|
-
|
|
142
|
+
List[Edge]: The top triplet results for single query mode (flat list).
|
|
143
|
+
List[List[Edge]]: List of top triplet results (one per query) for batch mode (list-of-lists).
|
|
144
|
+
|
|
145
|
+
Note:
|
|
146
|
+
In single-query mode, node_distances and edge_distances are stored as flat lists.
|
|
147
|
+
In batch mode, they are stored as list-of-lists (one list per query).
|
|
103
148
|
"""
|
|
104
|
-
if not
|
|
149
|
+
if query is not None and query_batch is not None:
|
|
150
|
+
raise ValueError("Cannot provide both 'query' and 'query_batch'; use exactly one.")
|
|
151
|
+
if query is None and query_batch is None:
|
|
152
|
+
raise ValueError("Must provide either 'query' or 'query_batch'.")
|
|
153
|
+
if query is not None and (not query or not isinstance(query, str)):
|
|
105
154
|
raise ValueError("The query must be a non-empty string.")
|
|
155
|
+
if query_batch is not None:
|
|
156
|
+
if not isinstance(query_batch, list) or not query_batch:
|
|
157
|
+
raise ValueError("query_batch must be a non-empty list of strings.")
|
|
158
|
+
if not all(isinstance(q, str) and q for q in query_batch):
|
|
159
|
+
raise ValueError("All items in query_batch must be non-empty strings.")
|
|
106
160
|
if top_k <= 0:
|
|
107
161
|
raise ValueError("top_k must be a positive integer.")
|
|
108
162
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
163
|
+
query_list_length = len(query_batch) if query_batch is not None else None
|
|
164
|
+
wide_search_limit = (
|
|
165
|
+
None if query_list_length else (wide_search_top_k if node_name is None else None)
|
|
166
|
+
)
|
|
113
167
|
|
|
114
168
|
if collections is None:
|
|
115
169
|
collections = [
|
|
@@ -123,77 +177,37 @@ async def brute_force_triplet_search(
|
|
|
123
177
|
collections.append("EdgeType_relationship_name")
|
|
124
178
|
|
|
125
179
|
try:
|
|
126
|
-
|
|
127
|
-
except Exception as e:
|
|
128
|
-
logger.error("Failed to initialize vector engine: %s", e)
|
|
129
|
-
raise RuntimeError("Initialization error") from e
|
|
130
|
-
|
|
131
|
-
query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
|
|
180
|
+
vector_search = NodeEdgeVectorSearch()
|
|
132
181
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
except CollectionNotFoundError:
|
|
139
|
-
return []
|
|
140
|
-
|
|
141
|
-
try:
|
|
142
|
-
start_time = time.time()
|
|
143
|
-
|
|
144
|
-
results = await asyncio.gather(
|
|
145
|
-
*[search_in_collection(collection_name) for collection_name in collections]
|
|
182
|
+
await vector_search.embed_and_retrieve_distances(
|
|
183
|
+
query=None if query_list_length else query,
|
|
184
|
+
query_batch=query_batch if query_list_length else None,
|
|
185
|
+
collections=collections,
|
|
186
|
+
wide_search_limit=wide_search_limit,
|
|
146
187
|
)
|
|
147
188
|
|
|
148
|
-
if
|
|
149
|
-
return []
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
189
|
+
if not vector_search.has_results():
|
|
190
|
+
return [[] for _ in range(query_list_length)] if query_list_length else []
|
|
191
|
+
|
|
192
|
+
results = await _get_top_triplet_importances(
|
|
193
|
+
memory_fragment,
|
|
194
|
+
vector_search,
|
|
195
|
+
properties_to_project,
|
|
196
|
+
node_type,
|
|
197
|
+
node_name,
|
|
198
|
+
triplet_distance_penalty,
|
|
199
|
+
wide_search_limit,
|
|
200
|
+
top_k,
|
|
201
|
+
query_list_length=query_list_length,
|
|
155
202
|
)
|
|
156
203
|
|
|
157
|
-
node_distances = {collection: result for collection, result in zip(collections, results)}
|
|
158
|
-
|
|
159
|
-
edge_distances = node_distances.get("EdgeType_relationship_name", None)
|
|
160
|
-
|
|
161
|
-
if wide_search_limit is not None:
|
|
162
|
-
relevant_ids_to_filter = list(
|
|
163
|
-
{
|
|
164
|
-
str(getattr(scored_node, "id"))
|
|
165
|
-
for collection_name, score_collection in node_distances.items()
|
|
166
|
-
if collection_name != "EdgeType_relationship_name"
|
|
167
|
-
and isinstance(score_collection, (list, tuple))
|
|
168
|
-
for scored_node in score_collection
|
|
169
|
-
if getattr(scored_node, "id", None)
|
|
170
|
-
}
|
|
171
|
-
)
|
|
172
|
-
else:
|
|
173
|
-
relevant_ids_to_filter = None
|
|
174
|
-
|
|
175
|
-
if memory_fragment is None:
|
|
176
|
-
memory_fragment = await get_memory_fragment(
|
|
177
|
-
properties_to_project=properties_to_project,
|
|
178
|
-
node_type=node_type,
|
|
179
|
-
node_name=node_name,
|
|
180
|
-
relevant_ids_to_filter=relevant_ids_to_filter,
|
|
181
|
-
triplet_distance_penalty=triplet_distance_penalty,
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
|
|
185
|
-
await memory_fragment.map_vector_distances_to_graph_edges(edge_distances=edge_distances)
|
|
186
|
-
|
|
187
|
-
results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
|
|
188
|
-
|
|
189
204
|
return results
|
|
190
|
-
|
|
191
205
|
except CollectionNotFoundError:
|
|
192
|
-
return []
|
|
206
|
+
return [[] for _ in range(query_list_length)] if query_list_length else []
|
|
193
207
|
except Exception as error:
|
|
194
208
|
logger.error(
|
|
195
209
|
"Error during brute force search for query: %s. Error: %s",
|
|
196
|
-
query,
|
|
210
|
+
query_batch if query_list_length else [query],
|
|
197
211
|
error,
|
|
198
212
|
)
|
|
199
213
|
raise error
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any, List, Optional
|
|
4
|
+
|
|
5
|
+
from cognee.shared.logging_utils import get_logger, ERROR
|
|
6
|
+
from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
|
|
7
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
8
|
+
|
|
9
|
+
logger = get_logger(level=ERROR)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NodeEdgeVectorSearch:
|
|
13
|
+
"""Manages vector search and distance retrieval for graph nodes and edges."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, edge_collection: str = "EdgeType_relationship_name", vector_engine=None):
|
|
16
|
+
self.edge_collection = edge_collection
|
|
17
|
+
self.vector_engine = vector_engine or self._init_vector_engine()
|
|
18
|
+
self.query_vector: Optional[Any] = None
|
|
19
|
+
self.node_distances: dict[str, list[Any]] = {}
|
|
20
|
+
self.edge_distances: list[Any] = []
|
|
21
|
+
self.query_list_length: Optional[int] = None
|
|
22
|
+
|
|
23
|
+
def _init_vector_engine(self):
|
|
24
|
+
try:
|
|
25
|
+
return get_vector_engine()
|
|
26
|
+
except Exception as e:
|
|
27
|
+
logger.error("Failed to initialize vector engine: %s", e)
|
|
28
|
+
raise RuntimeError("Initialization error") from e
|
|
29
|
+
|
|
30
|
+
async def embed_and_retrieve_distances(
|
|
31
|
+
self,
|
|
32
|
+
query: Optional[str] = None,
|
|
33
|
+
query_batch: Optional[List[str]] = None,
|
|
34
|
+
collections: List[str] = None,
|
|
35
|
+
wide_search_limit: Optional[int] = None,
|
|
36
|
+
):
|
|
37
|
+
"""Embeds query/queries and retrieves vector distances from all collections."""
|
|
38
|
+
if query is not None and query_batch is not None:
|
|
39
|
+
raise ValueError("Cannot provide both 'query' and 'query_batch'; use exactly one.")
|
|
40
|
+
if query is None and query_batch is None:
|
|
41
|
+
raise ValueError("Must provide either 'query' or 'query_batch'.")
|
|
42
|
+
if not collections:
|
|
43
|
+
raise ValueError("'collections' must be a non-empty list.")
|
|
44
|
+
|
|
45
|
+
start_time = time.time()
|
|
46
|
+
|
|
47
|
+
if query_batch is not None:
|
|
48
|
+
self.query_list_length = len(query_batch)
|
|
49
|
+
search_results = await self._run_batch_search(collections, query_batch)
|
|
50
|
+
else:
|
|
51
|
+
self.query_list_length = None
|
|
52
|
+
search_results = await self._run_single_search(collections, query, wide_search_limit)
|
|
53
|
+
|
|
54
|
+
elapsed_time = time.time() - start_time
|
|
55
|
+
collections_with_results = sum(1 for result in search_results if any(result))
|
|
56
|
+
logger.info(
|
|
57
|
+
f"Vector collection retrieval completed: Retrieved distances from "
|
|
58
|
+
f"{collections_with_results} collections in {elapsed_time:.2f}s"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
self.set_distances_from_results(collections, search_results, self.query_list_length)
|
|
62
|
+
|
|
63
|
+
def has_results(self) -> bool:
|
|
64
|
+
"""Checks if any collections returned results."""
|
|
65
|
+
if self.query_list_length is None:
|
|
66
|
+
if self.edge_distances and any(self.edge_distances):
|
|
67
|
+
return True
|
|
68
|
+
return any(
|
|
69
|
+
bool(collection_results) for collection_results in self.node_distances.values()
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if self.edge_distances and any(inner_list for inner_list in self.edge_distances):
|
|
73
|
+
return True
|
|
74
|
+
return any(
|
|
75
|
+
any(results_per_query for results_per_query in collection_results)
|
|
76
|
+
for collection_results in self.node_distances.values()
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def extract_relevant_node_ids(self) -> List[str]:
|
|
80
|
+
"""Extracts unique node IDs from search results."""
|
|
81
|
+
if self.query_list_length is not None:
|
|
82
|
+
return []
|
|
83
|
+
relevant_node_ids = set()
|
|
84
|
+
for scored_results in self.node_distances.values():
|
|
85
|
+
for scored_node in scored_results:
|
|
86
|
+
node_id = getattr(scored_node, "id", None)
|
|
87
|
+
if node_id:
|
|
88
|
+
relevant_node_ids.add(str(node_id))
|
|
89
|
+
return list(relevant_node_ids)
|
|
90
|
+
|
|
91
|
+
def set_distances_from_results(
|
|
92
|
+
self,
|
|
93
|
+
collections: List[str],
|
|
94
|
+
search_results: List[List[Any]],
|
|
95
|
+
query_list_length: Optional[int] = None,
|
|
96
|
+
):
|
|
97
|
+
"""Separates search results into node and edge distances with stable shapes.
|
|
98
|
+
|
|
99
|
+
Ensures all collections are present in the output, even if empty:
|
|
100
|
+
- Batch mode: missing/empty collections become [[]] * query_list_length
|
|
101
|
+
- Single mode: missing/empty collections become []
|
|
102
|
+
"""
|
|
103
|
+
self.node_distances = {}
|
|
104
|
+
self.edge_distances = (
|
|
105
|
+
[] if query_list_length is None else [[] for _ in range(query_list_length)]
|
|
106
|
+
)
|
|
107
|
+
for collection, result in zip(collections, search_results):
|
|
108
|
+
if not result:
|
|
109
|
+
empty_result = (
|
|
110
|
+
[] if query_list_length is None else [[] for _ in range(query_list_length)]
|
|
111
|
+
)
|
|
112
|
+
if collection == self.edge_collection:
|
|
113
|
+
self.edge_distances = empty_result
|
|
114
|
+
else:
|
|
115
|
+
self.node_distances[collection] = empty_result
|
|
116
|
+
else:
|
|
117
|
+
if collection == self.edge_collection:
|
|
118
|
+
self.edge_distances = result
|
|
119
|
+
else:
|
|
120
|
+
self.node_distances[collection] = result
|
|
121
|
+
|
|
122
|
+
async def _run_batch_search(
|
|
123
|
+
self, collections: List[str], query_batch: List[str]
|
|
124
|
+
) -> List[List[Any]]:
|
|
125
|
+
"""Runs batch search across all collections and returns list-of-lists per collection."""
|
|
126
|
+
search_tasks = [
|
|
127
|
+
self._search_batch_collection(collection, query_batch) for collection in collections
|
|
128
|
+
]
|
|
129
|
+
return await asyncio.gather(*search_tasks)
|
|
130
|
+
|
|
131
|
+
async def _search_batch_collection(
|
|
132
|
+
self, collection_name: str, query_batch: List[str]
|
|
133
|
+
) -> List[List[Any]]:
|
|
134
|
+
"""Searches one collection with batch queries and returns list-of-lists."""
|
|
135
|
+
try:
|
|
136
|
+
return await self.vector_engine.batch_search(
|
|
137
|
+
collection_name=collection_name, query_texts=query_batch, limit=None
|
|
138
|
+
)
|
|
139
|
+
except CollectionNotFoundError:
|
|
140
|
+
return [[]] * len(query_batch)
|
|
141
|
+
|
|
142
|
+
async def _run_single_search(
|
|
143
|
+
self, collections: List[str], query: str, wide_search_limit: Optional[int]
|
|
144
|
+
) -> List[List[Any]]:
|
|
145
|
+
"""Runs single query search and returns flat lists per collection.
|
|
146
|
+
|
|
147
|
+
Returns a list where each element is a collection's results (flat list).
|
|
148
|
+
These are stored as flat lists in node_distances/edge_distances for single-query mode.
|
|
149
|
+
"""
|
|
150
|
+
await self._embed_query(query)
|
|
151
|
+
search_tasks = [
|
|
152
|
+
self._search_single_collection(self.vector_engine, wide_search_limit, collection)
|
|
153
|
+
for collection in collections
|
|
154
|
+
]
|
|
155
|
+
search_results = await asyncio.gather(*search_tasks)
|
|
156
|
+
return search_results
|
|
157
|
+
|
|
158
|
+
async def _embed_query(self, query: str):
|
|
159
|
+
"""Embeds the query and stores the resulting vector."""
|
|
160
|
+
query_embeddings = await self.vector_engine.embedding_engine.embed_text([query])
|
|
161
|
+
self.query_vector = query_embeddings[0]
|
|
162
|
+
|
|
163
|
+
async def _search_single_collection(
|
|
164
|
+
self, vector_engine: Any, wide_search_limit: Optional[int], collection_name: str
|
|
165
|
+
):
|
|
166
|
+
"""Searches one collection and returns results or empty list if not found."""
|
|
167
|
+
try:
|
|
168
|
+
return await vector_engine.search(
|
|
169
|
+
collection_name=collection_name,
|
|
170
|
+
query_vector=self.query_vector,
|
|
171
|
+
limit=wide_search_limit,
|
|
172
|
+
)
|
|
173
|
+
except CollectionNotFoundError:
|
|
174
|
+
return []
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
2
|
+
from cognee.modules.search.models.SearchResultPayload import SearchResultPayload
|
|
3
|
+
from cognee.modules.search.methods.get_search_type_retriever_instance import (
|
|
4
|
+
get_search_type_retriever_instance,
|
|
5
|
+
)
|
|
6
|
+
from cognee.modules.search.types import SearchType
|
|
7
|
+
from cognee.shared.logging_utils import get_logger
|
|
8
|
+
|
|
9
|
+
logger = get_logger()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def get_retriever_output(query_type: SearchType, query_text: str, **kwargs):
|
|
13
|
+
graph_engine = await get_graph_engine()
|
|
14
|
+
is_empty = await graph_engine.is_empty()
|
|
15
|
+
|
|
16
|
+
if is_empty:
|
|
17
|
+
logger.warning("Search attempt on an empty knowledge graph")
|
|
18
|
+
|
|
19
|
+
retriever_instance = await get_search_type_retriever_instance(
|
|
20
|
+
query_type=query_type, query_text=query_text, **kwargs
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Get raw result objects from retriever and forward to context and completion methods to avoid duplicate retrievals.
|
|
24
|
+
retrieved_objects = await retriever_instance.get_retrieved_objects(query=query_text)
|
|
25
|
+
|
|
26
|
+
# Handle raw result object to extract context information
|
|
27
|
+
context = await retriever_instance.get_context_from_objects(
|
|
28
|
+
query=query_text, retrieved_objects=retrieved_objects
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
completion = None
|
|
32
|
+
if not kwargs.get(
|
|
33
|
+
"only_context", False
|
|
34
|
+
): # If only_context is True, skip getting completion. Performance optimization.
|
|
35
|
+
# Handle raw result and context object to handle completion operation
|
|
36
|
+
completion = await retriever_instance.get_completion_from_context(
|
|
37
|
+
query=query_text,
|
|
38
|
+
retrieved_objects=retrieved_objects,
|
|
39
|
+
context=context,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
search_result = SearchResultPayload(
|
|
43
|
+
result_object=retrieved_objects,
|
|
44
|
+
context=context,
|
|
45
|
+
completion=completion,
|
|
46
|
+
search_type=query_type,
|
|
47
|
+
only_context=kwargs.get("only_context", False),
|
|
48
|
+
dataset_name=kwargs.get("dataset").name if kwargs.get("dataset") else None,
|
|
49
|
+
dataset_id=kwargs.get("dataset").id if kwargs.get("dataset") else None,
|
|
50
|
+
dataset_tenant_id=kwargs.get("dataset").tenant_id if kwargs.get("dataset") else None,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return search_result
|