cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -7,7 +7,7 @@ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
|
7
7
|
from cognee.tasks.storage import add_data_points
|
|
8
8
|
from cognee.modules.graph.utils import resolve_edges_to_text
|
|
9
9
|
from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
|
|
10
|
-
from cognee.modules.retrieval.
|
|
10
|
+
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
11
11
|
from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search
|
|
12
12
|
from cognee.modules.retrieval.utils.completion import generate_completion, summarize_text
|
|
13
13
|
from cognee.modules.retrieval.utils.session_cache import (
|
|
@@ -16,26 +16,24 @@ from cognee.modules.retrieval.utils.session_cache import (
|
|
|
16
16
|
)
|
|
17
17
|
from cognee.shared.logging_utils import get_logger
|
|
18
18
|
from cognee.modules.retrieval.utils.extract_uuid_from_node import extract_uuid_from_node
|
|
19
|
+
from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
|
|
19
20
|
from cognee.modules.retrieval.utils.models import CogneeUserInteraction
|
|
20
21
|
from cognee.modules.engine.models.node_set import NodeSet
|
|
21
22
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
22
23
|
from cognee.context_global_variables import session_user
|
|
23
24
|
from cognee.infrastructure.databases.cache.config import CacheConfig
|
|
25
|
+
from cognee.modules.graph.utils import get_entity_nodes_from_triplets
|
|
24
26
|
|
|
25
27
|
logger = get_logger("GraphCompletionRetriever")
|
|
26
28
|
|
|
27
29
|
|
|
28
|
-
class GraphCompletionRetriever(
|
|
30
|
+
class GraphCompletionRetriever(BaseRetriever):
|
|
29
31
|
"""
|
|
30
32
|
Retriever for handling graph-based completion searches.
|
|
31
33
|
|
|
32
|
-
This class
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
- resolve_edges_to_text
|
|
36
|
-
- get_triplets
|
|
37
|
-
- get_context
|
|
38
|
-
- get_completion
|
|
34
|
+
This class implements the retrieval pipeline by searching for graph triplets (get_retrieved_objects function),
|
|
35
|
+
resolving those triplets into human-readable text context (get_context_from_objects function), and generating
|
|
36
|
+
LLM completions using the retrieved graph data (get_completion_from_context function).
|
|
39
37
|
"""
|
|
40
38
|
|
|
41
39
|
def __init__(
|
|
@@ -49,6 +47,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
|
|
|
49
47
|
save_interaction: bool = False,
|
|
50
48
|
wide_search_top_k: Optional[int] = 100,
|
|
51
49
|
triplet_distance_penalty: Optional[float] = 3.5,
|
|
50
|
+
session_id: Optional[str] = None,
|
|
51
|
+
response_model: Type = str,
|
|
52
52
|
):
|
|
53
53
|
"""Initialize retriever with prompt paths and search parameters."""
|
|
54
54
|
self.save_interaction = save_interaction
|
|
@@ -60,6 +60,39 @@ class GraphCompletionRetriever(BaseGraphRetriever):
|
|
|
60
60
|
self.node_type = node_type
|
|
61
61
|
self.node_name = node_name
|
|
62
62
|
self.triplet_distance_penalty = triplet_distance_penalty
|
|
63
|
+
# session_id (Optional[str]): Identifier for managing conversation history.
|
|
64
|
+
self.session_id = session_id
|
|
65
|
+
# response_model (Type): The Pydantic model or type for the expected response.
|
|
66
|
+
self.response_model = response_model
|
|
67
|
+
|
|
68
|
+
async def get_retrieved_objects(self, query: str) -> List[Edge]:
|
|
69
|
+
"""
|
|
70
|
+
Performs a brute-force triplet search on the graph and updates access timestamps.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
query (str): The search query to find relevant graph triplets.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
List[Edge]: A list of retrieved Edge objects (triplets).
|
|
77
|
+
Returns an empty list if the graph is empty or no results are found.
|
|
78
|
+
"""
|
|
79
|
+
graph_engine = await get_graph_engine()
|
|
80
|
+
is_empty = await graph_engine.is_empty()
|
|
81
|
+
|
|
82
|
+
if is_empty:
|
|
83
|
+
logger.warning("Search attempt on an empty knowledge graph")
|
|
84
|
+
return []
|
|
85
|
+
|
|
86
|
+
triplets = await self.get_triplets(query)
|
|
87
|
+
|
|
88
|
+
if len(triplets) == 0:
|
|
89
|
+
logger.warning("Empty context was provided to the completion")
|
|
90
|
+
return []
|
|
91
|
+
# TODO: Remove when refactor of timestamps tracking is merged
|
|
92
|
+
entity_nodes = get_entity_nodes_from_triplets(triplets)
|
|
93
|
+
await update_node_access_timestamps(entity_nodes)
|
|
94
|
+
|
|
95
|
+
return triplets
|
|
63
96
|
|
|
64
97
|
async def resolve_edges_to_text(self, retrieved_edges: list) -> str:
|
|
65
98
|
"""
|
|
@@ -115,72 +148,54 @@ class GraphCompletionRetriever(BaseGraphRetriever):
|
|
|
115
148
|
|
|
116
149
|
return found_triplets
|
|
117
150
|
|
|
118
|
-
async def
|
|
151
|
+
async def get_context_from_objects(self, query, retrieved_objects) -> str:
|
|
119
152
|
"""
|
|
120
|
-
|
|
153
|
+
Transforms raw retrieved graph triplets into a textual context string.
|
|
121
154
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
155
|
+
Args:
|
|
156
|
+
query (str): The original search query.
|
|
157
|
+
retrieved_objects (List[Edge]): The raw triplets returned from the search.
|
|
158
|
+
Output of the get_retrieved_objects method.
|
|
126
159
|
|
|
127
160
|
Returns:
|
|
128
|
-
|
|
161
|
+
str: A string representing the resolved graph context.
|
|
162
|
+
Returns an empty list (as string) if no triplets are provided.
|
|
129
163
|
|
|
130
|
-
|
|
131
|
-
|
|
164
|
+
Note: To avoid duplicate retrievals, ensure that retrieved_objects
|
|
165
|
+
are provided from get_retrieved_objects method call.
|
|
132
166
|
"""
|
|
133
|
-
graph_engine = await get_graph_engine()
|
|
134
|
-
is_empty = await graph_engine.is_empty()
|
|
135
167
|
|
|
136
|
-
|
|
137
|
-
logger.warning("Search attempt on an empty knowledge graph")
|
|
138
|
-
return []
|
|
139
|
-
|
|
140
|
-
triplets = await self.get_triplets(query)
|
|
168
|
+
triplets = retrieved_objects
|
|
141
169
|
|
|
142
170
|
if len(triplets) == 0:
|
|
143
171
|
logger.warning("Empty context was provided to the completion")
|
|
144
|
-
return
|
|
172
|
+
return ""
|
|
145
173
|
|
|
146
|
-
|
|
174
|
+
return await self.resolve_edges_to_text(triplets)
|
|
147
175
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
async def convert_retrieved_objects_to_context(self, triplets: List[Edge]):
|
|
151
|
-
context = await self.resolve_edges_to_text(triplets)
|
|
152
|
-
return context
|
|
153
|
-
|
|
154
|
-
async def get_completion(
|
|
176
|
+
async def get_completion_from_context(
|
|
155
177
|
self,
|
|
156
178
|
query: str,
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
response_model: Type = str,
|
|
179
|
+
retrieved_objects: Optional[List[Edge]],
|
|
180
|
+
context: str,
|
|
160
181
|
) -> List[Any]:
|
|
161
182
|
"""
|
|
162
|
-
Generates
|
|
183
|
+
Generates an LLM response based on the query, context, and conversation history.
|
|
184
|
+
Optionally saves the interaction and updates the session cache.
|
|
163
185
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
171
|
-
defaults to 'default_session'. (default None)
|
|
186
|
+
Args:
|
|
187
|
+
query (str): The user's question or prompt.
|
|
188
|
+
retrieved_objects (Optional[List[Edge]]): Raw triplets used for interaction mapping.
|
|
189
|
+
Output of get_retrieved_objects method.
|
|
190
|
+
context (str): The text-resolved graph context.
|
|
191
|
+
Output of the get_context_from_objects method.
|
|
172
192
|
|
|
173
193
|
Returns:
|
|
174
|
-
|
|
194
|
+
List[Any]: A list containing the generated response (completion).
|
|
175
195
|
|
|
176
|
-
|
|
196
|
+
Note: To avoid duplicate retrievals, ensure that retrieved_objects and context
|
|
197
|
+
are provided from previous method calls.
|
|
177
198
|
"""
|
|
178
|
-
triplets = context
|
|
179
|
-
|
|
180
|
-
if triplets is None:
|
|
181
|
-
triplets = await self.get_context(query)
|
|
182
|
-
|
|
183
|
-
context_text = await resolve_edges_to_text(triplets)
|
|
184
199
|
|
|
185
200
|
cache_config = CacheConfig()
|
|
186
201
|
user = session_user.get()
|
|
@@ -188,33 +203,33 @@ class GraphCompletionRetriever(BaseGraphRetriever):
|
|
|
188
203
|
session_save = user_id and cache_config.caching
|
|
189
204
|
|
|
190
205
|
if session_save:
|
|
191
|
-
conversation_history = await get_conversation_history(session_id=session_id)
|
|
206
|
+
conversation_history = await get_conversation_history(session_id=self.session_id)
|
|
192
207
|
|
|
193
208
|
context_summary, completion = await asyncio.gather(
|
|
194
|
-
summarize_text(
|
|
209
|
+
summarize_text(context),
|
|
195
210
|
generate_completion(
|
|
196
211
|
query=query,
|
|
197
|
-
context=
|
|
212
|
+
context=context,
|
|
198
213
|
user_prompt_path=self.user_prompt_path,
|
|
199
214
|
system_prompt_path=self.system_prompt_path,
|
|
200
215
|
system_prompt=self.system_prompt,
|
|
201
216
|
conversation_history=conversation_history,
|
|
202
|
-
response_model=response_model,
|
|
217
|
+
response_model=self.response_model,
|
|
203
218
|
),
|
|
204
219
|
)
|
|
205
220
|
else:
|
|
206
221
|
completion = await generate_completion(
|
|
207
222
|
query=query,
|
|
208
|
-
context=
|
|
223
|
+
context=context,
|
|
209
224
|
user_prompt_path=self.user_prompt_path,
|
|
210
225
|
system_prompt_path=self.system_prompt_path,
|
|
211
226
|
system_prompt=self.system_prompt,
|
|
212
|
-
response_model=response_model,
|
|
227
|
+
response_model=self.response_model,
|
|
213
228
|
)
|
|
214
229
|
|
|
215
|
-
if self.save_interaction and
|
|
230
|
+
if self.save_interaction and retrieved_objects and completion:
|
|
216
231
|
await self.save_qa(
|
|
217
|
-
question=query, answer=completion, context=
|
|
232
|
+
question=query, answer=completion, context=context, triplets=retrieved_objects
|
|
218
233
|
)
|
|
219
234
|
|
|
220
235
|
if session_save:
|
|
@@ -222,7 +237,7 @@ class GraphCompletionRetriever(BaseGraphRetriever):
|
|
|
222
237
|
query=query,
|
|
223
238
|
context_summary=context_summary,
|
|
224
239
|
answer=completion,
|
|
225
|
-
session_id=session_id,
|
|
240
|
+
session_id=self.session_id,
|
|
226
241
|
)
|
|
227
242
|
|
|
228
243
|
return [completion]
|
|
@@ -28,6 +28,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
|
|
|
28
28
|
save_interaction: bool = False,
|
|
29
29
|
wide_search_top_k: Optional[int] = 100,
|
|
30
30
|
triplet_distance_penalty: Optional[float] = 3.5,
|
|
31
|
+
session_id: Optional[str] = None,
|
|
31
32
|
):
|
|
32
33
|
"""Initialize retriever with default prompt paths and search parameters."""
|
|
33
34
|
super().__init__(
|
|
@@ -40,6 +41,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
|
|
|
40
41
|
system_prompt=system_prompt,
|
|
41
42
|
wide_search_top_k=wide_search_top_k,
|
|
42
43
|
triplet_distance_penalty=triplet_distance_penalty,
|
|
44
|
+
session_id=session_id,
|
|
43
45
|
)
|
|
44
46
|
self.summarize_prompt_path = summarize_prompt_path
|
|
45
47
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Any, Callable, Optional
|
|
2
|
+
from typing import Any, Callable, Optional, List, Union
|
|
3
3
|
from heapq import nlargest
|
|
4
4
|
|
|
5
5
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
@@ -72,7 +72,7 @@ class LexicalRetriever(BaseRetriever):
|
|
|
72
72
|
self._initialized = True
|
|
73
73
|
logger.info("Initialized with %d document chunks", len(self.chunks))
|
|
74
74
|
|
|
75
|
-
async def
|
|
75
|
+
async def get_retrieved_objects(self, query: str) -> Any:
|
|
76
76
|
"""Retrieves relevant chunks for the given query."""
|
|
77
77
|
if not self._initialized:
|
|
78
78
|
await self.initialize()
|
|
@@ -116,11 +116,36 @@ class LexicalRetriever(BaseRetriever):
|
|
|
116
116
|
else:
|
|
117
117
|
return [self.payloads[chunk_id] for chunk_id, _ in top_results]
|
|
118
118
|
|
|
119
|
-
async def
|
|
120
|
-
self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
|
|
121
|
-
) -> Any:
|
|
119
|
+
async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
|
|
122
120
|
"""
|
|
123
|
-
|
|
121
|
+
Retrieves context from retrieved chunks, in text form.
|
|
122
|
+
|
|
123
|
+
Parameters:
|
|
124
|
+
-----------
|
|
125
|
+
|
|
126
|
+
- query (str): The query string used to search for relevant document chunk payloads.
|
|
127
|
+
- retrieved_objects (Any): The retrieved objects to be used for generating textual context.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
--------
|
|
131
|
+
|
|
132
|
+
- str: A string containing the combined text of the retrieved chunk payloads, or an
|
|
133
|
+
empty string if none are found.
|
|
134
|
+
"""
|
|
135
|
+
if retrieved_objects:
|
|
136
|
+
payload_texts = [payload["text"] for payload in retrieved_objects]
|
|
137
|
+
return "\n".join(payload_texts)
|
|
138
|
+
else:
|
|
139
|
+
return ""
|
|
140
|
+
|
|
141
|
+
async def get_completion_from_context(
|
|
142
|
+
self, query: str, retrieved_objects: Any, context: Any
|
|
143
|
+
) -> Union[List[str], List[dict]]:
|
|
144
|
+
"""
|
|
145
|
+
Returns a completion for the given query.
|
|
146
|
+
|
|
147
|
+
In case of the Lexical Retriever, we do not generate a completion, we just return
|
|
148
|
+
the scored chunk payloads, i.e. the retrieved objects.
|
|
124
149
|
|
|
125
150
|
Parameters:
|
|
126
151
|
-----------
|
|
@@ -128,14 +153,11 @@ class LexicalRetriever(BaseRetriever):
|
|
|
128
153
|
- query (str): The query string to retrieve context for.
|
|
129
154
|
- context (Optional[Any]): Optional pre-fetched context; if None, it retrieves
|
|
130
155
|
the context for the query. (default None)
|
|
131
|
-
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
132
|
-
defaults to 'default_session'. (default None)
|
|
133
156
|
|
|
134
157
|
Returns:
|
|
135
158
|
--------
|
|
136
159
|
|
|
137
|
-
-
|
|
160
|
+
- List[dict]: The retrieved objects, i.e. the scored payloads.
|
|
138
161
|
"""
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
return context
|
|
162
|
+
# TODO: Do we want to generate a completion using LLM here?
|
|
163
|
+
return retrieved_objects
|
|
@@ -4,7 +4,6 @@ from cognee.infrastructure.databases.graph import get_graph_engine
|
|
|
4
4
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
5
5
|
from cognee.infrastructure.llm.prompts import render_prompt
|
|
6
6
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
7
|
-
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
|
|
8
7
|
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
|
|
9
8
|
|
|
10
9
|
logger = get_logger("NaturalLanguageRetriever")
|
|
@@ -25,10 +24,12 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
25
24
|
self,
|
|
26
25
|
system_prompt_path: str = "natural_language_retriever_system.txt",
|
|
27
26
|
max_attempts: int = 3,
|
|
27
|
+
session_id: Optional[str] = None,
|
|
28
28
|
):
|
|
29
29
|
"""Initialize retriever with optional custom prompt paths."""
|
|
30
30
|
self.system_prompt_path = system_prompt_path
|
|
31
31
|
self.max_attempts = max_attempts
|
|
32
|
+
self.session_id = session_id
|
|
32
33
|
|
|
33
34
|
async def _get_graph_schema(self, graph_engine) -> tuple:
|
|
34
35
|
"""Retrieve the node and edge schemas from the graph database."""
|
|
@@ -102,7 +103,17 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
102
103
|
)
|
|
103
104
|
return []
|
|
104
105
|
|
|
105
|
-
async def
|
|
106
|
+
async def get_retrieved_objects(self, query: str) -> Any:
|
|
107
|
+
graph_engine = await get_graph_engine()
|
|
108
|
+
is_empty = await graph_engine.is_empty()
|
|
109
|
+
|
|
110
|
+
if is_empty:
|
|
111
|
+
logger.warning("Search attempt on an empty knowledge graph")
|
|
112
|
+
return []
|
|
113
|
+
|
|
114
|
+
return await self._execute_cypher_query(query, graph_engine)
|
|
115
|
+
|
|
116
|
+
async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> Optional[Any]:
|
|
106
117
|
"""
|
|
107
118
|
Retrieves relevant context using a natural language query converted to Cypher.
|
|
108
119
|
|
|
@@ -121,17 +132,11 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
121
132
|
- Optional[Any]: Returns the context retrieved from the graph database based on the
|
|
122
133
|
query.
|
|
123
134
|
"""
|
|
124
|
-
|
|
125
|
-
|
|
135
|
+
# TODO: Do we want to process retrieved_objects into a context string?
|
|
136
|
+
return retrieved_objects
|
|
126
137
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
return []
|
|
130
|
-
|
|
131
|
-
return await self._execute_cypher_query(query, graph_engine)
|
|
132
|
-
|
|
133
|
-
async def get_completion(
|
|
134
|
-
self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
|
|
138
|
+
async def get_completion_from_context(
|
|
139
|
+
self, query: str, retrieved_objects: Any, context: Optional[Any] = None
|
|
135
140
|
) -> Any:
|
|
136
141
|
"""
|
|
137
142
|
Returns a completion based on the query and context.
|
|
@@ -154,7 +159,5 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
154
159
|
|
|
155
160
|
- Any: Returns the completion derived from the given query and context.
|
|
156
161
|
"""
|
|
157
|
-
|
|
158
|
-
context = await self.get_context(query)
|
|
159
|
-
|
|
162
|
+
# TODO: Do we want to generate a completion using LLM here?
|
|
160
163
|
return context
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
from typing import Any, Optional
|
|
1
|
+
from typing import Any, Optional, List, Union
|
|
2
2
|
|
|
3
3
|
from cognee.shared.logging_utils import get_logger
|
|
4
4
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
5
5
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
6
6
|
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
|
7
|
+
from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
|
|
7
8
|
from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
|
|
8
9
|
|
|
9
10
|
logger = get_logger("SummariesRetriever")
|
|
@@ -22,13 +23,14 @@ class SummariesRetriever(BaseRetriever):
|
|
|
22
23
|
- top_k: int - Number of top summaries to retrieve.
|
|
23
24
|
"""
|
|
24
25
|
|
|
25
|
-
def __init__(self, top_k: int = 5):
|
|
26
|
+
def __init__(self, top_k: int = 5, session_id: Optional[str] = None):
|
|
26
27
|
"""Initialize retriever with search parameters."""
|
|
27
28
|
self.top_k = top_k
|
|
29
|
+
self.session_id = session_id
|
|
28
30
|
|
|
29
|
-
async def
|
|
31
|
+
async def get_retrieved_objects(self, query: str) -> Any:
|
|
30
32
|
"""
|
|
31
|
-
Retrieves summary
|
|
33
|
+
Retrieves text summary objects based on the query.
|
|
32
34
|
|
|
33
35
|
On encountering a missing collection, raises NoDataError with a message to add data
|
|
34
36
|
first.
|
|
@@ -41,7 +43,7 @@ class SummariesRetriever(BaseRetriever):
|
|
|
41
43
|
Returns:
|
|
42
44
|
--------
|
|
43
45
|
|
|
44
|
-
- Any: A list of
|
|
46
|
+
- Any: A list of text summaries retrieved from the search.
|
|
45
47
|
"""
|
|
46
48
|
logger.info(
|
|
47
49
|
f"Starting summary retrieval for query: '{query[:100]}{'...' if len(query) > 100 else ''}'"
|
|
@@ -51,51 +53,66 @@ class SummariesRetriever(BaseRetriever):
|
|
|
51
53
|
|
|
52
54
|
try:
|
|
53
55
|
summaries_results = await vector_engine.search(
|
|
54
|
-
"TextSummary_text", query, limit=self.top_k
|
|
56
|
+
"TextSummary_text", query, limit=self.top_k, include_payload=True
|
|
55
57
|
)
|
|
56
58
|
logger.info(f"Found {len(summaries_results)} summaries from vector search")
|
|
59
|
+
|
|
60
|
+
await update_node_access_timestamps(summaries_results)
|
|
61
|
+
|
|
62
|
+
return summaries_results
|
|
57
63
|
except CollectionNotFoundError as error:
|
|
58
64
|
logger.error("TextSummary_text collection not found in vector database")
|
|
59
65
|
raise NoDataError("No data found in the system, please add data first.") from error
|
|
60
66
|
|
|
61
|
-
|
|
62
|
-
logger.info(f"Returning {len(summary_payloads)} summary payloads")
|
|
63
|
-
return summary_payloads
|
|
64
|
-
|
|
65
|
-
async def get_completion(
|
|
66
|
-
self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None, **kwargs
|
|
67
|
-
) -> Any:
|
|
67
|
+
async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
|
|
68
68
|
"""
|
|
69
|
-
|
|
69
|
+
Retrieves relevant summaries as context.
|
|
70
70
|
|
|
71
|
-
|
|
72
|
-
|
|
71
|
+
Fetches text summaries based on a query from a vector engine and combines their text.
|
|
72
|
+
Returns empty string if no summaries are found. Raises NoDataError if the collection is not
|
|
73
|
+
found.
|
|
73
74
|
|
|
74
75
|
Parameters:
|
|
75
76
|
-----------
|
|
76
77
|
|
|
77
|
-
- query (str): The search
|
|
78
|
-
- context (Optional[Any]): Optional context for the completion; if not provided,
|
|
79
|
-
will be retrieved based on the query. (default None)
|
|
80
|
-
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
81
|
-
defaults to 'default_session'. (default None)
|
|
78
|
+
- query (str): The query string used to search for relevant text summaries.
|
|
82
79
|
|
|
83
80
|
Returns:
|
|
84
81
|
--------
|
|
85
82
|
|
|
86
|
-
-
|
|
83
|
+
- str: A string containing the combined text of the retrieved summaries, or an
|
|
84
|
+
empty string if none are found.
|
|
87
85
|
"""
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
if context is None:
|
|
93
|
-
logger.debug("No context provided, retrieving context from vector database")
|
|
94
|
-
context = await self.get_context(query)
|
|
86
|
+
if retrieved_objects:
|
|
87
|
+
summary_payload_texts = [summary.payload["text"] for summary in retrieved_objects]
|
|
88
|
+
return "\n".join(summary_payload_texts)
|
|
95
89
|
else:
|
|
96
|
-
|
|
90
|
+
return ""
|
|
97
91
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
92
|
+
async def get_completion_from_context(
|
|
93
|
+
self, query: str, retrieved_objects: Any, context: Any
|
|
94
|
+
) -> Union[List[str], List[dict]]:
|
|
95
|
+
"""
|
|
96
|
+
Generates a completion using text summaries.
|
|
97
|
+
In case of the Summaries Retriever, we do not generate a completion, we just return
|
|
98
|
+
the payloads of found summaries.
|
|
99
|
+
|
|
100
|
+
Parameters:
|
|
101
|
+
-----------
|
|
102
|
+
|
|
103
|
+
- query (str): The query string to be used for generating a completion.
|
|
104
|
+
- retrieved_objects (Any): The retrieved objects to be used for generating a completion.
|
|
105
|
+
- context (Any): The context to be used for generating a completion.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
--------
|
|
109
|
+
|
|
110
|
+
- List[dict]: A list of payloads of found summaries.
|
|
111
|
+
"""
|
|
112
|
+
# TODO: Do we want to generate a completion using LLM here?
|
|
113
|
+
if retrieved_objects:
|
|
114
|
+
summary_payloads = [summary.payload for summary in retrieved_objects]
|
|
115
|
+
logger.info(f"Returning {len(summary_payloads)} summary payloads")
|
|
116
|
+
return summary_payloads
|
|
117
|
+
else:
|
|
118
|
+
return []
|