cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/chunks/__init__.py +9 -0
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/graph/__init__.py +7 -0
- cognee/tasks/memify/__init__.py +8 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -5,6 +5,7 @@ from datetime import datetime
|
|
|
5
5
|
|
|
6
6
|
from operator import itemgetter
|
|
7
7
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
8
|
+
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
8
9
|
from cognee.modules.retrieval.utils.completion import generate_completion, summarize_text
|
|
9
10
|
from cognee.modules.retrieval.utils.session_cache import (
|
|
10
11
|
save_conversation_history,
|
|
@@ -49,6 +50,8 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
49
50
|
node_name: Optional[List[str]] = None,
|
|
50
51
|
wide_search_top_k: Optional[int] = 100,
|
|
51
52
|
triplet_distance_penalty: Optional[float] = 3.5,
|
|
53
|
+
session_id: Optional[str] = None,
|
|
54
|
+
response_model: Type = str,
|
|
52
55
|
):
|
|
53
56
|
super().__init__(
|
|
54
57
|
user_prompt_path=user_prompt_path,
|
|
@@ -58,6 +61,8 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
58
61
|
node_name=node_name,
|
|
59
62
|
wide_search_top_k=wide_search_top_k,
|
|
60
63
|
triplet_distance_penalty=triplet_distance_penalty,
|
|
64
|
+
session_id=session_id,
|
|
65
|
+
response_model=response_model,
|
|
61
66
|
)
|
|
62
67
|
self.user_prompt_path = user_prompt_path
|
|
63
68
|
self.system_prompt_path = system_prompt_path
|
|
@@ -98,7 +103,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
98
103
|
|
|
99
104
|
async def filter_top_k_events(self, relevant_events, scored_results):
|
|
100
105
|
# Build a score lookup from vector search results
|
|
101
|
-
score_lookup = {res.
|
|
106
|
+
score_lookup = {res.id: res.score for res in scored_results}
|
|
102
107
|
|
|
103
108
|
events_with_scores = []
|
|
104
109
|
for event in relevant_events[0]["events"]:
|
|
@@ -109,9 +114,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
109
114
|
|
|
110
115
|
return events_with_scores[: self.top_k]
|
|
111
116
|
|
|
112
|
-
async def
|
|
113
|
-
"""Retrieves context based on the query."""
|
|
114
|
-
|
|
117
|
+
async def get_retrieved_objects(self, query: str) -> dict:
|
|
115
118
|
time_from, time_to = await self.extract_time_from_query(query)
|
|
116
119
|
|
|
117
120
|
graph_engine = await get_graph_engine()
|
|
@@ -127,7 +130,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
127
130
|
"No timestamps identified based on the query, performing retrieval using triplet search on events and entities."
|
|
128
131
|
)
|
|
129
132
|
triplets = await self.get_triplets(query)
|
|
130
|
-
return
|
|
133
|
+
return {"triplets": triplets}
|
|
131
134
|
|
|
132
135
|
if ids:
|
|
133
136
|
relevant_events = await graph_engine.collect_events(ids=ids)
|
|
@@ -136,7 +139,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
136
139
|
"No events identified based on timestamp filtering, performing retrieval using triplet search on events and entities."
|
|
137
140
|
)
|
|
138
141
|
triplets = await self.get_triplets(query)
|
|
139
|
-
return
|
|
142
|
+
return {"triplets": triplets}
|
|
140
143
|
|
|
141
144
|
vector_engine = get_vector_engine()
|
|
142
145
|
query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
|
|
@@ -145,16 +148,26 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
145
148
|
collection_name="Event_name", query_vector=query_vector, limit=None
|
|
146
149
|
)
|
|
147
150
|
|
|
148
|
-
|
|
151
|
+
return {"relevant_events": relevant_events, "vector_search_results": vector_search_results}
|
|
149
152
|
|
|
150
|
-
|
|
153
|
+
async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> Any:
|
|
154
|
+
"""Retrieves context based on the query."""
|
|
155
|
+
if retrieved_objects.get("relevant_events", None) and retrieved_objects.get(
|
|
156
|
+
"vector_search_results", None
|
|
157
|
+
):
|
|
158
|
+
top_k_events = await self.filter_top_k_events(
|
|
159
|
+
retrieved_objects.get("relevant_events"),
|
|
160
|
+
retrieved_objects.get("vector_search_results", None),
|
|
161
|
+
)
|
|
162
|
+
return self.descriptions_to_string(top_k_events)
|
|
163
|
+
else:
|
|
164
|
+
# In case no events were found, fall back to triplet context
|
|
165
|
+
triplets = retrieved_objects.get("triplets", [])
|
|
166
|
+
context_text = await self.resolve_edges_to_text(triplets)
|
|
167
|
+
return context_text
|
|
151
168
|
|
|
152
|
-
async def
|
|
153
|
-
self,
|
|
154
|
-
query: str,
|
|
155
|
-
context: Optional[str] = None,
|
|
156
|
-
session_id: Optional[str] = None,
|
|
157
|
-
response_model: Type = str,
|
|
169
|
+
async def get_completion_from_context(
|
|
170
|
+
self, query: str, retrieved_objects: Any = None, context: Optional[str] = None
|
|
158
171
|
) -> List[Any]:
|
|
159
172
|
"""
|
|
160
173
|
Generates a response using the query and optional context.
|
|
@@ -174,45 +187,42 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
174
187
|
|
|
175
188
|
- List[str]: A list containing the generated completion.
|
|
176
189
|
"""
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
context_summary, completion = await asyncio.gather(
|
|
191
|
-
summarize_text(context),
|
|
192
|
-
generate_completion(
|
|
193
|
-
query=query,
|
|
194
|
-
context=context,
|
|
195
|
-
user_prompt_path=self.user_prompt_path,
|
|
196
|
-
system_prompt_path=self.system_prompt_path,
|
|
197
|
-
conversation_history=conversation_history,
|
|
198
|
-
response_model=response_model,
|
|
199
|
-
),
|
|
200
|
-
)
|
|
201
|
-
else:
|
|
202
|
-
completion = await generate_completion(
|
|
190
|
+
|
|
191
|
+
# Check if we need to generate context summary for caching
|
|
192
|
+
cache_config = CacheConfig()
|
|
193
|
+
user = session_user.get()
|
|
194
|
+
user_id = getattr(user, "id", None)
|
|
195
|
+
session_save = user_id and cache_config.caching
|
|
196
|
+
|
|
197
|
+
if session_save:
|
|
198
|
+
conversation_history = await get_conversation_history(session_id=self.session_id)
|
|
199
|
+
|
|
200
|
+
context_summary, completion = await asyncio.gather(
|
|
201
|
+
summarize_text(context),
|
|
202
|
+
generate_completion(
|
|
203
203
|
query=query,
|
|
204
204
|
context=context,
|
|
205
205
|
user_prompt_path=self.user_prompt_path,
|
|
206
206
|
system_prompt_path=self.system_prompt_path,
|
|
207
|
-
|
|
208
|
-
|
|
207
|
+
conversation_history=conversation_history,
|
|
208
|
+
response_model=self.response_model,
|
|
209
|
+
),
|
|
210
|
+
)
|
|
211
|
+
else:
|
|
212
|
+
completion = await generate_completion(
|
|
213
|
+
query=query,
|
|
214
|
+
context=context,
|
|
215
|
+
user_prompt_path=self.user_prompt_path,
|
|
216
|
+
system_prompt_path=self.system_prompt_path,
|
|
217
|
+
response_model=self.response_model,
|
|
218
|
+
)
|
|
209
219
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
220
|
+
if session_save:
|
|
221
|
+
await save_conversation_history(
|
|
222
|
+
query=query,
|
|
223
|
+
context_summary=context_summary,
|
|
224
|
+
answer=completion,
|
|
225
|
+
session_id=self.session_id,
|
|
226
|
+
)
|
|
217
227
|
|
|
218
228
|
return [completion]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Any, Optional, Type, List
|
|
2
|
+
from typing import Any, Optional, Type, List, Union
|
|
3
3
|
|
|
4
4
|
from cognee.shared.logging_utils import get_logger
|
|
5
5
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
@@ -32,19 +32,23 @@ class TripletRetriever(BaseRetriever):
|
|
|
32
32
|
system_prompt_path: str = "answer_simple_question.txt",
|
|
33
33
|
system_prompt: Optional[str] = None,
|
|
34
34
|
top_k: Optional[int] = 5,
|
|
35
|
+
session_id: Optional[str] = None,
|
|
36
|
+
response_model: Type = str,
|
|
35
37
|
):
|
|
36
38
|
"""Initialize retriever with optional custom prompt paths."""
|
|
37
39
|
self.user_prompt_path = user_prompt_path
|
|
38
40
|
self.system_prompt_path = system_prompt_path
|
|
39
41
|
self.top_k = top_k if top_k is not None else 5
|
|
40
42
|
self.system_prompt = system_prompt
|
|
43
|
+
self.session_id = session_id
|
|
44
|
+
self.response_model = response_model
|
|
41
45
|
|
|
42
|
-
async def
|
|
46
|
+
async def get_retrieved_objects(self, query: str) -> Any:
|
|
43
47
|
"""
|
|
44
|
-
Retrieves relevant triplets
|
|
48
|
+
Retrieves relevant triplets.
|
|
45
49
|
|
|
46
|
-
Fetches triplets based on a query from a vector engine
|
|
47
|
-
Returns empty
|
|
50
|
+
Fetches triplets based on a query from a vector engine.
|
|
51
|
+
Returns empty list if no triplets are found. Raises NoDataError if the collection is not
|
|
48
52
|
found.
|
|
49
53
|
|
|
50
54
|
Parameters:
|
|
@@ -55,8 +59,7 @@ class TripletRetriever(BaseRetriever):
|
|
|
55
59
|
Returns:
|
|
56
60
|
--------
|
|
57
61
|
|
|
58
|
-
-
|
|
59
|
-
empty string if none are found.
|
|
62
|
+
- Any: A list containing the retrieved triplets, or an empty list if none are found.
|
|
60
63
|
"""
|
|
61
64
|
vector_engine = get_vector_engine()
|
|
62
65
|
|
|
@@ -67,25 +70,30 @@ class TripletRetriever(BaseRetriever):
|
|
|
67
70
|
"In order to use TRIPLET_COMPLETION first use the create_triplet_embeddings memify pipeline. "
|
|
68
71
|
)
|
|
69
72
|
|
|
70
|
-
found_triplets = await vector_engine.search(
|
|
73
|
+
found_triplets = await vector_engine.search(
|
|
74
|
+
"Triplet_text", query, limit=self.top_k, include_payload=True
|
|
75
|
+
)
|
|
71
76
|
|
|
72
77
|
if len(found_triplets) == 0:
|
|
73
|
-
return
|
|
78
|
+
return []
|
|
74
79
|
|
|
75
|
-
|
|
76
|
-
combined_context = "\n".join(triplets_payload)
|
|
77
|
-
return combined_context
|
|
80
|
+
return found_triplets
|
|
78
81
|
except CollectionNotFoundError as error:
|
|
79
82
|
logger.error("Triplet_text collection not found")
|
|
80
83
|
raise NoDataError("No data found in the system, please add data first.") from error
|
|
81
84
|
|
|
82
|
-
async def
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
85
|
+
async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
|
|
86
|
+
if retrieved_objects:
|
|
87
|
+
triplets_payload = [
|
|
88
|
+
found_triplet.payload["text"] for found_triplet in retrieved_objects
|
|
89
|
+
]
|
|
90
|
+
combined_context = "\n".join(triplets_payload)
|
|
91
|
+
return combined_context
|
|
92
|
+
return ""
|
|
93
|
+
|
|
94
|
+
async def get_completion_from_context(
|
|
95
|
+
self, query: str, retrieved_objects: Any, context: Any
|
|
96
|
+
) -> Union[List[str], List[dict]]:
|
|
89
97
|
"""
|
|
90
98
|
Generates an LLM completion using the context.
|
|
91
99
|
|
|
@@ -107,9 +115,6 @@ class TripletRetriever(BaseRetriever):
|
|
|
107
115
|
|
|
108
116
|
- Any: The generated completion based on the provided query and context.
|
|
109
117
|
"""
|
|
110
|
-
if context is None:
|
|
111
|
-
context = await self.get_context(query)
|
|
112
|
-
|
|
113
118
|
cache_config = CacheConfig()
|
|
114
119
|
user = session_user.get()
|
|
115
120
|
user_id = getattr(user, "id", None)
|
|
@@ -119,14 +124,11 @@ class TripletRetriever(BaseRetriever):
|
|
|
119
124
|
completion = await self._get_completion_with_session(
|
|
120
125
|
query=query,
|
|
121
126
|
context=context,
|
|
122
|
-
session_id=session_id,
|
|
123
|
-
response_model=response_model,
|
|
124
127
|
)
|
|
125
128
|
else:
|
|
126
129
|
completion = await self._get_completion_without_session(
|
|
127
130
|
query=query,
|
|
128
131
|
context=context,
|
|
129
|
-
response_model=response_model,
|
|
130
132
|
)
|
|
131
133
|
|
|
132
134
|
return [completion]
|
|
@@ -135,11 +137,9 @@ class TripletRetriever(BaseRetriever):
|
|
|
135
137
|
self,
|
|
136
138
|
query: str,
|
|
137
139
|
context: str,
|
|
138
|
-
session_id: Optional[str],
|
|
139
|
-
response_model: Type,
|
|
140
140
|
) -> Any:
|
|
141
141
|
"""Generate completion with session history and caching."""
|
|
142
|
-
conversation_history = await get_conversation_history(session_id=session_id)
|
|
142
|
+
conversation_history = await get_conversation_history(session_id=self.session_id)
|
|
143
143
|
|
|
144
144
|
context_summary, completion = await asyncio.gather(
|
|
145
145
|
summarize_text(context),
|
|
@@ -150,7 +150,7 @@ class TripletRetriever(BaseRetriever):
|
|
|
150
150
|
system_prompt_path=self.system_prompt_path,
|
|
151
151
|
system_prompt=self.system_prompt,
|
|
152
152
|
conversation_history=conversation_history,
|
|
153
|
-
response_model=response_model,
|
|
153
|
+
response_model=self.response_model,
|
|
154
154
|
),
|
|
155
155
|
)
|
|
156
156
|
|
|
@@ -158,7 +158,7 @@ class TripletRetriever(BaseRetriever):
|
|
|
158
158
|
query=query,
|
|
159
159
|
context_summary=context_summary,
|
|
160
160
|
answer=completion,
|
|
161
|
-
session_id=session_id,
|
|
161
|
+
session_id=self.session_id,
|
|
162
162
|
)
|
|
163
163
|
|
|
164
164
|
return completion
|
|
@@ -167,7 +167,6 @@ class TripletRetriever(BaseRetriever):
|
|
|
167
167
|
self,
|
|
168
168
|
query: str,
|
|
169
169
|
context: str,
|
|
170
|
-
response_model: Type,
|
|
171
170
|
) -> Any:
|
|
172
171
|
"""Generate completion without session history."""
|
|
173
172
|
completion = await generate_completion(
|
|
@@ -176,7 +175,7 @@ class TripletRetriever(BaseRetriever):
|
|
|
176
175
|
user_prompt_path=self.user_prompt_path,
|
|
177
176
|
system_prompt_path=self.system_prompt_path,
|
|
178
177
|
system_prompt=self.system_prompt,
|
|
179
|
-
response_model=response_model,
|
|
178
|
+
response_model=self.response_model,
|
|
180
179
|
)
|
|
181
180
|
|
|
182
181
|
return completion
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Utilities for tracking data access in retrievers."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import List, Any
|
|
6
|
+
from uuid import UUID
|
|
7
|
+
import os
|
|
8
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
9
|
+
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
10
|
+
from cognee.modules.data.models import Data
|
|
11
|
+
from cognee.shared.logging_utils import get_logger
|
|
12
|
+
from sqlalchemy import update
|
|
13
|
+
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def update_node_access_timestamps(items: List[Any]):
|
|
19
|
+
if os.getenv("ENABLE_LAST_ACCESSED", "false").lower() != "true":
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
if not items:
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
graph_engine = await get_graph_engine()
|
|
26
|
+
timestamp_dt = datetime.now(timezone.utc)
|
|
27
|
+
|
|
28
|
+
# Extract node IDs
|
|
29
|
+
node_ids = []
|
|
30
|
+
for item in items:
|
|
31
|
+
item_id = item.payload.get("id") if hasattr(item, "payload") else item.get("id")
|
|
32
|
+
if item_id:
|
|
33
|
+
node_ids.append(str(item_id))
|
|
34
|
+
|
|
35
|
+
if not node_ids:
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
# Focus on document-level tracking via projection
|
|
39
|
+
try:
|
|
40
|
+
doc_ids = await _find_origin_documents_via_projection(graph_engine, node_ids)
|
|
41
|
+
if doc_ids:
|
|
42
|
+
await _update_sql_records(doc_ids, timestamp_dt)
|
|
43
|
+
except Exception as e:
|
|
44
|
+
logger.error(f"Failed to update SQL timestamps: {e}")
|
|
45
|
+
raise
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
async def _find_origin_documents_via_projection(graph_engine, node_ids):
|
|
49
|
+
"""Find origin documents using graph projection instead of DB queries"""
|
|
50
|
+
# Project the entire graph with necessary properties
|
|
51
|
+
memory_fragment = CogneeGraph()
|
|
52
|
+
await memory_fragment.project_graph_from_db(
|
|
53
|
+
graph_engine,
|
|
54
|
+
node_properties_to_project=["id", "type"],
|
|
55
|
+
edge_properties_to_project=["relationship_name"],
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Find origin documents by traversing the in-memory graph
|
|
59
|
+
doc_ids = set()
|
|
60
|
+
for node_id in node_ids:
|
|
61
|
+
node = memory_fragment.get_node(node_id)
|
|
62
|
+
if node and node.get_attribute("type") == "DocumentChunk":
|
|
63
|
+
# Traverse edges to find connected documents
|
|
64
|
+
for edge in node.get_skeleton_edges():
|
|
65
|
+
# Get the neighbor node
|
|
66
|
+
neighbor = (
|
|
67
|
+
edge.get_destination_node()
|
|
68
|
+
if edge.get_source_node().id == node_id
|
|
69
|
+
else edge.get_source_node()
|
|
70
|
+
)
|
|
71
|
+
if neighbor and neighbor.get_attribute("type") in ["TextDocument", "Document"]:
|
|
72
|
+
doc_ids.add(neighbor.id)
|
|
73
|
+
|
|
74
|
+
return list(doc_ids)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
async def _update_sql_records(doc_ids, timestamp_dt):
|
|
78
|
+
"""Update SQL Data table (same for all providers)"""
|
|
79
|
+
db_engine = get_relational_engine()
|
|
80
|
+
async with db_engine.get_async_session() as session:
|
|
81
|
+
stmt = (
|
|
82
|
+
update(Data)
|
|
83
|
+
.where(Data.id.in_([UUID(doc_id) for doc_id in doc_ids]))
|
|
84
|
+
.values(last_accessed=timestamp_dt)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
await session.execute(stmt)
|
|
88
|
+
await session.commit()
|