cognee 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/health.py +2 -12
- cognee/api/v1/add/add.py +46 -6
- cognee/api/v1/add/routers/get_add_router.py +11 -2
- cognee/api/v1/cognify/cognify.py +29 -9
- cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
- cognee/api/v1/datasets/datasets.py +11 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
- cognee/api/v1/delete/routers/get_delete_router.py +2 -0
- cognee/api/v1/memify/routers/get_memify_router.py +2 -1
- cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
- cognee/api/v1/responses/default_tools.py +0 -1
- cognee/api/v1/responses/dispatch_function.py +1 -1
- cognee/api/v1/responses/routers/default_tools.py +0 -1
- cognee/api/v1/search/routers/get_search_router.py +3 -3
- cognee/api/v1/search/search.py +11 -9
- cognee/api/v1/settings/routers/get_settings_router.py +7 -1
- cognee/api/v1/sync/routers/get_sync_router.py +3 -0
- cognee/api/v1/ui/ui.py +45 -16
- cognee/api/v1/update/routers/get_update_router.py +3 -1
- cognee/api/v1/update/update.py +3 -3
- cognee/api/v1/users/routers/get_visualize_router.py +2 -0
- cognee/cli/_cognee.py +61 -10
- cognee/cli/commands/add_command.py +3 -3
- cognee/cli/commands/cognify_command.py +3 -3
- cognee/cli/commands/config_command.py +9 -7
- cognee/cli/commands/delete_command.py +3 -3
- cognee/cli/commands/search_command.py +3 -7
- cognee/cli/config.py +0 -1
- cognee/context_global_variables.py +5 -0
- cognee/exceptions/exceptions.py +1 -1
- cognee/infrastructure/databases/cache/__init__.py +2 -0
- cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
- cognee/infrastructure/databases/cache/config.py +44 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
- cognee/infrastructure/databases/exceptions/__init__.py +1 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
- cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
- cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +76 -47
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
- cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
- cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
- cognee/infrastructure/files/exceptions.py +1 -1
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
- cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
- cognee/infrastructure/files/utils/guess_file_type.py +6 -0
- cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
- cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
- cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
- cognee/infrastructure/loaders/LoaderEngine.py +27 -7
- cognee/infrastructure/loaders/external/__init__.py +7 -0
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
- cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
- cognee/infrastructure/loaders/supported_loaders.py +7 -0
- cognee/modules/data/exceptions/exceptions.py +1 -1
- cognee/modules/data/methods/__init__.py +3 -0
- cognee/modules/data/methods/get_dataset_data.py +4 -1
- cognee/modules/data/methods/has_dataset_data.py +21 -0
- cognee/modules/engine/models/TableRow.py +0 -1
- cognee/modules/ingestion/save_data_to_file.py +9 -2
- cognee/modules/pipelines/exceptions/exceptions.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +12 -1
- cognee/modules/pipelines/operations/run_tasks.py +25 -197
- cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
- cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
- cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
- cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
- cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
- cognee/modules/retrieval/base_graph_retriever.py +3 -1
- cognee/modules/retrieval/base_retriever.py +3 -1
- cognee/modules/retrieval/chunks_retriever.py +5 -1
- cognee/modules/retrieval/code_retriever.py +20 -2
- cognee/modules/retrieval/completion_retriever.py +50 -9
- cognee/modules/retrieval/cypher_search_retriever.py +11 -1
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
- cognee/modules/retrieval/graph_completion_cot_retriever.py +152 -22
- cognee/modules/retrieval/graph_completion_retriever.py +54 -10
- cognee/modules/retrieval/lexical_retriever.py +20 -2
- cognee/modules/retrieval/natural_language_retriever.py +10 -1
- cognee/modules/retrieval/summaries_retriever.py +5 -1
- cognee/modules/retrieval/temporal_retriever.py +62 -10
- cognee/modules/retrieval/user_qa_feedback.py +3 -2
- cognee/modules/retrieval/utils/completion.py +30 -4
- cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
- cognee/modules/retrieval/utils/session_cache.py +156 -0
- cognee/modules/search/methods/get_search_type_tools.py +0 -5
- cognee/modules/search/methods/no_access_control_search.py +12 -1
- cognee/modules/search/methods/search.py +51 -5
- cognee/modules/search/types/SearchType.py +0 -1
- cognee/modules/settings/get_settings.py +23 -0
- cognee/modules/users/methods/get_authenticated_user.py +3 -1
- cognee/modules/users/methods/get_default_user.py +1 -6
- cognee/modules/users/roles/methods/create_role.py +2 -2
- cognee/modules/users/tenants/methods/create_tenant.py +2 -2
- cognee/shared/exceptions/exceptions.py +1 -1
- cognee/shared/logging_utils.py +18 -11
- cognee/shared/utils.py +24 -2
- cognee/tasks/codingagents/coding_rule_associations.py +1 -2
- cognee/tasks/documents/exceptions/exceptions.py +1 -1
- cognee/tasks/feedback/__init__.py +13 -0
- cognee/tasks/feedback/create_enrichments.py +84 -0
- cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
- cognee/tasks/feedback/generate_improved_answers.py +130 -0
- cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
- cognee/tasks/feedback/models.py +26 -0
- cognee/tasks/graph/extract_graph_from_data.py +2 -0
- cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
- cognee/tasks/ingestion/ingest_data.py +11 -5
- cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
- cognee/tasks/storage/add_data_points.py +3 -10
- cognee/tasks/storage/index_data_points.py +19 -14
- cognee/tasks/storage/index_graph_edges.py +25 -11
- cognee/tasks/web_scraper/__init__.py +34 -0
- cognee/tasks/web_scraper/config.py +26 -0
- cognee/tasks/web_scraper/default_url_crawler.py +446 -0
- cognee/tasks/web_scraper/models.py +46 -0
- cognee/tasks/web_scraper/types.py +4 -0
- cognee/tasks/web_scraper/utils.py +142 -0
- cognee/tasks/web_scraper/web_scraper_task.py +396 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
- cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
- cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
- cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
- cognee/tests/subprocesses/reader.py +25 -0
- cognee/tests/subprocesses/simple_cognify_1.py +31 -0
- cognee/tests/subprocesses/simple_cognify_2.py +31 -0
- cognee/tests/subprocesses/writer.py +32 -0
- cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
- cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
- cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
- cognee/tests/test_add_docling_document.py +56 -0
- cognee/tests/test_chromadb.py +7 -11
- cognee/tests/test_concurrent_subprocess_access.py +76 -0
- cognee/tests/test_conversation_history.py +240 -0
- cognee/tests/test_feedback_enrichment.py +174 -0
- cognee/tests/test_kuzu.py +27 -15
- cognee/tests/test_lancedb.py +7 -11
- cognee/tests/test_library.py +32 -2
- cognee/tests/test_neo4j.py +24 -16
- cognee/tests/test_neptune_analytics_vector.py +7 -11
- cognee/tests/test_permissions.py +9 -13
- cognee/tests/test_pgvector.py +4 -4
- cognee/tests/test_remote_kuzu.py +8 -11
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +6 -8
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
- cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
- {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +21 -6
- {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +178 -139
- {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +1 -0
- distributed/Dockerfile +0 -3
- distributed/entrypoint.py +21 -9
- distributed/signal.py +5 -0
- distributed/workers/data_point_saving_worker.py +64 -34
- distributed/workers/graph_saving_worker.py +71 -47
- cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
- cognee/modules/retrieval/insights_retriever.py +0 -133
- cognee/tests/test_memgraph.py +0 -109
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
- {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from typing import Optional, List, Dict, Any
|
|
2
|
+
from cognee.context_global_variables import session_user
|
|
3
|
+
from cognee.infrastructure.databases.cache.config import CacheConfig
|
|
4
|
+
from cognee.infrastructure.databases.exceptions import CacheConnectionError
|
|
5
|
+
from cognee.shared.logging_utils import get_logger
|
|
6
|
+
|
|
7
|
+
logger = get_logger("session_cache")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def save_conversation_history(
|
|
11
|
+
query: str,
|
|
12
|
+
context_summary: str,
|
|
13
|
+
answer: str,
|
|
14
|
+
session_id: Optional[str] = None,
|
|
15
|
+
) -> bool:
|
|
16
|
+
"""
|
|
17
|
+
Saves Q&A interaction to the session cache if user is authenticated and caching is enabled.
|
|
18
|
+
|
|
19
|
+
Handles cache unavailability gracefully by logging warnings instead of failing.
|
|
20
|
+
|
|
21
|
+
Parameters:
|
|
22
|
+
-----------
|
|
23
|
+
|
|
24
|
+
- query (str): The user's query/question.
|
|
25
|
+
- context_summary (str): Summarized context used for generating the answer.
|
|
26
|
+
- answer (str): The generated answer/completion.
|
|
27
|
+
- session_id (Optional[str]): Session identifier. Defaults to 'default_session' if None.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
--------
|
|
31
|
+
|
|
32
|
+
- bool: True if successfully saved to cache, False otherwise.
|
|
33
|
+
"""
|
|
34
|
+
try:
|
|
35
|
+
cache_config = CacheConfig()
|
|
36
|
+
user = session_user.get()
|
|
37
|
+
user_id = getattr(user, "id", None)
|
|
38
|
+
|
|
39
|
+
if not (user_id and cache_config.caching):
|
|
40
|
+
logger.debug("Session caching disabled or user not authenticated")
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
if session_id is None:
|
|
44
|
+
session_id = "default_session"
|
|
45
|
+
|
|
46
|
+
from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
|
|
47
|
+
|
|
48
|
+
cache_engine = get_cache_engine()
|
|
49
|
+
|
|
50
|
+
if cache_engine is None:
|
|
51
|
+
logger.warning("Cache engine not available, skipping session save")
|
|
52
|
+
return False
|
|
53
|
+
|
|
54
|
+
await cache_engine.add_qa(
|
|
55
|
+
str(user_id),
|
|
56
|
+
session_id=session_id,
|
|
57
|
+
question=query,
|
|
58
|
+
context=context_summary,
|
|
59
|
+
answer=answer,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
logger.info(
|
|
63
|
+
f"Successfully saved Q&A to session cache: user_id={user_id}, session_id={session_id}"
|
|
64
|
+
)
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
except CacheConnectionError as e:
|
|
68
|
+
logger.warning(f"Cache unavailable, continuing without session save: {e.message}")
|
|
69
|
+
return False
|
|
70
|
+
|
|
71
|
+
except Exception as e:
|
|
72
|
+
logger.error(
|
|
73
|
+
f"Unexpected error saving to session cache: {type(e).__name__}: {str(e)}. Continuing without caching."
|
|
74
|
+
)
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
async def get_conversation_history(
|
|
79
|
+
session_id: Optional[str] = None,
|
|
80
|
+
) -> str:
|
|
81
|
+
"""
|
|
82
|
+
Retrieves conversation history from cache and formats it as text.
|
|
83
|
+
|
|
84
|
+
Returns formatted conversation history with time, question, context, and answer
|
|
85
|
+
for the last N Q&A pairs (N is determined by cache engine default).
|
|
86
|
+
|
|
87
|
+
Parameters:
|
|
88
|
+
-----------
|
|
89
|
+
|
|
90
|
+
- session_id (Optional[str]): Session identifier. Defaults to 'default_session' if None.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
--------
|
|
94
|
+
|
|
95
|
+
- str: Formatted conversation history string, or empty string if no history or error.
|
|
96
|
+
|
|
97
|
+
Format:
|
|
98
|
+
-------
|
|
99
|
+
|
|
100
|
+
Previous conversation:
|
|
101
|
+
|
|
102
|
+
[2024-01-15 10:30:45]
|
|
103
|
+
QUESTION: What is X?
|
|
104
|
+
CONTEXT: X is a concept...
|
|
105
|
+
ANSWER: X is...
|
|
106
|
+
|
|
107
|
+
[2024-01-15 10:31:20]
|
|
108
|
+
QUESTION: How does Y work?
|
|
109
|
+
CONTEXT: Y is related to...
|
|
110
|
+
ANSWER: Y works by...
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
cache_config = CacheConfig()
|
|
114
|
+
user = session_user.get()
|
|
115
|
+
user_id = getattr(user, "id", None)
|
|
116
|
+
|
|
117
|
+
if not (user_id and cache_config.caching):
|
|
118
|
+
logger.debug("Session caching disabled or user not authenticated")
|
|
119
|
+
return ""
|
|
120
|
+
|
|
121
|
+
if session_id is None:
|
|
122
|
+
session_id = "default_session"
|
|
123
|
+
|
|
124
|
+
from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
|
|
125
|
+
|
|
126
|
+
cache_engine = get_cache_engine()
|
|
127
|
+
|
|
128
|
+
if cache_engine is None:
|
|
129
|
+
logger.warning("Cache engine not available, skipping conversation history retrieval")
|
|
130
|
+
return ""
|
|
131
|
+
|
|
132
|
+
history_entries = await cache_engine.get_latest_qa(str(user_id), session_id)
|
|
133
|
+
|
|
134
|
+
if not history_entries:
|
|
135
|
+
logger.debug("No conversation history found")
|
|
136
|
+
return ""
|
|
137
|
+
|
|
138
|
+
history_text = "Previous conversation:\n\n"
|
|
139
|
+
for entry in history_entries:
|
|
140
|
+
history_text += f"[{entry.get('time', 'Unknown time')}]\n"
|
|
141
|
+
history_text += f"QUESTION: {entry.get('question', '')}\n"
|
|
142
|
+
history_text += f"CONTEXT: {entry.get('context', '')}\n"
|
|
143
|
+
history_text += f"ANSWER: {entry.get('answer', '')}\n\n"
|
|
144
|
+
|
|
145
|
+
logger.debug(f"Retrieved {len(history_entries)} conversation history entries")
|
|
146
|
+
return history_text
|
|
147
|
+
|
|
148
|
+
except CacheConnectionError as e:
|
|
149
|
+
logger.warning(f"Cache unavailable, continuing without conversation history: {e.message}")
|
|
150
|
+
return ""
|
|
151
|
+
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.warning(
|
|
154
|
+
f"Unexpected error retrieving conversation history: {type(e).__name__}: {str(e)}"
|
|
155
|
+
)
|
|
156
|
+
return ""
|
|
@@ -9,7 +9,6 @@ from cognee.modules.search.exceptions import UnsupportedSearchTypeError
|
|
|
9
9
|
# Retrievers
|
|
10
10
|
from cognee.modules.retrieval.user_qa_feedback import UserQAFeedback
|
|
11
11
|
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
|
|
12
|
-
from cognee.modules.retrieval.insights_retriever import InsightsRetriever
|
|
13
12
|
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
|
|
14
13
|
from cognee.modules.retrieval.completion_retriever import CompletionRetriever
|
|
15
14
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
@@ -44,10 +43,6 @@ async def get_search_type_tools(
|
|
|
44
43
|
SummariesRetriever(top_k=top_k).get_completion,
|
|
45
44
|
SummariesRetriever(top_k=top_k).get_context,
|
|
46
45
|
],
|
|
47
|
-
SearchType.INSIGHTS: [
|
|
48
|
-
InsightsRetriever(top_k=top_k).get_completion,
|
|
49
|
-
InsightsRetriever(top_k=top_k).get_context,
|
|
50
|
-
],
|
|
51
46
|
SearchType.CHUNKS: [
|
|
52
47
|
ChunksRetriever(top_k=top_k).get_completion,
|
|
53
48
|
ChunksRetriever(top_k=top_k).get_context,
|
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
from typing import Any, List, Optional, Tuple, Type, Union
|
|
2
2
|
|
|
3
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
3
4
|
from cognee.modules.data.models.Dataset import Dataset
|
|
4
5
|
from cognee.modules.engine.models.node_set import NodeSet
|
|
5
6
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
6
7
|
from cognee.modules.search.types import SearchType
|
|
8
|
+
from cognee.shared.logging_utils import get_logger
|
|
7
9
|
|
|
8
10
|
from .get_search_type_tools import get_search_type_tools
|
|
9
11
|
|
|
12
|
+
logger = get_logger()
|
|
13
|
+
|
|
10
14
|
|
|
11
15
|
async def no_access_control_search(
|
|
12
16
|
query_type: SearchType,
|
|
@@ -19,6 +23,7 @@ async def no_access_control_search(
|
|
|
19
23
|
save_interaction: bool = False,
|
|
20
24
|
last_k: Optional[int] = None,
|
|
21
25
|
only_context: bool = False,
|
|
26
|
+
session_id: Optional[str] = None,
|
|
22
27
|
) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
|
|
23
28
|
search_tools = await get_search_type_tools(
|
|
24
29
|
query_type=query_type,
|
|
@@ -31,6 +36,12 @@ async def no_access_control_search(
|
|
|
31
36
|
save_interaction=save_interaction,
|
|
32
37
|
last_k=last_k,
|
|
33
38
|
)
|
|
39
|
+
graph_engine = await get_graph_engine()
|
|
40
|
+
is_empty = await graph_engine.is_empty()
|
|
41
|
+
|
|
42
|
+
if is_empty:
|
|
43
|
+
# TODO: we can log here, but not all search types use graph. Still keeping this here for reviewer input
|
|
44
|
+
logger.warning("Search attempt on an empty knowledge graph")
|
|
34
45
|
if len(search_tools) == 2:
|
|
35
46
|
[get_completion, get_context] = search_tools
|
|
36
47
|
|
|
@@ -38,7 +49,7 @@ async def no_access_control_search(
|
|
|
38
49
|
return None, await get_context(query_text), []
|
|
39
50
|
|
|
40
51
|
context = await get_context(query_text)
|
|
41
|
-
result = await get_completion(query_text, context)
|
|
52
|
+
result = await get_completion(query_text, context, session_id=session_id)
|
|
42
53
|
else:
|
|
43
54
|
unknown_tool = search_tools[0]
|
|
44
55
|
result = await unknown_tool(query_text)
|
|
@@ -5,6 +5,8 @@ from uuid import UUID
|
|
|
5
5
|
from fastapi.encoders import jsonable_encoder
|
|
6
6
|
from typing import Any, List, Optional, Tuple, Type, Union
|
|
7
7
|
|
|
8
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
9
|
+
from cognee.shared.logging_utils import get_logger
|
|
8
10
|
from cognee.shared.utils import send_telemetry
|
|
9
11
|
from cognee.context_global_variables import set_database_global_context_variables
|
|
10
12
|
|
|
@@ -22,11 +24,13 @@ from cognee.modules.data.models import Dataset
|
|
|
22
24
|
from cognee.modules.data.methods.get_authorized_existing_datasets import (
|
|
23
25
|
get_authorized_existing_datasets,
|
|
24
26
|
)
|
|
25
|
-
|
|
27
|
+
from cognee import __version__ as cognee_version
|
|
26
28
|
from .get_search_type_tools import get_search_type_tools
|
|
27
29
|
from .no_access_control_search import no_access_control_search
|
|
28
30
|
from ..utils.prepare_search_result import prepare_search_result
|
|
29
31
|
|
|
32
|
+
logger = get_logger()
|
|
33
|
+
|
|
30
34
|
|
|
31
35
|
async def search(
|
|
32
36
|
query_text: str,
|
|
@@ -42,6 +46,7 @@ async def search(
|
|
|
42
46
|
last_k: Optional[int] = None,
|
|
43
47
|
only_context: bool = False,
|
|
44
48
|
use_combined_context: bool = False,
|
|
49
|
+
session_id: Optional[str] = None,
|
|
45
50
|
) -> Union[CombinedSearchResult, List[SearchResult]]:
|
|
46
51
|
"""
|
|
47
52
|
|
|
@@ -59,7 +64,14 @@ async def search(
|
|
|
59
64
|
Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
|
|
60
65
|
"""
|
|
61
66
|
query = await log_query(query_text, query_type.value, user.id)
|
|
62
|
-
send_telemetry(
|
|
67
|
+
send_telemetry(
|
|
68
|
+
"cognee.search EXECUTION STARTED",
|
|
69
|
+
user.id,
|
|
70
|
+
additional_properties={
|
|
71
|
+
"cognee_version": cognee_version,
|
|
72
|
+
"tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
|
|
73
|
+
},
|
|
74
|
+
)
|
|
63
75
|
|
|
64
76
|
# Use search function filtered by permissions if access control is enabled
|
|
65
77
|
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
|
@@ -77,6 +89,7 @@ async def search(
|
|
|
77
89
|
last_k=last_k,
|
|
78
90
|
only_context=only_context,
|
|
79
91
|
use_combined_context=use_combined_context,
|
|
92
|
+
session_id=session_id,
|
|
80
93
|
)
|
|
81
94
|
else:
|
|
82
95
|
search_results = [
|
|
@@ -91,10 +104,18 @@ async def search(
|
|
|
91
104
|
save_interaction=save_interaction,
|
|
92
105
|
last_k=last_k,
|
|
93
106
|
only_context=only_context,
|
|
107
|
+
session_id=session_id,
|
|
94
108
|
)
|
|
95
109
|
]
|
|
96
110
|
|
|
97
|
-
send_telemetry(
|
|
111
|
+
send_telemetry(
|
|
112
|
+
"cognee.search EXECUTION COMPLETED",
|
|
113
|
+
user.id,
|
|
114
|
+
additional_properties={
|
|
115
|
+
"cognee_version": cognee_version,
|
|
116
|
+
"tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
|
|
117
|
+
},
|
|
118
|
+
)
|
|
98
119
|
|
|
99
120
|
await log_result(
|
|
100
121
|
query.id,
|
|
@@ -195,6 +216,7 @@ async def authorized_search(
|
|
|
195
216
|
last_k: Optional[int] = None,
|
|
196
217
|
only_context: bool = False,
|
|
197
218
|
use_combined_context: bool = False,
|
|
219
|
+
session_id: Optional[str] = None,
|
|
198
220
|
) -> Union[
|
|
199
221
|
Tuple[Any, Union[List[Edge], str], List[Dataset]],
|
|
200
222
|
List[Tuple[Any, Union[List[Edge], str], List[Dataset]]],
|
|
@@ -221,6 +243,7 @@ async def authorized_search(
|
|
|
221
243
|
save_interaction=save_interaction,
|
|
222
244
|
last_k=last_k,
|
|
223
245
|
only_context=True,
|
|
246
|
+
session_id=session_id,
|
|
224
247
|
)
|
|
225
248
|
|
|
226
249
|
context = {}
|
|
@@ -263,7 +286,7 @@ async def authorized_search(
|
|
|
263
286
|
return combined_context
|
|
264
287
|
|
|
265
288
|
combined_context = prepare_combined_context(context)
|
|
266
|
-
completion = await get_completion(query_text, combined_context)
|
|
289
|
+
completion = await get_completion(query_text, combined_context, session_id=session_id)
|
|
267
290
|
|
|
268
291
|
return completion, combined_context, datasets
|
|
269
292
|
|
|
@@ -280,6 +303,7 @@ async def authorized_search(
|
|
|
280
303
|
save_interaction=save_interaction,
|
|
281
304
|
last_k=last_k,
|
|
282
305
|
only_context=only_context,
|
|
306
|
+
session_id=session_id,
|
|
283
307
|
)
|
|
284
308
|
|
|
285
309
|
return search_results
|
|
@@ -298,6 +322,7 @@ async def search_in_datasets_context(
|
|
|
298
322
|
last_k: Optional[int] = None,
|
|
299
323
|
only_context: bool = False,
|
|
300
324
|
context: Optional[Any] = None,
|
|
325
|
+
session_id: Optional[str] = None,
|
|
301
326
|
) -> List[Tuple[Any, Union[str, List[Edge]], List[Dataset]]]:
|
|
302
327
|
"""
|
|
303
328
|
Searches all provided datasets and handles setting up of appropriate database context based on permissions.
|
|
@@ -317,10 +342,30 @@ async def search_in_datasets_context(
|
|
|
317
342
|
last_k: Optional[int] = None,
|
|
318
343
|
only_context: bool = False,
|
|
319
344
|
context: Optional[Any] = None,
|
|
345
|
+
session_id: Optional[str] = None,
|
|
320
346
|
) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
|
|
321
347
|
# Set database configuration in async context for each dataset user has access for
|
|
322
348
|
await set_database_global_context_variables(dataset.id, dataset.owner_id)
|
|
323
349
|
|
|
350
|
+
graph_engine = await get_graph_engine()
|
|
351
|
+
is_empty = await graph_engine.is_empty()
|
|
352
|
+
|
|
353
|
+
if is_empty:
|
|
354
|
+
# TODO: we can log here, but not all search types use graph. Still keeping this here for reviewer input
|
|
355
|
+
from cognee.modules.data.methods import get_dataset_data
|
|
356
|
+
|
|
357
|
+
dataset_data = await get_dataset_data(dataset.id)
|
|
358
|
+
|
|
359
|
+
if len(dataset_data) > 0:
|
|
360
|
+
logger.warning(
|
|
361
|
+
f"Dataset '{dataset.name}' has {len(dataset_data)} data item(s) but the knowledge graph is empty. "
|
|
362
|
+
"Please run cognify to process the data before searching."
|
|
363
|
+
)
|
|
364
|
+
else:
|
|
365
|
+
logger.warning(
|
|
366
|
+
"Search attempt on an empty knowledge graph - no data has been added to this dataset"
|
|
367
|
+
)
|
|
368
|
+
|
|
324
369
|
specific_search_tools = await get_search_type_tools(
|
|
325
370
|
query_type=query_type,
|
|
326
371
|
query_text=query_text,
|
|
@@ -340,7 +385,7 @@ async def search_in_datasets_context(
|
|
|
340
385
|
return None, await get_context(query_text), [dataset]
|
|
341
386
|
|
|
342
387
|
search_context = context or await get_context(query_text)
|
|
343
|
-
search_result = await get_completion(query_text, search_context)
|
|
388
|
+
search_result = await get_completion(query_text, search_context, session_id=session_id)
|
|
344
389
|
|
|
345
390
|
return search_result, search_context, [dataset]
|
|
346
391
|
else:
|
|
@@ -365,6 +410,7 @@ async def search_in_datasets_context(
|
|
|
365
410
|
last_k=last_k,
|
|
366
411
|
only_context=only_context,
|
|
367
412
|
context=context,
|
|
413
|
+
session_id=session_id,
|
|
368
414
|
)
|
|
369
415
|
)
|
|
370
416
|
|
|
@@ -15,6 +15,7 @@ class ModelName(Enum):
|
|
|
15
15
|
ollama = "ollama"
|
|
16
16
|
anthropic = "anthropic"
|
|
17
17
|
gemini = "gemini"
|
|
18
|
+
mistral = "mistral"
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class LLMConfig(BaseModel):
|
|
@@ -72,6 +73,10 @@ def get_settings() -> SettingsDict:
|
|
|
72
73
|
"value": "gemini",
|
|
73
74
|
"label": "Gemini",
|
|
74
75
|
},
|
|
76
|
+
{
|
|
77
|
+
"value": "mistral",
|
|
78
|
+
"label": "Mistral",
|
|
79
|
+
},
|
|
75
80
|
]
|
|
76
81
|
|
|
77
82
|
return SettingsDict.model_validate(
|
|
@@ -134,6 +139,24 @@ def get_settings() -> SettingsDict:
|
|
|
134
139
|
"label": "Gemini 2.0 Flash",
|
|
135
140
|
},
|
|
136
141
|
],
|
|
142
|
+
"mistral": [
|
|
143
|
+
{
|
|
144
|
+
"value": "mistral-medium-2508",
|
|
145
|
+
"label": "Mistral Medium 3.1",
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
"value": "magistral-medium-2509",
|
|
149
|
+
"label": "Magistral Medium 1.2",
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
"value": "magistral-medium-2507",
|
|
153
|
+
"label": "Magistral Medium 1.1",
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
"value": "mistral-large-2411",
|
|
157
|
+
"label": "Mistral Large 2.1",
|
|
158
|
+
},
|
|
159
|
+
],
|
|
137
160
|
},
|
|
138
161
|
},
|
|
139
162
|
vector_db={
|
|
@@ -37,6 +37,8 @@ async def get_authenticated_user(
|
|
|
37
37
|
except Exception as e:
|
|
38
38
|
# Convert any get_default_user failure into a proper HTTP 500 error
|
|
39
39
|
logger.error(f"Failed to create default user: {str(e)}")
|
|
40
|
-
raise HTTPException(
|
|
40
|
+
raise HTTPException(
|
|
41
|
+
status_code=500, detail=f"Failed to create default user: {str(e)}"
|
|
42
|
+
) from e
|
|
41
43
|
|
|
42
44
|
return user
|
|
@@ -27,12 +27,7 @@ async def get_default_user() -> SimpleNamespace:
|
|
|
27
27
|
if user is None:
|
|
28
28
|
return await create_default_user()
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
# SimpleNamespace is just a dictionary which can be accessed through attributes
|
|
32
|
-
auth_data = SimpleNamespace(
|
|
33
|
-
id=user.id, email=user.email, tenant_id=user.tenant_id, roles=[]
|
|
34
|
-
)
|
|
35
|
-
return auth_data
|
|
30
|
+
return user
|
|
36
31
|
except Exception as error:
|
|
37
32
|
if "principals" in str(error.args):
|
|
38
33
|
raise DatabaseNotCreatedError() from error
|
|
@@ -40,8 +40,8 @@ async def create_role(
|
|
|
40
40
|
# Add association directly to the association table
|
|
41
41
|
role = Role(name=role_name, tenant_id=tenant.id)
|
|
42
42
|
session.add(role)
|
|
43
|
-
except IntegrityError:
|
|
44
|
-
raise EntityAlreadyExistsError(message="Role already exists for tenant.")
|
|
43
|
+
except IntegrityError as e:
|
|
44
|
+
raise EntityAlreadyExistsError(message="Role already exists for tenant.") from e
|
|
45
45
|
|
|
46
46
|
await session.commit()
|
|
47
47
|
await session.refresh(role)
|
|
@@ -35,5 +35,5 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
|
|
|
35
35
|
await session.merge(user)
|
|
36
36
|
await session.commit()
|
|
37
37
|
return tenant.id
|
|
38
|
-
except IntegrityError:
|
|
39
|
-
raise EntityAlreadyExistsError(message="Tenant already exists.")
|
|
38
|
+
except IntegrityError as e:
|
|
39
|
+
raise EntityAlreadyExistsError(message="Tenant already exists.") from e
|
|
@@ -7,6 +7,6 @@ class IngestionError(CogneeValidationError):
|
|
|
7
7
|
self,
|
|
8
8
|
message: str = "Failed to load data.",
|
|
9
9
|
name: str = "IngestionError",
|
|
10
|
-
status_code=status.
|
|
10
|
+
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
|
|
11
11
|
):
|
|
12
12
|
super().__init__(message, name, status_code)
|
cognee/shared/logging_utils.py
CHANGED
|
@@ -430,6 +430,15 @@ def setup_logging(log_level=None, name=None):
|
|
|
430
430
|
stream_handler.setFormatter(console_formatter)
|
|
431
431
|
stream_handler.setLevel(log_level)
|
|
432
432
|
|
|
433
|
+
root_logger = logging.getLogger()
|
|
434
|
+
if root_logger.hasHandlers():
|
|
435
|
+
root_logger.handlers.clear()
|
|
436
|
+
root_logger.addHandler(stream_handler)
|
|
437
|
+
|
|
438
|
+
# Note: root logger needs to be set at NOTSET to allow all messages through and specific stream and file handlers
|
|
439
|
+
# can define their own levels.
|
|
440
|
+
root_logger.setLevel(logging.NOTSET)
|
|
441
|
+
|
|
433
442
|
# Check if we already have a log file path from the environment
|
|
434
443
|
# NOTE: environment variable must be used here as it allows us to
|
|
435
444
|
# log to a single file with a name based on a timestamp in a multiprocess setting.
|
|
@@ -441,17 +450,15 @@ def setup_logging(log_level=None, name=None):
|
|
|
441
450
|
log_file_path = os.path.join(LOGS_DIR, f"{start_time}.log")
|
|
442
451
|
os.environ["LOG_FILE_NAME"] = log_file_path
|
|
443
452
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
root_logger.addHandler(file_handler)
|
|
454
|
-
root_logger.setLevel(log_level)
|
|
453
|
+
try:
|
|
454
|
+
# Create a file handler that uses our custom PlainFileHandler
|
|
455
|
+
file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
|
|
456
|
+
file_handler.setLevel(DEBUG)
|
|
457
|
+
root_logger.addHandler(file_handler)
|
|
458
|
+
except Exception as e:
|
|
459
|
+
# Note: Exceptions happen in case of read only file systems or log file path poiting to location where it does
|
|
460
|
+
# not have write permission. Logging to file is not mandatory so we just log a warning to console.
|
|
461
|
+
root_logger.warning(f"Warning: Could not create log file handler at {log_file_path}: {e}")
|
|
455
462
|
|
|
456
463
|
if log_level > logging.DEBUG:
|
|
457
464
|
import warnings
|
cognee/shared/utils.py
CHANGED
|
@@ -8,7 +8,7 @@ import http.server
|
|
|
8
8
|
import socketserver
|
|
9
9
|
from threading import Thread
|
|
10
10
|
import pathlib
|
|
11
|
-
from uuid import uuid4
|
|
11
|
+
from uuid import uuid4, uuid5, NAMESPACE_OID
|
|
12
12
|
|
|
13
13
|
from cognee.base_config import get_base_config
|
|
14
14
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
@@ -51,6 +51,26 @@ def get_anonymous_id():
|
|
|
51
51
|
return anonymous_id
|
|
52
52
|
|
|
53
53
|
|
|
54
|
+
def _sanitize_nested_properties(obj, property_names: list[str]):
|
|
55
|
+
"""
|
|
56
|
+
Recursively replaces any property whose key matches one of `property_names`
|
|
57
|
+
(e.g., ['url', 'path']) in a nested dict or list with a uuid5 hash
|
|
58
|
+
of its string value. Returns a new sanitized copy.
|
|
59
|
+
"""
|
|
60
|
+
if isinstance(obj, dict):
|
|
61
|
+
new_obj = {}
|
|
62
|
+
for k, v in obj.items():
|
|
63
|
+
if k in property_names and isinstance(v, str):
|
|
64
|
+
new_obj[k] = str(uuid5(NAMESPACE_OID, v))
|
|
65
|
+
else:
|
|
66
|
+
new_obj[k] = _sanitize_nested_properties(v, property_names)
|
|
67
|
+
return new_obj
|
|
68
|
+
elif isinstance(obj, list):
|
|
69
|
+
return [_sanitize_nested_properties(item, property_names) for item in obj]
|
|
70
|
+
else:
|
|
71
|
+
return obj
|
|
72
|
+
|
|
73
|
+
|
|
54
74
|
def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
|
|
55
75
|
if os.getenv("TELEMETRY_DISABLED"):
|
|
56
76
|
return
|
|
@@ -58,7 +78,9 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
|
|
|
58
78
|
env = os.getenv("ENV")
|
|
59
79
|
if env in ["test", "dev"]:
|
|
60
80
|
return
|
|
61
|
-
|
|
81
|
+
additional_properties = _sanitize_nested_properties(
|
|
82
|
+
obj=additional_properties, property_names=["url"]
|
|
83
|
+
)
|
|
62
84
|
current_time = datetime.now(timezone.utc)
|
|
63
85
|
payload = {
|
|
64
86
|
"anonymous_id": str(get_anonymous_id()),
|
|
@@ -12,7 +12,7 @@ class WrongDataDocumentInputError(CogneeValidationError):
|
|
|
12
12
|
self,
|
|
13
13
|
field: str,
|
|
14
14
|
name: str = "WrongDataDocumentInputError",
|
|
15
|
-
status_code: int = status.
|
|
15
|
+
status_code: int = status.HTTP_422_UNPROCESSABLE_CONTENT,
|
|
16
16
|
):
|
|
17
17
|
message = f"Missing of invalid parameter: '{field}'."
|
|
18
18
|
super().__init__(message, name, status_code)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .extract_feedback_interactions import extract_feedback_interactions
|
|
2
|
+
from .generate_improved_answers import generate_improved_answers
|
|
3
|
+
from .create_enrichments import create_enrichments
|
|
4
|
+
from .link_enrichments_to_feedback import link_enrichments_to_feedback
|
|
5
|
+
from .models import FeedbackEnrichment
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"extract_feedback_interactions",
|
|
9
|
+
"generate_improved_answers",
|
|
10
|
+
"create_enrichments",
|
|
11
|
+
"link_enrichments_to_feedback",
|
|
12
|
+
"FeedbackEnrichment",
|
|
13
|
+
]
|