cognee 0.2.3.dev1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__main__.py +4 -0
- cognee/api/v1/add/add.py +18 -6
- cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
- cognee/api/v1/cognify/cognify.py +22 -107
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +1 -1
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/search.py +6 -0
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +180 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +14 -5
- cognee/infrastructure/llm/config.py +5 -5
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +23 -138
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
- cognee/modules/retrieval/graph_completion_retriever.py +89 -5
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/methods/search.py +46 -5
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/logging_utils.py +142 -31
- cognee/shared/utils.py +0 -1
- cognee/tasks/graph/extract_graph_from_data.py +6 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
- cognee/tasks/storage/add_data_points.py +33 -3
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_search_db.py +126 -7
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
- cognee/tests/unit/modules/search/search_methods_test.py +2 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/RECORD +120 -83
- cognee-0.2.4.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/infrastructure/pipeline/models/__init__.py +0 -0
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -35,6 +35,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
35
35
|
top_k: Optional[int] = 5,
|
|
36
36
|
node_type: Optional[Type] = None,
|
|
37
37
|
node_name: Optional[List[str]] = None,
|
|
38
|
+
save_interaction: bool = False,
|
|
38
39
|
):
|
|
39
40
|
super().__init__(
|
|
40
41
|
user_prompt_path=user_prompt_path,
|
|
@@ -42,6 +43,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
42
43
|
top_k=top_k,
|
|
43
44
|
node_type=node_type,
|
|
44
45
|
node_name=node_name,
|
|
46
|
+
save_interaction=save_interaction,
|
|
45
47
|
)
|
|
46
48
|
self.validation_system_prompt_path = validation_system_prompt_path
|
|
47
49
|
self.validation_user_prompt_path = validation_user_prompt_path
|
|
@@ -75,7 +77,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
75
77
|
"""
|
|
76
78
|
followup_question = ""
|
|
77
79
|
triplets = []
|
|
78
|
-
|
|
80
|
+
completion = [""]
|
|
79
81
|
|
|
80
82
|
for round_idx in range(max_iter + 1):
|
|
81
83
|
if round_idx == 0:
|
|
@@ -85,15 +87,15 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
85
87
|
triplets += await self.get_triplets(followup_question)
|
|
86
88
|
context = await self.resolve_edges_to_text(list(set(triplets)))
|
|
87
89
|
|
|
88
|
-
|
|
90
|
+
completion = await generate_completion(
|
|
89
91
|
query=query,
|
|
90
92
|
context=context,
|
|
91
93
|
user_prompt_path=self.user_prompt_path,
|
|
92
94
|
system_prompt_path=self.system_prompt_path,
|
|
93
95
|
)
|
|
94
|
-
logger.info(f"Chain-of-thought: round {round_idx} - answer: {
|
|
96
|
+
logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}")
|
|
95
97
|
if round_idx < max_iter:
|
|
96
|
-
valid_args = {"query": query, "answer":
|
|
98
|
+
valid_args = {"query": query, "answer": completion, "context": context}
|
|
97
99
|
valid_user_prompt = LLMGateway.render_prompt(
|
|
98
100
|
filename=self.validation_user_prompt_path, context=valid_args
|
|
99
101
|
)
|
|
@@ -106,7 +108,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
106
108
|
system_prompt=valid_system_prompt,
|
|
107
109
|
response_model=str,
|
|
108
110
|
)
|
|
109
|
-
followup_args = {"query": query, "answer":
|
|
111
|
+
followup_args = {"query": query, "answer": completion, "reasoning": reasoning}
|
|
110
112
|
followup_prompt = LLMGateway.render_prompt(
|
|
111
113
|
filename=self.followup_user_prompt_path, context=followup_args
|
|
112
114
|
)
|
|
@@ -121,4 +123,9 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
121
123
|
f"Chain-of-thought: round {round_idx} - follow-up question: {followup_question}"
|
|
122
124
|
)
|
|
123
125
|
|
|
124
|
-
|
|
126
|
+
if self.save_interaction and context and triplets and completion:
|
|
127
|
+
await self.save_qa(
|
|
128
|
+
question=query, answer=completion, context=context, triplets=triplets
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return [completion]
|
|
@@ -1,14 +1,20 @@
|
|
|
1
|
-
from typing import Any, Optional, Type, List
|
|
1
|
+
from typing import Any, Optional, Type, List, Coroutine
|
|
2
2
|
from collections import Counter
|
|
3
|
+
from uuid import NAMESPACE_OID, uuid5
|
|
3
4
|
import string
|
|
4
5
|
|
|
5
6
|
from cognee.infrastructure.engine import DataPoint
|
|
7
|
+
from cognee.tasks.storage import add_data_points
|
|
6
8
|
from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
|
|
7
9
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
8
10
|
from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search
|
|
9
11
|
from cognee.modules.retrieval.utils.completion import generate_completion
|
|
10
12
|
from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
|
|
11
13
|
from cognee.shared.logging_utils import get_logger
|
|
14
|
+
from cognee.modules.retrieval.utils.extract_uuid_from_node import extract_uuid_from_node
|
|
15
|
+
from cognee.modules.retrieval.utils.models import CogneeUserInteraction
|
|
16
|
+
from cognee.modules.engine.models.node_set import NodeSet
|
|
17
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
12
18
|
|
|
13
19
|
logger = get_logger("GraphCompletionRetriever")
|
|
14
20
|
|
|
@@ -33,8 +39,10 @@ class GraphCompletionRetriever(BaseRetriever):
|
|
|
33
39
|
top_k: Optional[int] = 5,
|
|
34
40
|
node_type: Optional[Type] = None,
|
|
35
41
|
node_name: Optional[List[str]] = None,
|
|
42
|
+
save_interaction: bool = False,
|
|
36
43
|
):
|
|
37
44
|
"""Initialize retriever with prompt paths and search parameters."""
|
|
45
|
+
self.save_interaction = save_interaction
|
|
38
46
|
self.user_prompt_path = user_prompt_path
|
|
39
47
|
self.system_prompt_path = system_prompt_path
|
|
40
48
|
self.top_k = top_k if top_k is not None else 5
|
|
@@ -118,7 +126,7 @@ class GraphCompletionRetriever(BaseRetriever):
|
|
|
118
126
|
|
|
119
127
|
return found_triplets
|
|
120
128
|
|
|
121
|
-
async def get_context(self, query: str) -> str:
|
|
129
|
+
async def get_context(self, query: str) -> str | tuple[str, list]:
|
|
122
130
|
"""
|
|
123
131
|
Retrieves and resolves graph triplets into context based on a query.
|
|
124
132
|
|
|
@@ -137,9 +145,11 @@ class GraphCompletionRetriever(BaseRetriever):
|
|
|
137
145
|
|
|
138
146
|
if len(triplets) == 0:
|
|
139
147
|
logger.warning("Empty context was provided to the completion")
|
|
140
|
-
return ""
|
|
148
|
+
return "", triplets
|
|
141
149
|
|
|
142
|
-
|
|
150
|
+
context = await self.resolve_edges_to_text(triplets)
|
|
151
|
+
|
|
152
|
+
return context, triplets
|
|
143
153
|
|
|
144
154
|
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
|
|
145
155
|
"""
|
|
@@ -157,8 +167,10 @@ class GraphCompletionRetriever(BaseRetriever):
|
|
|
157
167
|
|
|
158
168
|
- Any: A generated completion based on the query and context provided.
|
|
159
169
|
"""
|
|
170
|
+
triplets = None
|
|
171
|
+
|
|
160
172
|
if context is None:
|
|
161
|
-
context = await self.get_context(query)
|
|
173
|
+
context, triplets = await self.get_context(query)
|
|
162
174
|
|
|
163
175
|
completion = await generate_completion(
|
|
164
176
|
query=query,
|
|
@@ -166,6 +178,12 @@ class GraphCompletionRetriever(BaseRetriever):
|
|
|
166
178
|
user_prompt_path=self.user_prompt_path,
|
|
167
179
|
system_prompt_path=self.system_prompt_path,
|
|
168
180
|
)
|
|
181
|
+
|
|
182
|
+
if self.save_interaction and context and triplets and completion:
|
|
183
|
+
await self.save_qa(
|
|
184
|
+
question=query, answer=completion, context=context, triplets=triplets
|
|
185
|
+
)
|
|
186
|
+
|
|
169
187
|
return [completion]
|
|
170
188
|
|
|
171
189
|
def _top_n_words(self, text, stop_words=None, top_n=3, separator=", "):
|
|
@@ -187,3 +205,69 @@ class GraphCompletionRetriever(BaseRetriever):
|
|
|
187
205
|
first_n_words = text.split()[:first_n_words]
|
|
188
206
|
top_n_words = self._top_n_words(text, top_n=top_n_words)
|
|
189
207
|
return f"{' '.join(first_n_words)}... [{top_n_words}]"
|
|
208
|
+
|
|
209
|
+
async def save_qa(self, question: str, answer: str, context: str, triplets: List) -> None:
|
|
210
|
+
"""
|
|
211
|
+
Saves a question and answer pair for later analysis or storage.
|
|
212
|
+
Parameters:
|
|
213
|
+
-----------
|
|
214
|
+
- question (str): The question text.
|
|
215
|
+
- answer (str): The answer text.
|
|
216
|
+
- context (str): The context text.
|
|
217
|
+
- triplets (List): A list of triples retrieved from the graph.
|
|
218
|
+
"""
|
|
219
|
+
nodeset_name = "Interactions"
|
|
220
|
+
interactions_node_set = NodeSet(
|
|
221
|
+
id=uuid5(NAMESPACE_OID, name=nodeset_name), name=nodeset_name
|
|
222
|
+
)
|
|
223
|
+
source_id = uuid5(NAMESPACE_OID, name=(question + answer + context))
|
|
224
|
+
|
|
225
|
+
cognee_user_interaction = CogneeUserInteraction(
|
|
226
|
+
id=source_id,
|
|
227
|
+
question=question,
|
|
228
|
+
answer=answer,
|
|
229
|
+
context=context,
|
|
230
|
+
belongs_to_set=interactions_node_set,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
await add_data_points(data_points=[cognee_user_interaction], update_edge_collection=False)
|
|
234
|
+
|
|
235
|
+
relationships = []
|
|
236
|
+
relationship_name = "used_graph_element_to_answer"
|
|
237
|
+
for triplet in triplets:
|
|
238
|
+
target_id_1 = extract_uuid_from_node(triplet.node1)
|
|
239
|
+
target_id_2 = extract_uuid_from_node(triplet.node2)
|
|
240
|
+
if target_id_1 and target_id_2:
|
|
241
|
+
relationships.append(
|
|
242
|
+
(
|
|
243
|
+
source_id,
|
|
244
|
+
target_id_1,
|
|
245
|
+
relationship_name,
|
|
246
|
+
{
|
|
247
|
+
"relationship_name": relationship_name,
|
|
248
|
+
"source_node_id": source_id,
|
|
249
|
+
"target_node_id": target_id_1,
|
|
250
|
+
"ontology_valid": False,
|
|
251
|
+
"feedback_weight": 0,
|
|
252
|
+
},
|
|
253
|
+
)
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
relationships.append(
|
|
257
|
+
(
|
|
258
|
+
source_id,
|
|
259
|
+
target_id_2,
|
|
260
|
+
relationship_name,
|
|
261
|
+
{
|
|
262
|
+
"relationship_name": relationship_name,
|
|
263
|
+
"source_node_id": source_id,
|
|
264
|
+
"target_node_id": target_id_2,
|
|
265
|
+
"ontology_valid": False,
|
|
266
|
+
"feedback_weight": 0,
|
|
267
|
+
},
|
|
268
|
+
)
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
if len(relationships) > 0:
|
|
272
|
+
graph_engine = await get_graph_engine()
|
|
273
|
+
await graph_engine.add_edges(relationships)
|
|
@@ -24,6 +24,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
|
|
|
24
24
|
top_k: Optional[int] = 5,
|
|
25
25
|
node_type: Optional[Type] = None,
|
|
26
26
|
node_name: Optional[List[str]] = None,
|
|
27
|
+
save_interaction: bool = False,
|
|
27
28
|
):
|
|
28
29
|
"""Initialize retriever with default prompt paths and search parameters."""
|
|
29
30
|
super().__init__(
|
|
@@ -32,6 +33,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
|
|
|
32
33
|
top_k=top_k,
|
|
33
34
|
node_type=node_type,
|
|
34
35
|
node_name=node_name,
|
|
36
|
+
save_interaction=save_interaction,
|
|
35
37
|
)
|
|
36
38
|
self.summarize_prompt_path = summarize_prompt_path
|
|
37
39
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import Any, Optional
|
|
2
2
|
from cognee.shared.logging_utils import get_logger
|
|
3
3
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
4
|
-
from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter
|
|
5
4
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
6
5
|
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
|
7
6
|
from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
|
|
@@ -123,9 +122,6 @@ class NaturalLanguageRetriever(BaseRetriever):
|
|
|
123
122
|
"""
|
|
124
123
|
graph_engine = await get_graph_engine()
|
|
125
124
|
|
|
126
|
-
if isinstance(graph_engine, (NetworkXAdapter)):
|
|
127
|
-
raise SearchTypeNotSupported("Natural language search type not supported.")
|
|
128
|
-
|
|
129
125
|
return await self._execute_cypher_query(query, graph_engine)
|
|
130
126
|
|
|
131
127
|
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from typing import Any, Optional, List
|
|
2
|
+
|
|
3
|
+
from uuid import NAMESPACE_OID, uuid5, UUID
|
|
4
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
5
|
+
from cognee.infrastructure.llm import LLMGateway
|
|
6
|
+
from cognee.modules.engine.models import NodeSet
|
|
7
|
+
from cognee.shared.logging_utils import get_logger
|
|
8
|
+
from cognee.modules.retrieval.base_feedback import BaseFeedback
|
|
9
|
+
from cognee.modules.retrieval.utils.models import CogneeUserFeedback
|
|
10
|
+
from cognee.modules.retrieval.utils.models import UserFeedbackEvaluation
|
|
11
|
+
from cognee.tasks.storage import add_data_points
|
|
12
|
+
|
|
13
|
+
logger = get_logger("CompletionRetriever")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class UserQAFeedback(BaseFeedback):
|
|
17
|
+
"""
|
|
18
|
+
Interface for handling user feedback queries.
|
|
19
|
+
Public methods:
|
|
20
|
+
- get_context(query: str) -> str
|
|
21
|
+
- get_completion(query: str, context: Optional[Any] = None) -> Any
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, last_k: Optional[int] = 1) -> None:
|
|
25
|
+
"""Initialize retriever with optional custom prompt paths."""
|
|
26
|
+
self.last_k = last_k
|
|
27
|
+
|
|
28
|
+
async def add_feedback(self, feedback_text: str) -> List[str]:
|
|
29
|
+
feedback_sentiment = await LLMGateway.acreate_structured_output(
|
|
30
|
+
text_input=feedback_text,
|
|
31
|
+
system_prompt="You are a sentiment analysis assistant. For each piece of user feedback you receive, return exactly one of: Positive, Negative, or Neutral classification and a corresponding score from -5 (worst negative) to 5 (best positive)",
|
|
32
|
+
response_model=UserFeedbackEvaluation,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
graph_engine = await get_graph_engine()
|
|
36
|
+
last_interaction_ids = await graph_engine.get_last_user_interaction_ids(limit=self.last_k)
|
|
37
|
+
|
|
38
|
+
nodeset_name = "UserQAFeedbacks"
|
|
39
|
+
feedbacks_node_set = NodeSet(id=uuid5(NAMESPACE_OID, name=nodeset_name), name=nodeset_name)
|
|
40
|
+
feedback_id = uuid5(NAMESPACE_OID, name=feedback_text)
|
|
41
|
+
|
|
42
|
+
cognee_user_feedback = CogneeUserFeedback(
|
|
43
|
+
id=feedback_id,
|
|
44
|
+
feedback=feedback_text,
|
|
45
|
+
sentiment=feedback_sentiment.evaluation.value,
|
|
46
|
+
score=feedback_sentiment.score,
|
|
47
|
+
belongs_to_set=feedbacks_node_set,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
await add_data_points(data_points=[cognee_user_feedback], update_edge_collection=False)
|
|
51
|
+
|
|
52
|
+
relationships = []
|
|
53
|
+
relationship_name = "gives_feedback_to"
|
|
54
|
+
to_node_ids = []
|
|
55
|
+
|
|
56
|
+
for interaction_id in last_interaction_ids:
|
|
57
|
+
target_id_1 = feedback_id
|
|
58
|
+
target_id_2 = UUID(interaction_id)
|
|
59
|
+
|
|
60
|
+
if target_id_1 and target_id_2:
|
|
61
|
+
relationships.append(
|
|
62
|
+
(
|
|
63
|
+
target_id_1,
|
|
64
|
+
target_id_2,
|
|
65
|
+
relationship_name,
|
|
66
|
+
{
|
|
67
|
+
"relationship_name": relationship_name,
|
|
68
|
+
"source_node_id": target_id_1,
|
|
69
|
+
"target_node_id": target_id_2,
|
|
70
|
+
"ontology_valid": False,
|
|
71
|
+
},
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
to_node_ids.append(str(target_id_2))
|
|
75
|
+
|
|
76
|
+
if len(relationships) > 0:
|
|
77
|
+
graph_engine = await get_graph_engine()
|
|
78
|
+
await graph_engine.add_edges(relationships)
|
|
79
|
+
await graph_engine.apply_feedback_weight(
|
|
80
|
+
node_ids=to_node_ids, weight=feedback_sentiment.score
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return [feedback_text]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from typing import Any, Optional
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def extract_uuid_from_node(node: Any) -> Optional[UUID]:
|
|
6
|
+
"""
|
|
7
|
+
Try to pull a UUID string out of node.id or node.properties['id'],
|
|
8
|
+
then return a UUID instance (or None if neither exists).
|
|
9
|
+
"""
|
|
10
|
+
id_str = None
|
|
11
|
+
if not id_str:
|
|
12
|
+
id_str = getattr(node, "id", None)
|
|
13
|
+
|
|
14
|
+
if hasattr(node, "attributes") and not id_str:
|
|
15
|
+
id_str = node.attributes.get("id", None)
|
|
16
|
+
|
|
17
|
+
id = UUID(id_str) if isinstance(id_str, str) else None
|
|
18
|
+
return id
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from cognee.infrastructure.engine.models.DataPoint import DataPoint
|
|
3
|
+
from cognee.modules.engine.models.node_set import NodeSet
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from pydantic import BaseModel, Field, confloat
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CogneeUserInteraction(DataPoint):
|
|
9
|
+
"""User - Cognee interaction"""
|
|
10
|
+
|
|
11
|
+
question: str
|
|
12
|
+
answer: str
|
|
13
|
+
context: str
|
|
14
|
+
belongs_to_set: Optional[NodeSet] = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CogneeUserFeedback(DataPoint):
|
|
18
|
+
"""User - Cognee Feedback"""
|
|
19
|
+
|
|
20
|
+
feedback: str
|
|
21
|
+
sentiment: str
|
|
22
|
+
score: float
|
|
23
|
+
belongs_to_set: Optional[NodeSet] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class UserFeedbackSentiment(str, Enum):
|
|
27
|
+
"""User - User feedback sentiment"""
|
|
28
|
+
|
|
29
|
+
positive = "positive"
|
|
30
|
+
negative = "negative"
|
|
31
|
+
neutral = "neutral"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class UserFeedbackEvaluation(BaseModel):
|
|
35
|
+
"""User - User feedback evaluation"""
|
|
36
|
+
|
|
37
|
+
score: confloat(ge=-5, le=5) = Field(
|
|
38
|
+
..., description="Sentiment score from -5 (negative) to +5 (positive)"
|
|
39
|
+
)
|
|
40
|
+
evaluation: UserFeedbackSentiment
|
|
@@ -3,6 +3,8 @@ import json
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
from typing import Callable, List, Optional, Type, Union
|
|
6
|
+
|
|
7
|
+
from cognee.modules.retrieval.user_qa_feedback import UserQAFeedback
|
|
6
8
|
from cognee.modules.search.exceptions import UnsupportedSearchTypeError
|
|
7
9
|
from cognee.context_global_variables import set_database_global_context_variables
|
|
8
10
|
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
|
|
@@ -38,6 +40,8 @@ async def search(
|
|
|
38
40
|
top_k: int = 10,
|
|
39
41
|
node_type: Optional[Type] = None,
|
|
40
42
|
node_name: Optional[List[str]] = None,
|
|
43
|
+
save_interaction: Optional[bool] = False,
|
|
44
|
+
last_k: Optional[int] = None,
|
|
41
45
|
):
|
|
42
46
|
"""
|
|
43
47
|
|
|
@@ -57,7 +61,14 @@ async def search(
|
|
|
57
61
|
# Use search function filtered by permissions if access control is enabled
|
|
58
62
|
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
|
59
63
|
return await authorized_search(
|
|
60
|
-
query_text,
|
|
64
|
+
query_text=query_text,
|
|
65
|
+
query_type=query_type,
|
|
66
|
+
user=user,
|
|
67
|
+
dataset_ids=dataset_ids,
|
|
68
|
+
system_prompt_path=system_prompt_path,
|
|
69
|
+
top_k=top_k,
|
|
70
|
+
save_interaction=save_interaction,
|
|
71
|
+
last_k=last_k,
|
|
61
72
|
)
|
|
62
73
|
|
|
63
74
|
query = await log_query(query_text, query_type.value, user.id)
|
|
@@ -70,6 +81,8 @@ async def search(
|
|
|
70
81
|
top_k=top_k,
|
|
71
82
|
node_type=node_type,
|
|
72
83
|
node_name=node_name,
|
|
84
|
+
save_interaction=save_interaction,
|
|
85
|
+
last_k=last_k,
|
|
73
86
|
)
|
|
74
87
|
|
|
75
88
|
await log_result(
|
|
@@ -91,6 +104,8 @@ async def specific_search(
|
|
|
91
104
|
top_k: int = 10,
|
|
92
105
|
node_type: Optional[Type] = None,
|
|
93
106
|
node_name: Optional[List[str]] = None,
|
|
107
|
+
save_interaction: Optional[bool] = False,
|
|
108
|
+
last_k: Optional[int] = None,
|
|
94
109
|
) -> list:
|
|
95
110
|
search_tasks: dict[SearchType, Callable] = {
|
|
96
111
|
SearchType.SUMMARIES: SummariesRetriever(top_k=top_k).get_completion,
|
|
@@ -104,28 +119,33 @@ async def specific_search(
|
|
|
104
119
|
top_k=top_k,
|
|
105
120
|
node_type=node_type,
|
|
106
121
|
node_name=node_name,
|
|
122
|
+
save_interaction=save_interaction,
|
|
107
123
|
).get_completion,
|
|
108
124
|
SearchType.GRAPH_COMPLETION_COT: GraphCompletionCotRetriever(
|
|
109
125
|
system_prompt_path=system_prompt_path,
|
|
110
126
|
top_k=top_k,
|
|
111
127
|
node_type=node_type,
|
|
112
128
|
node_name=node_name,
|
|
129
|
+
save_interaction=save_interaction,
|
|
113
130
|
).get_completion,
|
|
114
131
|
SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION: GraphCompletionContextExtensionRetriever(
|
|
115
132
|
system_prompt_path=system_prompt_path,
|
|
116
133
|
top_k=top_k,
|
|
117
134
|
node_type=node_type,
|
|
118
135
|
node_name=node_name,
|
|
136
|
+
save_interaction=save_interaction,
|
|
119
137
|
).get_completion,
|
|
120
138
|
SearchType.GRAPH_SUMMARY_COMPLETION: GraphSummaryCompletionRetriever(
|
|
121
139
|
system_prompt_path=system_prompt_path,
|
|
122
140
|
top_k=top_k,
|
|
123
141
|
node_type=node_type,
|
|
124
142
|
node_name=node_name,
|
|
143
|
+
save_interaction=save_interaction,
|
|
125
144
|
).get_completion,
|
|
126
145
|
SearchType.CODE: CodeRetriever(top_k=top_k).get_completion,
|
|
127
146
|
SearchType.CYPHER: CypherSearchRetriever().get_completion,
|
|
128
147
|
SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion,
|
|
148
|
+
SearchType.FEEDBACK: UserQAFeedback(last_k=last_k).add_feedback,
|
|
129
149
|
}
|
|
130
150
|
|
|
131
151
|
# If the query type is FEELING_LUCKY, select the search type intelligently
|
|
@@ -153,6 +173,8 @@ async def authorized_search(
|
|
|
153
173
|
dataset_ids: Optional[list[UUID]] = None,
|
|
154
174
|
system_prompt_path: str = "answer_simple_question.txt",
|
|
155
175
|
top_k: int = 10,
|
|
176
|
+
save_interaction: bool = False,
|
|
177
|
+
last_k: Optional[int] = None,
|
|
156
178
|
) -> list:
|
|
157
179
|
"""
|
|
158
180
|
Verifies access for provided datasets or uses all datasets user has read access for and performs search per dataset.
|
|
@@ -166,7 +188,14 @@ async def authorized_search(
|
|
|
166
188
|
|
|
167
189
|
# Searches all provided datasets and handles setting up of appropriate database context based on permissions
|
|
168
190
|
search_results = await specific_search_by_context(
|
|
169
|
-
search_datasets,
|
|
191
|
+
search_datasets,
|
|
192
|
+
query_text,
|
|
193
|
+
query_type,
|
|
194
|
+
user,
|
|
195
|
+
system_prompt_path,
|
|
196
|
+
top_k,
|
|
197
|
+
save_interaction,
|
|
198
|
+
last_k=last_k,
|
|
170
199
|
)
|
|
171
200
|
|
|
172
201
|
await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)
|
|
@@ -181,17 +210,27 @@ async def specific_search_by_context(
|
|
|
181
210
|
user: User,
|
|
182
211
|
system_prompt_path: str,
|
|
183
212
|
top_k: int,
|
|
213
|
+
save_interaction: bool = False,
|
|
214
|
+
last_k: Optional[int] = None,
|
|
184
215
|
):
|
|
185
216
|
"""
|
|
186
217
|
Searches all provided datasets and handles setting up of appropriate database context based on permissions.
|
|
187
218
|
Not to be used outside of active access control mode.
|
|
188
219
|
"""
|
|
189
220
|
|
|
190
|
-
async def _search_by_context(
|
|
221
|
+
async def _search_by_context(
|
|
222
|
+
dataset, user, query_type, query_text, system_prompt_path, top_k, last_k
|
|
223
|
+
):
|
|
191
224
|
# Set database configuration in async context for each dataset user has access for
|
|
192
225
|
await set_database_global_context_variables(dataset.id, dataset.owner_id)
|
|
193
226
|
search_results = await specific_search(
|
|
194
|
-
query_type,
|
|
227
|
+
query_type,
|
|
228
|
+
query_text,
|
|
229
|
+
user,
|
|
230
|
+
system_prompt_path=system_prompt_path,
|
|
231
|
+
top_k=top_k,
|
|
232
|
+
save_interaction=save_interaction,
|
|
233
|
+
last_k=last_k,
|
|
195
234
|
)
|
|
196
235
|
return {
|
|
197
236
|
"search_result": search_results,
|
|
@@ -203,7 +242,9 @@ async def specific_search_by_context(
|
|
|
203
242
|
tasks = []
|
|
204
243
|
for dataset in search_datasets:
|
|
205
244
|
tasks.append(
|
|
206
|
-
_search_by_context(
|
|
245
|
+
_search_by_context(
|
|
246
|
+
dataset, user, query_type, query_text, system_prompt_path, top_k, last_k
|
|
247
|
+
)
|
|
207
248
|
)
|
|
208
249
|
|
|
209
250
|
return await asyncio.gather(*tasks)
|
|
@@ -36,6 +36,7 @@ class ClassDefinition(DataPoint):
|
|
|
36
36
|
class CodeFile(DataPoint):
|
|
37
37
|
name: str
|
|
38
38
|
file_path: str
|
|
39
|
+
language: Optional[str] = None # e.g., 'python', 'javascript', 'java', etc.
|
|
39
40
|
source_code: Optional[str] = None
|
|
40
41
|
part_of: Optional[Repository] = None
|
|
41
42
|
depends_on: Optional[List["ImportStatement"]] = []
|