cognee 0.3.7__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/add/routers/get_add_router.py +6 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
- cognee/api/v1/delete/routers/get_delete_router.py +2 -0
- cognee/api/v1/memify/routers/get_memify_router.py +2 -1
- cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
- cognee/api/v1/search/routers/get_search_router.py +3 -3
- cognee/api/v1/sync/routers/get_sync_router.py +3 -0
- cognee/api/v1/ui/ui.py +2 -4
- cognee/api/v1/update/routers/get_update_router.py +2 -0
- cognee/api/v1/users/routers/get_visualize_router.py +2 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +9 -3
- cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
- cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
- cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
- cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
- cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
- cognee/modules/retrieval/graph_completion_cot_retriever.py +137 -38
- cognee/modules/retrieval/utils/completion.py +25 -4
- cognee/modules/search/methods/search.py +17 -3
- cognee/shared/logging_utils.py +18 -11
- cognee/shared/utils.py +24 -2
- cognee/tasks/feedback/__init__.py +13 -0
- cognee/tasks/feedback/create_enrichments.py +84 -0
- cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
- cognee/tasks/feedback/generate_improved_answers.py +130 -0
- cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
- cognee/tasks/feedback/models.py +26 -0
- cognee/tests/test_feedback_enrichment.py +174 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
- {cognee-0.3.7.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +1 -1
- {cognee-0.3.7.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +36 -26
- {cognee-0.3.7.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.3.7.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +0 -0
- {cognee-0.3.7.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.7.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
2
3
|
from typing import Optional, List, Type, Any
|
|
4
|
+
from pydantic import BaseModel
|
|
3
5
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
4
6
|
from cognee.shared.logging_utils import get_logger
|
|
5
7
|
|
|
6
8
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
7
|
-
from cognee.modules.retrieval.utils.completion import
|
|
9
|
+
from cognee.modules.retrieval.utils.completion import (
|
|
10
|
+
generate_structured_completion,
|
|
11
|
+
summarize_text,
|
|
12
|
+
)
|
|
8
13
|
from cognee.modules.retrieval.utils.session_cache import (
|
|
9
14
|
save_conversation_history,
|
|
10
15
|
get_conversation_history,
|
|
@@ -17,6 +22,20 @@ from cognee.infrastructure.databases.cache.config import CacheConfig
|
|
|
17
22
|
logger = get_logger()
|
|
18
23
|
|
|
19
24
|
|
|
25
|
+
def _as_answer_text(completion: Any) -> str:
|
|
26
|
+
"""Convert completion to human-readable text for validation and follow-up prompts."""
|
|
27
|
+
if isinstance(completion, str):
|
|
28
|
+
return completion
|
|
29
|
+
if isinstance(completion, BaseModel):
|
|
30
|
+
# Add notice that this is a structured response
|
|
31
|
+
json_str = completion.model_dump_json(indent=2)
|
|
32
|
+
return f"[Structured Response]\n{json_str}"
|
|
33
|
+
try:
|
|
34
|
+
return json.dumps(completion, indent=2)
|
|
35
|
+
except TypeError:
|
|
36
|
+
return str(completion)
|
|
37
|
+
|
|
38
|
+
|
|
20
39
|
class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
21
40
|
"""
|
|
22
41
|
Handles graph completion by generating responses based on a series of interactions with
|
|
@@ -25,6 +44,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
25
44
|
questions based on reasoning. The public methods are:
|
|
26
45
|
|
|
27
46
|
- get_completion
|
|
47
|
+
- get_structured_completion
|
|
28
48
|
|
|
29
49
|
Instance variables include:
|
|
30
50
|
- validation_system_prompt_path
|
|
@@ -61,51 +81,35 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
61
81
|
self.followup_system_prompt_path = followup_system_prompt_path
|
|
62
82
|
self.followup_user_prompt_path = followup_user_prompt_path
|
|
63
83
|
|
|
64
|
-
async def
|
|
84
|
+
async def _run_cot_completion(
|
|
65
85
|
self,
|
|
66
86
|
query: str,
|
|
67
87
|
context: Optional[List[Edge]] = None,
|
|
68
|
-
|
|
69
|
-
max_iter=4,
|
|
70
|
-
|
|
88
|
+
conversation_history: str = "",
|
|
89
|
+
max_iter: int = 4,
|
|
90
|
+
response_model: Type = str,
|
|
91
|
+
) -> tuple[Any, str, List[Edge]]:
|
|
71
92
|
"""
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
This method interacts with a language model client to retrieve a structured response,
|
|
75
|
-
using a series of iterations to refine the answers and generate follow-up questions
|
|
76
|
-
based on reasoning derived from previous outputs. It raises exceptions if the context
|
|
77
|
-
retrieval fails or if the model encounters issues in generating outputs.
|
|
93
|
+
Run chain-of-thought completion with optional structured output.
|
|
78
94
|
|
|
79
95
|
Parameters:
|
|
80
96
|
-----------
|
|
81
|
-
|
|
82
|
-
-
|
|
83
|
-
-
|
|
84
|
-
|
|
85
|
-
-
|
|
86
|
-
defaults to 'default_session'. (default None)
|
|
87
|
-
- max_iter: The maximum number of iterations to refine the answer and generate
|
|
88
|
-
follow-up questions. (default 4)
|
|
97
|
+
- query: User query
|
|
98
|
+
- context: Optional pre-fetched context edges
|
|
99
|
+
- conversation_history: Optional conversation history string
|
|
100
|
+
- max_iter: Maximum CoT iterations
|
|
101
|
+
- response_model: Type for structured output (str for plain text)
|
|
89
102
|
|
|
90
103
|
Returns:
|
|
91
104
|
--------
|
|
92
|
-
|
|
93
|
-
-
|
|
105
|
+
- completion_result: The generated completion (string or structured model)
|
|
106
|
+
- context_text: The resolved context text
|
|
107
|
+
- triplets: The list of triplets used
|
|
94
108
|
"""
|
|
95
109
|
followup_question = ""
|
|
96
110
|
triplets = []
|
|
97
111
|
completion = ""
|
|
98
112
|
|
|
99
|
-
# Retrieve conversation history if session saving is enabled
|
|
100
|
-
cache_config = CacheConfig()
|
|
101
|
-
user = session_user.get()
|
|
102
|
-
user_id = getattr(user, "id", None)
|
|
103
|
-
session_save = user_id and cache_config.caching
|
|
104
|
-
|
|
105
|
-
conversation_history = ""
|
|
106
|
-
if session_save:
|
|
107
|
-
conversation_history = await get_conversation_history(session_id=session_id)
|
|
108
|
-
|
|
109
113
|
for round_idx in range(max_iter + 1):
|
|
110
114
|
if round_idx == 0:
|
|
111
115
|
if context is None:
|
|
@@ -117,17 +121,21 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
117
121
|
triplets += await self.get_context(followup_question)
|
|
118
122
|
context_text = await self.resolve_edges_to_text(list(set(triplets)))
|
|
119
123
|
|
|
120
|
-
completion = await
|
|
124
|
+
completion = await generate_structured_completion(
|
|
121
125
|
query=query,
|
|
122
126
|
context=context_text,
|
|
123
127
|
user_prompt_path=self.user_prompt_path,
|
|
124
128
|
system_prompt_path=self.system_prompt_path,
|
|
125
129
|
system_prompt=self.system_prompt,
|
|
126
|
-
conversation_history=conversation_history if
|
|
130
|
+
conversation_history=conversation_history if conversation_history else None,
|
|
131
|
+
response_model=response_model,
|
|
127
132
|
)
|
|
133
|
+
|
|
128
134
|
logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}")
|
|
135
|
+
|
|
129
136
|
if round_idx < max_iter:
|
|
130
|
-
|
|
137
|
+
answer_text = _as_answer_text(completion)
|
|
138
|
+
valid_args = {"query": query, "answer": answer_text, "context": context_text}
|
|
131
139
|
valid_user_prompt = render_prompt(
|
|
132
140
|
filename=self.validation_user_prompt_path, context=valid_args
|
|
133
141
|
)
|
|
@@ -140,7 +148,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
140
148
|
system_prompt=valid_system_prompt,
|
|
141
149
|
response_model=str,
|
|
142
150
|
)
|
|
143
|
-
followup_args = {"query": query, "answer":
|
|
151
|
+
followup_args = {"query": query, "answer": answer_text, "reasoning": reasoning}
|
|
144
152
|
followup_prompt = render_prompt(
|
|
145
153
|
filename=self.followup_user_prompt_path, context=followup_args
|
|
146
154
|
)
|
|
@@ -155,19 +163,110 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
|
|
|
155
163
|
f"Chain-of-thought: round {round_idx} - follow-up question: {followup_question}"
|
|
156
164
|
)
|
|
157
165
|
|
|
166
|
+
return completion, context_text, triplets
|
|
167
|
+
|
|
168
|
+
async def get_structured_completion(
|
|
169
|
+
self,
|
|
170
|
+
query: str,
|
|
171
|
+
context: Optional[List[Edge]] = None,
|
|
172
|
+
session_id: Optional[str] = None,
|
|
173
|
+
max_iter: int = 4,
|
|
174
|
+
response_model: Type = str,
|
|
175
|
+
) -> Any:
|
|
176
|
+
"""
|
|
177
|
+
Generate structured completion responses based on a user query and contextual information.
|
|
178
|
+
|
|
179
|
+
This method applies the same chain-of-thought logic as get_completion but returns
|
|
180
|
+
structured output using the provided response model.
|
|
181
|
+
|
|
182
|
+
Parameters:
|
|
183
|
+
-----------
|
|
184
|
+
- query (str): The user's query to be processed and answered.
|
|
185
|
+
- context (Optional[List[Edge]]): Optional context that may assist in answering the query.
|
|
186
|
+
If not provided, it will be fetched based on the query. (default None)
|
|
187
|
+
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
188
|
+
defaults to 'default_session'. (default None)
|
|
189
|
+
- max_iter: The maximum number of iterations to refine the answer and generate
|
|
190
|
+
follow-up questions. (default 4)
|
|
191
|
+
- response_model (Type): The Pydantic model type for structured output. (default str)
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
--------
|
|
195
|
+
- Any: The generated structured completion based on the response model.
|
|
196
|
+
"""
|
|
197
|
+
# Check if session saving is enabled
|
|
198
|
+
cache_config = CacheConfig()
|
|
199
|
+
user = session_user.get()
|
|
200
|
+
user_id = getattr(user, "id", None)
|
|
201
|
+
session_save = user_id and cache_config.caching
|
|
202
|
+
|
|
203
|
+
# Load conversation history if enabled
|
|
204
|
+
conversation_history = ""
|
|
205
|
+
if session_save:
|
|
206
|
+
conversation_history = await get_conversation_history(session_id=session_id)
|
|
207
|
+
|
|
208
|
+
completion, context_text, triplets = await self._run_cot_completion(
|
|
209
|
+
query=query,
|
|
210
|
+
context=context,
|
|
211
|
+
conversation_history=conversation_history,
|
|
212
|
+
max_iter=max_iter,
|
|
213
|
+
response_model=response_model,
|
|
214
|
+
)
|
|
215
|
+
|
|
158
216
|
if self.save_interaction and context and triplets and completion:
|
|
159
217
|
await self.save_qa(
|
|
160
|
-
question=query, answer=completion, context=context_text, triplets=triplets
|
|
218
|
+
question=query, answer=str(completion), context=context_text, triplets=triplets
|
|
161
219
|
)
|
|
162
220
|
|
|
163
|
-
# Save to session cache
|
|
221
|
+
# Save to session cache if enabled
|
|
164
222
|
if session_save:
|
|
165
223
|
context_summary = await summarize_text(context_text)
|
|
166
224
|
await save_conversation_history(
|
|
167
225
|
query=query,
|
|
168
226
|
context_summary=context_summary,
|
|
169
|
-
answer=completion,
|
|
227
|
+
answer=str(completion),
|
|
170
228
|
session_id=session_id,
|
|
171
229
|
)
|
|
172
230
|
|
|
231
|
+
return completion
|
|
232
|
+
|
|
233
|
+
async def get_completion(
|
|
234
|
+
self,
|
|
235
|
+
query: str,
|
|
236
|
+
context: Optional[List[Edge]] = None,
|
|
237
|
+
session_id: Optional[str] = None,
|
|
238
|
+
max_iter=4,
|
|
239
|
+
) -> List[str]:
|
|
240
|
+
"""
|
|
241
|
+
Generate completion responses based on a user query and contextual information.
|
|
242
|
+
|
|
243
|
+
This method interacts with a language model client to retrieve a structured response,
|
|
244
|
+
using a series of iterations to refine the answers and generate follow-up questions
|
|
245
|
+
based on reasoning derived from previous outputs. It raises exceptions if the context
|
|
246
|
+
retrieval fails or if the model encounters issues in generating outputs.
|
|
247
|
+
|
|
248
|
+
Parameters:
|
|
249
|
+
-----------
|
|
250
|
+
|
|
251
|
+
- query (str): The user's query to be processed and answered.
|
|
252
|
+
- context (Optional[Any]): Optional context that may assist in answering the query.
|
|
253
|
+
If not provided, it will be fetched based on the query. (default None)
|
|
254
|
+
- session_id (Optional[str]): Optional session identifier for caching. If None,
|
|
255
|
+
defaults to 'default_session'. (default None)
|
|
256
|
+
- max_iter: The maximum number of iterations to refine the answer and generate
|
|
257
|
+
follow-up questions. (default 4)
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
--------
|
|
261
|
+
|
|
262
|
+
- List[str]: A list containing the generated answer to the user's query.
|
|
263
|
+
"""
|
|
264
|
+
completion = await self.get_structured_completion(
|
|
265
|
+
query=query,
|
|
266
|
+
context=context,
|
|
267
|
+
session_id=session_id,
|
|
268
|
+
max_iter=max_iter,
|
|
269
|
+
response_model=str,
|
|
270
|
+
)
|
|
271
|
+
|
|
173
272
|
return [completion]
|
|
@@ -1,17 +1,18 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional, Type, Any
|
|
2
2
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
3
3
|
from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
async def
|
|
6
|
+
async def generate_structured_completion(
|
|
7
7
|
query: str,
|
|
8
8
|
context: str,
|
|
9
9
|
user_prompt_path: str,
|
|
10
10
|
system_prompt_path: str,
|
|
11
11
|
system_prompt: Optional[str] = None,
|
|
12
12
|
conversation_history: Optional[str] = None,
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
response_model: Type = str,
|
|
14
|
+
) -> Any:
|
|
15
|
+
"""Generates a structured completion using LLM with given context and prompts."""
|
|
15
16
|
args = {"question": query, "context": context}
|
|
16
17
|
user_prompt = render_prompt(user_prompt_path, args)
|
|
17
18
|
system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path)
|
|
@@ -23,6 +24,26 @@ async def generate_completion(
|
|
|
23
24
|
return await LLMGateway.acreate_structured_output(
|
|
24
25
|
text_input=user_prompt,
|
|
25
26
|
system_prompt=system_prompt,
|
|
27
|
+
response_model=response_model,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def generate_completion(
|
|
32
|
+
query: str,
|
|
33
|
+
context: str,
|
|
34
|
+
user_prompt_path: str,
|
|
35
|
+
system_prompt_path: str,
|
|
36
|
+
system_prompt: Optional[str] = None,
|
|
37
|
+
conversation_history: Optional[str] = None,
|
|
38
|
+
) -> str:
|
|
39
|
+
"""Generates a completion using LLM with given context and prompts."""
|
|
40
|
+
return await generate_structured_completion(
|
|
41
|
+
query=query,
|
|
42
|
+
context=context,
|
|
43
|
+
user_prompt_path=user_prompt_path,
|
|
44
|
+
system_prompt_path=system_prompt_path,
|
|
45
|
+
system_prompt=system_prompt,
|
|
46
|
+
conversation_history=conversation_history,
|
|
26
47
|
response_model=str,
|
|
27
48
|
)
|
|
28
49
|
|
|
@@ -24,7 +24,7 @@ from cognee.modules.data.models import Dataset
|
|
|
24
24
|
from cognee.modules.data.methods.get_authorized_existing_datasets import (
|
|
25
25
|
get_authorized_existing_datasets,
|
|
26
26
|
)
|
|
27
|
-
|
|
27
|
+
from cognee import __version__ as cognee_version
|
|
28
28
|
from .get_search_type_tools import get_search_type_tools
|
|
29
29
|
from .no_access_control_search import no_access_control_search
|
|
30
30
|
from ..utils.prepare_search_result import prepare_search_result
|
|
@@ -64,7 +64,14 @@ async def search(
|
|
|
64
64
|
Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
|
|
65
65
|
"""
|
|
66
66
|
query = await log_query(query_text, query_type.value, user.id)
|
|
67
|
-
send_telemetry(
|
|
67
|
+
send_telemetry(
|
|
68
|
+
"cognee.search EXECUTION STARTED",
|
|
69
|
+
user.id,
|
|
70
|
+
additional_properties={
|
|
71
|
+
"cognee_version": cognee_version,
|
|
72
|
+
"tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
|
|
73
|
+
},
|
|
74
|
+
)
|
|
68
75
|
|
|
69
76
|
# Use search function filtered by permissions if access control is enabled
|
|
70
77
|
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
|
@@ -101,7 +108,14 @@ async def search(
|
|
|
101
108
|
)
|
|
102
109
|
]
|
|
103
110
|
|
|
104
|
-
send_telemetry(
|
|
111
|
+
send_telemetry(
|
|
112
|
+
"cognee.search EXECUTION COMPLETED",
|
|
113
|
+
user.id,
|
|
114
|
+
additional_properties={
|
|
115
|
+
"cognee_version": cognee_version,
|
|
116
|
+
"tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
|
|
117
|
+
},
|
|
118
|
+
)
|
|
105
119
|
|
|
106
120
|
await log_result(
|
|
107
121
|
query.id,
|
cognee/shared/logging_utils.py
CHANGED
|
@@ -430,6 +430,15 @@ def setup_logging(log_level=None, name=None):
|
|
|
430
430
|
stream_handler.setFormatter(console_formatter)
|
|
431
431
|
stream_handler.setLevel(log_level)
|
|
432
432
|
|
|
433
|
+
root_logger = logging.getLogger()
|
|
434
|
+
if root_logger.hasHandlers():
|
|
435
|
+
root_logger.handlers.clear()
|
|
436
|
+
root_logger.addHandler(stream_handler)
|
|
437
|
+
|
|
438
|
+
# Note: root logger needs to be set at NOTSET to allow all messages through and specific stream and file handlers
|
|
439
|
+
# can define their own levels.
|
|
440
|
+
root_logger.setLevel(logging.NOTSET)
|
|
441
|
+
|
|
433
442
|
# Check if we already have a log file path from the environment
|
|
434
443
|
# NOTE: environment variable must be used here as it allows us to
|
|
435
444
|
# log to a single file with a name based on a timestamp in a multiprocess setting.
|
|
@@ -441,17 +450,15 @@ def setup_logging(log_level=None, name=None):
|
|
|
441
450
|
log_file_path = os.path.join(LOGS_DIR, f"{start_time}.log")
|
|
442
451
|
os.environ["LOG_FILE_NAME"] = log_file_path
|
|
443
452
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
root_logger.addHandler(file_handler)
|
|
454
|
-
root_logger.setLevel(log_level)
|
|
453
|
+
try:
|
|
454
|
+
# Create a file handler that uses our custom PlainFileHandler
|
|
455
|
+
file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
|
|
456
|
+
file_handler.setLevel(DEBUG)
|
|
457
|
+
root_logger.addHandler(file_handler)
|
|
458
|
+
except Exception as e:
|
|
459
|
+
# Note: Exceptions happen in case of read only file systems or log file path poiting to location where it does
|
|
460
|
+
# not have write permission. Logging to file is not mandatory so we just log a warning to console.
|
|
461
|
+
root_logger.warning(f"Warning: Could not create log file handler at {log_file_path}: {e}")
|
|
455
462
|
|
|
456
463
|
if log_level > logging.DEBUG:
|
|
457
464
|
import warnings
|
cognee/shared/utils.py
CHANGED
|
@@ -8,7 +8,7 @@ import http.server
|
|
|
8
8
|
import socketserver
|
|
9
9
|
from threading import Thread
|
|
10
10
|
import pathlib
|
|
11
|
-
from uuid import uuid4
|
|
11
|
+
from uuid import uuid4, uuid5, NAMESPACE_OID
|
|
12
12
|
|
|
13
13
|
from cognee.base_config import get_base_config
|
|
14
14
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
@@ -51,6 +51,26 @@ def get_anonymous_id():
|
|
|
51
51
|
return anonymous_id
|
|
52
52
|
|
|
53
53
|
|
|
54
|
+
def _sanitize_nested_properties(obj, property_names: list[str]):
|
|
55
|
+
"""
|
|
56
|
+
Recursively replaces any property whose key matches one of `property_names`
|
|
57
|
+
(e.g., ['url', 'path']) in a nested dict or list with a uuid5 hash
|
|
58
|
+
of its string value. Returns a new sanitized copy.
|
|
59
|
+
"""
|
|
60
|
+
if isinstance(obj, dict):
|
|
61
|
+
new_obj = {}
|
|
62
|
+
for k, v in obj.items():
|
|
63
|
+
if k in property_names and isinstance(v, str):
|
|
64
|
+
new_obj[k] = str(uuid5(NAMESPACE_OID, v))
|
|
65
|
+
else:
|
|
66
|
+
new_obj[k] = _sanitize_nested_properties(v, property_names)
|
|
67
|
+
return new_obj
|
|
68
|
+
elif isinstance(obj, list):
|
|
69
|
+
return [_sanitize_nested_properties(item, property_names) for item in obj]
|
|
70
|
+
else:
|
|
71
|
+
return obj
|
|
72
|
+
|
|
73
|
+
|
|
54
74
|
def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
|
|
55
75
|
if os.getenv("TELEMETRY_DISABLED"):
|
|
56
76
|
return
|
|
@@ -58,7 +78,9 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
|
|
|
58
78
|
env = os.getenv("ENV")
|
|
59
79
|
if env in ["test", "dev"]:
|
|
60
80
|
return
|
|
61
|
-
|
|
81
|
+
additional_properties = _sanitize_nested_properties(
|
|
82
|
+
obj=additional_properties, property_names=["url"]
|
|
83
|
+
)
|
|
62
84
|
current_time = datetime.now(timezone.utc)
|
|
63
85
|
payload = {
|
|
64
86
|
"anonymous_id": str(get_anonymous_id()),
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .extract_feedback_interactions import extract_feedback_interactions
|
|
2
|
+
from .generate_improved_answers import generate_improved_answers
|
|
3
|
+
from .create_enrichments import create_enrichments
|
|
4
|
+
from .link_enrichments_to_feedback import link_enrichments_to_feedback
|
|
5
|
+
from .models import FeedbackEnrichment
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"extract_feedback_interactions",
|
|
9
|
+
"generate_improved_answers",
|
|
10
|
+
"create_enrichments",
|
|
11
|
+
"link_enrichments_to_feedback",
|
|
12
|
+
"FeedbackEnrichment",
|
|
13
|
+
]
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from uuid import NAMESPACE_OID, uuid5
|
|
5
|
+
|
|
6
|
+
from cognee.infrastructure.llm import LLMGateway
|
|
7
|
+
from cognee.infrastructure.llm.prompts.read_query_prompt import read_query_prompt
|
|
8
|
+
from cognee.shared.logging_utils import get_logger
|
|
9
|
+
from cognee.modules.engine.models import NodeSet
|
|
10
|
+
|
|
11
|
+
from .models import FeedbackEnrichment
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = get_logger("create_enrichments")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _validate_enrichments(enrichments: List[FeedbackEnrichment]) -> bool:
|
|
18
|
+
"""Validate that all enrichments contain required fields for completion."""
|
|
19
|
+
return all(
|
|
20
|
+
enrichment.question is not None
|
|
21
|
+
and enrichment.original_answer is not None
|
|
22
|
+
and enrichment.improved_answer is not None
|
|
23
|
+
and enrichment.new_context is not None
|
|
24
|
+
and enrichment.feedback_id is not None
|
|
25
|
+
and enrichment.interaction_id is not None
|
|
26
|
+
for enrichment in enrichments
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
async def _generate_enrichment_report(
|
|
31
|
+
question: str, improved_answer: str, new_context: str, report_prompt_location: str
|
|
32
|
+
) -> str:
|
|
33
|
+
"""Generate educational report using feedback report prompt."""
|
|
34
|
+
try:
|
|
35
|
+
prompt_template = read_query_prompt(report_prompt_location)
|
|
36
|
+
rendered_prompt = prompt_template.format(
|
|
37
|
+
question=question,
|
|
38
|
+
improved_answer=improved_answer,
|
|
39
|
+
new_context=new_context,
|
|
40
|
+
)
|
|
41
|
+
return await LLMGateway.acreate_structured_output(
|
|
42
|
+
text_input=rendered_prompt,
|
|
43
|
+
system_prompt="You are a helpful assistant that creates educational content.",
|
|
44
|
+
response_model=str,
|
|
45
|
+
)
|
|
46
|
+
except Exception as exc:
|
|
47
|
+
logger.warning("Failed to generate enrichment report", error=str(exc), question=question)
|
|
48
|
+
return f"Educational content for: {question} - {improved_answer}"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
async def create_enrichments(
|
|
52
|
+
enrichments: List[FeedbackEnrichment],
|
|
53
|
+
report_prompt_location: str = "feedback_report_prompt.txt",
|
|
54
|
+
) -> List[FeedbackEnrichment]:
|
|
55
|
+
"""Fill text and belongs_to_set fields of existing FeedbackEnrichment DataPoints."""
|
|
56
|
+
if not enrichments:
|
|
57
|
+
logger.info("No enrichments provided; returning empty list")
|
|
58
|
+
return []
|
|
59
|
+
|
|
60
|
+
if not _validate_enrichments(enrichments):
|
|
61
|
+
logger.error("Input validation failed; missing required fields")
|
|
62
|
+
return []
|
|
63
|
+
|
|
64
|
+
logger.info("Completing enrichments", count=len(enrichments))
|
|
65
|
+
|
|
66
|
+
nodeset = NodeSet(id=uuid5(NAMESPACE_OID, name="FeedbackEnrichment"), name="FeedbackEnrichment")
|
|
67
|
+
|
|
68
|
+
completed_enrichments: List[FeedbackEnrichment] = []
|
|
69
|
+
|
|
70
|
+
for enrichment in enrichments:
|
|
71
|
+
report_text = await _generate_enrichment_report(
|
|
72
|
+
enrichment.question,
|
|
73
|
+
enrichment.improved_answer,
|
|
74
|
+
enrichment.new_context,
|
|
75
|
+
report_prompt_location,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
enrichment.text = report_text
|
|
79
|
+
enrichment.belongs_to_set = [nodeset]
|
|
80
|
+
|
|
81
|
+
completed_enrichments.append(enrichment)
|
|
82
|
+
|
|
83
|
+
logger.info("Completed enrichments", successful=len(completed_enrichments))
|
|
84
|
+
return completed_enrichments
|