cognee 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/health.py +2 -12
  3. cognee/api/v1/add/add.py +46 -6
  4. cognee/api/v1/add/routers/get_add_router.py +11 -2
  5. cognee/api/v1/cognify/cognify.py +29 -9
  6. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  7. cognee/api/v1/datasets/datasets.py +11 -0
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
  9. cognee/api/v1/delete/routers/get_delete_router.py +2 -0
  10. cognee/api/v1/memify/routers/get_memify_router.py +2 -1
  11. cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
  12. cognee/api/v1/responses/default_tools.py +0 -1
  13. cognee/api/v1/responses/dispatch_function.py +1 -1
  14. cognee/api/v1/responses/routers/default_tools.py +0 -1
  15. cognee/api/v1/search/routers/get_search_router.py +3 -3
  16. cognee/api/v1/search/search.py +11 -9
  17. cognee/api/v1/settings/routers/get_settings_router.py +7 -1
  18. cognee/api/v1/sync/routers/get_sync_router.py +3 -0
  19. cognee/api/v1/ui/ui.py +45 -16
  20. cognee/api/v1/update/routers/get_update_router.py +3 -1
  21. cognee/api/v1/update/update.py +3 -3
  22. cognee/api/v1/users/routers/get_visualize_router.py +2 -0
  23. cognee/cli/_cognee.py +61 -10
  24. cognee/cli/commands/add_command.py +3 -3
  25. cognee/cli/commands/cognify_command.py +3 -3
  26. cognee/cli/commands/config_command.py +9 -7
  27. cognee/cli/commands/delete_command.py +3 -3
  28. cognee/cli/commands/search_command.py +3 -7
  29. cognee/cli/config.py +0 -1
  30. cognee/context_global_variables.py +5 -0
  31. cognee/exceptions/exceptions.py +1 -1
  32. cognee/infrastructure/databases/cache/__init__.py +2 -0
  33. cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
  34. cognee/infrastructure/databases/cache/config.py +44 -0
  35. cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
  36. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
  37. cognee/infrastructure/databases/exceptions/__init__.py +1 -0
  38. cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
  39. cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
  40. cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
  41. cognee/infrastructure/databases/graph/kuzu/adapter.py +76 -47
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
  43. cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
  44. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
  46. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
  47. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
  48. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
  49. cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
  50. cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
  52. cognee/infrastructure/files/exceptions.py +1 -1
  53. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
  54. cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
  55. cognee/infrastructure/files/utils/guess_file_type.py +6 -0
  56. cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
  57. cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
  58. cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
  59. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
  60. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
  61. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
  62. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
  63. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
  68. cognee/infrastructure/loaders/LoaderEngine.py +27 -7
  69. cognee/infrastructure/loaders/external/__init__.py +7 -0
  70. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
  71. cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
  72. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  73. cognee/modules/data/exceptions/exceptions.py +1 -1
  74. cognee/modules/data/methods/__init__.py +3 -0
  75. cognee/modules/data/methods/get_dataset_data.py +4 -1
  76. cognee/modules/data/methods/has_dataset_data.py +21 -0
  77. cognee/modules/engine/models/TableRow.py +0 -1
  78. cognee/modules/ingestion/save_data_to_file.py +9 -2
  79. cognee/modules/pipelines/exceptions/exceptions.py +1 -1
  80. cognee/modules/pipelines/operations/pipeline.py +12 -1
  81. cognee/modules/pipelines/operations/run_tasks.py +25 -197
  82. cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
  83. cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
  84. cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
  85. cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
  86. cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
  87. cognee/modules/retrieval/base_graph_retriever.py +3 -1
  88. cognee/modules/retrieval/base_retriever.py +3 -1
  89. cognee/modules/retrieval/chunks_retriever.py +5 -1
  90. cognee/modules/retrieval/code_retriever.py +20 -2
  91. cognee/modules/retrieval/completion_retriever.py +50 -9
  92. cognee/modules/retrieval/cypher_search_retriever.py +11 -1
  93. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
  94. cognee/modules/retrieval/graph_completion_cot_retriever.py +152 -22
  95. cognee/modules/retrieval/graph_completion_retriever.py +54 -10
  96. cognee/modules/retrieval/lexical_retriever.py +20 -2
  97. cognee/modules/retrieval/natural_language_retriever.py +10 -1
  98. cognee/modules/retrieval/summaries_retriever.py +5 -1
  99. cognee/modules/retrieval/temporal_retriever.py +62 -10
  100. cognee/modules/retrieval/user_qa_feedback.py +3 -2
  101. cognee/modules/retrieval/utils/completion.py +30 -4
  102. cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
  103. cognee/modules/retrieval/utils/session_cache.py +156 -0
  104. cognee/modules/search/methods/get_search_type_tools.py +0 -5
  105. cognee/modules/search/methods/no_access_control_search.py +12 -1
  106. cognee/modules/search/methods/search.py +51 -5
  107. cognee/modules/search/types/SearchType.py +0 -1
  108. cognee/modules/settings/get_settings.py +23 -0
  109. cognee/modules/users/methods/get_authenticated_user.py +3 -1
  110. cognee/modules/users/methods/get_default_user.py +1 -6
  111. cognee/modules/users/roles/methods/create_role.py +2 -2
  112. cognee/modules/users/tenants/methods/create_tenant.py +2 -2
  113. cognee/shared/exceptions/exceptions.py +1 -1
  114. cognee/shared/logging_utils.py +18 -11
  115. cognee/shared/utils.py +24 -2
  116. cognee/tasks/codingagents/coding_rule_associations.py +1 -2
  117. cognee/tasks/documents/exceptions/exceptions.py +1 -1
  118. cognee/tasks/feedback/__init__.py +13 -0
  119. cognee/tasks/feedback/create_enrichments.py +84 -0
  120. cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
  121. cognee/tasks/feedback/generate_improved_answers.py +130 -0
  122. cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
  123. cognee/tasks/feedback/models.py +26 -0
  124. cognee/tasks/graph/extract_graph_from_data.py +2 -0
  125. cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
  126. cognee/tasks/ingestion/ingest_data.py +11 -5
  127. cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
  128. cognee/tasks/storage/add_data_points.py +3 -10
  129. cognee/tasks/storage/index_data_points.py +19 -14
  130. cognee/tasks/storage/index_graph_edges.py +25 -11
  131. cognee/tasks/web_scraper/__init__.py +34 -0
  132. cognee/tasks/web_scraper/config.py +26 -0
  133. cognee/tasks/web_scraper/default_url_crawler.py +446 -0
  134. cognee/tasks/web_scraper/models.py +46 -0
  135. cognee/tasks/web_scraper/types.py +4 -0
  136. cognee/tasks/web_scraper/utils.py +142 -0
  137. cognee/tasks/web_scraper/web_scraper_task.py +396 -0
  138. cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
  139. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
  140. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
  141. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
  142. cognee/tests/subprocesses/reader.py +25 -0
  143. cognee/tests/subprocesses/simple_cognify_1.py +31 -0
  144. cognee/tests/subprocesses/simple_cognify_2.py +31 -0
  145. cognee/tests/subprocesses/writer.py +32 -0
  146. cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
  147. cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
  148. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
  149. cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
  150. cognee/tests/test_add_docling_document.py +56 -0
  151. cognee/tests/test_chromadb.py +7 -11
  152. cognee/tests/test_concurrent_subprocess_access.py +76 -0
  153. cognee/tests/test_conversation_history.py +240 -0
  154. cognee/tests/test_feedback_enrichment.py +174 -0
  155. cognee/tests/test_kuzu.py +27 -15
  156. cognee/tests/test_lancedb.py +7 -11
  157. cognee/tests/test_library.py +32 -2
  158. cognee/tests/test_neo4j.py +24 -16
  159. cognee/tests/test_neptune_analytics_vector.py +7 -11
  160. cognee/tests/test_permissions.py +9 -13
  161. cognee/tests/test_pgvector.py +4 -4
  162. cognee/tests/test_remote_kuzu.py +8 -11
  163. cognee/tests/test_s3_file_storage.py +1 -1
  164. cognee/tests/test_search_db.py +6 -8
  165. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
  166. cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
  167. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
  168. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +21 -6
  169. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +178 -139
  170. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +1 -0
  171. distributed/Dockerfile +0 -3
  172. distributed/entrypoint.py +21 -9
  173. distributed/signal.py +5 -0
  174. distributed/workers/data_point_saving_worker.py +64 -34
  175. distributed/workers/graph_saving_worker.py +71 -47
  176. cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
  177. cognee/modules/retrieval/insights_retriever.py +0 -133
  178. cognee/tests/test_memgraph.py +0 -109
  179. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
  180. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
  181. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
  182. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,84 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List
4
+ from uuid import NAMESPACE_OID, uuid5
5
+
6
+ from cognee.infrastructure.llm import LLMGateway
7
+ from cognee.infrastructure.llm.prompts.read_query_prompt import read_query_prompt
8
+ from cognee.shared.logging_utils import get_logger
9
+ from cognee.modules.engine.models import NodeSet
10
+
11
+ from .models import FeedbackEnrichment
12
+
13
+
14
+ logger = get_logger("create_enrichments")
15
+
16
+
17
+ def _validate_enrichments(enrichments: List[FeedbackEnrichment]) -> bool:
18
+ """Validate that all enrichments contain required fields for completion."""
19
+ return all(
20
+ enrichment.question is not None
21
+ and enrichment.original_answer is not None
22
+ and enrichment.improved_answer is not None
23
+ and enrichment.new_context is not None
24
+ and enrichment.feedback_id is not None
25
+ and enrichment.interaction_id is not None
26
+ for enrichment in enrichments
27
+ )
28
+
29
+
30
+ async def _generate_enrichment_report(
31
+ question: str, improved_answer: str, new_context: str, report_prompt_location: str
32
+ ) -> str:
33
+ """Generate educational report using feedback report prompt."""
34
+ try:
35
+ prompt_template = read_query_prompt(report_prompt_location)
36
+ rendered_prompt = prompt_template.format(
37
+ question=question,
38
+ improved_answer=improved_answer,
39
+ new_context=new_context,
40
+ )
41
+ return await LLMGateway.acreate_structured_output(
42
+ text_input=rendered_prompt,
43
+ system_prompt="You are a helpful assistant that creates educational content.",
44
+ response_model=str,
45
+ )
46
+ except Exception as exc:
47
+ logger.warning("Failed to generate enrichment report", error=str(exc), question=question)
48
+ return f"Educational content for: {question} - {improved_answer}"
49
+
50
+
51
+ async def create_enrichments(
52
+ enrichments: List[FeedbackEnrichment],
53
+ report_prompt_location: str = "feedback_report_prompt.txt",
54
+ ) -> List[FeedbackEnrichment]:
55
+ """Fill text and belongs_to_set fields of existing FeedbackEnrichment DataPoints."""
56
+ if not enrichments:
57
+ logger.info("No enrichments provided; returning empty list")
58
+ return []
59
+
60
+ if not _validate_enrichments(enrichments):
61
+ logger.error("Input validation failed; missing required fields")
62
+ return []
63
+
64
+ logger.info("Completing enrichments", count=len(enrichments))
65
+
66
+ nodeset = NodeSet(id=uuid5(NAMESPACE_OID, name="FeedbackEnrichment"), name="FeedbackEnrichment")
67
+
68
+ completed_enrichments: List[FeedbackEnrichment] = []
69
+
70
+ for enrichment in enrichments:
71
+ report_text = await _generate_enrichment_report(
72
+ enrichment.question,
73
+ enrichment.improved_answer,
74
+ enrichment.new_context,
75
+ report_prompt_location,
76
+ )
77
+
78
+ enrichment.text = report_text
79
+ enrichment.belongs_to_set = [nodeset]
80
+
81
+ completed_enrichments.append(enrichment)
82
+
83
+ logger.info("Completed enrichments", successful=len(completed_enrichments))
84
+ return completed_enrichments
@@ -0,0 +1,230 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional, Tuple
4
+ from uuid import UUID, uuid5, NAMESPACE_OID
5
+
6
+ from cognee.infrastructure.llm import LLMGateway
7
+ from cognee.infrastructure.llm.prompts.read_query_prompt import read_query_prompt
8
+ from cognee.shared.logging_utils import get_logger
9
+ from cognee.infrastructure.databases.graph import get_graph_engine
10
+
11
+ from .models import FeedbackEnrichment
12
+
13
+
14
+ logger = get_logger("extract_feedback_interactions")
15
+
16
+
17
+ def _filter_negative_feedback(feedback_nodes):
18
+ """Filter for negative sentiment feedback using precise sentiment classification."""
19
+ return [
20
+ (node_id, props)
21
+ for node_id, props in feedback_nodes
22
+ if (props.get("sentiment", "").casefold() == "negative" or props.get("score", 0) < 0)
23
+ ]
24
+
25
+
26
+ def _get_normalized_id(node_id, props) -> str:
27
+ """Return Cognee node id preference: props.id → props.node_id → raw node_id."""
28
+ return str(props.get("id") or props.get("node_id") or node_id)
29
+
30
+
31
+ async def _fetch_feedback_and_interaction_graph_data() -> Tuple[List, List]:
32
+ """Fetch feedback and interaction nodes with edges from graph engine."""
33
+ try:
34
+ graph_engine = await get_graph_engine()
35
+ attribute_filters = [{"type": ["CogneeUserFeedback", "CogneeUserInteraction"]}]
36
+ return await graph_engine.get_filtered_graph_data(attribute_filters)
37
+ except Exception as exc: # noqa: BLE001
38
+ logger.error("Failed to fetch filtered graph data", error=str(exc))
39
+ return [], []
40
+
41
+
42
+ def _separate_feedback_and_interaction_nodes(graph_nodes: List) -> Tuple[List, List]:
43
+ """Split nodes into feedback and interaction groups by type field."""
44
+ feedback_nodes = [
45
+ (_get_normalized_id(node_id, props), props)
46
+ for node_id, props in graph_nodes
47
+ if props.get("type") == "CogneeUserFeedback"
48
+ ]
49
+ interaction_nodes = [
50
+ (_get_normalized_id(node_id, props), props)
51
+ for node_id, props in graph_nodes
52
+ if props.get("type") == "CogneeUserInteraction"
53
+ ]
54
+ return feedback_nodes, interaction_nodes
55
+
56
+
57
+ def _match_feedback_nodes_to_interactions_by_edges(
58
+ feedback_nodes: List, interaction_nodes: List, graph_edges: List
59
+ ) -> List[Tuple[Tuple, Tuple]]:
60
+ """Match feedback to interactions using gives_feedback_to edges."""
61
+ interaction_by_id = {node_id: (node_id, props) for node_id, props in interaction_nodes}
62
+ feedback_by_id = {node_id: (node_id, props) for node_id, props in feedback_nodes}
63
+ feedback_edges = [
64
+ (source_id, target_id)
65
+ for source_id, target_id, rel, _ in graph_edges
66
+ if rel == "gives_feedback_to"
67
+ ]
68
+
69
+ feedback_interaction_pairs: List[Tuple[Tuple, Tuple]] = []
70
+ for source_id, target_id in feedback_edges:
71
+ source_id_str, target_id_str = str(source_id), str(target_id)
72
+
73
+ feedback_node = feedback_by_id.get(source_id_str)
74
+ interaction_node = interaction_by_id.get(target_id_str)
75
+
76
+ if feedback_node and interaction_node:
77
+ feedback_interaction_pairs.append((feedback_node, interaction_node))
78
+
79
+ return feedback_interaction_pairs
80
+
81
+
82
+ def _sort_pairs_by_recency_and_limit(
83
+ feedback_interaction_pairs: List[Tuple[Tuple, Tuple]], last_n_limit: Optional[int]
84
+ ) -> List[Tuple[Tuple, Tuple]]:
85
+ """Sort by interaction created_at desc with updated_at fallback, then limit."""
86
+
87
+ def _recency_key(pair):
88
+ _, (_, interaction_props) = pair
89
+ created_at = interaction_props.get("created_at") or ""
90
+ updated_at = interaction_props.get("updated_at") or ""
91
+ return (created_at, updated_at)
92
+
93
+ sorted_pairs = sorted(feedback_interaction_pairs, key=_recency_key, reverse=True)
94
+ return sorted_pairs[: last_n_limit or len(sorted_pairs)]
95
+
96
+
97
+ async def _generate_human_readable_context_summary(
98
+ question_text: str, raw_context_text: str
99
+ ) -> str:
100
+ """Generate a concise human-readable summary for given context."""
101
+ try:
102
+ prompt = read_query_prompt("feedback_user_context_prompt.txt")
103
+ rendered = prompt.format(question=question_text, context=raw_context_text)
104
+ return await LLMGateway.acreate_structured_output(
105
+ text_input=rendered, system_prompt="", response_model=str
106
+ )
107
+ except Exception as exc: # noqa: BLE001
108
+ logger.warning("Failed to summarize context", error=str(exc))
109
+ return raw_context_text or ""
110
+
111
+
112
+ def _has_required_feedback_fields(enrichment: FeedbackEnrichment) -> bool:
113
+ """Validate required fields exist in the FeedbackEnrichment DataPoint."""
114
+ return (
115
+ enrichment.question is not None
116
+ and enrichment.original_answer is not None
117
+ and enrichment.context is not None
118
+ and enrichment.feedback_text is not None
119
+ and enrichment.feedback_id is not None
120
+ and enrichment.interaction_id is not None
121
+ )
122
+
123
+
124
+ async def _build_feedback_interaction_record(
125
+ feedback_node_id: str, feedback_props: Dict, interaction_node_id: str, interaction_props: Dict
126
+ ) -> Optional[FeedbackEnrichment]:
127
+ """Build a single FeedbackEnrichment DataPoint with context summary."""
128
+ try:
129
+ question_text = interaction_props.get("question")
130
+ original_answer_text = interaction_props.get("answer")
131
+ raw_context_text = interaction_props.get("context", "")
132
+ feedback_text = feedback_props.get("feedback") or feedback_props.get("text") or ""
133
+
134
+ context_summary_text = await _generate_human_readable_context_summary(
135
+ question_text or "", raw_context_text
136
+ )
137
+
138
+ enrichment = FeedbackEnrichment(
139
+ id=str(uuid5(NAMESPACE_OID, f"{question_text}_{interaction_node_id}")),
140
+ text="",
141
+ question=question_text,
142
+ original_answer=original_answer_text,
143
+ improved_answer="",
144
+ feedback_id=UUID(str(feedback_node_id)),
145
+ interaction_id=UUID(str(interaction_node_id)),
146
+ belongs_to_set=None,
147
+ context=context_summary_text,
148
+ feedback_text=feedback_text,
149
+ new_context="",
150
+ explanation="",
151
+ )
152
+
153
+ if _has_required_feedback_fields(enrichment):
154
+ return enrichment
155
+ else:
156
+ logger.warning("Skipping invalid feedback item", interaction=str(interaction_node_id))
157
+ return None
158
+ except Exception as exc: # noqa: BLE001
159
+ logger.error("Failed to process feedback pair", error=str(exc))
160
+ return None
161
+
162
+
163
+ async def _build_feedback_interaction_records(
164
+ matched_feedback_interaction_pairs: List[Tuple[Tuple, Tuple]],
165
+ ) -> List[FeedbackEnrichment]:
166
+ """Build all FeedbackEnrichment DataPoints from matched pairs."""
167
+ feedback_interaction_records: List[FeedbackEnrichment] = []
168
+ for (feedback_node_id, feedback_props), (
169
+ interaction_node_id,
170
+ interaction_props,
171
+ ) in matched_feedback_interaction_pairs:
172
+ record = await _build_feedback_interaction_record(
173
+ feedback_node_id, feedback_props, interaction_node_id, interaction_props
174
+ )
175
+ if record:
176
+ feedback_interaction_records.append(record)
177
+ return feedback_interaction_records
178
+
179
+
180
+ async def extract_feedback_interactions(
181
+ data: Any, last_n: Optional[int] = None
182
+ ) -> List[FeedbackEnrichment]:
183
+ """Extract negative feedback-interaction pairs and create FeedbackEnrichment DataPoints."""
184
+ if not data or data == [{}]:
185
+ logger.info(
186
+ "No data passed to the extraction task (extraction task fetches data from graph directly)",
187
+ data=data,
188
+ )
189
+
190
+ graph_nodes, graph_edges = await _fetch_feedback_and_interaction_graph_data()
191
+ if not graph_nodes:
192
+ logger.warning("No graph nodes retrieved from database")
193
+ return []
194
+
195
+ feedback_nodes, interaction_nodes = _separate_feedback_and_interaction_nodes(graph_nodes)
196
+ logger.info(
197
+ "Retrieved nodes from graph",
198
+ total_nodes=len(graph_nodes),
199
+ feedback_nodes=len(feedback_nodes),
200
+ interaction_nodes=len(interaction_nodes),
201
+ )
202
+
203
+ negative_feedback_nodes = _filter_negative_feedback(feedback_nodes)
204
+ logger.info(
205
+ "Filtered feedback nodes",
206
+ total_feedback=len(feedback_nodes),
207
+ negative_feedback=len(negative_feedback_nodes),
208
+ )
209
+
210
+ if not negative_feedback_nodes:
211
+ logger.info("No negative feedback found; returning empty list")
212
+ return []
213
+
214
+ matched_feedback_interaction_pairs = _match_feedback_nodes_to_interactions_by_edges(
215
+ negative_feedback_nodes, interaction_nodes, graph_edges
216
+ )
217
+ if not matched_feedback_interaction_pairs:
218
+ logger.info("No feedback-to-interaction matches found; returning empty list")
219
+ return []
220
+
221
+ matched_feedback_interaction_pairs = _sort_pairs_by_recency_and_limit(
222
+ matched_feedback_interaction_pairs, last_n
223
+ )
224
+
225
+ feedback_interaction_records = await _build_feedback_interaction_records(
226
+ matched_feedback_interaction_pairs
227
+ )
228
+
229
+ logger.info("Extracted feedback pairs", count=len(feedback_interaction_records))
230
+ return feedback_interaction_records
@@ -0,0 +1,130 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Optional
4
+ from pydantic import BaseModel
5
+
6
+ from cognee.infrastructure.llm import LLMGateway
7
+ from cognee.infrastructure.llm.prompts.read_query_prompt import read_query_prompt
8
+ from cognee.modules.graph.utils import resolve_edges_to_text
9
+ from cognee.shared.logging_utils import get_logger
10
+
11
+ from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
12
+ from .models import FeedbackEnrichment
13
+
14
+
15
+ class ImprovedAnswerResponse(BaseModel):
16
+ """Response model for improved answer generation containing answer and explanation."""
17
+
18
+ answer: str
19
+ explanation: str
20
+
21
+
22
+ logger = get_logger("generate_improved_answers")
23
+
24
+
25
+ def _validate_input_data(enrichments: List[FeedbackEnrichment]) -> bool:
26
+ """Validate that input contains required fields for all enrichments."""
27
+ return all(
28
+ enrichment.question is not None
29
+ and enrichment.original_answer is not None
30
+ and enrichment.context is not None
31
+ and enrichment.feedback_text is not None
32
+ and enrichment.feedback_id is not None
33
+ and enrichment.interaction_id is not None
34
+ for enrichment in enrichments
35
+ )
36
+
37
+
38
+ def _render_reaction_prompt(
39
+ question: str, context: str, wrong_answer: str, negative_feedback: str
40
+ ) -> str:
41
+ """Render the feedback reaction prompt with provided variables."""
42
+ prompt_template = read_query_prompt("feedback_reaction_prompt.txt")
43
+ return prompt_template.format(
44
+ question=question,
45
+ context=context,
46
+ wrong_answer=wrong_answer,
47
+ negative_feedback=negative_feedback,
48
+ )
49
+
50
+
51
+ async def _generate_improved_answer_for_single_interaction(
52
+ enrichment: FeedbackEnrichment, retriever, reaction_prompt_location: str
53
+ ) -> Optional[FeedbackEnrichment]:
54
+ """Generate improved answer for a single enrichment using structured retriever completion."""
55
+ try:
56
+ query_text = _render_reaction_prompt(
57
+ enrichment.question,
58
+ enrichment.context,
59
+ enrichment.original_answer,
60
+ enrichment.feedback_text,
61
+ )
62
+
63
+ retrieved_context = await retriever.get_context(query_text)
64
+ completion = await retriever.get_structured_completion(
65
+ query=query_text,
66
+ context=retrieved_context,
67
+ response_model=ImprovedAnswerResponse,
68
+ max_iter=4,
69
+ )
70
+ new_context_text = await retriever.resolve_edges_to_text(retrieved_context)
71
+
72
+ if completion:
73
+ enrichment.improved_answer = completion.answer
74
+ enrichment.new_context = new_context_text
75
+ enrichment.explanation = completion.explanation
76
+ return enrichment
77
+ else:
78
+ logger.warning(
79
+ "Failed to get structured completion from retriever", question=enrichment.question
80
+ )
81
+ return None
82
+
83
+ except Exception as exc: # noqa: BLE001
84
+ logger.error(
85
+ "Failed to generate improved answer",
86
+ error=str(exc),
87
+ question=enrichment.question,
88
+ )
89
+ return None
90
+
91
+
92
+ async def generate_improved_answers(
93
+ enrichments: List[FeedbackEnrichment],
94
+ top_k: int = 20,
95
+ reaction_prompt_location: str = "feedback_reaction_prompt.txt",
96
+ ) -> List[FeedbackEnrichment]:
97
+ """Generate improved answers using CoT retriever and LLM."""
98
+ if not enrichments:
99
+ logger.info("No enrichments provided; returning empty list")
100
+ return []
101
+
102
+ if not _validate_input_data(enrichments):
103
+ logger.error("Input data validation failed; missing required fields")
104
+ return []
105
+
106
+ retriever = GraphCompletionCotRetriever(
107
+ top_k=top_k,
108
+ save_interaction=False,
109
+ user_prompt_path="graph_context_for_question.txt",
110
+ system_prompt_path="answer_simple_question.txt",
111
+ )
112
+
113
+ improved_answers: List[FeedbackEnrichment] = []
114
+
115
+ for enrichment in enrichments:
116
+ result = await _generate_improved_answer_for_single_interaction(
117
+ enrichment, retriever, reaction_prompt_location
118
+ )
119
+
120
+ if result:
121
+ improved_answers.append(result)
122
+ else:
123
+ logger.warning(
124
+ "Failed to generate improved answer",
125
+ question=enrichment.question,
126
+ interaction_id=enrichment.interaction_id,
127
+ )
128
+
129
+ logger.info("Generated improved answers", count=len(improved_answers))
130
+ return improved_answers
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Tuple
4
+ from uuid import UUID
5
+
6
+ from cognee.infrastructure.databases.graph import get_graph_engine
7
+ from cognee.tasks.storage import index_graph_edges
8
+ from cognee.shared.logging_utils import get_logger
9
+
10
+ from .models import FeedbackEnrichment
11
+
12
+
13
+ logger = get_logger("link_enrichments_to_feedback")
14
+
15
+
16
+ def _create_edge_tuple(
17
+ source_id: UUID, target_id: UUID, relationship_name: str
18
+ ) -> Tuple[UUID, UUID, str, dict]:
19
+ """Create an edge tuple with proper properties structure."""
20
+ return (
21
+ source_id,
22
+ target_id,
23
+ relationship_name,
24
+ {
25
+ "relationship_name": relationship_name,
26
+ "source_node_id": source_id,
27
+ "target_node_id": target_id,
28
+ "ontology_valid": False,
29
+ },
30
+ )
31
+
32
+
33
+ async def link_enrichments_to_feedback(
34
+ enrichments: List[FeedbackEnrichment],
35
+ ) -> List[FeedbackEnrichment]:
36
+ """Manually create edges from enrichments to original feedback/interaction nodes."""
37
+ if not enrichments:
38
+ logger.info("No enrichments provided; returning empty list")
39
+ return []
40
+
41
+ relationships = []
42
+
43
+ for enrichment in enrichments:
44
+ enrichment_id = enrichment.id
45
+ feedback_id = enrichment.feedback_id
46
+ interaction_id = enrichment.interaction_id
47
+
48
+ if enrichment_id and feedback_id:
49
+ enriches_feedback_edge = _create_edge_tuple(
50
+ enrichment_id, feedback_id, "enriches_feedback"
51
+ )
52
+ relationships.append(enriches_feedback_edge)
53
+
54
+ if enrichment_id and interaction_id:
55
+ improves_interaction_edge = _create_edge_tuple(
56
+ enrichment_id, interaction_id, "improves_interaction"
57
+ )
58
+ relationships.append(improves_interaction_edge)
59
+
60
+ if relationships:
61
+ graph_engine = await get_graph_engine()
62
+ await graph_engine.add_edges(relationships)
63
+ await index_graph_edges(relationships)
64
+ logger.info("Linking enrichments to feedback", edge_count=len(relationships))
65
+
66
+ logger.info("Linked enrichments", enrichment_count=len(enrichments))
67
+ return enrichments
@@ -0,0 +1,26 @@
1
+ from typing import List, Optional, Union
2
+ from uuid import UUID
3
+
4
+ from cognee.infrastructure.engine import DataPoint
5
+ from cognee.modules.engine.models import Entity, NodeSet
6
+ from cognee.tasks.temporal_graph.models import Event
7
+
8
+
9
+ class FeedbackEnrichment(DataPoint):
10
+ """Minimal DataPoint for feedback enrichment that works with extract_graph_from_data."""
11
+
12
+ text: str
13
+ contains: Optional[List[Union[Entity, Event]]] = None
14
+ metadata: dict = {"index_fields": ["text"]}
15
+
16
+ question: str
17
+ original_answer: str
18
+ improved_answer: str
19
+ feedback_id: UUID
20
+ interaction_id: UUID
21
+ belongs_to_set: Optional[List[NodeSet]] = None
22
+
23
+ context: str = ""
24
+ feedback_text: str = ""
25
+ new_context: str = ""
26
+ explanation: str = ""
@@ -4,6 +4,7 @@ from pydantic import BaseModel
4
4
 
5
5
  from cognee.infrastructure.databases.graph import get_graph_engine
6
6
  from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
7
+ from cognee.tasks.storage import index_graph_edges
7
8
  from cognee.tasks.storage.add_data_points import add_data_points
8
9
  from cognee.modules.ontology.ontology_config import Config
9
10
  from cognee.modules.ontology.get_default_ontology_resolver import (
@@ -88,6 +89,7 @@ async def integrate_chunk_graphs(
88
89
 
89
90
  if len(graph_edges) > 0:
90
91
  await graph_engine.add_edges(graph_edges)
92
+ await index_graph_edges(graph_edges)
91
93
 
92
94
  return data_chunks
93
95
 
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  from urllib.parse import urlparse
3
- from typing import List, Tuple
3
+ from typing import Any, List, Tuple
4
4
  from pathlib import Path
5
5
  import tempfile
6
6
 
@@ -34,7 +34,8 @@ async def pull_from_s3(file_path, destination_file) -> None:
34
34
 
35
35
 
36
36
  async def data_item_to_text_file(
37
- data_item_path: str, preferred_loaders: List[str]
37
+ data_item_path: str,
38
+ preferred_loaders: dict[str, dict[str, Any]] = None,
38
39
  ) -> Tuple[str, LoaderInterface]:
39
40
  if isinstance(data_item_path, str):
40
41
  parsed_url = urlparse(data_item_path)
@@ -74,6 +75,5 @@ async def data_item_to_text_file(
74
75
  )
75
76
  else:
76
77
  raise IngestionError(message="Local files are not accepted.")
77
-
78
78
  # data is not a supported type
79
79
  raise IngestionError(message=f"Data type not supported: {type(data_item_path)}")
@@ -6,6 +6,7 @@ from typing import Union, BinaryIO, Any, List, Optional
6
6
  import cognee.modules.ingestion as ingestion
7
7
  from cognee.infrastructure.databases.relational import get_relational_engine
8
8
  from cognee.modules.data.models import Data
9
+ from cognee.modules.ingestion.exceptions import IngestionError
9
10
  from cognee.modules.users.models import User
10
11
  from cognee.modules.users.methods import get_default_user
11
12
  from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
@@ -27,7 +28,7 @@ async def ingest_data(
27
28
  user: User,
28
29
  node_set: Optional[List[str]] = None,
29
30
  dataset_id: UUID = None,
30
- preferred_loaders: List[str] = None,
31
+ preferred_loaders: dict[str, dict[str, Any]] = None,
31
32
  ):
32
33
  if not user:
33
34
  user = await get_default_user()
@@ -44,7 +45,7 @@ async def ingest_data(
44
45
  user: User,
45
46
  node_set: Optional[List[str]] = None,
46
47
  dataset_id: UUID = None,
47
- preferred_loaders: List[str] = None,
48
+ preferred_loaders: dict[str, dict[str, Any]] = None,
48
49
  ):
49
50
  new_datapoints = []
50
51
  existing_data_points = []
@@ -77,22 +78,27 @@ async def ingest_data(
77
78
  dataset_data_map = {str(data.id): True for data in dataset_data}
78
79
 
79
80
  for data_item in data:
80
- # Get file path of data item or create a file it doesn't exist
81
+ # Get file path of data item or create a file if it doesn't exist
81
82
  original_file_path = await save_data_item_to_storage(data_item)
82
-
83
83
  # Transform file path to be OS usable
84
84
  actual_file_path = get_data_file_path(original_file_path)
85
85
 
86
86
  # Store all input data as text files in Cognee data storage
87
87
  cognee_storage_file_path, loader_engine = await data_item_to_text_file(
88
- actual_file_path, preferred_loaders
88
+ actual_file_path,
89
+ preferred_loaders,
89
90
  )
90
91
 
92
+ if loader_engine is None:
93
+ raise IngestionError("Loader cannot be None")
94
+
91
95
  # Find metadata from original file
96
+ # Standard flow: extract metadata from both original and stored files
92
97
  async with open_data_file(original_file_path) as file:
93
98
  classified_data = ingestion.classify(file)
94
99
 
95
100
  # data_id is the hash of original file contents + owner id to avoid duplicate data
101
+
96
102
  data_id = ingestion.identify(classified_data, user)
97
103
  original_file_metadata = classified_data.get_metadata()
98
104
 
@@ -8,6 +8,9 @@ from cognee.modules.ingestion import save_data_to_file
8
8
  from cognee.shared.logging_utils import get_logger
9
9
  from pydantic_settings import BaseSettings, SettingsConfigDict
10
10
 
11
+ from cognee.tasks.web_scraper.utils import fetch_page_content
12
+
13
+
11
14
  logger = get_logger()
12
15
 
13
16
 
@@ -27,6 +30,12 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
27
30
 
28
31
  return await get_data_from_llama_index(data_item)
29
32
 
33
+ if "docling" in str(type(data_item)):
34
+ from docling_core.types import DoclingDocument
35
+
36
+ if isinstance(data_item, DoclingDocument):
37
+ data_item = data_item.export_to_text()
38
+
30
39
  # data is a file object coming from upload.
31
40
  if hasattr(data_item, "file"):
32
41
  return await save_data_to_file(data_item.file, filename=data_item.filename)
@@ -48,7 +57,9 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
48
57
  # data is s3 file path
49
58
  if parsed_url.scheme == "s3":
50
59
  return data_item
51
-
60
+ elif parsed_url.scheme == "http" or parsed_url.scheme == "https":
61
+ urls_to_page_contents = await fetch_page_content(data_item)
62
+ return await save_data_to_file(urls_to_page_contents[data_item], file_extension="html")
52
63
  # data is local file path
53
64
  elif parsed_url.scheme == "file":
54
65
  if settings.accept_local_file_path: