cognee 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/api/client.py +28 -3
  3. cognee/api/health.py +10 -13
  4. cognee/api/v1/add/add.py +3 -1
  5. cognee/api/v1/add/routers/get_add_router.py +12 -37
  6. cognee/api/v1/cloud/routers/__init__.py +1 -0
  7. cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
  8. cognee/api/v1/cognify/code_graph_pipeline.py +9 -4
  9. cognee/api/v1/cognify/cognify.py +50 -3
  10. cognee/api/v1/cognify/routers/get_cognify_router.py +1 -1
  11. cognee/api/v1/datasets/routers/get_datasets_router.py +15 -4
  12. cognee/api/v1/memify/__init__.py +0 -0
  13. cognee/api/v1/memify/routers/__init__.py +1 -0
  14. cognee/api/v1/memify/routers/get_memify_router.py +100 -0
  15. cognee/api/v1/notebooks/routers/__init__.py +1 -0
  16. cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
  17. cognee/api/v1/search/routers/get_search_router.py +20 -1
  18. cognee/api/v1/search/search.py +11 -4
  19. cognee/api/v1/sync/__init__.py +17 -0
  20. cognee/api/v1/sync/routers/__init__.py +3 -0
  21. cognee/api/v1/sync/routers/get_sync_router.py +241 -0
  22. cognee/api/v1/sync/sync.py +877 -0
  23. cognee/api/v1/ui/__init__.py +1 -0
  24. cognee/api/v1/ui/ui.py +529 -0
  25. cognee/api/v1/users/routers/get_auth_router.py +13 -1
  26. cognee/base_config.py +10 -1
  27. cognee/cli/_cognee.py +93 -0
  28. cognee/infrastructure/databases/graph/config.py +10 -4
  29. cognee/infrastructure/databases/graph/kuzu/adapter.py +135 -0
  30. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +89 -0
  31. cognee/infrastructure/databases/relational/__init__.py +2 -0
  32. cognee/infrastructure/databases/relational/get_async_session.py +15 -0
  33. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
  34. cognee/infrastructure/databases/relational/with_async_session.py +25 -0
  35. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
  36. cognee/infrastructure/databases/vector/config.py +13 -6
  37. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +1 -1
  38. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
  39. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +4 -1
  40. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
  41. cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
  42. cognee/infrastructure/files/storage/StorageManager.py +7 -1
  43. cognee/infrastructure/files/storage/storage.py +16 -0
  44. cognee/infrastructure/llm/LLMGateway.py +18 -0
  45. cognee/infrastructure/llm/config.py +4 -2
  46. cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
  47. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
  48. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
  49. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
  50. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
  51. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
  52. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
  53. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -1
  54. cognee/infrastructure/utils/run_sync.py +8 -1
  55. cognee/modules/chunking/models/DocumentChunk.py +4 -3
  56. cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
  57. cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
  58. cognee/modules/cloud/exceptions/__init__.py +2 -0
  59. cognee/modules/cloud/operations/__init__.py +1 -0
  60. cognee/modules/cloud/operations/check_api_key.py +25 -0
  61. cognee/modules/data/deletion/prune_system.py +1 -1
  62. cognee/modules/data/methods/check_dataset_name.py +1 -1
  63. cognee/modules/data/methods/get_dataset_data.py +1 -1
  64. cognee/modules/data/methods/load_or_create_datasets.py +1 -1
  65. cognee/modules/engine/models/Event.py +16 -0
  66. cognee/modules/engine/models/Interval.py +8 -0
  67. cognee/modules/engine/models/Timestamp.py +13 -0
  68. cognee/modules/engine/models/__init__.py +3 -0
  69. cognee/modules/engine/utils/__init__.py +2 -0
  70. cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
  71. cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
  72. cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
  73. cognee/modules/graph/utils/__init__.py +1 -0
  74. cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
  75. cognee/modules/memify/__init__.py +1 -0
  76. cognee/modules/memify/memify.py +118 -0
  77. cognee/modules/notebooks/methods/__init__.py +5 -0
  78. cognee/modules/notebooks/methods/create_notebook.py +26 -0
  79. cognee/modules/notebooks/methods/delete_notebook.py +13 -0
  80. cognee/modules/notebooks/methods/get_notebook.py +21 -0
  81. cognee/modules/notebooks/methods/get_notebooks.py +18 -0
  82. cognee/modules/notebooks/methods/update_notebook.py +17 -0
  83. cognee/modules/notebooks/models/Notebook.py +53 -0
  84. cognee/modules/notebooks/models/__init__.py +1 -0
  85. cognee/modules/notebooks/operations/__init__.py +1 -0
  86. cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
  87. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +19 -3
  88. cognee/modules/pipelines/operations/pipeline.py +1 -0
  89. cognee/modules/pipelines/operations/run_tasks.py +17 -41
  90. cognee/modules/retrieval/base_graph_retriever.py +18 -0
  91. cognee/modules/retrieval/base_retriever.py +1 -1
  92. cognee/modules/retrieval/code_retriever.py +8 -0
  93. cognee/modules/retrieval/coding_rules_retriever.py +31 -0
  94. cognee/modules/retrieval/completion_retriever.py +9 -3
  95. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
  96. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +23 -14
  97. cognee/modules/retrieval/graph_completion_cot_retriever.py +21 -11
  98. cognee/modules/retrieval/graph_completion_retriever.py +32 -65
  99. cognee/modules/retrieval/graph_summary_completion_retriever.py +3 -1
  100. cognee/modules/retrieval/insights_retriever.py +14 -3
  101. cognee/modules/retrieval/summaries_retriever.py +1 -1
  102. cognee/modules/retrieval/temporal_retriever.py +152 -0
  103. cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
  104. cognee/modules/retrieval/utils/completion.py +10 -3
  105. cognee/modules/search/methods/get_search_type_tools.py +168 -0
  106. cognee/modules/search/methods/no_access_control_search.py +47 -0
  107. cognee/modules/search/methods/search.py +219 -139
  108. cognee/modules/search/types/SearchResult.py +21 -0
  109. cognee/modules/search/types/SearchType.py +2 -0
  110. cognee/modules/search/types/__init__.py +1 -0
  111. cognee/modules/search/utils/__init__.py +2 -0
  112. cognee/modules/search/utils/prepare_search_result.py +41 -0
  113. cognee/modules/search/utils/transform_context_to_graph.py +38 -0
  114. cognee/modules/sync/__init__.py +1 -0
  115. cognee/modules/sync/methods/__init__.py +23 -0
  116. cognee/modules/sync/methods/create_sync_operation.py +53 -0
  117. cognee/modules/sync/methods/get_sync_operation.py +107 -0
  118. cognee/modules/sync/methods/update_sync_operation.py +248 -0
  119. cognee/modules/sync/models/SyncOperation.py +142 -0
  120. cognee/modules/sync/models/__init__.py +3 -0
  121. cognee/modules/users/__init__.py +0 -1
  122. cognee/modules/users/methods/__init__.py +4 -1
  123. cognee/modules/users/methods/create_user.py +26 -1
  124. cognee/modules/users/methods/get_authenticated_user.py +36 -42
  125. cognee/modules/users/methods/get_default_user.py +3 -1
  126. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
  127. cognee/root_dir.py +19 -0
  128. cognee/shared/logging_utils.py +1 -1
  129. cognee/tasks/codingagents/__init__.py +0 -0
  130. cognee/tasks/codingagents/coding_rule_associations.py +127 -0
  131. cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
  132. cognee/tasks/memify/__init__.py +2 -0
  133. cognee/tasks/memify/extract_subgraph.py +7 -0
  134. cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
  135. cognee/tasks/repo_processor/get_repo_file_dependencies.py +52 -27
  136. cognee/tasks/temporal_graph/__init__.py +1 -0
  137. cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
  138. cognee/tasks/temporal_graph/enrich_events.py +34 -0
  139. cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
  140. cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
  141. cognee/tasks/temporal_graph/models.py +49 -0
  142. cognee/tests/test_kuzu.py +4 -4
  143. cognee/tests/test_neo4j.py +4 -4
  144. cognee/tests/test_permissions.py +3 -3
  145. cognee/tests/test_relational_db_migration.py +7 -5
  146. cognee/tests/test_search_db.py +18 -24
  147. cognee/tests/test_temporal_graph.py +167 -0
  148. cognee/tests/unit/api/__init__.py +1 -0
  149. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
  150. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
  151. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +13 -16
  152. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +11 -16
  153. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +5 -4
  154. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
  155. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
  156. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
  157. cognee/tests/unit/modules/users/__init__.py +1 -0
  158. cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
  159. cognee/tests/unit/processing/utils/utils_test.py +20 -1
  160. {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/METADATA +8 -6
  161. {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/RECORD +165 -90
  162. cognee/tests/unit/modules/search/search_methods_test.py +0 -225
  163. {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/WHEEL +0 -0
  164. {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/entry_points.txt +0 -0
  165. {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/licenses/LICENSE +0 -0
  166. {cognee-0.2.4.dist-info → cognee-0.3.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,118 @@
1
+ from typing import Union, Optional, List, Type, Any
2
+ from uuid import UUID
3
+
4
+ from cognee.shared.logging_utils import get_logger
5
+
6
+ from cognee.modules.retrieval.utils.brute_force_triplet_search import get_memory_fragment
7
+ from cognee.context_global_variables import set_database_global_context_variables
8
+ from cognee.modules.engine.models.node_set import NodeSet
9
+ from cognee.modules.pipelines import run_pipeline
10
+ from cognee.modules.pipelines.tasks.task import Task
11
+ from cognee.modules.users.models import User
12
+ from cognee.modules.pipelines.layers.resolve_authorized_user_datasets import (
13
+ resolve_authorized_user_datasets,
14
+ )
15
+ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
16
+ reset_dataset_pipeline_run_status,
17
+ )
18
+ from cognee.modules.engine.operations.setup import setup
19
+ from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
20
+ from cognee.tasks.memify.extract_subgraph_chunks import extract_subgraph_chunks
21
+ from cognee.tasks.codingagents.coding_rule_associations import (
22
+ add_rule_associations,
23
+ )
24
+
25
+ logger = get_logger("memify")
26
+
27
+
28
+ async def memify(
29
+ extraction_tasks: Union[List[Task], List[str]] = None,
30
+ enrichment_tasks: Union[List[Task], List[str]] = None,
31
+ data: Optional[Any] = None,
32
+ dataset: Union[str, UUID] = "main_dataset",
33
+ user: User = None,
34
+ node_type: Optional[Type] = NodeSet,
35
+ node_name: Optional[List[str]] = None,
36
+ vector_db_config: Optional[dict] = None,
37
+ graph_db_config: Optional[dict] = None,
38
+ run_in_background: bool = False,
39
+ ):
40
+ """
41
+ Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data,
42
+ custom data can also be provided instead which can be processed with provided extraction and enrichment tasks.
43
+
44
+ Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation.
45
+
46
+ This is the core processing step in Cognee that converts raw text and documents
47
+ into an intelligent knowledge graph. It analyzes content, extracts entities and
48
+ relationships, and creates semantic connections for enhanced search and reasoning.
49
+
50
+ Args:
51
+ extraction_tasks: List of Cognee Tasks to execute for graph/data extraction.
52
+ enrichment_tasks: List of Cognee Tasks to handle enrichment of provided graph/data from extraction tasks.
53
+ data: The data to ingest. Can be anything when custom extraction and enrichment tasks are used.
54
+ Data provided here will be forwarded to the first extraction task in the pipeline as input.
55
+ If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded
56
+ dataset: Dataset name or dataset uuid to process.
57
+ user: User context for authentication and data access. Uses default if None.
58
+ node_type: Filter graph to specific entity types (for advanced filtering). Used when no data is provided.
59
+ node_name: Filter graph to specific named entities (for targeted search). Used when no data is provided.
60
+ vector_db_config: Custom vector database configuration for embeddings storage.
61
+ graph_db_config: Custom graph database configuration for relationship storage.
62
+ run_in_background: If True, starts processing asynchronously and returns immediately.
63
+ If False, waits for completion before returning.
64
+ Background mode recommended for large datasets (>100MB).
65
+ Use pipeline_run_id from return value to monitor progress.
66
+ """
67
+
68
+ # Use default coding rules tasks if no tasks were provided
69
+ if not extraction_tasks:
70
+ extraction_tasks = [Task(extract_subgraph_chunks)]
71
+ if not enrichment_tasks:
72
+ enrichment_tasks = [
73
+ Task(
74
+ add_rule_associations,
75
+ rules_nodeset_name="coding_agent_rules",
76
+ task_config={"batch_size": 1},
77
+ )
78
+ ]
79
+
80
+ await setup()
81
+
82
+ user, authorized_dataset_list = await resolve_authorized_user_datasets(dataset, user)
83
+ authorized_dataset = authorized_dataset_list[0]
84
+
85
+ if not data:
86
+ # Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
87
+ await set_database_global_context_variables(
88
+ authorized_dataset.id, authorized_dataset.owner_id
89
+ )
90
+
91
+ memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name)
92
+ # Subgraphs should be a single element in the list to represent one data item
93
+ data = [memory_fragment]
94
+
95
+ memify_tasks = [
96
+ *extraction_tasks, # Unpack tasks provided to memify pipeline
97
+ *enrichment_tasks,
98
+ ]
99
+
100
+ await reset_dataset_pipeline_run_status(
101
+ authorized_dataset.id, user, pipeline_names=["memify_pipeline"]
102
+ )
103
+
104
+ # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
105
+ pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
106
+
107
+ # Run the run_pipeline in the background or blocking based on executor
108
+ return await pipeline_executor_func(
109
+ pipeline=run_pipeline,
110
+ tasks=memify_tasks,
111
+ user=user,
112
+ data=data,
113
+ datasets=authorized_dataset.id,
114
+ vector_db_config=vector_db_config,
115
+ graph_db_config=graph_db_config,
116
+ incremental_loading=False,
117
+ pipeline_name="memify_pipeline",
118
+ )
@@ -0,0 +1,5 @@
1
+ from .get_notebook import get_notebook
2
+ from .get_notebooks import get_notebooks
3
+ from .create_notebook import create_notebook
4
+ from .update_notebook import update_notebook
5
+ from .delete_notebook import delete_notebook
@@ -0,0 +1,26 @@
1
+ from uuid import UUID
2
+ from typing import List, Optional
3
+ from sqlalchemy.ext.asyncio import AsyncSession
4
+
5
+ from cognee.infrastructure.databases.relational import with_async_session
6
+
7
+ from ..models.Notebook import Notebook, NotebookCell
8
+
9
+
10
+ @with_async_session
11
+ async def create_notebook(
12
+ user_id: UUID,
13
+ notebook_name: str,
14
+ cells: Optional[List[NotebookCell]],
15
+ deletable: Optional[bool],
16
+ session: AsyncSession,
17
+ ) -> Notebook:
18
+ notebook = Notebook(
19
+ name=notebook_name, owner_id=user_id, cells=cells, deletable=deletable or True
20
+ )
21
+
22
+ session.add(notebook)
23
+
24
+ await session.commit()
25
+
26
+ return notebook
@@ -0,0 +1,13 @@
1
+ from sqlalchemy.ext.asyncio import AsyncSession
2
+
3
+ from cognee.infrastructure.databases.relational import with_async_session
4
+
5
+ from ..models.Notebook import Notebook
6
+
7
+
8
+ @with_async_session
9
+ async def delete_notebook(
10
+ notebook: Notebook,
11
+ session: AsyncSession,
12
+ ) -> None:
13
+ await session.delete(notebook)
@@ -0,0 +1,21 @@
1
+ from uuid import UUID
2
+ from typing import Optional
3
+ from sqlalchemy import select
4
+ from sqlalchemy.ext.asyncio import AsyncSession
5
+
6
+ from cognee.infrastructure.databases.relational import with_async_session
7
+
8
+ from ..models.Notebook import Notebook
9
+
10
+
11
+ @with_async_session
12
+ async def get_notebook(
13
+ notebook_id: UUID,
14
+ user_id: UUID,
15
+ session: AsyncSession,
16
+ ) -> Optional[Notebook]:
17
+ result = await session.execute(
18
+ select(Notebook).where(Notebook.owner_id == user_id and Notebook.id == notebook_id)
19
+ )
20
+
21
+ return result.scalar()
@@ -0,0 +1,18 @@
1
+ from uuid import UUID
2
+ from typing import List
3
+ from sqlalchemy import select
4
+ from sqlalchemy.ext.asyncio import AsyncSession
5
+
6
+ from cognee.infrastructure.databases.relational import with_async_session
7
+
8
+ from ..models.Notebook import Notebook
9
+
10
+
11
+ @with_async_session
12
+ async def get_notebooks(
13
+ user_id: UUID,
14
+ session: AsyncSession,
15
+ ) -> List[Notebook]:
16
+ result = await session.execute(select(Notebook).where(Notebook.owner_id == user_id))
17
+
18
+ return list(result.scalars().all())
@@ -0,0 +1,17 @@
1
+ from typing import Callable, AsyncContextManager
2
+ from sqlalchemy.ext.asyncio import AsyncSession
3
+
4
+ from cognee.infrastructure.databases.relational import with_async_session
5
+
6
+ from ..models.Notebook import Notebook
7
+
8
+
9
+ @with_async_session
10
+ async def update_notebook(
11
+ notebook: Notebook,
12
+ session: AsyncSession,
13
+ ) -> Notebook:
14
+ if notebook not in session:
15
+ session.add(notebook)
16
+
17
+ return notebook
@@ -0,0 +1,53 @@
1
+ import json
2
+ from typing import List, Literal
3
+ from uuid import uuid4, UUID as UUID_t
4
+ from pydantic import BaseModel, ConfigDict
5
+ from datetime import datetime, timezone
6
+ from fastapi.encoders import jsonable_encoder
7
+ from sqlalchemy import Boolean, Column, DateTime, JSON, UUID, String, TypeDecorator
8
+ from sqlalchemy.orm import mapped_column, Mapped
9
+
10
+ from cognee.infrastructure.databases.relational import Base
11
+
12
+
13
+ class NotebookCell(BaseModel):
14
+ id: UUID_t
15
+ type: Literal["markdown", "code"]
16
+ name: str
17
+ content: str
18
+
19
+ model_config = ConfigDict(arbitrary_types_allowed=True)
20
+
21
+
22
+ class NotebookCellList(TypeDecorator):
23
+ impl = JSON
24
+ cache_ok = True
25
+
26
+ def process_bind_param(self, notebook_cells, dialect):
27
+ if notebook_cells is None:
28
+ return []
29
+ return [
30
+ json.dumps(jsonable_encoder(cell)) if isinstance(cell, NotebookCell) else cell
31
+ for cell in notebook_cells
32
+ ]
33
+
34
+ def process_result_value(self, cells_json_list, dialect):
35
+ if cells_json_list is None:
36
+ return []
37
+ return [NotebookCell(**json.loads(json_string)) for json_string in cells_json_list]
38
+
39
+
40
+ class Notebook(Base):
41
+ __tablename__ = "notebooks"
42
+
43
+ id: Mapped[UUID_t] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid4)
44
+
45
+ owner_id: Mapped[UUID_t] = mapped_column(UUID(as_uuid=True), index=True)
46
+
47
+ name: Mapped[str] = mapped_column(String, nullable=False)
48
+
49
+ cells: Mapped[List[NotebookCell]] = mapped_column(NotebookCellList, nullable=False)
50
+
51
+ deletable: Mapped[bool] = mapped_column(Boolean, default=True)
52
+
53
+ created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
@@ -0,0 +1 @@
1
+ from .Notebook import Notebook, NotebookCell
@@ -0,0 +1 @@
1
+ from .run_in_local_sandbox import run_in_local_sandbox
@@ -0,0 +1,55 @@
1
+ import io
2
+ import sys
3
+ import traceback
4
+
5
+
6
+ def wrap_in_async_handler(user_code: str) -> str:
7
+ return (
8
+ "from cognee.infrastructure.utils.run_sync import run_sync\n\n"
9
+ "async def __user_main__():\n"
10
+ + "\n".join(" " + line for line in user_code.strip().split("\n"))
11
+ + "\n"
12
+ " globals().update(locals())\n\n"
13
+ "run_sync(__user_main__())\n"
14
+ )
15
+
16
+
17
+ def run_in_local_sandbox(code, environment=None):
18
+ environment = environment or {}
19
+ code = wrap_in_async_handler(code.replace("\xa0", "\n"))
20
+
21
+ buffer = io.StringIO()
22
+ sys_stdout = sys.stdout
23
+ sys.stdout = buffer
24
+ sys.stderr = buffer
25
+
26
+ error = None
27
+
28
+ printOutput = []
29
+
30
+ def customPrintFunction(output):
31
+ printOutput.append(output)
32
+
33
+ environment["print"] = customPrintFunction
34
+
35
+ try:
36
+ exec(code, environment)
37
+ except Exception:
38
+ error = traceback.format_exc()
39
+ finally:
40
+ sys.stdout = sys_stdout
41
+ sys.stderr = sys_stdout
42
+
43
+ return printOutput, error
44
+
45
+
46
+ if __name__ == "__main__":
47
+ run_in_local_sandbox("""
48
+ import cognee
49
+
50
+ await cognee.add("Test file with some random content 3.")
51
+
52
+ a = "asd"
53
+
54
+ b = {"c": "dfgh"}
55
+ """)
@@ -1,12 +1,28 @@
1
1
  from uuid import UUID
2
+ from typing import Optional, List
3
+
2
4
  from cognee.modules.pipelines.methods import get_pipeline_runs_by_dataset, reset_pipeline_run_status
3
5
  from cognee.modules.pipelines.models.PipelineRun import PipelineRunStatus
4
6
  from cognee.modules.users.models import User
5
7
 
6
8
 
7
- async def reset_dataset_pipeline_run_status(dataset_id: UUID, user: User):
9
+ async def reset_dataset_pipeline_run_status(
10
+ dataset_id: UUID, user: User, pipeline_names: Optional[list[str]] = None
11
+ ):
12
+ """Reset the status of all (or selected) pipeline runs for a dataset.
13
+
14
+ If *pipeline_names* is given, only runs whose *pipeline_name* is in
15
+ that list are touched.
16
+ """
8
17
  related_pipeline_runs = await get_pipeline_runs_by_dataset(dataset_id)
9
18
 
10
19
  for pipeline_run in related_pipeline_runs:
11
- if pipeline_run.status is not PipelineRunStatus.DATASET_PROCESSING_INITIATED:
12
- await reset_pipeline_run_status(user.id, dataset_id, pipeline_run.pipeline_name)
20
+ # Skip runs that are initiated
21
+ if pipeline_run.status is PipelineRunStatus.DATASET_PROCESSING_INITIATED:
22
+ continue
23
+
24
+ # If a name filter is provided, skip non-matching runs
25
+ if pipeline_names is not None and pipeline_run.pipeline_name not in pipeline_names:
26
+ continue
27
+
28
+ await reset_pipeline_run_status(user.id, dataset_id, pipeline_run.pipeline_name)
@@ -5,6 +5,7 @@ from typing import Union
5
5
  from cognee.modules.pipelines.layers.setup_and_check_environment import (
6
6
  setup_and_check_environment,
7
7
  )
8
+
8
9
  from cognee.shared.logging_utils import get_logger
9
10
  from cognee.modules.data.methods.get_dataset_data import get_dataset_data
10
11
  from cognee.modules.data.models import Data, Dataset
@@ -266,48 +266,24 @@ async def run_tasks(
266
266
  if incremental_loading:
267
267
  data = await resolve_data_directories(data)
268
268
 
269
- # TODO: Return to using async.gather for data items after Cognee release
270
- # # Create async tasks per data item that will run the pipeline for the data item
271
- # data_item_tasks = [
272
- # asyncio.create_task(
273
- # _run_tasks_data_item(
274
- # data_item,
275
- # dataset,
276
- # tasks,
277
- # pipeline_name,
278
- # pipeline_id,
279
- # pipeline_run_id,
280
- # context,
281
- # user,
282
- # incremental_loading,
283
- # )
284
- # )
285
- # for data_item in data
286
- # ]
287
- # results = await asyncio.gather(*data_item_tasks)
288
- # # Remove skipped data items from results
289
- # results = [result for result in results if result]
290
-
291
- ### TEMP sync data item handling
292
- results = []
293
- # Run the pipeline for each data_item sequentially, one after the other
294
- for data_item in data:
295
- result = await _run_tasks_data_item(
296
- data_item,
297
- dataset,
298
- tasks,
299
- pipeline_name,
300
- pipeline_id,
301
- pipeline_run_id,
302
- context,
303
- user,
304
- incremental_loading,
269
+ # Create async tasks per data item that will run the pipeline for the data item
270
+ data_item_tasks = [
271
+ asyncio.create_task(
272
+ _run_tasks_data_item(
273
+ data_item,
274
+ dataset,
275
+ tasks,
276
+ pipeline_name,
277
+ pipeline_id,
278
+ pipeline_run_id,
279
+ context,
280
+ user,
281
+ incremental_loading,
282
+ )
305
283
  )
306
-
307
- # Skip items that returned a false-y value
308
- if result:
309
- results.append(result)
310
- ### END
284
+ for data_item in data
285
+ ]
286
+ results = await asyncio.gather(*data_item_tasks)
311
287
 
312
288
  # Remove skipped data items from results
313
289
  results = [result for result in results if result]
@@ -0,0 +1,18 @@
1
+ from typing import List, Optional
2
+ from abc import ABC, abstractmethod
3
+
4
+ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
5
+
6
+
7
+ class BaseGraphRetriever(ABC):
8
+ """Base class for all graph based retrievers."""
9
+
10
+ @abstractmethod
11
+ async def get_context(self, query: str) -> List[Edge]:
12
+ """Retrieves triplets based on the query."""
13
+ pass
14
+
15
+ @abstractmethod
16
+ async def get_completion(self, query: str, context: Optional[List[Edge]] = None) -> str:
17
+ """Generates a response using the query and optional context (triplets)."""
18
+ pass
@@ -1,5 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any, Optional, Callable
2
+ from typing import Any, Optional
3
3
 
4
4
 
5
5
  class BaseRetriever(ABC):
@@ -94,7 +94,15 @@ class CodeRetriever(BaseRetriever):
94
94
  {"id": res.id, "score": res.score, "payload": res.payload}
95
95
  )
96
96
 
97
+ existing_collection = []
97
98
  for collection in self.classes_and_functions_collections:
99
+ if await vector_engine.has_collection(collection):
100
+ existing_collection.append(collection)
101
+
102
+ if not existing_collection:
103
+ raise RuntimeError("No collection found for code retriever")
104
+
105
+ for collection in existing_collection:
98
106
  logger.debug(f"Searching {collection} collection with general query")
99
107
  search_results_code = await vector_engine.search(
100
108
  collection, query, limit=self.top_k
@@ -0,0 +1,31 @@
1
+ import asyncio
2
+ from functools import reduce
3
+ from typing import List, Optional
4
+ from cognee.shared.logging_utils import get_logger
5
+ from cognee.tasks.codingagents.coding_rule_associations import get_existing_rules
6
+
7
+ logger = get_logger("CodingRulesRetriever")
8
+
9
+
10
+ class CodingRulesRetriever:
11
+ """Retriever for handling codeing rule based searches."""
12
+
13
+ def __init__(self, rules_nodeset_name: Optional[List[str]] = None):
14
+ if isinstance(rules_nodeset_name, list):
15
+ if not rules_nodeset_name:
16
+ # If there is no provided nodeset set to coding_agent_rules
17
+ rules_nodeset_name = ["coding_agent_rules"]
18
+
19
+ self.rules_nodeset_name = rules_nodeset_name
20
+ """Initialize retriever with search parameters."""
21
+
22
+ async def get_existing_rules(self, query_text):
23
+ if self.rules_nodeset_name:
24
+ rules_list = await asyncio.gather(
25
+ *[
26
+ get_existing_rules(rules_nodeset_name=nodeset)
27
+ for nodeset in self.rules_nodeset_name
28
+ ]
29
+ )
30
+
31
+ return reduce(lambda x, y: x + y, rules_list, [])
@@ -23,12 +23,14 @@ class CompletionRetriever(BaseRetriever):
23
23
  self,
24
24
  user_prompt_path: str = "context_for_question.txt",
25
25
  system_prompt_path: str = "answer_simple_question.txt",
26
+ system_prompt: Optional[str] = None,
26
27
  top_k: Optional[int] = 1,
27
28
  ):
28
29
  """Initialize retriever with optional custom prompt paths."""
29
30
  self.user_prompt_path = user_prompt_path
30
31
  self.system_prompt_path = system_prompt_path
31
32
  self.top_k = top_k if top_k is not None else 1
33
+ self.system_prompt = system_prompt
32
34
 
33
35
  async def get_context(self, query: str) -> str:
34
36
  """
@@ -65,7 +67,7 @@ class CompletionRetriever(BaseRetriever):
65
67
  logger.error("DocumentChunk_text collection not found")
66
68
  raise NoDataError("No data found in the system, please add data first.") from error
67
69
 
68
- async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
70
+ async def get_completion(self, query: str, context: Optional[Any] = None) -> str:
69
71
  """
70
72
  Generates an LLM completion using the context.
71
73
 
@@ -88,6 +90,10 @@ class CompletionRetriever(BaseRetriever):
88
90
  context = await self.get_context(query)
89
91
 
90
92
  completion = await generate_completion(
91
- query, context, self.user_prompt_path, self.system_prompt_path
93
+ query=query,
94
+ context=context,
95
+ user_prompt_path=self.user_prompt_path,
96
+ system_prompt_path=self.system_prompt_path,
97
+ system_prompt=self.system_prompt,
92
98
  )
93
- return [completion]
99
+ return completion
@@ -49,6 +49,7 @@ class TripletSearchContextProvider(BaseContextProvider):
49
49
  tasks = [
50
50
  brute_force_triplet_search(
51
51
  query=f"{entity_text} {query}",
52
+ user=user,
52
53
  top_k=self.top_k,
53
54
  collections=self.collections,
54
55
  properties_to_project=self.properties_to_project,
@@ -1,4 +1,5 @@
1
- from typing import Any, Optional, List, Type
1
+ from typing import Optional, List, Type
2
+ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
2
3
  from cognee.shared.logging_utils import get_logger
3
4
  from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
4
5
  from cognee.modules.retrieval.utils.completion import generate_completion
@@ -26,6 +27,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
26
27
  self,
27
28
  user_prompt_path: str = "graph_context_for_question.txt",
28
29
  system_prompt_path: str = "answer_simple_question.txt",
30
+ system_prompt: Optional[str] = None,
29
31
  top_k: Optional[int] = 5,
30
32
  node_type: Optional[Type] = None,
31
33
  node_name: Optional[List[str]] = None,
@@ -38,11 +40,15 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
38
40
  node_type=node_type,
39
41
  node_name=node_name,
40
42
  save_interaction=save_interaction,
43
+ system_prompt=system_prompt,
41
44
  )
42
45
 
43
46
  async def get_completion(
44
- self, query: str, context: Optional[Any] = None, context_extension_rounds=4
45
- ) -> List[str]:
47
+ self,
48
+ query: str,
49
+ context: Optional[List[Edge]] = None,
50
+ context_extension_rounds=4,
51
+ ) -> str:
46
52
  """
47
53
  Extends the context for a given query by retrieving related triplets and generating new
48
54
  completions based on them.
@@ -67,11 +73,12 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
67
73
  - List[str]: A list containing the generated answer based on the query and the
68
74
  extended context.
69
75
  """
70
- triplets = []
76
+ triplets = context
77
+
78
+ if triplets is None:
79
+ triplets = await self.get_context(query)
71
80
 
72
- if context is None:
73
- triplets += await self.get_triplets(query)
74
- context = await self.resolve_edges_to_text(triplets)
81
+ context_text = await self.resolve_edges_to_text(triplets)
75
82
 
76
83
  round_idx = 1
77
84
 
@@ -83,14 +90,15 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
83
90
  )
84
91
  completion = await generate_completion(
85
92
  query=query,
86
- context=context,
93
+ context=context_text,
87
94
  user_prompt_path=self.user_prompt_path,
88
95
  system_prompt_path=self.system_prompt_path,
96
+ system_prompt=self.system_prompt,
89
97
  )
90
98
 
91
- triplets += await self.get_triplets(completion)
99
+ triplets += await self.get_context(completion)
92
100
  triplets = list(set(triplets))
93
- context = await self.resolve_edges_to_text(triplets)
101
+ context_text = await self.resolve_edges_to_text(triplets)
94
102
 
95
103
  num_triplets = len(triplets)
96
104
 
@@ -109,14 +117,15 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
109
117
 
110
118
  completion = await generate_completion(
111
119
  query=query,
112
- context=context,
120
+ context=context_text,
113
121
  user_prompt_path=self.user_prompt_path,
114
122
  system_prompt_path=self.system_prompt_path,
123
+ system_prompt=self.system_prompt,
115
124
  )
116
125
 
117
- if self.save_interaction and context and triplets and completion:
126
+ if self.save_interaction and context_text and triplets and completion:
118
127
  await self.save_qa(
119
- question=query, answer=completion, context=context, triplets=triplets
128
+ question=query, answer=completion, context=context_text, triplets=triplets
120
129
  )
121
130
 
122
- return [completion]
131
+ return completion