cognee 0.2.4__py3-none-any.whl → 0.3.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +28 -3
- cognee/api/health.py +10 -13
- cognee/api/v1/add/add.py +3 -1
- cognee/api/v1/add/routers/get_add_router.py +12 -37
- cognee/api/v1/cloud/routers/__init__.py +1 -0
- cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +9 -4
- cognee/api/v1/cognify/cognify.py +50 -3
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +15 -4
- cognee/api/v1/memify/__init__.py +0 -0
- cognee/api/v1/memify/routers/__init__.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +100 -0
- cognee/api/v1/notebooks/routers/__init__.py +1 -0
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
- cognee/api/v1/search/routers/get_search_router.py +20 -1
- cognee/api/v1/search/search.py +11 -4
- cognee/api/v1/sync/__init__.py +17 -0
- cognee/api/v1/sync/routers/__init__.py +3 -0
- cognee/api/v1/sync/routers/get_sync_router.py +241 -0
- cognee/api/v1/sync/sync.py +877 -0
- cognee/api/v1/users/routers/get_auth_router.py +13 -1
- cognee/base_config.py +10 -1
- cognee/infrastructure/databases/graph/config.py +10 -4
- cognee/infrastructure/databases/graph/kuzu/adapter.py +135 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +89 -0
- cognee/infrastructure/databases/relational/__init__.py +2 -0
- cognee/infrastructure/databases/relational/get_async_session.py +15 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
- cognee/infrastructure/databases/relational/with_async_session.py +25 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
- cognee/infrastructure/databases/vector/config.py +13 -6
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +4 -1
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
- cognee/infrastructure/files/storage/StorageManager.py +7 -1
- cognee/infrastructure/files/storage/storage.py +16 -0
- cognee/infrastructure/llm/LLMGateway.py +18 -0
- cognee/infrastructure/llm/config.py +4 -2
- cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -1
- cognee/infrastructure/utils/run_sync.py +8 -1
- cognee/modules/chunking/models/DocumentChunk.py +4 -3
- cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
- cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
- cognee/modules/cloud/exceptions/__init__.py +2 -0
- cognee/modules/cloud/operations/__init__.py +1 -0
- cognee/modules/cloud/operations/check_api_key.py +25 -0
- cognee/modules/data/deletion/prune_system.py +1 -1
- cognee/modules/data/methods/check_dataset_name.py +1 -1
- cognee/modules/data/methods/get_dataset_data.py +1 -1
- cognee/modules/data/methods/load_or_create_datasets.py +1 -1
- cognee/modules/engine/models/Event.py +16 -0
- cognee/modules/engine/models/Interval.py +8 -0
- cognee/modules/engine/models/Timestamp.py +13 -0
- cognee/modules/engine/models/__init__.py +3 -0
- cognee/modules/engine/utils/__init__.py +2 -0
- cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
- cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
- cognee/modules/memify/__init__.py +1 -0
- cognee/modules/memify/memify.py +118 -0
- cognee/modules/notebooks/methods/__init__.py +5 -0
- cognee/modules/notebooks/methods/create_notebook.py +26 -0
- cognee/modules/notebooks/methods/delete_notebook.py +13 -0
- cognee/modules/notebooks/methods/get_notebook.py +21 -0
- cognee/modules/notebooks/methods/get_notebooks.py +18 -0
- cognee/modules/notebooks/methods/update_notebook.py +17 -0
- cognee/modules/notebooks/models/Notebook.py +53 -0
- cognee/modules/notebooks/models/__init__.py +1 -0
- cognee/modules/notebooks/operations/__init__.py +1 -0
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +19 -3
- cognee/modules/pipelines/operations/pipeline.py +1 -0
- cognee/modules/pipelines/operations/run_tasks.py +17 -41
- cognee/modules/retrieval/base_graph_retriever.py +18 -0
- cognee/modules/retrieval/base_retriever.py +1 -1
- cognee/modules/retrieval/code_retriever.py +8 -0
- cognee/modules/retrieval/coding_rules_retriever.py +31 -0
- cognee/modules/retrieval/completion_retriever.py +9 -3
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +23 -14
- cognee/modules/retrieval/graph_completion_cot_retriever.py +21 -11
- cognee/modules/retrieval/graph_completion_retriever.py +32 -65
- cognee/modules/retrieval/graph_summary_completion_retriever.py +3 -1
- cognee/modules/retrieval/insights_retriever.py +14 -3
- cognee/modules/retrieval/summaries_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +152 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
- cognee/modules/retrieval/utils/completion.py +10 -3
- cognee/modules/search/methods/get_search_type_tools.py +168 -0
- cognee/modules/search/methods/no_access_control_search.py +47 -0
- cognee/modules/search/methods/search.py +219 -139
- cognee/modules/search/types/SearchResult.py +21 -0
- cognee/modules/search/types/SearchType.py +2 -0
- cognee/modules/search/types/__init__.py +1 -0
- cognee/modules/search/utils/__init__.py +2 -0
- cognee/modules/search/utils/prepare_search_result.py +41 -0
- cognee/modules/search/utils/transform_context_to_graph.py +38 -0
- cognee/modules/sync/__init__.py +1 -0
- cognee/modules/sync/methods/__init__.py +23 -0
- cognee/modules/sync/methods/create_sync_operation.py +53 -0
- cognee/modules/sync/methods/get_sync_operation.py +107 -0
- cognee/modules/sync/methods/update_sync_operation.py +248 -0
- cognee/modules/sync/models/SyncOperation.py +142 -0
- cognee/modules/sync/models/__init__.py +3 -0
- cognee/modules/users/__init__.py +0 -1
- cognee/modules/users/methods/__init__.py +4 -1
- cognee/modules/users/methods/create_user.py +26 -1
- cognee/modules/users/methods/get_authenticated_user.py +36 -42
- cognee/modules/users/methods/get_default_user.py +3 -1
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
- cognee/root_dir.py +19 -0
- cognee/shared/logging_utils.py +1 -1
- cognee/tasks/codingagents/__init__.py +0 -0
- cognee/tasks/codingagents/coding_rule_associations.py +127 -0
- cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/extract_subgraph.py +7 -0
- cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +52 -27
- cognee/tasks/temporal_graph/__init__.py +1 -0
- cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
- cognee/tasks/temporal_graph/enrich_events.py +34 -0
- cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
- cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
- cognee/tasks/temporal_graph/models.py +49 -0
- cognee/tests/test_kuzu.py +4 -4
- cognee/tests/test_neo4j.py +4 -4
- cognee/tests/test_permissions.py +3 -3
- cognee/tests/test_relational_db_migration.py +7 -5
- cognee/tests/test_search_db.py +18 -24
- cognee/tests/test_temporal_graph.py +167 -0
- cognee/tests/unit/api/__init__.py +1 -0
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +13 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +11 -16
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +5 -4
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
- cognee/tests/unit/modules/users/__init__.py +1 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
- cognee/tests/unit/processing/utils/utils_test.py +20 -1
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/METADATA +8 -6
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/RECORD +162 -89
- cognee/tests/unit/modules/search/search_methods_test.py +0 -225
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.4.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import Union, Optional, List, Type, Any
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
4
|
+
from cognee.shared.logging_utils import get_logger
|
|
5
|
+
|
|
6
|
+
from cognee.modules.retrieval.utils.brute_force_triplet_search import get_memory_fragment
|
|
7
|
+
from cognee.context_global_variables import set_database_global_context_variables
|
|
8
|
+
from cognee.modules.engine.models.node_set import NodeSet
|
|
9
|
+
from cognee.modules.pipelines import run_pipeline
|
|
10
|
+
from cognee.modules.pipelines.tasks.task import Task
|
|
11
|
+
from cognee.modules.users.models import User
|
|
12
|
+
from cognee.modules.pipelines.layers.resolve_authorized_user_datasets import (
|
|
13
|
+
resolve_authorized_user_datasets,
|
|
14
|
+
)
|
|
15
|
+
from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
|
|
16
|
+
reset_dataset_pipeline_run_status,
|
|
17
|
+
)
|
|
18
|
+
from cognee.modules.engine.operations.setup import setup
|
|
19
|
+
from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
|
|
20
|
+
from cognee.tasks.memify.extract_subgraph_chunks import extract_subgraph_chunks
|
|
21
|
+
from cognee.tasks.codingagents.coding_rule_associations import (
|
|
22
|
+
add_rule_associations,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
logger = get_logger("memify")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
async def memify(
|
|
29
|
+
extraction_tasks: Union[List[Task], List[str]] = None,
|
|
30
|
+
enrichment_tasks: Union[List[Task], List[str]] = None,
|
|
31
|
+
data: Optional[Any] = None,
|
|
32
|
+
dataset: Union[str, UUID] = "main_dataset",
|
|
33
|
+
user: User = None,
|
|
34
|
+
node_type: Optional[Type] = NodeSet,
|
|
35
|
+
node_name: Optional[List[str]] = None,
|
|
36
|
+
vector_db_config: Optional[dict] = None,
|
|
37
|
+
graph_db_config: Optional[dict] = None,
|
|
38
|
+
run_in_background: bool = False,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data,
|
|
42
|
+
custom data can also be provided instead which can be processed with provided extraction and enrichment tasks.
|
|
43
|
+
|
|
44
|
+
Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation.
|
|
45
|
+
|
|
46
|
+
This is the core processing step in Cognee that converts raw text and documents
|
|
47
|
+
into an intelligent knowledge graph. It analyzes content, extracts entities and
|
|
48
|
+
relationships, and creates semantic connections for enhanced search and reasoning.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
extraction_tasks: List of Cognee Tasks to execute for graph/data extraction.
|
|
52
|
+
enrichment_tasks: List of Cognee Tasks to handle enrichment of provided graph/data from extraction tasks.
|
|
53
|
+
data: The data to ingest. Can be anything when custom extraction and enrichment tasks are used.
|
|
54
|
+
Data provided here will be forwarded to the first extraction task in the pipeline as input.
|
|
55
|
+
If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded
|
|
56
|
+
dataset: Dataset name or dataset uuid to process.
|
|
57
|
+
user: User context for authentication and data access. Uses default if None.
|
|
58
|
+
node_type: Filter graph to specific entity types (for advanced filtering). Used when no data is provided.
|
|
59
|
+
node_name: Filter graph to specific named entities (for targeted search). Used when no data is provided.
|
|
60
|
+
vector_db_config: Custom vector database configuration for embeddings storage.
|
|
61
|
+
graph_db_config: Custom graph database configuration for relationship storage.
|
|
62
|
+
run_in_background: If True, starts processing asynchronously and returns immediately.
|
|
63
|
+
If False, waits for completion before returning.
|
|
64
|
+
Background mode recommended for large datasets (>100MB).
|
|
65
|
+
Use pipeline_run_id from return value to monitor progress.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
# Use default coding rules tasks if no tasks were provided
|
|
69
|
+
if not extraction_tasks:
|
|
70
|
+
extraction_tasks = [Task(extract_subgraph_chunks)]
|
|
71
|
+
if not enrichment_tasks:
|
|
72
|
+
enrichment_tasks = [
|
|
73
|
+
Task(
|
|
74
|
+
add_rule_associations,
|
|
75
|
+
rules_nodeset_name="coding_agent_rules",
|
|
76
|
+
task_config={"batch_size": 1},
|
|
77
|
+
)
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
await setup()
|
|
81
|
+
|
|
82
|
+
user, authorized_dataset_list = await resolve_authorized_user_datasets(dataset, user)
|
|
83
|
+
authorized_dataset = authorized_dataset_list[0]
|
|
84
|
+
|
|
85
|
+
if not data:
|
|
86
|
+
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
|
|
87
|
+
await set_database_global_context_variables(
|
|
88
|
+
authorized_dataset.id, authorized_dataset.owner_id
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name)
|
|
92
|
+
# Subgraphs should be a single element in the list to represent one data item
|
|
93
|
+
data = [memory_fragment]
|
|
94
|
+
|
|
95
|
+
memify_tasks = [
|
|
96
|
+
*extraction_tasks, # Unpack tasks provided to memify pipeline
|
|
97
|
+
*enrichment_tasks,
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
await reset_dataset_pipeline_run_status(
|
|
101
|
+
authorized_dataset.id, user, pipeline_names=["memify_pipeline"]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
|
105
|
+
pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
|
|
106
|
+
|
|
107
|
+
# Run the run_pipeline in the background or blocking based on executor
|
|
108
|
+
return await pipeline_executor_func(
|
|
109
|
+
pipeline=run_pipeline,
|
|
110
|
+
tasks=memify_tasks,
|
|
111
|
+
user=user,
|
|
112
|
+
data=data,
|
|
113
|
+
datasets=authorized_dataset.id,
|
|
114
|
+
vector_db_config=vector_db_config,
|
|
115
|
+
graph_db_config=graph_db_config,
|
|
116
|
+
incremental_loading=False,
|
|
117
|
+
pipeline_name="memify_pipeline",
|
|
118
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
+
|
|
5
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
6
|
+
|
|
7
|
+
from ..models.Notebook import Notebook, NotebookCell
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@with_async_session
|
|
11
|
+
async def create_notebook(
|
|
12
|
+
user_id: UUID,
|
|
13
|
+
notebook_name: str,
|
|
14
|
+
cells: Optional[List[NotebookCell]],
|
|
15
|
+
deletable: Optional[bool],
|
|
16
|
+
session: AsyncSession,
|
|
17
|
+
) -> Notebook:
|
|
18
|
+
notebook = Notebook(
|
|
19
|
+
name=notebook_name, owner_id=user_id, cells=cells, deletable=deletable or True
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
session.add(notebook)
|
|
23
|
+
|
|
24
|
+
await session.commit()
|
|
25
|
+
|
|
26
|
+
return notebook
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
2
|
+
|
|
3
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
4
|
+
|
|
5
|
+
from ..models.Notebook import Notebook
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@with_async_session
|
|
9
|
+
async def delete_notebook(
|
|
10
|
+
notebook: Notebook,
|
|
11
|
+
session: AsyncSession,
|
|
12
|
+
) -> None:
|
|
13
|
+
await session.delete(notebook)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from sqlalchemy import select
|
|
4
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
|
+
|
|
6
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
7
|
+
|
|
8
|
+
from ..models.Notebook import Notebook
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@with_async_session
|
|
12
|
+
async def get_notebook(
|
|
13
|
+
notebook_id: UUID,
|
|
14
|
+
user_id: UUID,
|
|
15
|
+
session: AsyncSession,
|
|
16
|
+
) -> Optional[Notebook]:
|
|
17
|
+
result = await session.execute(
|
|
18
|
+
select(Notebook).where(Notebook.owner_id == user_id and Notebook.id == notebook_id)
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
return result.scalar()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import List
|
|
3
|
+
from sqlalchemy import select
|
|
4
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
|
+
|
|
6
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
7
|
+
|
|
8
|
+
from ..models.Notebook import Notebook
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@with_async_session
|
|
12
|
+
async def get_notebooks(
|
|
13
|
+
user_id: UUID,
|
|
14
|
+
session: AsyncSession,
|
|
15
|
+
) -> List[Notebook]:
|
|
16
|
+
result = await session.execute(select(Notebook).where(Notebook.owner_id == user_id))
|
|
17
|
+
|
|
18
|
+
return list(result.scalars().all())
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Callable, AsyncContextManager
|
|
2
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
3
|
+
|
|
4
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
5
|
+
|
|
6
|
+
from ..models.Notebook import Notebook
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@with_async_session
|
|
10
|
+
async def update_notebook(
|
|
11
|
+
notebook: Notebook,
|
|
12
|
+
session: AsyncSession,
|
|
13
|
+
) -> Notebook:
|
|
14
|
+
if notebook not in session:
|
|
15
|
+
session.add(notebook)
|
|
16
|
+
|
|
17
|
+
return notebook
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import List, Literal
|
|
3
|
+
from uuid import uuid4, UUID as UUID_t
|
|
4
|
+
from pydantic import BaseModel, ConfigDict
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from fastapi.encoders import jsonable_encoder
|
|
7
|
+
from sqlalchemy import Boolean, Column, DateTime, JSON, UUID, String, TypeDecorator
|
|
8
|
+
from sqlalchemy.orm import mapped_column, Mapped
|
|
9
|
+
|
|
10
|
+
from cognee.infrastructure.databases.relational import Base
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NotebookCell(BaseModel):
|
|
14
|
+
id: UUID_t
|
|
15
|
+
type: Literal["markdown", "code"]
|
|
16
|
+
name: str
|
|
17
|
+
content: str
|
|
18
|
+
|
|
19
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NotebookCellList(TypeDecorator):
|
|
23
|
+
impl = JSON
|
|
24
|
+
cache_ok = True
|
|
25
|
+
|
|
26
|
+
def process_bind_param(self, notebook_cells, dialect):
|
|
27
|
+
if notebook_cells is None:
|
|
28
|
+
return []
|
|
29
|
+
return [
|
|
30
|
+
json.dumps(jsonable_encoder(cell)) if isinstance(cell, NotebookCell) else cell
|
|
31
|
+
for cell in notebook_cells
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
def process_result_value(self, cells_json_list, dialect):
|
|
35
|
+
if cells_json_list is None:
|
|
36
|
+
return []
|
|
37
|
+
return [NotebookCell(**json.loads(json_string)) for json_string in cells_json_list]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Notebook(Base):
|
|
41
|
+
__tablename__ = "notebooks"
|
|
42
|
+
|
|
43
|
+
id: Mapped[UUID_t] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid4)
|
|
44
|
+
|
|
45
|
+
owner_id: Mapped[UUID_t] = mapped_column(UUID(as_uuid=True), index=True)
|
|
46
|
+
|
|
47
|
+
name: Mapped[str] = mapped_column(String, nullable=False)
|
|
48
|
+
|
|
49
|
+
cells: Mapped[List[NotebookCell]] = mapped_column(NotebookCellList, nullable=False)
|
|
50
|
+
|
|
51
|
+
deletable: Mapped[bool] = mapped_column(Boolean, default=True)
|
|
52
|
+
|
|
53
|
+
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .Notebook import Notebook, NotebookCell
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .run_in_local_sandbox import run_in_local_sandbox
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import sys
|
|
3
|
+
import traceback
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def wrap_in_async_handler(user_code: str) -> str:
|
|
7
|
+
return (
|
|
8
|
+
"from cognee.infrastructure.utils.run_sync import run_sync\n\n"
|
|
9
|
+
"async def __user_main__():\n"
|
|
10
|
+
+ "\n".join(" " + line for line in user_code.strip().split("\n"))
|
|
11
|
+
+ "\n"
|
|
12
|
+
" globals().update(locals())\n\n"
|
|
13
|
+
"run_sync(__user_main__())\n"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def run_in_local_sandbox(code, environment=None):
|
|
18
|
+
environment = environment or {}
|
|
19
|
+
code = wrap_in_async_handler(code.replace("\xa0", "\n"))
|
|
20
|
+
|
|
21
|
+
buffer = io.StringIO()
|
|
22
|
+
sys_stdout = sys.stdout
|
|
23
|
+
sys.stdout = buffer
|
|
24
|
+
sys.stderr = buffer
|
|
25
|
+
|
|
26
|
+
error = None
|
|
27
|
+
|
|
28
|
+
printOutput = []
|
|
29
|
+
|
|
30
|
+
def customPrintFunction(output):
|
|
31
|
+
printOutput.append(output)
|
|
32
|
+
|
|
33
|
+
environment["print"] = customPrintFunction
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
exec(code, environment)
|
|
37
|
+
except Exception:
|
|
38
|
+
error = traceback.format_exc()
|
|
39
|
+
finally:
|
|
40
|
+
sys.stdout = sys_stdout
|
|
41
|
+
sys.stderr = sys_stdout
|
|
42
|
+
|
|
43
|
+
return printOutput, error
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
if __name__ == "__main__":
|
|
47
|
+
run_in_local_sandbox("""
|
|
48
|
+
import cognee
|
|
49
|
+
|
|
50
|
+
await cognee.add("Test file with some random content 3.")
|
|
51
|
+
|
|
52
|
+
a = "asd"
|
|
53
|
+
|
|
54
|
+
b = {"c": "dfgh"}
|
|
55
|
+
""")
|
|
@@ -1,12 +1,28 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
|
+
from typing import Optional, List
|
|
3
|
+
|
|
2
4
|
from cognee.modules.pipelines.methods import get_pipeline_runs_by_dataset, reset_pipeline_run_status
|
|
3
5
|
from cognee.modules.pipelines.models.PipelineRun import PipelineRunStatus
|
|
4
6
|
from cognee.modules.users.models import User
|
|
5
7
|
|
|
6
8
|
|
|
7
|
-
async def reset_dataset_pipeline_run_status(
|
|
9
|
+
async def reset_dataset_pipeline_run_status(
|
|
10
|
+
dataset_id: UUID, user: User, pipeline_names: Optional[list[str]] = None
|
|
11
|
+
):
|
|
12
|
+
"""Reset the status of all (or selected) pipeline runs for a dataset.
|
|
13
|
+
|
|
14
|
+
If *pipeline_names* is given, only runs whose *pipeline_name* is in
|
|
15
|
+
that list are touched.
|
|
16
|
+
"""
|
|
8
17
|
related_pipeline_runs = await get_pipeline_runs_by_dataset(dataset_id)
|
|
9
18
|
|
|
10
19
|
for pipeline_run in related_pipeline_runs:
|
|
11
|
-
|
|
12
|
-
|
|
20
|
+
# Skip runs that are initiated
|
|
21
|
+
if pipeline_run.status is PipelineRunStatus.DATASET_PROCESSING_INITIATED:
|
|
22
|
+
continue
|
|
23
|
+
|
|
24
|
+
# If a name filter is provided, skip non-matching runs
|
|
25
|
+
if pipeline_names is not None and pipeline_run.pipeline_name not in pipeline_names:
|
|
26
|
+
continue
|
|
27
|
+
|
|
28
|
+
await reset_pipeline_run_status(user.id, dataset_id, pipeline_run.pipeline_name)
|
|
@@ -5,6 +5,7 @@ from typing import Union
|
|
|
5
5
|
from cognee.modules.pipelines.layers.setup_and_check_environment import (
|
|
6
6
|
setup_and_check_environment,
|
|
7
7
|
)
|
|
8
|
+
|
|
8
9
|
from cognee.shared.logging_utils import get_logger
|
|
9
10
|
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
|
|
10
11
|
from cognee.modules.data.models import Data, Dataset
|
|
@@ -266,48 +266,24 @@ async def run_tasks(
|
|
|
266
266
|
if incremental_loading:
|
|
267
267
|
data = await resolve_data_directories(data)
|
|
268
268
|
|
|
269
|
-
#
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
# )
|
|
284
|
-
# )
|
|
285
|
-
# for data_item in data
|
|
286
|
-
# ]
|
|
287
|
-
# results = await asyncio.gather(*data_item_tasks)
|
|
288
|
-
# # Remove skipped data items from results
|
|
289
|
-
# results = [result for result in results if result]
|
|
290
|
-
|
|
291
|
-
### TEMP sync data item handling
|
|
292
|
-
results = []
|
|
293
|
-
# Run the pipeline for each data_item sequentially, one after the other
|
|
294
|
-
for data_item in data:
|
|
295
|
-
result = await _run_tasks_data_item(
|
|
296
|
-
data_item,
|
|
297
|
-
dataset,
|
|
298
|
-
tasks,
|
|
299
|
-
pipeline_name,
|
|
300
|
-
pipeline_id,
|
|
301
|
-
pipeline_run_id,
|
|
302
|
-
context,
|
|
303
|
-
user,
|
|
304
|
-
incremental_loading,
|
|
269
|
+
# Create async tasks per data item that will run the pipeline for the data item
|
|
270
|
+
data_item_tasks = [
|
|
271
|
+
asyncio.create_task(
|
|
272
|
+
_run_tasks_data_item(
|
|
273
|
+
data_item,
|
|
274
|
+
dataset,
|
|
275
|
+
tasks,
|
|
276
|
+
pipeline_name,
|
|
277
|
+
pipeline_id,
|
|
278
|
+
pipeline_run_id,
|
|
279
|
+
context,
|
|
280
|
+
user,
|
|
281
|
+
incremental_loading,
|
|
282
|
+
)
|
|
305
283
|
)
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
results.append(result)
|
|
310
|
-
### END
|
|
284
|
+
for data_item in data
|
|
285
|
+
]
|
|
286
|
+
results = await asyncio.gather(*data_item_tasks)
|
|
311
287
|
|
|
312
288
|
# Remove skipped data items from results
|
|
313
289
|
results = [result for result in results if result]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
|
|
4
|
+
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseGraphRetriever(ABC):
|
|
8
|
+
"""Base class for all graph based retrievers."""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
async def get_context(self, query: str) -> List[Edge]:
|
|
12
|
+
"""Retrieves triplets based on the query."""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
async def get_completion(self, query: str, context: Optional[List[Edge]] = None) -> str:
|
|
17
|
+
"""Generates a response using the query and optional context (triplets)."""
|
|
18
|
+
pass
|
|
@@ -94,7 +94,15 @@ class CodeRetriever(BaseRetriever):
|
|
|
94
94
|
{"id": res.id, "score": res.score, "payload": res.payload}
|
|
95
95
|
)
|
|
96
96
|
|
|
97
|
+
existing_collection = []
|
|
97
98
|
for collection in self.classes_and_functions_collections:
|
|
99
|
+
if await vector_engine.has_collection(collection):
|
|
100
|
+
existing_collection.append(collection)
|
|
101
|
+
|
|
102
|
+
if not existing_collection:
|
|
103
|
+
raise RuntimeError("No collection found for code retriever")
|
|
104
|
+
|
|
105
|
+
for collection in existing_collection:
|
|
98
106
|
logger.debug(f"Searching {collection} collection with general query")
|
|
99
107
|
search_results_code = await vector_engine.search(
|
|
100
108
|
collection, query, limit=self.top_k
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from functools import reduce
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
from cognee.shared.logging_utils import get_logger
|
|
5
|
+
from cognee.tasks.codingagents.coding_rule_associations import get_existing_rules
|
|
6
|
+
|
|
7
|
+
logger = get_logger("CodingRulesRetriever")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CodingRulesRetriever:
|
|
11
|
+
"""Retriever for handling codeing rule based searches."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, rules_nodeset_name: Optional[List[str]] = None):
|
|
14
|
+
if isinstance(rules_nodeset_name, list):
|
|
15
|
+
if not rules_nodeset_name:
|
|
16
|
+
# If there is no provided nodeset set to coding_agent_rules
|
|
17
|
+
rules_nodeset_name = ["coding_agent_rules"]
|
|
18
|
+
|
|
19
|
+
self.rules_nodeset_name = rules_nodeset_name
|
|
20
|
+
"""Initialize retriever with search parameters."""
|
|
21
|
+
|
|
22
|
+
async def get_existing_rules(self, query_text):
|
|
23
|
+
if self.rules_nodeset_name:
|
|
24
|
+
rules_list = await asyncio.gather(
|
|
25
|
+
*[
|
|
26
|
+
get_existing_rules(rules_nodeset_name=nodeset)
|
|
27
|
+
for nodeset in self.rules_nodeset_name
|
|
28
|
+
]
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
return reduce(lambda x, y: x + y, rules_list, [])
|
|
@@ -23,12 +23,14 @@ class CompletionRetriever(BaseRetriever):
|
|
|
23
23
|
self,
|
|
24
24
|
user_prompt_path: str = "context_for_question.txt",
|
|
25
25
|
system_prompt_path: str = "answer_simple_question.txt",
|
|
26
|
+
system_prompt: Optional[str] = None,
|
|
26
27
|
top_k: Optional[int] = 1,
|
|
27
28
|
):
|
|
28
29
|
"""Initialize retriever with optional custom prompt paths."""
|
|
29
30
|
self.user_prompt_path = user_prompt_path
|
|
30
31
|
self.system_prompt_path = system_prompt_path
|
|
31
32
|
self.top_k = top_k if top_k is not None else 1
|
|
33
|
+
self.system_prompt = system_prompt
|
|
32
34
|
|
|
33
35
|
async def get_context(self, query: str) -> str:
|
|
34
36
|
"""
|
|
@@ -65,7 +67,7 @@ class CompletionRetriever(BaseRetriever):
|
|
|
65
67
|
logger.error("DocumentChunk_text collection not found")
|
|
66
68
|
raise NoDataError("No data found in the system, please add data first.") from error
|
|
67
69
|
|
|
68
|
-
async def get_completion(self, query: str, context: Optional[Any] = None) ->
|
|
70
|
+
async def get_completion(self, query: str, context: Optional[Any] = None) -> str:
|
|
69
71
|
"""
|
|
70
72
|
Generates an LLM completion using the context.
|
|
71
73
|
|
|
@@ -88,6 +90,10 @@ class CompletionRetriever(BaseRetriever):
|
|
|
88
90
|
context = await self.get_context(query)
|
|
89
91
|
|
|
90
92
|
completion = await generate_completion(
|
|
91
|
-
query,
|
|
93
|
+
query=query,
|
|
94
|
+
context=context,
|
|
95
|
+
user_prompt_path=self.user_prompt_path,
|
|
96
|
+
system_prompt_path=self.system_prompt_path,
|
|
97
|
+
system_prompt=self.system_prompt,
|
|
92
98
|
)
|
|
93
|
-
return
|
|
99
|
+
return completion
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional, List, Type
|
|
2
|
+
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
2
3
|
from cognee.shared.logging_utils import get_logger
|
|
3
4
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
4
5
|
from cognee.modules.retrieval.utils.completion import generate_completion
|
|
@@ -26,6 +27,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
26
27
|
self,
|
|
27
28
|
user_prompt_path: str = "graph_context_for_question.txt",
|
|
28
29
|
system_prompt_path: str = "answer_simple_question.txt",
|
|
30
|
+
system_prompt: Optional[str] = None,
|
|
29
31
|
top_k: Optional[int] = 5,
|
|
30
32
|
node_type: Optional[Type] = None,
|
|
31
33
|
node_name: Optional[List[str]] = None,
|
|
@@ -38,11 +40,15 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
38
40
|
node_type=node_type,
|
|
39
41
|
node_name=node_name,
|
|
40
42
|
save_interaction=save_interaction,
|
|
43
|
+
system_prompt=system_prompt,
|
|
41
44
|
)
|
|
42
45
|
|
|
43
46
|
async def get_completion(
|
|
44
|
-
self,
|
|
45
|
-
|
|
47
|
+
self,
|
|
48
|
+
query: str,
|
|
49
|
+
context: Optional[List[Edge]] = None,
|
|
50
|
+
context_extension_rounds=4,
|
|
51
|
+
) -> str:
|
|
46
52
|
"""
|
|
47
53
|
Extends the context for a given query by retrieving related triplets and generating new
|
|
48
54
|
completions based on them.
|
|
@@ -67,11 +73,12 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
67
73
|
- List[str]: A list containing the generated answer based on the query and the
|
|
68
74
|
extended context.
|
|
69
75
|
"""
|
|
70
|
-
triplets =
|
|
76
|
+
triplets = context
|
|
77
|
+
|
|
78
|
+
if triplets is None:
|
|
79
|
+
triplets = await self.get_context(query)
|
|
71
80
|
|
|
72
|
-
|
|
73
|
-
triplets += await self.get_triplets(query)
|
|
74
|
-
context = await self.resolve_edges_to_text(triplets)
|
|
81
|
+
context_text = await self.resolve_edges_to_text(triplets)
|
|
75
82
|
|
|
76
83
|
round_idx = 1
|
|
77
84
|
|
|
@@ -83,14 +90,15 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
83
90
|
)
|
|
84
91
|
completion = await generate_completion(
|
|
85
92
|
query=query,
|
|
86
|
-
context=
|
|
93
|
+
context=context_text,
|
|
87
94
|
user_prompt_path=self.user_prompt_path,
|
|
88
95
|
system_prompt_path=self.system_prompt_path,
|
|
96
|
+
system_prompt=self.system_prompt,
|
|
89
97
|
)
|
|
90
98
|
|
|
91
|
-
triplets += await self.
|
|
99
|
+
triplets += await self.get_context(completion)
|
|
92
100
|
triplets = list(set(triplets))
|
|
93
|
-
|
|
101
|
+
context_text = await self.resolve_edges_to_text(triplets)
|
|
94
102
|
|
|
95
103
|
num_triplets = len(triplets)
|
|
96
104
|
|
|
@@ -109,14 +117,15 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
109
117
|
|
|
110
118
|
completion = await generate_completion(
|
|
111
119
|
query=query,
|
|
112
|
-
context=
|
|
120
|
+
context=context_text,
|
|
113
121
|
user_prompt_path=self.user_prompt_path,
|
|
114
122
|
system_prompt_path=self.system_prompt_path,
|
|
123
|
+
system_prompt=self.system_prompt,
|
|
115
124
|
)
|
|
116
125
|
|
|
117
|
-
if self.save_interaction and
|
|
126
|
+
if self.save_interaction and context_text and triplets and completion:
|
|
118
127
|
await self.save_qa(
|
|
119
|
-
question=query, answer=completion, context=
|
|
128
|
+
question=query, answer=completion, context=context_text, triplets=triplets
|
|
120
129
|
)
|
|
121
130
|
|
|
122
|
-
return
|
|
131
|
+
return completion
|