cognee 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/__main__.py +4 -0
- cognee/api/client.py +28 -3
- cognee/api/health.py +10 -13
- cognee/api/v1/add/add.py +20 -6
- cognee/api/v1/add/routers/get_add_router.py +12 -37
- cognee/api/v1/cloud/routers/__init__.py +1 -0
- cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +14 -3
- cognee/api/v1/cognify/cognify.py +67 -105
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +16 -5
- cognee/api/v1/memify/routers/__init__.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +100 -0
- cognee/api/v1/notebooks/routers/__init__.py +1 -0
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/routers/get_search_router.py +20 -1
- cognee/api/v1/search/search.py +17 -4
- cognee/api/v1/sync/__init__.py +17 -0
- cognee/api/v1/sync/routers/__init__.py +3 -0
- cognee/api/v1/sync/routers/get_sync_router.py +241 -0
- cognee/api/v1/sync/sync.py +877 -0
- cognee/api/v1/ui/__init__.py +1 -0
- cognee/api/v1/ui/ui.py +529 -0
- cognee/api/v1/users/routers/get_auth_router.py +13 -1
- cognee/base_config.py +10 -1
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +273 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/infrastructure/databases/graph/config.py +10 -4
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +199 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +138 -0
- cognee/infrastructure/databases/relational/__init__.py +2 -0
- cognee/infrastructure/databases/relational/get_async_session.py +15 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
- cognee/infrastructure/databases/relational/with_async_session.py +25 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
- cognee/infrastructure/databases/vector/config.py +13 -6
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -4
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +10 -7
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
- cognee/infrastructure/files/storage/StorageManager.py +7 -1
- cognee/infrastructure/files/storage/storage.py +16 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +32 -5
- cognee/infrastructure/llm/config.py +6 -4
- cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +28 -4
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/infrastructure/utils/run_sync.py +8 -1
- cognee/modules/chunking/models/DocumentChunk.py +4 -3
- cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
- cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
- cognee/modules/cloud/exceptions/__init__.py +2 -0
- cognee/modules/cloud/operations/__init__.py +1 -0
- cognee/modules/cloud/operations/check_api_key.py +25 -0
- cognee/modules/data/deletion/prune_system.py +1 -1
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/check_dataset_name.py +1 -1
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/get_dataset_data.py +1 -1
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/engine/models/Event.py +16 -0
- cognee/modules/engine/models/Interval.py +8 -0
- cognee/modules/engine/models/Timestamp.py +13 -0
- cognee/modules/engine/models/__init__.py +3 -0
- cognee/modules/engine/utils/__init__.py +2 -0
- cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
- cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
- cognee/modules/memify/__init__.py +1 -0
- cognee/modules/memify/memify.py +118 -0
- cognee/modules/notebooks/methods/__init__.py +5 -0
- cognee/modules/notebooks/methods/create_notebook.py +26 -0
- cognee/modules/notebooks/methods/delete_notebook.py +13 -0
- cognee/modules/notebooks/methods/get_notebook.py +21 -0
- cognee/modules/notebooks/methods/get_notebooks.py +18 -0
- cognee/modules/notebooks/methods/update_notebook.py +17 -0
- cognee/modules/notebooks/models/Notebook.py +53 -0
- cognee/modules/notebooks/models/__init__.py +1 -0
- cognee/modules/notebooks/operations/__init__.py +1 -0
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +28 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +24 -138
- cognee/modules/pipelines/operations/run_tasks.py +17 -41
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/base_graph_retriever.py +18 -0
- cognee/modules/retrieval/base_retriever.py +1 -1
- cognee/modules/retrieval/code_retriever.py +8 -0
- cognee/modules/retrieval/coding_rules_retriever.py +31 -0
- cognee/modules/retrieval/completion_retriever.py +9 -3
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +29 -13
- cognee/modules/retrieval/graph_completion_cot_retriever.py +30 -13
- cognee/modules/retrieval/graph_completion_retriever.py +107 -56
- cognee/modules/retrieval/graph_summary_completion_retriever.py +5 -1
- cognee/modules/retrieval/insights_retriever.py +14 -3
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/summaries_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +152 -0
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
- cognee/modules/retrieval/utils/completion.py +10 -3
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/methods/get_search_type_tools.py +168 -0
- cognee/modules/search/methods/no_access_control_search.py +47 -0
- cognee/modules/search/methods/search.py +239 -118
- cognee/modules/search/types/SearchResult.py +21 -0
- cognee/modules/search/types/SearchType.py +3 -0
- cognee/modules/search/types/__init__.py +1 -0
- cognee/modules/search/utils/__init__.py +2 -0
- cognee/modules/search/utils/prepare_search_result.py +41 -0
- cognee/modules/search/utils/transform_context_to_graph.py +38 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/modules/sync/__init__.py +1 -0
- cognee/modules/sync/methods/__init__.py +23 -0
- cognee/modules/sync/methods/create_sync_operation.py +53 -0
- cognee/modules/sync/methods/get_sync_operation.py +107 -0
- cognee/modules/sync/methods/update_sync_operation.py +248 -0
- cognee/modules/sync/models/SyncOperation.py +142 -0
- cognee/modules/sync/models/__init__.py +3 -0
- cognee/modules/users/__init__.py +0 -1
- cognee/modules/users/methods/__init__.py +4 -1
- cognee/modules/users/methods/create_user.py +26 -1
- cognee/modules/users/methods/get_authenticated_user.py +36 -42
- cognee/modules/users/methods/get_default_user.py +3 -1
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
- cognee/root_dir.py +19 -0
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/logging_utils.py +143 -32
- cognee/shared/utils.py +0 -1
- cognee/tasks/codingagents/coding_rule_associations.py +127 -0
- cognee/tasks/graph/extract_graph_from_data.py +6 -2
- cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/extract_subgraph.py +7 -0
- cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +144 -47
- cognee/tasks/storage/add_data_points.py +33 -3
- cognee/tasks/temporal_graph/__init__.py +1 -0
- cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
- cognee/tasks/temporal_graph/enrich_events.py +34 -0
- cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
- cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
- cognee/tasks/temporal_graph/models.py +49 -0
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_permissions.py +3 -3
- cognee/tests/test_relational_db_migration.py +7 -5
- cognee/tests/test_search_db.py +136 -23
- cognee/tests/test_temporal_graph.py +167 -0
- cognee/tests/unit/api/__init__.py +1 -0
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +12 -15
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +10 -15
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +4 -3
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
- cognee/tests/unit/modules/users/__init__.py +1 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
- cognee/tests/unit/processing/utils/utils_test.py +20 -1
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/METADATA +13 -9
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/RECORD +247 -135
- cognee-0.3.0.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- cognee/tests/unit/modules/search/search_methods_test.py +0 -223
- /cognee/{infrastructure/databases/graph/networkx → api/v1/memify}/__init__.py +0 -0
- /cognee/{infrastructure/pipeline/models → tasks/codingagents}/__init__.py +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/NOTICE.md +0 -0
cognee/__init__.py
CHANGED
|
@@ -18,6 +18,7 @@ logger = setup_logging()
|
|
|
18
18
|
from .api.v1.add import add
|
|
19
19
|
from .api.v1.delete import delete
|
|
20
20
|
from .api.v1.cognify import cognify
|
|
21
|
+
from .modules.memify import memify
|
|
21
22
|
from .api.v1.config.config import config
|
|
22
23
|
from .api.v1.datasets.datasets import datasets
|
|
23
24
|
from .api.v1.prune import prune
|
|
@@ -26,6 +27,7 @@ from .api.v1.visualize import visualize_graph, start_visualization_server
|
|
|
26
27
|
from cognee.modules.visualization.cognee_network_visualization import (
|
|
27
28
|
cognee_network_visualization,
|
|
28
29
|
)
|
|
30
|
+
from .api.v1.ui import start_ui
|
|
29
31
|
|
|
30
32
|
# Pipelines
|
|
31
33
|
from .modules import pipelines
|
cognee/__main__.py
ADDED
cognee/api/client.py
CHANGED
|
@@ -9,7 +9,7 @@ from contextlib import asynccontextmanager
|
|
|
9
9
|
from fastapi import Request
|
|
10
10
|
from fastapi import FastAPI, status
|
|
11
11
|
from fastapi.encoders import jsonable_encoder
|
|
12
|
-
from fastapi.responses import JSONResponse
|
|
12
|
+
from fastapi.responses import JSONResponse
|
|
13
13
|
from fastapi.middleware.cors import CORSMiddleware
|
|
14
14
|
from fastapi.exceptions import RequestValidationError
|
|
15
15
|
from fastapi.openapi.utils import get_openapi
|
|
@@ -17,14 +17,18 @@ from fastapi.openapi.utils import get_openapi
|
|
|
17
17
|
from cognee.exceptions import CogneeApiError
|
|
18
18
|
from cognee.shared.logging_utils import get_logger, setup_logging
|
|
19
19
|
from cognee.api.health import health_checker, HealthStatus
|
|
20
|
+
from cognee.api.v1.cloud.routers import get_checks_router
|
|
21
|
+
from cognee.api.v1.notebooks.routers import get_notebooks_router
|
|
20
22
|
from cognee.api.v1.permissions.routers import get_permissions_router
|
|
21
23
|
from cognee.api.v1.settings.routers import get_settings_router
|
|
22
24
|
from cognee.api.v1.datasets.routers import get_datasets_router
|
|
23
25
|
from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
|
|
24
26
|
from cognee.api.v1.search.routers import get_search_router
|
|
27
|
+
from cognee.api.v1.memify.routers import get_memify_router
|
|
25
28
|
from cognee.api.v1.add.routers import get_add_router
|
|
26
29
|
from cognee.api.v1.delete.routers import get_delete_router
|
|
27
30
|
from cognee.api.v1.responses.routers import get_responses_router
|
|
31
|
+
from cognee.api.v1.sync.routers import get_sync_router
|
|
28
32
|
from cognee.api.v1.users.routers import (
|
|
29
33
|
get_auth_router,
|
|
30
34
|
get_register_router,
|
|
@@ -33,6 +37,7 @@ from cognee.api.v1.users.routers import (
|
|
|
33
37
|
get_users_router,
|
|
34
38
|
get_visualize_router,
|
|
35
39
|
)
|
|
40
|
+
from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION
|
|
36
41
|
|
|
37
42
|
logger = get_logger()
|
|
38
43
|
|
|
@@ -83,7 +88,7 @@ app.add_middleware(
|
|
|
83
88
|
CORSMiddleware,
|
|
84
89
|
allow_origins=allowed_origins, # Now controlled by env var
|
|
85
90
|
allow_credentials=True,
|
|
86
|
-
allow_methods=["OPTIONS", "GET", "POST", "DELETE"],
|
|
91
|
+
allow_methods=["OPTIONS", "GET", "PUT", "POST", "DELETE"],
|
|
87
92
|
allow_headers=["*"],
|
|
88
93
|
)
|
|
89
94
|
# To allow origins, set CORS_ALLOWED_ORIGINS env variable to a comma-separated list, e.g.:
|
|
@@ -110,7 +115,11 @@ def custom_openapi():
|
|
|
110
115
|
},
|
|
111
116
|
}
|
|
112
117
|
|
|
113
|
-
|
|
118
|
+
if REQUIRE_AUTHENTICATION:
|
|
119
|
+
openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
|
|
120
|
+
|
|
121
|
+
# Remove global security requirement - let individual endpoints specify their own security
|
|
122
|
+
# openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
|
|
114
123
|
|
|
115
124
|
app.openapi_schema = openapi_schema
|
|
116
125
|
|
|
@@ -230,6 +239,8 @@ app.include_router(get_add_router(), prefix="/api/v1/add", tags=["add"])
|
|
|
230
239
|
|
|
231
240
|
app.include_router(get_cognify_router(), prefix="/api/v1/cognify", tags=["cognify"])
|
|
232
241
|
|
|
242
|
+
app.include_router(get_memify_router(), prefix="/api/v1/memify", tags=["memify"])
|
|
243
|
+
|
|
233
244
|
app.include_router(get_search_router(), prefix="/api/v1/search", tags=["search"])
|
|
234
245
|
|
|
235
246
|
app.include_router(
|
|
@@ -248,6 +259,8 @@ app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"]
|
|
|
248
259
|
|
|
249
260
|
app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
|
|
250
261
|
|
|
262
|
+
app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
|
|
263
|
+
|
|
251
264
|
codegraph_routes = get_code_pipeline_router()
|
|
252
265
|
if codegraph_routes:
|
|
253
266
|
app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
|
|
@@ -258,6 +271,18 @@ app.include_router(
|
|
|
258
271
|
tags=["users"],
|
|
259
272
|
)
|
|
260
273
|
|
|
274
|
+
app.include_router(
|
|
275
|
+
get_notebooks_router(),
|
|
276
|
+
prefix="/api/v1/notebooks",
|
|
277
|
+
tags=["notebooks"],
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
app.include_router(
|
|
281
|
+
get_checks_router(),
|
|
282
|
+
prefix="/api/v1/checks",
|
|
283
|
+
tags=["checks"],
|
|
284
|
+
)
|
|
285
|
+
|
|
261
286
|
|
|
262
287
|
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
|
263
288
|
"""
|
cognee/api/health.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
"""Health check system for cognee API."""
|
|
2
2
|
|
|
3
|
+
from io import BytesIO
|
|
3
4
|
import time
|
|
4
5
|
import asyncio
|
|
5
6
|
from datetime import datetime, timezone
|
|
6
|
-
from typing import Dict
|
|
7
|
+
from typing import Dict
|
|
7
8
|
from enum import Enum
|
|
8
9
|
from pydantic import BaseModel
|
|
9
10
|
|
|
@@ -53,7 +54,7 @@ class HealthChecker:
|
|
|
53
54
|
# Test connection by creating a session
|
|
54
55
|
session = engine.get_session()
|
|
55
56
|
if session:
|
|
56
|
-
|
|
57
|
+
session.close()
|
|
57
58
|
|
|
58
59
|
response_time = int((time.time() - start_time) * 1000)
|
|
59
60
|
return ComponentHealth(
|
|
@@ -117,12 +118,9 @@ class HealthChecker:
|
|
|
117
118
|
engine = await get_graph_engine()
|
|
118
119
|
|
|
119
120
|
# Test basic operation with actual graph query
|
|
120
|
-
if hasattr(engine, "
|
|
121
|
-
# For SQL-like graph DBs (Neo4j, Memgraph)
|
|
122
|
-
await engine.execute("MATCH () RETURN count(*) LIMIT 1")
|
|
123
|
-
elif hasattr(engine, "query"):
|
|
121
|
+
if hasattr(engine, "query"):
|
|
124
122
|
# For other graph engines
|
|
125
|
-
engine.query("MATCH () RETURN count(*) LIMIT 1", {})
|
|
123
|
+
await engine.query("MATCH () RETURN count(*) LIMIT 1", {})
|
|
126
124
|
# If engine exists but no test method, consider it healthy
|
|
127
125
|
|
|
128
126
|
response_time = int((time.time() - start_time) * 1000)
|
|
@@ -167,8 +165,8 @@ class HealthChecker:
|
|
|
167
165
|
else:
|
|
168
166
|
# For S3, test basic operations
|
|
169
167
|
test_path = "health_check_test"
|
|
170
|
-
await storage.store(test_path, b"test")
|
|
171
|
-
await storage.
|
|
168
|
+
await storage.store(test_path, BytesIO(b"test"))
|
|
169
|
+
await storage.remove(test_path)
|
|
172
170
|
|
|
173
171
|
response_time = int((time.time() - start_time) * 1000)
|
|
174
172
|
return ComponentHealth(
|
|
@@ -190,14 +188,13 @@ class HealthChecker:
|
|
|
190
188
|
"""Check LLM provider health (non-critical)."""
|
|
191
189
|
start_time = time.time()
|
|
192
190
|
try:
|
|
193
|
-
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
194
191
|
from cognee.infrastructure.llm.config import get_llm_config
|
|
192
|
+
from cognee.infrastructure.llm import LLMGateway
|
|
195
193
|
|
|
196
194
|
config = get_llm_config()
|
|
197
195
|
|
|
198
196
|
# Test actual API connection with minimal request
|
|
199
|
-
|
|
200
|
-
await client.show_prompt("test", "test")
|
|
197
|
+
LLMGateway.show_prompt("test", "test")
|
|
201
198
|
|
|
202
199
|
response_time = int((time.time() - start_time) * 1000)
|
|
203
200
|
return ComponentHealth(
|
|
@@ -226,7 +223,7 @@ class HealthChecker:
|
|
|
226
223
|
|
|
227
224
|
# Test actual embedding generation with minimal text
|
|
228
225
|
engine = get_embedding_engine()
|
|
229
|
-
await engine.embed_text("test")
|
|
226
|
+
await engine.embed_text(["test"])
|
|
230
227
|
|
|
231
228
|
response_time = int((time.time() - start_time) * 1000)
|
|
232
229
|
return ComponentHealth(
|
cognee/api/v1/add/add.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import Union, BinaryIO, List, Optional
|
|
3
3
|
|
|
4
|
-
from cognee.modules.pipelines import Task
|
|
5
4
|
from cognee.modules.users.models import User
|
|
6
|
-
from cognee.modules.pipelines import
|
|
5
|
+
from cognee.modules.pipelines import Task, run_pipeline
|
|
6
|
+
from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
|
|
7
|
+
resolve_authorized_user_dataset,
|
|
8
|
+
)
|
|
9
|
+
from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
|
|
10
|
+
reset_dataset_pipeline_run_status,
|
|
11
|
+
)
|
|
12
|
+
from cognee.modules.engine.operations.setup import setup
|
|
7
13
|
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
|
|
8
14
|
|
|
9
15
|
|
|
@@ -128,11 +134,11 @@ async def add(
|
|
|
128
134
|
|
|
129
135
|
Optional:
|
|
130
136
|
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
|
|
131
|
-
- LLM_MODEL: Model name (default: "gpt-
|
|
137
|
+
- LLM_MODEL: Model name (default: "gpt-5-mini")
|
|
132
138
|
- DEFAULT_USER_EMAIL: Custom default user email
|
|
133
139
|
- DEFAULT_USER_PASSWORD: Custom default user password
|
|
134
140
|
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
|
|
135
|
-
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
|
|
141
|
+
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
|
|
136
142
|
|
|
137
143
|
"""
|
|
138
144
|
tasks = [
|
|
@@ -140,11 +146,19 @@ async def add(
|
|
|
140
146
|
Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
|
|
141
147
|
]
|
|
142
148
|
|
|
149
|
+
await setup()
|
|
150
|
+
|
|
151
|
+
user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
|
|
152
|
+
|
|
153
|
+
await reset_dataset_pipeline_run_status(
|
|
154
|
+
authorized_dataset.id, user, pipeline_names=["add_pipeline", "cognify_pipeline"]
|
|
155
|
+
)
|
|
156
|
+
|
|
143
157
|
pipeline_run_info = None
|
|
144
158
|
|
|
145
|
-
async for run_info in
|
|
159
|
+
async for run_info in run_pipeline(
|
|
146
160
|
tasks=tasks,
|
|
147
|
-
datasets=
|
|
161
|
+
datasets=[authorized_dataset.id],
|
|
148
162
|
data=data,
|
|
149
163
|
user=user,
|
|
150
164
|
pipeline_name="add_pipeline",
|
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import requests
|
|
3
|
-
import subprocess
|
|
4
1
|
from uuid import UUID
|
|
5
2
|
|
|
6
3
|
from fastapi import APIRouter
|
|
@@ -24,7 +21,9 @@ def get_add_router() -> APIRouter:
|
|
|
24
21
|
async def add(
|
|
25
22
|
data: List[UploadFile] = File(default=None),
|
|
26
23
|
datasetName: Optional[str] = Form(default=None),
|
|
24
|
+
# Note: Literal is needed for Swagger use
|
|
27
25
|
datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]),
|
|
26
|
+
node_set: Optional[List[str]] = Form(default=[""], example=[""]),
|
|
28
27
|
user: User = Depends(get_authenticated_user),
|
|
29
28
|
):
|
|
30
29
|
"""
|
|
@@ -41,6 +40,8 @@ def get_add_router() -> APIRouter:
|
|
|
41
40
|
- Regular file uploads
|
|
42
41
|
- **datasetName** (Optional[str]): Name of the dataset to add data to
|
|
43
42
|
- **datasetId** (Optional[UUID]): UUID of an already existing dataset
|
|
43
|
+
- **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control.
|
|
44
|
+
Used for grouping related data points in the knowledge graph.
|
|
44
45
|
|
|
45
46
|
Either datasetName or datasetId must be provided.
|
|
46
47
|
|
|
@@ -57,17 +58,12 @@ def get_add_router() -> APIRouter:
|
|
|
57
58
|
|
|
58
59
|
## Notes
|
|
59
60
|
- To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
|
|
60
|
-
- GitHub repositories are cloned and all files are processed
|
|
61
|
-
- HTTP URLs are fetched and their content is processed
|
|
62
|
-
- The ALLOW_HTTP_REQUESTS environment variable controls URL processing
|
|
63
61
|
- datasetId value can only be the UUID of an already existing dataset
|
|
64
62
|
"""
|
|
65
63
|
send_telemetry(
|
|
66
64
|
"Add API Endpoint Invoked",
|
|
67
65
|
user.id,
|
|
68
|
-
additional_properties={
|
|
69
|
-
"endpoint": "POST /v1/add",
|
|
70
|
-
},
|
|
66
|
+
additional_properties={"endpoint": "POST /v1/add", "node_set": node_set},
|
|
71
67
|
)
|
|
72
68
|
|
|
73
69
|
from cognee.api.v1.add import add as cognee_add
|
|
@@ -76,34 +72,13 @@ def get_add_router() -> APIRouter:
|
|
|
76
72
|
raise ValueError("Either datasetId or datasetName must be provided.")
|
|
77
73
|
|
|
78
74
|
try:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
):
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
repo_name = data.split("/")[-1].replace(".git", "")
|
|
87
|
-
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
|
|
88
|
-
# TODO: Update add call with dataset info
|
|
89
|
-
await cognee_add(
|
|
90
|
-
"data://.data/",
|
|
91
|
-
f"{repo_name}",
|
|
92
|
-
)
|
|
93
|
-
else:
|
|
94
|
-
# Fetch and store the data from other types of URL using curl
|
|
95
|
-
response = requests.get(data)
|
|
96
|
-
response.raise_for_status()
|
|
97
|
-
|
|
98
|
-
file_data = await response.content()
|
|
99
|
-
# TODO: Update add call with dataset info
|
|
100
|
-
return await cognee_add(file_data)
|
|
101
|
-
else:
|
|
102
|
-
add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
|
|
103
|
-
|
|
104
|
-
if isinstance(add_run, PipelineRunErrored):
|
|
105
|
-
return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
|
|
106
|
-
return add_run.model_dump()
|
|
75
|
+
add_run = await cognee_add(
|
|
76
|
+
data, datasetName, user=user, dataset_id=datasetId, node_set=node_set
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if isinstance(add_run, PipelineRunErrored):
|
|
80
|
+
return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
|
|
81
|
+
return add_run.model_dump()
|
|
107
82
|
except Exception as error:
|
|
108
83
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
|
109
84
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .get_checks_router import get_checks_router
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from fastapi import APIRouter, Depends, Request
|
|
2
|
+
|
|
3
|
+
from cognee.modules.users.models import User
|
|
4
|
+
from cognee.modules.users.methods import get_authenticated_user
|
|
5
|
+
from cognee.modules.cloud.operations import check_api_key
|
|
6
|
+
from cognee.modules.cloud.exceptions import CloudApiKeyMissingError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_checks_router():
|
|
10
|
+
router = APIRouter()
|
|
11
|
+
|
|
12
|
+
@router.post("/connection")
|
|
13
|
+
async def get_connection_check_endpoint(
|
|
14
|
+
request: Request, user: User = Depends(get_authenticated_user)
|
|
15
|
+
):
|
|
16
|
+
api_token = request.headers.get("X-Api-Key")
|
|
17
|
+
|
|
18
|
+
if api_token is None:
|
|
19
|
+
return CloudApiKeyMissingError()
|
|
20
|
+
|
|
21
|
+
return await check_api_key(api_token)
|
|
22
|
+
|
|
23
|
+
return router
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import pathlib
|
|
3
3
|
import asyncio
|
|
4
|
+
from typing import Optional
|
|
4
5
|
from cognee.shared.logging_utils import get_logger, setup_logging
|
|
5
6
|
from cognee.modules.observability.get_observe import get_observe
|
|
6
7
|
|
|
@@ -28,7 +29,12 @@ logger = get_logger("code_graph_pipeline")
|
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
@observe
|
|
31
|
-
async def run_code_graph_pipeline(
|
|
32
|
+
async def run_code_graph_pipeline(
|
|
33
|
+
repo_path,
|
|
34
|
+
include_docs=False,
|
|
35
|
+
excluded_paths: Optional[list[str]] = None,
|
|
36
|
+
supported_languages: Optional[list[str]] = None,
|
|
37
|
+
):
|
|
32
38
|
import cognee
|
|
33
39
|
from cognee.low_level import setup
|
|
34
40
|
|
|
@@ -41,7 +47,12 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
|
|
|
41
47
|
detailed_extraction = True
|
|
42
48
|
|
|
43
49
|
tasks = [
|
|
44
|
-
Task(
|
|
50
|
+
Task(
|
|
51
|
+
get_repo_file_dependencies,
|
|
52
|
+
detailed_extraction=detailed_extraction,
|
|
53
|
+
supported_languages=supported_languages,
|
|
54
|
+
excluded_paths=excluded_paths,
|
|
55
|
+
),
|
|
45
56
|
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
|
46
57
|
Task(add_data_points, task_config={"batch_size": 30}),
|
|
47
58
|
]
|
|
@@ -89,7 +100,7 @@ if __name__ == "__main__":
|
|
|
89
100
|
|
|
90
101
|
async def main():
|
|
91
102
|
async for run_status in run_code_graph_pipeline("REPO_PATH"):
|
|
92
|
-
print(f"{run_status.
|
|
103
|
+
print(f"{run_status.pipeline_run_id}: {run_status.status}")
|
|
93
104
|
|
|
94
105
|
file_path = os.path.join(
|
|
95
106
|
pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html"
|
cognee/api/v1/cognify/cognify.py
CHANGED
|
@@ -7,12 +7,10 @@ from cognee.shared.logging_utils import get_logger
|
|
|
7
7
|
from cognee.shared.data_models import KnowledgeGraph
|
|
8
8
|
from cognee.infrastructure.llm import get_max_chunk_tokens
|
|
9
9
|
|
|
10
|
-
from cognee.modules.pipelines import
|
|
10
|
+
from cognee.modules.pipelines import run_pipeline
|
|
11
11
|
from cognee.modules.pipelines.tasks.task import Task
|
|
12
12
|
from cognee.modules.chunking.TextChunker import TextChunker
|
|
13
13
|
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
|
14
|
-
from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunErrored
|
|
15
|
-
from cognee.modules.pipelines.queues.pipeline_run_info_queues import push_to_queue
|
|
16
14
|
from cognee.modules.users.models import User
|
|
17
15
|
|
|
18
16
|
from cognee.tasks.documents import (
|
|
@@ -23,6 +21,12 @@ from cognee.tasks.documents import (
|
|
|
23
21
|
from cognee.tasks.graph import extract_graph_from_data
|
|
24
22
|
from cognee.tasks.storage import add_data_points
|
|
25
23
|
from cognee.tasks.summarization import summarize_text
|
|
24
|
+
from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
|
|
25
|
+
from cognee.tasks.temporal_graph.extract_events_and_entities import extract_events_and_timestamps
|
|
26
|
+
from cognee.tasks.temporal_graph.extract_knowledge_graph_from_events import (
|
|
27
|
+
extract_knowledge_graph_from_events,
|
|
28
|
+
)
|
|
29
|
+
|
|
26
30
|
|
|
27
31
|
logger = get_logger("cognify")
|
|
28
32
|
|
|
@@ -40,6 +44,8 @@ async def cognify(
|
|
|
40
44
|
graph_db_config: dict = None,
|
|
41
45
|
run_in_background: bool = False,
|
|
42
46
|
incremental_loading: bool = True,
|
|
47
|
+
custom_prompt: Optional[str] = None,
|
|
48
|
+
temporal_cognify: bool = False,
|
|
43
49
|
):
|
|
44
50
|
"""
|
|
45
51
|
Transform ingested data into a structured knowledge graph.
|
|
@@ -91,7 +97,7 @@ async def cognify(
|
|
|
91
97
|
- LangchainChunker: Recursive character splitting with overlap
|
|
92
98
|
Determines how documents are segmented for processing.
|
|
93
99
|
chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
|
|
94
|
-
Formula: min(
|
|
100
|
+
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
|
|
95
101
|
Default limits: ~512-8192 tokens depending on models.
|
|
96
102
|
Smaller chunks = more granular but potentially fragmented knowledge.
|
|
97
103
|
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
|
|
@@ -102,6 +108,10 @@ async def cognify(
|
|
|
102
108
|
If False, waits for completion before returning.
|
|
103
109
|
Background mode recommended for large datasets (>100MB).
|
|
104
110
|
Use pipeline_run_id from return value to monitor progress.
|
|
111
|
+
custom_prompt: Optional custom prompt string to use for entity extraction and graph generation.
|
|
112
|
+
If provided, this prompt will be used instead of the default prompts for
|
|
113
|
+
knowledge graph extraction. The prompt should guide the LLM on how to
|
|
114
|
+
extract entities and relationships from the text content.
|
|
105
115
|
|
|
106
116
|
Returns:
|
|
107
117
|
Union[dict, list[PipelineRunInfo]]:
|
|
@@ -178,115 +188,27 @@ async def cognify(
|
|
|
178
188
|
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
|
|
179
189
|
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
|
|
180
190
|
"""
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
if run_in_background:
|
|
184
|
-
return await run_cognify_as_background_process(
|
|
185
|
-
tasks=tasks,
|
|
186
|
-
user=user,
|
|
187
|
-
datasets=datasets,
|
|
188
|
-
vector_db_config=vector_db_config,
|
|
189
|
-
graph_db_config=graph_db_config,
|
|
190
|
-
incremental_loading=incremental_loading,
|
|
191
|
-
)
|
|
191
|
+
if temporal_cognify:
|
|
192
|
+
tasks = await get_temporal_tasks(user, chunker, chunk_size)
|
|
192
193
|
else:
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
user=user,
|
|
196
|
-
datasets=datasets,
|
|
197
|
-
vector_db_config=vector_db_config,
|
|
198
|
-
graph_db_config=graph_db_config,
|
|
199
|
-
incremental_loading=incremental_loading,
|
|
194
|
+
tasks = await get_default_tasks(
|
|
195
|
+
user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
|
|
200
196
|
)
|
|
201
197
|
|
|
198
|
+
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
|
199
|
+
pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
|
|
202
200
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
datasets,
|
|
207
|
-
graph_db_config: dict = None,
|
|
208
|
-
vector_db_config: dict = False,
|
|
209
|
-
incremental_loading: bool = True,
|
|
210
|
-
):
|
|
211
|
-
total_run_info = {}
|
|
212
|
-
|
|
213
|
-
async for run_info in cognee_pipeline(
|
|
201
|
+
# Run the run_pipeline in the background or blocking based on executor
|
|
202
|
+
return await pipeline_executor_func(
|
|
203
|
+
pipeline=run_pipeline,
|
|
214
204
|
tasks=tasks,
|
|
215
|
-
datasets=datasets,
|
|
216
205
|
user=user,
|
|
217
|
-
|
|
218
|
-
graph_db_config=graph_db_config,
|
|
206
|
+
datasets=datasets,
|
|
219
207
|
vector_db_config=vector_db_config,
|
|
208
|
+
graph_db_config=graph_db_config,
|
|
220
209
|
incremental_loading=incremental_loading,
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
total_run_info[run_info.dataset_id] = run_info
|
|
224
|
-
else:
|
|
225
|
-
total_run_info = run_info
|
|
226
|
-
|
|
227
|
-
return total_run_info
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
async def run_cognify_as_background_process(
|
|
231
|
-
tasks,
|
|
232
|
-
user,
|
|
233
|
-
datasets,
|
|
234
|
-
graph_db_config: dict = None,
|
|
235
|
-
vector_db_config: dict = False,
|
|
236
|
-
incremental_loading: bool = True,
|
|
237
|
-
):
|
|
238
|
-
# Convert dataset to list if it's a string
|
|
239
|
-
if isinstance(datasets, str):
|
|
240
|
-
datasets = [datasets]
|
|
241
|
-
|
|
242
|
-
# Store pipeline status for all pipelines
|
|
243
|
-
pipeline_run_started_info = {}
|
|
244
|
-
|
|
245
|
-
async def handle_rest_of_the_run(pipeline_list):
|
|
246
|
-
# Execute all provided pipelines one by one to avoid database write conflicts
|
|
247
|
-
# TODO: Convert to async gather task instead of for loop when Queue mechanism for database is created
|
|
248
|
-
for pipeline in pipeline_list:
|
|
249
|
-
while True:
|
|
250
|
-
try:
|
|
251
|
-
pipeline_run_info = await anext(pipeline)
|
|
252
|
-
|
|
253
|
-
push_to_queue(pipeline_run_info.pipeline_run_id, pipeline_run_info)
|
|
254
|
-
|
|
255
|
-
if isinstance(pipeline_run_info, PipelineRunCompleted) or isinstance(
|
|
256
|
-
pipeline_run_info, PipelineRunErrored
|
|
257
|
-
):
|
|
258
|
-
break
|
|
259
|
-
except StopAsyncIteration:
|
|
260
|
-
break
|
|
261
|
-
|
|
262
|
-
# Start all pipelines to get started status
|
|
263
|
-
pipeline_list = []
|
|
264
|
-
for dataset in datasets:
|
|
265
|
-
pipeline_run = cognee_pipeline(
|
|
266
|
-
tasks=tasks,
|
|
267
|
-
user=user,
|
|
268
|
-
datasets=dataset,
|
|
269
|
-
pipeline_name="cognify_pipeline",
|
|
270
|
-
graph_db_config=graph_db_config,
|
|
271
|
-
vector_db_config=vector_db_config,
|
|
272
|
-
incremental_loading=incremental_loading,
|
|
273
|
-
)
|
|
274
|
-
|
|
275
|
-
# Save dataset Pipeline run started info
|
|
276
|
-
run_info = await anext(pipeline_run)
|
|
277
|
-
pipeline_run_started_info[run_info.dataset_id] = run_info
|
|
278
|
-
|
|
279
|
-
if pipeline_run_started_info[run_info.dataset_id].payload:
|
|
280
|
-
# Remove payload info to avoid serialization
|
|
281
|
-
# TODO: Handle payload serialization
|
|
282
|
-
pipeline_run_started_info[run_info.dataset_id].payload = []
|
|
283
|
-
|
|
284
|
-
pipeline_list.append(pipeline_run)
|
|
285
|
-
|
|
286
|
-
# Send all started pipelines to execute one by one in background
|
|
287
|
-
asyncio.create_task(handle_rest_of_the_run(pipeline_list=pipeline_list))
|
|
288
|
-
|
|
289
|
-
return pipeline_run_started_info
|
|
210
|
+
pipeline_name="cognify_pipeline",
|
|
211
|
+
)
|
|
290
212
|
|
|
291
213
|
|
|
292
214
|
async def get_default_tasks( # TODO: Find out a better way to do this (Boris's comment)
|
|
@@ -295,6 +217,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
295
217
|
chunker=TextChunker,
|
|
296
218
|
chunk_size: int = None,
|
|
297
219
|
ontology_file_path: Optional[str] = None,
|
|
220
|
+
custom_prompt: Optional[str] = None,
|
|
298
221
|
) -> list[Task]:
|
|
299
222
|
default_tasks = [
|
|
300
223
|
Task(classify_documents),
|
|
@@ -308,6 +231,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
308
231
|
extract_graph_from_data,
|
|
309
232
|
graph_model=graph_model,
|
|
310
233
|
ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
|
|
234
|
+
custom_prompt=custom_prompt,
|
|
311
235
|
task_config={"batch_size": 10},
|
|
312
236
|
), # Generate knowledge graphs from the document chunks.
|
|
313
237
|
Task(
|
|
@@ -318,3 +242,41 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
318
242
|
]
|
|
319
243
|
|
|
320
244
|
return default_tasks
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
async def get_temporal_tasks(
|
|
248
|
+
user: User = None, chunker=TextChunker, chunk_size: int = None
|
|
249
|
+
) -> list[Task]:
|
|
250
|
+
"""
|
|
251
|
+
Builds and returns a list of temporal processing tasks to be executed in sequence.
|
|
252
|
+
|
|
253
|
+
The pipeline includes:
|
|
254
|
+
1. Document classification.
|
|
255
|
+
2. Dataset permission checks (requires "write" access).
|
|
256
|
+
3. Document chunking with a specified or default chunk size.
|
|
257
|
+
4. Event and timestamp extraction from chunks.
|
|
258
|
+
5. Knowledge graph extraction from events.
|
|
259
|
+
6. Batched insertion of data points.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
user (User, optional): The user requesting task execution, used for permission checks.
|
|
263
|
+
chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
|
|
264
|
+
chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
list[Task]: A list of Task objects representing the temporal processing pipeline.
|
|
268
|
+
"""
|
|
269
|
+
temporal_tasks = [
|
|
270
|
+
Task(classify_documents),
|
|
271
|
+
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
|
272
|
+
Task(
|
|
273
|
+
extract_chunks_from_documents,
|
|
274
|
+
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
|
275
|
+
chunker=chunker,
|
|
276
|
+
),
|
|
277
|
+
Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
|
|
278
|
+
Task(extract_knowledge_graph_from_events),
|
|
279
|
+
Task(add_data_points, task_config={"batch_size": 10}),
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
return temporal_tasks
|