cognee 0.2.3.dev1__py3-none-any.whl → 0.3.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/__main__.py +4 -0
- cognee/api/client.py +28 -3
- cognee/api/health.py +10 -13
- cognee/api/v1/add/add.py +20 -6
- cognee/api/v1/add/routers/get_add_router.py +12 -37
- cognee/api/v1/cloud/routers/__init__.py +1 -0
- cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +14 -3
- cognee/api/v1/cognify/cognify.py +67 -105
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +16 -5
- cognee/api/v1/memify/routers/__init__.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +100 -0
- cognee/api/v1/notebooks/routers/__init__.py +1 -0
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/routers/get_search_router.py +20 -1
- cognee/api/v1/search/search.py +17 -4
- cognee/api/v1/sync/__init__.py +17 -0
- cognee/api/v1/sync/routers/__init__.py +3 -0
- cognee/api/v1/sync/routers/get_sync_router.py +241 -0
- cognee/api/v1/sync/sync.py +877 -0
- cognee/api/v1/users/routers/get_auth_router.py +13 -1
- cognee/base_config.py +10 -1
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +180 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/infrastructure/databases/graph/config.py +10 -4
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +199 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +138 -0
- cognee/infrastructure/databases/relational/__init__.py +2 -0
- cognee/infrastructure/databases/relational/get_async_session.py +15 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
- cognee/infrastructure/databases/relational/with_async_session.py +25 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
- cognee/infrastructure/databases/vector/config.py +13 -6
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -4
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +10 -7
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
- cognee/infrastructure/files/storage/StorageManager.py +7 -1
- cognee/infrastructure/files/storage/storage.py +16 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +32 -5
- cognee/infrastructure/llm/config.py +6 -4
- cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +28 -4
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/infrastructure/utils/run_sync.py +8 -1
- cognee/modules/chunking/models/DocumentChunk.py +4 -3
- cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
- cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
- cognee/modules/cloud/exceptions/__init__.py +2 -0
- cognee/modules/cloud/operations/__init__.py +1 -0
- cognee/modules/cloud/operations/check_api_key.py +25 -0
- cognee/modules/data/deletion/prune_system.py +1 -1
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/check_dataset_name.py +1 -1
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/get_dataset_data.py +1 -1
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/engine/models/Event.py +16 -0
- cognee/modules/engine/models/Interval.py +8 -0
- cognee/modules/engine/models/Timestamp.py +13 -0
- cognee/modules/engine/models/__init__.py +3 -0
- cognee/modules/engine/utils/__init__.py +2 -0
- cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
- cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
- cognee/modules/memify/__init__.py +1 -0
- cognee/modules/memify/memify.py +118 -0
- cognee/modules/notebooks/methods/__init__.py +5 -0
- cognee/modules/notebooks/methods/create_notebook.py +26 -0
- cognee/modules/notebooks/methods/delete_notebook.py +13 -0
- cognee/modules/notebooks/methods/get_notebook.py +21 -0
- cognee/modules/notebooks/methods/get_notebooks.py +18 -0
- cognee/modules/notebooks/methods/update_notebook.py +17 -0
- cognee/modules/notebooks/models/Notebook.py +53 -0
- cognee/modules/notebooks/models/__init__.py +1 -0
- cognee/modules/notebooks/operations/__init__.py +1 -0
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +28 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +24 -138
- cognee/modules/pipelines/operations/run_tasks.py +17 -41
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/base_graph_retriever.py +18 -0
- cognee/modules/retrieval/base_retriever.py +1 -1
- cognee/modules/retrieval/code_retriever.py +8 -0
- cognee/modules/retrieval/coding_rules_retriever.py +31 -0
- cognee/modules/retrieval/completion_retriever.py +9 -3
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +29 -13
- cognee/modules/retrieval/graph_completion_cot_retriever.py +30 -13
- cognee/modules/retrieval/graph_completion_retriever.py +107 -56
- cognee/modules/retrieval/graph_summary_completion_retriever.py +5 -1
- cognee/modules/retrieval/insights_retriever.py +14 -3
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/summaries_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +152 -0
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
- cognee/modules/retrieval/utils/completion.py +10 -3
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/methods/get_search_type_tools.py +168 -0
- cognee/modules/search/methods/no_access_control_search.py +47 -0
- cognee/modules/search/methods/search.py +239 -118
- cognee/modules/search/types/SearchResult.py +21 -0
- cognee/modules/search/types/SearchType.py +3 -0
- cognee/modules/search/types/__init__.py +1 -0
- cognee/modules/search/utils/__init__.py +2 -0
- cognee/modules/search/utils/prepare_search_result.py +41 -0
- cognee/modules/search/utils/transform_context_to_graph.py +38 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/modules/sync/__init__.py +1 -0
- cognee/modules/sync/methods/__init__.py +23 -0
- cognee/modules/sync/methods/create_sync_operation.py +53 -0
- cognee/modules/sync/methods/get_sync_operation.py +107 -0
- cognee/modules/sync/methods/update_sync_operation.py +248 -0
- cognee/modules/sync/models/SyncOperation.py +142 -0
- cognee/modules/sync/models/__init__.py +3 -0
- cognee/modules/users/__init__.py +0 -1
- cognee/modules/users/methods/__init__.py +4 -1
- cognee/modules/users/methods/create_user.py +26 -1
- cognee/modules/users/methods/get_authenticated_user.py +36 -42
- cognee/modules/users/methods/get_default_user.py +3 -1
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
- cognee/root_dir.py +19 -0
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/logging_utils.py +143 -32
- cognee/shared/utils.py +0 -1
- cognee/tasks/codingagents/coding_rule_associations.py +127 -0
- cognee/tasks/graph/extract_graph_from_data.py +6 -2
- cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/extract_subgraph.py +7 -0
- cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +144 -47
- cognee/tasks/storage/add_data_points.py +33 -3
- cognee/tasks/temporal_graph/__init__.py +1 -0
- cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
- cognee/tasks/temporal_graph/enrich_events.py +34 -0
- cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
- cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
- cognee/tasks/temporal_graph/models.py +49 -0
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_permissions.py +3 -3
- cognee/tests/test_relational_db_migration.py +7 -5
- cognee/tests/test_search_db.py +136 -23
- cognee/tests/test_temporal_graph.py +167 -0
- cognee/tests/unit/api/__init__.py +1 -0
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +12 -15
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +10 -15
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +4 -3
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
- cognee/tests/unit/modules/users/__init__.py +1 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
- cognee/tests/unit/processing/utils/utils_test.py +20 -1
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dev0.dist-info}/METADATA +13 -9
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dev0.dist-info}/RECORD +245 -135
- cognee-0.3.0.dev0.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- cognee/tests/unit/modules/search/search_methods_test.py +0 -223
- /cognee/{infrastructure/databases/graph/networkx → api/v1/memify}/__init__.py +0 -0
- /cognee/{infrastructure/pipeline/models → tasks/codingagents}/__init__.py +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from cognee.modules.engine.models import Interval, Event
|
|
2
|
+
from cognee.modules.engine.utils.generate_timestamp_datapoint import generate_timestamp_datapoint
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def generate_event_datapoint(event) -> Event:
|
|
6
|
+
"""
|
|
7
|
+
Generates an Event datapoint from a given event model, including temporal metadata if available.
|
|
8
|
+
|
|
9
|
+
The function maps the basic attributes (name, description, location) from the input event
|
|
10
|
+
and enriches them with temporal information. If start and end times are provided, an
|
|
11
|
+
Interval is created. If only one timestamp is available, it is added directly. Temporal
|
|
12
|
+
information is also appended to the event description for context.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
event: An event model instance containing attributes such as name, description,
|
|
16
|
+
location, time_from, and time_to.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Event: A structured Event object with name, description, location, and enriched
|
|
20
|
+
temporal details.
|
|
21
|
+
"""
|
|
22
|
+
# Base event data
|
|
23
|
+
event_data = {
|
|
24
|
+
"name": event.name,
|
|
25
|
+
"description": event.description,
|
|
26
|
+
"location": event.location,
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Create timestamps if they exist
|
|
30
|
+
time_from = generate_timestamp_datapoint(event.time_from) if event.time_from else None
|
|
31
|
+
time_to = generate_timestamp_datapoint(event.time_to) if event.time_to else None
|
|
32
|
+
|
|
33
|
+
# Add temporal information
|
|
34
|
+
if time_from and time_to:
|
|
35
|
+
event_data["during"] = Interval(time_from=time_from, time_to=time_to)
|
|
36
|
+
# Enrich description with temporal info
|
|
37
|
+
temporal_info = f"\n---\nTime data: {time_from.timestamp_str} to {time_to.timestamp_str}"
|
|
38
|
+
event_data["description"] = (event_data["description"] or "Event") + temporal_info
|
|
39
|
+
elif time_from or time_to:
|
|
40
|
+
timestamp = time_from or time_to
|
|
41
|
+
event_data["at"] = timestamp
|
|
42
|
+
# Enrich description with temporal info
|
|
43
|
+
temporal_info = f"\n---\nTime data: {timestamp.timestamp_str}"
|
|
44
|
+
event_data["description"] = (event_data["description"] or "Event") + temporal_info
|
|
45
|
+
|
|
46
|
+
return Event(**event_data)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from cognee.modules.engine.models import Interval, Timestamp, Event
|
|
3
|
+
from cognee.modules.engine.utils import generate_node_id
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def generate_timestamp_datapoint(ts: Timestamp) -> Timestamp:
|
|
7
|
+
"""
|
|
8
|
+
Generates a normalized Timestamp datapoint from a given Timestamp model.
|
|
9
|
+
|
|
10
|
+
The function converts the provided timestamp into an integer representation,
|
|
11
|
+
constructs a human-readable string format, and creates a new Timestamp object
|
|
12
|
+
with a unique identifier.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
ts (Timestamp): The input Timestamp model containing date and time components.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Timestamp: A new Timestamp object with a generated ID, integer representation,
|
|
19
|
+
original components, and formatted string.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
time_at = date_to_int(ts)
|
|
23
|
+
timestamp_str = (
|
|
24
|
+
f"{ts.year:04d}-{ts.month:02d}-{ts.day:02d} {ts.hour:02d}:{ts.minute:02d}:{ts.second:02d}"
|
|
25
|
+
)
|
|
26
|
+
return Timestamp(
|
|
27
|
+
id=generate_node_id(str(time_at)),
|
|
28
|
+
time_at=time_at,
|
|
29
|
+
year=ts.year,
|
|
30
|
+
month=ts.month,
|
|
31
|
+
day=ts.day,
|
|
32
|
+
hour=ts.hour,
|
|
33
|
+
minute=ts.minute,
|
|
34
|
+
second=ts.second,
|
|
35
|
+
timestamp_str=timestamp_str,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def date_to_int(ts: Timestamp) -> int:
|
|
40
|
+
"""
|
|
41
|
+
Converts a Timestamp model into an integer representation in milliseconds since the Unix epoch (UTC).
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
ts (Timestamp): The input Timestamp model containing year, month, day, hour, minute, and second.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
int: The UTC timestamp in milliseconds since January 1, 1970.
|
|
48
|
+
"""
|
|
49
|
+
dt = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, tzinfo=timezone.utc)
|
|
50
|
+
time = int(dt.timestamp() * 1000)
|
|
51
|
+
return time
|
|
@@ -76,7 +76,7 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
76
76
|
start_time = time.time()
|
|
77
77
|
|
|
78
78
|
# Determine projection strategy
|
|
79
|
-
if node_type is not None and node_name
|
|
79
|
+
if node_type is not None and node_name not in [None, [], ""]:
|
|
80
80
|
nodes_data, edges_data = await adapter.get_nodeset_subgraph(
|
|
81
81
|
node_type=node_type, node_name=node_name
|
|
82
82
|
)
|
|
@@ -180,7 +180,7 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
180
180
|
logger.error(f"Error mapping vector distances to edges: {str(ex)}")
|
|
181
181
|
raise ex
|
|
182
182
|
|
|
183
|
-
async def calculate_top_triplet_importances(self, k: int) -> List:
|
|
183
|
+
async def calculate_top_triplet_importances(self, k: int) -> List[Edge]:
|
|
184
184
|
def score(edge):
|
|
185
185
|
n1 = edge.node1.attributes.get("vector_distance", 1)
|
|
186
186
|
n2 = edge.node2.attributes.get("vector_distance", 1)
|
|
@@ -3,10 +3,11 @@ from cognee.infrastructure.databases.graph import get_graph_engine
|
|
|
3
3
|
from cognee.context_global_variables import set_database_global_context_variables
|
|
4
4
|
from cognee.modules.data.exceptions.exceptions import DatasetNotFoundError
|
|
5
5
|
from cognee.modules.data.methods import get_authorized_dataset
|
|
6
|
+
from cognee.modules.users.models import User
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
async def get_formatted_graph_data(dataset_id: UUID,
|
|
9
|
-
dataset = await get_authorized_dataset(
|
|
9
|
+
async def get_formatted_graph_data(dataset_id: UUID, user: User):
|
|
10
|
+
dataset = await get_authorized_dataset(user, dataset_id)
|
|
10
11
|
if not dataset:
|
|
11
12
|
raise DatasetNotFoundError(message="Dataset not found.")
|
|
12
13
|
|
|
@@ -4,3 +4,4 @@ from .get_model_instance_from_graph import get_model_instance_from_graph
|
|
|
4
4
|
from .retrieve_existing_edges import retrieve_existing_edges
|
|
5
5
|
from .convert_node_to_data_point import convert_node_to_data_point
|
|
6
6
|
from .deduplicate_nodes_and_edges import deduplicate_nodes_and_edges
|
|
7
|
+
from .resolve_edges_to_text import resolve_edges_to_text
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
async def resolve_edges_to_text(retrieved_edges: List[Edge]) -> str:
|
|
6
|
+
"""
|
|
7
|
+
Converts retrieved graph edges into a human-readable string format.
|
|
8
|
+
|
|
9
|
+
Parameters:
|
|
10
|
+
-----------
|
|
11
|
+
|
|
12
|
+
- retrieved_edges (list): A list of edges retrieved from the graph.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
--------
|
|
16
|
+
|
|
17
|
+
- str: A formatted string representation of the nodes and their connections.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def _get_nodes(retrieved_edges: List[Edge]) -> dict:
|
|
21
|
+
def _get_title(text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
|
|
22
|
+
def _top_n_words(text, stop_words=None, top_n=3, separator=", "):
|
|
23
|
+
"""Concatenates the top N frequent words in text."""
|
|
24
|
+
if stop_words is None:
|
|
25
|
+
from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
|
|
26
|
+
|
|
27
|
+
stop_words = DEFAULT_STOP_WORDS
|
|
28
|
+
|
|
29
|
+
import string
|
|
30
|
+
|
|
31
|
+
words = [word.lower().strip(string.punctuation) for word in text.split()]
|
|
32
|
+
|
|
33
|
+
if stop_words:
|
|
34
|
+
words = [word for word in words if word and word not in stop_words]
|
|
35
|
+
|
|
36
|
+
from collections import Counter
|
|
37
|
+
|
|
38
|
+
top_words = [word for word, freq in Counter(words).most_common(top_n)]
|
|
39
|
+
|
|
40
|
+
return separator.join(top_words)
|
|
41
|
+
|
|
42
|
+
"""Creates a title, by combining first words with most frequent words from the text."""
|
|
43
|
+
first_words = text.split()[:first_n_words]
|
|
44
|
+
top_words = _top_n_words(text, top_n=first_n_words)
|
|
45
|
+
return f"{' '.join(first_words)}... [{top_words}]"
|
|
46
|
+
|
|
47
|
+
"""Creates a dictionary of nodes with their names and content."""
|
|
48
|
+
nodes = {}
|
|
49
|
+
for edge in retrieved_edges:
|
|
50
|
+
for node in (edge.node1, edge.node2):
|
|
51
|
+
if node.id not in nodes:
|
|
52
|
+
text = node.attributes.get("text")
|
|
53
|
+
if text:
|
|
54
|
+
name = _get_title(text)
|
|
55
|
+
content = text
|
|
56
|
+
else:
|
|
57
|
+
name = node.attributes.get("name", "Unnamed Node")
|
|
58
|
+
content = node.attributes.get("description", name)
|
|
59
|
+
nodes[node.id] = {"node": node, "name": name, "content": content}
|
|
60
|
+
return nodes
|
|
61
|
+
|
|
62
|
+
nodes = _get_nodes(retrieved_edges)
|
|
63
|
+
node_section = "\n".join(
|
|
64
|
+
f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n"
|
|
65
|
+
for info in nodes.values()
|
|
66
|
+
)
|
|
67
|
+
connection_section = "\n".join(
|
|
68
|
+
f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}"
|
|
69
|
+
for edge in retrieved_edges
|
|
70
|
+
)
|
|
71
|
+
return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .memify import memify
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import Union, Optional, List, Type, Any
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
4
|
+
from cognee.shared.logging_utils import get_logger
|
|
5
|
+
|
|
6
|
+
from cognee.modules.retrieval.utils.brute_force_triplet_search import get_memory_fragment
|
|
7
|
+
from cognee.context_global_variables import set_database_global_context_variables
|
|
8
|
+
from cognee.modules.engine.models.node_set import NodeSet
|
|
9
|
+
from cognee.modules.pipelines import run_pipeline
|
|
10
|
+
from cognee.modules.pipelines.tasks.task import Task
|
|
11
|
+
from cognee.modules.users.models import User
|
|
12
|
+
from cognee.modules.pipelines.layers.resolve_authorized_user_datasets import (
|
|
13
|
+
resolve_authorized_user_datasets,
|
|
14
|
+
)
|
|
15
|
+
from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
|
|
16
|
+
reset_dataset_pipeline_run_status,
|
|
17
|
+
)
|
|
18
|
+
from cognee.modules.engine.operations.setup import setup
|
|
19
|
+
from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
|
|
20
|
+
from cognee.tasks.memify.extract_subgraph_chunks import extract_subgraph_chunks
|
|
21
|
+
from cognee.tasks.codingagents.coding_rule_associations import (
|
|
22
|
+
add_rule_associations,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
logger = get_logger("memify")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
async def memify(
|
|
29
|
+
extraction_tasks: Union[List[Task], List[str]] = None,
|
|
30
|
+
enrichment_tasks: Union[List[Task], List[str]] = None,
|
|
31
|
+
data: Optional[Any] = None,
|
|
32
|
+
dataset: Union[str, UUID] = "main_dataset",
|
|
33
|
+
user: User = None,
|
|
34
|
+
node_type: Optional[Type] = NodeSet,
|
|
35
|
+
node_name: Optional[List[str]] = None,
|
|
36
|
+
vector_db_config: Optional[dict] = None,
|
|
37
|
+
graph_db_config: Optional[dict] = None,
|
|
38
|
+
run_in_background: bool = False,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data,
|
|
42
|
+
custom data can also be provided instead which can be processed with provided extraction and enrichment tasks.
|
|
43
|
+
|
|
44
|
+
Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation.
|
|
45
|
+
|
|
46
|
+
This is the core processing step in Cognee that converts raw text and documents
|
|
47
|
+
into an intelligent knowledge graph. It analyzes content, extracts entities and
|
|
48
|
+
relationships, and creates semantic connections for enhanced search and reasoning.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
extraction_tasks: List of Cognee Tasks to execute for graph/data extraction.
|
|
52
|
+
enrichment_tasks: List of Cognee Tasks to handle enrichment of provided graph/data from extraction tasks.
|
|
53
|
+
data: The data to ingest. Can be anything when custom extraction and enrichment tasks are used.
|
|
54
|
+
Data provided here will be forwarded to the first extraction task in the pipeline as input.
|
|
55
|
+
If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded
|
|
56
|
+
dataset: Dataset name or dataset uuid to process.
|
|
57
|
+
user: User context for authentication and data access. Uses default if None.
|
|
58
|
+
node_type: Filter graph to specific entity types (for advanced filtering). Used when no data is provided.
|
|
59
|
+
node_name: Filter graph to specific named entities (for targeted search). Used when no data is provided.
|
|
60
|
+
vector_db_config: Custom vector database configuration for embeddings storage.
|
|
61
|
+
graph_db_config: Custom graph database configuration for relationship storage.
|
|
62
|
+
run_in_background: If True, starts processing asynchronously and returns immediately.
|
|
63
|
+
If False, waits for completion before returning.
|
|
64
|
+
Background mode recommended for large datasets (>100MB).
|
|
65
|
+
Use pipeline_run_id from return value to monitor progress.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
# Use default coding rules tasks if no tasks were provided
|
|
69
|
+
if not extraction_tasks:
|
|
70
|
+
extraction_tasks = [Task(extract_subgraph_chunks)]
|
|
71
|
+
if not enrichment_tasks:
|
|
72
|
+
enrichment_tasks = [
|
|
73
|
+
Task(
|
|
74
|
+
add_rule_associations,
|
|
75
|
+
rules_nodeset_name="coding_agent_rules",
|
|
76
|
+
task_config={"batch_size": 1},
|
|
77
|
+
)
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
await setup()
|
|
81
|
+
|
|
82
|
+
user, authorized_dataset_list = await resolve_authorized_user_datasets(dataset, user)
|
|
83
|
+
authorized_dataset = authorized_dataset_list[0]
|
|
84
|
+
|
|
85
|
+
if not data:
|
|
86
|
+
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
|
|
87
|
+
await set_database_global_context_variables(
|
|
88
|
+
authorized_dataset.id, authorized_dataset.owner_id
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
memory_fragment = await get_memory_fragment(node_type=node_type, node_name=node_name)
|
|
92
|
+
# Subgraphs should be a single element in the list to represent one data item
|
|
93
|
+
data = [memory_fragment]
|
|
94
|
+
|
|
95
|
+
memify_tasks = [
|
|
96
|
+
*extraction_tasks, # Unpack tasks provided to memify pipeline
|
|
97
|
+
*enrichment_tasks,
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
await reset_dataset_pipeline_run_status(
|
|
101
|
+
authorized_dataset.id, user, pipeline_names=["memify_pipeline"]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
|
105
|
+
pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
|
|
106
|
+
|
|
107
|
+
# Run the run_pipeline in the background or blocking based on executor
|
|
108
|
+
return await pipeline_executor_func(
|
|
109
|
+
pipeline=run_pipeline,
|
|
110
|
+
tasks=memify_tasks,
|
|
111
|
+
user=user,
|
|
112
|
+
data=data,
|
|
113
|
+
datasets=authorized_dataset.id,
|
|
114
|
+
vector_db_config=vector_db_config,
|
|
115
|
+
graph_db_config=graph_db_config,
|
|
116
|
+
incremental_loading=False,
|
|
117
|
+
pipeline_name="memify_pipeline",
|
|
118
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
+
|
|
5
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
6
|
+
|
|
7
|
+
from ..models.Notebook import Notebook, NotebookCell
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@with_async_session
|
|
11
|
+
async def create_notebook(
|
|
12
|
+
user_id: UUID,
|
|
13
|
+
notebook_name: str,
|
|
14
|
+
cells: Optional[List[NotebookCell]],
|
|
15
|
+
deletable: Optional[bool],
|
|
16
|
+
session: AsyncSession,
|
|
17
|
+
) -> Notebook:
|
|
18
|
+
notebook = Notebook(
|
|
19
|
+
name=notebook_name, owner_id=user_id, cells=cells, deletable=deletable or True
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
session.add(notebook)
|
|
23
|
+
|
|
24
|
+
await session.commit()
|
|
25
|
+
|
|
26
|
+
return notebook
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
2
|
+
|
|
3
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
4
|
+
|
|
5
|
+
from ..models.Notebook import Notebook
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@with_async_session
|
|
9
|
+
async def delete_notebook(
|
|
10
|
+
notebook: Notebook,
|
|
11
|
+
session: AsyncSession,
|
|
12
|
+
) -> None:
|
|
13
|
+
await session.delete(notebook)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from sqlalchemy import select
|
|
4
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
|
+
|
|
6
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
7
|
+
|
|
8
|
+
from ..models.Notebook import Notebook
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@with_async_session
|
|
12
|
+
async def get_notebook(
|
|
13
|
+
notebook_id: UUID,
|
|
14
|
+
user_id: UUID,
|
|
15
|
+
session: AsyncSession,
|
|
16
|
+
) -> Optional[Notebook]:
|
|
17
|
+
result = await session.execute(
|
|
18
|
+
select(Notebook).where(Notebook.owner_id == user_id and Notebook.id == notebook_id)
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
return result.scalar()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import List
|
|
3
|
+
from sqlalchemy import select
|
|
4
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
|
+
|
|
6
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
7
|
+
|
|
8
|
+
from ..models.Notebook import Notebook
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@with_async_session
|
|
12
|
+
async def get_notebooks(
|
|
13
|
+
user_id: UUID,
|
|
14
|
+
session: AsyncSession,
|
|
15
|
+
) -> List[Notebook]:
|
|
16
|
+
result = await session.execute(select(Notebook).where(Notebook.owner_id == user_id))
|
|
17
|
+
|
|
18
|
+
return list(result.scalars().all())
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Callable, AsyncContextManager
|
|
2
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
3
|
+
|
|
4
|
+
from cognee.infrastructure.databases.relational import with_async_session
|
|
5
|
+
|
|
6
|
+
from ..models.Notebook import Notebook
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@with_async_session
|
|
10
|
+
async def update_notebook(
|
|
11
|
+
notebook: Notebook,
|
|
12
|
+
session: AsyncSession,
|
|
13
|
+
) -> Notebook:
|
|
14
|
+
if notebook not in session:
|
|
15
|
+
session.add(notebook)
|
|
16
|
+
|
|
17
|
+
return notebook
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import List, Literal
|
|
3
|
+
from uuid import uuid4, UUID as UUID_t
|
|
4
|
+
from pydantic import BaseModel, ConfigDict
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from fastapi.encoders import jsonable_encoder
|
|
7
|
+
from sqlalchemy import Boolean, Column, DateTime, JSON, UUID, String, TypeDecorator
|
|
8
|
+
from sqlalchemy.orm import mapped_column, Mapped
|
|
9
|
+
|
|
10
|
+
from cognee.infrastructure.databases.relational import Base
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NotebookCell(BaseModel):
|
|
14
|
+
id: UUID_t
|
|
15
|
+
type: Literal["markdown", "code"]
|
|
16
|
+
name: str
|
|
17
|
+
content: str
|
|
18
|
+
|
|
19
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NotebookCellList(TypeDecorator):
|
|
23
|
+
impl = JSON
|
|
24
|
+
cache_ok = True
|
|
25
|
+
|
|
26
|
+
def process_bind_param(self, notebook_cells, dialect):
|
|
27
|
+
if notebook_cells is None:
|
|
28
|
+
return []
|
|
29
|
+
return [
|
|
30
|
+
json.dumps(jsonable_encoder(cell)) if isinstance(cell, NotebookCell) else cell
|
|
31
|
+
for cell in notebook_cells
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
def process_result_value(self, cells_json_list, dialect):
|
|
35
|
+
if cells_json_list is None:
|
|
36
|
+
return []
|
|
37
|
+
return [NotebookCell(**json.loads(json_string)) for json_string in cells_json_list]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Notebook(Base):
|
|
41
|
+
__tablename__ = "notebooks"
|
|
42
|
+
|
|
43
|
+
id: Mapped[UUID_t] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid4)
|
|
44
|
+
|
|
45
|
+
owner_id: Mapped[UUID_t] = mapped_column(UUID(as_uuid=True), index=True)
|
|
46
|
+
|
|
47
|
+
name: Mapped[str] = mapped_column(String, nullable=False)
|
|
48
|
+
|
|
49
|
+
cells: Mapped[List[NotebookCell]] = mapped_column(NotebookCellList, nullable=False)
|
|
50
|
+
|
|
51
|
+
deletable: Mapped[bool] = mapped_column(Boolean, default=True)
|
|
52
|
+
|
|
53
|
+
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .Notebook import Notebook, NotebookCell
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .run_in_local_sandbox import run_in_local_sandbox
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import sys
|
|
3
|
+
import traceback
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def wrap_in_async_handler(user_code: str) -> str:
|
|
7
|
+
return (
|
|
8
|
+
"from cognee.infrastructure.utils.run_sync import run_sync\n\n"
|
|
9
|
+
"async def __user_main__():\n"
|
|
10
|
+
+ "\n".join(" " + line for line in user_code.strip().split("\n"))
|
|
11
|
+
+ "\n"
|
|
12
|
+
" globals().update(locals())\n\n"
|
|
13
|
+
"run_sync(__user_main__())\n"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def run_in_local_sandbox(code, environment=None):
|
|
18
|
+
environment = environment or {}
|
|
19
|
+
code = wrap_in_async_handler(code.replace("\xa0", "\n"))
|
|
20
|
+
|
|
21
|
+
buffer = io.StringIO()
|
|
22
|
+
sys_stdout = sys.stdout
|
|
23
|
+
sys.stdout = buffer
|
|
24
|
+
sys.stderr = buffer
|
|
25
|
+
|
|
26
|
+
error = None
|
|
27
|
+
|
|
28
|
+
printOutput = []
|
|
29
|
+
|
|
30
|
+
def customPrintFunction(output):
|
|
31
|
+
printOutput.append(output)
|
|
32
|
+
|
|
33
|
+
environment["print"] = customPrintFunction
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
exec(code, environment)
|
|
37
|
+
except Exception:
|
|
38
|
+
error = traceback.format_exc()
|
|
39
|
+
finally:
|
|
40
|
+
sys.stdout = sys_stdout
|
|
41
|
+
sys.stderr = sys_stdout
|
|
42
|
+
|
|
43
|
+
return printOutput, error
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
if __name__ == "__main__":
|
|
47
|
+
run_in_local_sandbox("""
|
|
48
|
+
import cognee
|
|
49
|
+
|
|
50
|
+
await cognee.add("Test file with some random content 3.")
|
|
51
|
+
|
|
52
|
+
a = "asd"
|
|
53
|
+
|
|
54
|
+
b = {"c": "dfgh"}
|
|
55
|
+
""")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from fastapi import status
|
|
2
|
+
from cognee.exceptions import CogneeValidationError
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class WrongTaskTypeError(CogneeValidationError):
|
|
6
|
+
"""
|
|
7
|
+
Raised when the tasks argument is not a list of Task class instances.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
message: str = "tasks argument must be a list, containing Task class instances.",
|
|
13
|
+
name: str = "WrongTaskTypeError",
|
|
14
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
15
|
+
):
|
|
16
|
+
self.message = message
|
|
17
|
+
self.name = name
|
|
18
|
+
self.status_code = status_code
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .validate_pipeline_tasks import validate_pipeline_tasks
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from typing import Union, Optional
|
|
2
|
+
from cognee.modules.data.models import Dataset
|
|
3
|
+
from cognee.modules.data.models import Data
|
|
4
|
+
from cognee.modules.pipelines.models import PipelineRunStatus
|
|
5
|
+
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
|
|
6
|
+
from cognee.modules.pipelines.methods import get_pipeline_run_by_dataset
|
|
7
|
+
from cognee.shared.logging_utils import get_logger
|
|
8
|
+
|
|
9
|
+
from cognee.modules.pipelines.models.PipelineRunInfo import (
|
|
10
|
+
PipelineRunCompleted,
|
|
11
|
+
PipelineRunStarted,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def check_pipeline_run_qualification(
|
|
18
|
+
dataset: Dataset, data: list[Data], pipeline_name: str
|
|
19
|
+
) -> Optional[Union[PipelineRunStarted, PipelineRunCompleted]]:
|
|
20
|
+
"""
|
|
21
|
+
Function used to determine if pipeline is currently being processed or was already processed.
|
|
22
|
+
In case pipeline was or is being processed return value is returned and current pipline execution should be stopped.
|
|
23
|
+
In case pipeline is not or was not processed there will be no return value and pipeline processing can start.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
dataset: Dataset object
|
|
27
|
+
data: List of Data
|
|
28
|
+
pipeline_name: pipeline name
|
|
29
|
+
|
|
30
|
+
Returns: Pipeline state if it is being processed or was already processed
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# async with update_status_lock: TODO: Add UI lock to prevent multiple backend requests
|
|
35
|
+
if isinstance(dataset, Dataset):
|
|
36
|
+
task_status = await get_pipeline_status([dataset.id], pipeline_name)
|
|
37
|
+
else:
|
|
38
|
+
task_status = {}
|
|
39
|
+
|
|
40
|
+
if str(dataset.id) in task_status:
|
|
41
|
+
if task_status[str(dataset.id)] == PipelineRunStatus.DATASET_PROCESSING_STARTED:
|
|
42
|
+
logger.info("Dataset %s is already being processed.", dataset.id)
|
|
43
|
+
pipeline_run = await get_pipeline_run_by_dataset(dataset.id, pipeline_name)
|
|
44
|
+
return PipelineRunStarted(
|
|
45
|
+
pipeline_run_id=pipeline_run.pipeline_run_id,
|
|
46
|
+
dataset_id=dataset.id,
|
|
47
|
+
dataset_name=dataset.name,
|
|
48
|
+
payload=data,
|
|
49
|
+
)
|
|
50
|
+
elif task_status[str(dataset.id)] == PipelineRunStatus.DATASET_PROCESSING_COMPLETED:
|
|
51
|
+
logger.info("Dataset %s is already processed.", dataset.id)
|
|
52
|
+
pipeline_run = await get_pipeline_run_by_dataset(dataset.id, pipeline_name)
|
|
53
|
+
return PipelineRunCompleted(
|
|
54
|
+
pipeline_run_id=pipeline_run.pipeline_run_id,
|
|
55
|
+
dataset_id=dataset.id,
|
|
56
|
+
dataset_name=dataset.name,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return
|