cognee 0.2.3.dev0__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__main__.py +4 -0
- cognee/api/v1/add/add.py +18 -11
- cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
- cognee/api/v1/cognify/cognify.py +22 -115
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/config/config.py +5 -13
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -2
- cognee/api/v1/delete/delete.py +1 -1
- cognee/api/v1/exceptions/__init__.py +13 -0
- cognee/api/v1/{delete → exceptions}/exceptions.py +15 -12
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/search.py +6 -7
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +180 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/exceptions/__init__.py +5 -5
- cognee/exceptions/exceptions.py +37 -17
- cognee/infrastructure/data/exceptions/__init__.py +7 -0
- cognee/infrastructure/data/exceptions/exceptions.py +22 -0
- cognee/infrastructure/data/utils/extract_keywords.py +3 -3
- cognee/infrastructure/databases/exceptions/__init__.py +3 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +57 -9
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +15 -10
- cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +2 -2
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +4 -5
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -8
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
- cognee/infrastructure/databases/vector/exceptions/exceptions.py +3 -3
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +4 -3
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +14 -5
- cognee/infrastructure/llm/config.py +5 -5
- cognee/infrastructure/llm/exceptions.py +30 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +6 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +9 -7
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/modules/data/exceptions/exceptions.py +18 -5
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/delete_data.py +2 -4
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/data/processing/document_types/exceptions/exceptions.py +2 -2
- cognee/modules/graph/cognee_graph/CogneeGraph.py +6 -4
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +5 -10
- cognee/modules/graph/exceptions/__init__.py +2 -0
- cognee/modules/graph/exceptions/exceptions.py +25 -3
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/ingestion/exceptions/exceptions.py +2 -2
- cognee/modules/ontology/exceptions/exceptions.py +4 -4
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/exceptions.py +2 -2
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +23 -138
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/exceptions/exceptions.py +12 -6
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
- cognee/modules/retrieval/graph_completion_retriever.py +89 -5
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/exceptions/__init__.py +7 -0
- cognee/modules/search/exceptions/exceptions.py +15 -0
- cognee/modules/search/methods/search.py +47 -7
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/modules/users/exceptions/exceptions.py +6 -6
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/exceptions/exceptions.py +2 -2
- cognee/shared/logging_utils.py +142 -31
- cognee/shared/utils.py +0 -1
- cognee/tasks/completion/exceptions/exceptions.py +3 -3
- cognee/tasks/documents/classify_documents.py +4 -0
- cognee/tasks/documents/exceptions/__init__.py +11 -0
- cognee/tasks/documents/exceptions/exceptions.py +36 -0
- cognee/tasks/documents/extract_chunks_from_documents.py +8 -2
- cognee/tasks/graph/exceptions/__init__.py +12 -0
- cognee/tasks/graph/exceptions/exceptions.py +41 -0
- cognee/tasks/graph/extract_graph_from_data.py +34 -2
- cognee/tasks/ingestion/exceptions/__init__.py +8 -0
- cognee/tasks/ingestion/exceptions/exceptions.py +12 -0
- cognee/tasks/ingestion/resolve_data_directories.py +5 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
- cognee/tasks/storage/add_data_points.py +41 -3
- cognee/tasks/storage/exceptions/__init__.py +9 -0
- cognee/tasks/storage/exceptions/exceptions.py +13 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/summarization/exceptions/__init__.py +9 -0
- cognee/tasks/summarization/exceptions/exceptions.py +14 -0
- cognee/tasks/summarization/summarize_text.py +8 -1
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/test_delete_by_id.py +1 -1
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_search_db.py +126 -7
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +5 -5
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
- cognee/tests/unit/modules/search/search_methods_test.py +4 -2
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/RECORD +172 -121
- cognee-0.2.4.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/exceptions/EmbeddingException.py +0 -20
- cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/infrastructure/pipeline/models/__init__.py +0 -0
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -16,17 +16,17 @@ class MistralTokenizer(TokenizerInterface):
|
|
|
16
16
|
|
|
17
17
|
Instance variables include:
|
|
18
18
|
- model: str
|
|
19
|
-
-
|
|
19
|
+
- max_completion_tokens: int
|
|
20
20
|
- tokenizer: MistralTokenizer
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
def __init__(
|
|
24
24
|
self,
|
|
25
25
|
model: str,
|
|
26
|
-
|
|
26
|
+
max_completion_tokens: int = 3072,
|
|
27
27
|
):
|
|
28
28
|
self.model = model
|
|
29
|
-
self.
|
|
29
|
+
self.max_completion_tokens = max_completion_tokens
|
|
30
30
|
|
|
31
31
|
# Import here to make it an optional dependency
|
|
32
32
|
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
|
@@ -13,10 +13,10 @@ class TikTokenTokenizer(TokenizerInterface):
|
|
|
13
13
|
def __init__(
|
|
14
14
|
self,
|
|
15
15
|
model: Optional[str] = None,
|
|
16
|
-
|
|
16
|
+
max_completion_tokens: int = 8191,
|
|
17
17
|
):
|
|
18
18
|
self.model = model
|
|
19
|
-
self.
|
|
19
|
+
self.max_completion_tokens = max_completion_tokens
|
|
20
20
|
# Initialize TikToken for GPT based on model
|
|
21
21
|
if model:
|
|
22
22
|
self.tokenizer = tiktoken.encoding_for_model(self.model)
|
|
@@ -93,9 +93,9 @@ class TikTokenTokenizer(TokenizerInterface):
|
|
|
93
93
|
num_tokens = len(self.tokenizer.encode(text))
|
|
94
94
|
return num_tokens
|
|
95
95
|
|
|
96
|
-
def
|
|
96
|
+
def trim_text_to_max_completion_tokens(self, text: str) -> str:
|
|
97
97
|
"""
|
|
98
|
-
Trim the text so that the number of tokens does not exceed
|
|
98
|
+
Trim the text so that the number of tokens does not exceed max_completion_tokens.
|
|
99
99
|
|
|
100
100
|
Parameters:
|
|
101
101
|
-----------
|
|
@@ -111,13 +111,13 @@ class TikTokenTokenizer(TokenizerInterface):
|
|
|
111
111
|
num_tokens = self.count_tokens(text)
|
|
112
112
|
|
|
113
113
|
# If the number of tokens is within the limit, return the text as is
|
|
114
|
-
if num_tokens <= self.
|
|
114
|
+
if num_tokens <= self.max_completion_tokens:
|
|
115
115
|
return text
|
|
116
116
|
|
|
117
117
|
# If the number exceeds the limit, trim the text
|
|
118
118
|
# This is a simple trim, it may cut words in half; consider using word boundaries for a cleaner cut
|
|
119
119
|
encoded_text = self.tokenizer.encode(text)
|
|
120
|
-
trimmed_encoded_text = encoded_text[: self.
|
|
120
|
+
trimmed_encoded_text = encoded_text[: self.max_completion_tokens]
|
|
121
121
|
# Decoding the trimmed text
|
|
122
122
|
trimmed_text = self.tokenizer.decode(trimmed_encoded_text)
|
|
123
123
|
return trimmed_text
|
|
@@ -32,13 +32,13 @@ def get_max_chunk_tokens():
|
|
|
32
32
|
|
|
33
33
|
# We need to make sure chunk size won't take more than half of LLM max context token size
|
|
34
34
|
# but it also can't be bigger than the embedding engine max token size
|
|
35
|
-
llm_cutoff_point = llm_client.
|
|
36
|
-
max_chunk_tokens = min(embedding_engine.
|
|
35
|
+
llm_cutoff_point = llm_client.max_completion_tokens // 2 # Round down the division
|
|
36
|
+
max_chunk_tokens = min(embedding_engine.max_completion_tokens, llm_cutoff_point)
|
|
37
37
|
|
|
38
38
|
return max_chunk_tokens
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
def
|
|
41
|
+
def get_model_max_completion_tokens(model_name: str):
|
|
42
42
|
"""
|
|
43
43
|
Retrieve the maximum token limit for a specified model name if it exists.
|
|
44
44
|
|
|
@@ -56,15 +56,15 @@ def get_model_max_tokens(model_name: str):
|
|
|
56
56
|
|
|
57
57
|
Number of max tokens of model, or None if model is unknown
|
|
58
58
|
"""
|
|
59
|
-
|
|
59
|
+
max_completion_tokens = None
|
|
60
60
|
|
|
61
61
|
if model_name in litellm.model_cost:
|
|
62
|
-
|
|
63
|
-
logger.debug(f"Max input tokens for {model_name}: {
|
|
62
|
+
max_completion_tokens = litellm.model_cost[model_name]["max_tokens"]
|
|
63
|
+
logger.debug(f"Max input tokens for {model_name}: {max_completion_tokens}")
|
|
64
64
|
else:
|
|
65
65
|
logger.info("Model not found in LiteLLM's model_cost.")
|
|
66
66
|
|
|
67
|
-
return
|
|
67
|
+
return max_completion_tokens
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
async def test_llm_connection():
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.exceptions import (
|
|
2
|
+
CogneeValidationError,
|
|
3
|
+
CogneeConfigurationError,
|
|
4
|
+
)
|
|
2
5
|
from fastapi import status
|
|
3
6
|
|
|
4
7
|
|
|
5
|
-
class UnstructuredLibraryImportError(
|
|
8
|
+
class UnstructuredLibraryImportError(CogneeConfigurationError):
|
|
6
9
|
def __init__(
|
|
7
10
|
self,
|
|
8
11
|
message: str = "Import error. Unstructured library is not installed.",
|
|
@@ -12,7 +15,7 @@ class UnstructuredLibraryImportError(CogneeApiError):
|
|
|
12
15
|
super().__init__(message, name, status_code)
|
|
13
16
|
|
|
14
17
|
|
|
15
|
-
class UnauthorizedDataAccessError(
|
|
18
|
+
class UnauthorizedDataAccessError(CogneeValidationError):
|
|
16
19
|
def __init__(
|
|
17
20
|
self,
|
|
18
21
|
message: str = "User does not have permission to access this data.",
|
|
@@ -22,7 +25,7 @@ class UnauthorizedDataAccessError(CogneeApiError):
|
|
|
22
25
|
super().__init__(message, name, status_code)
|
|
23
26
|
|
|
24
27
|
|
|
25
|
-
class DatasetNotFoundError(
|
|
28
|
+
class DatasetNotFoundError(CogneeValidationError):
|
|
26
29
|
def __init__(
|
|
27
30
|
self,
|
|
28
31
|
message: str = "Dataset not found.",
|
|
@@ -32,7 +35,7 @@ class DatasetNotFoundError(CogneeApiError):
|
|
|
32
35
|
super().__init__(message, name, status_code)
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
class DatasetTypeError(
|
|
38
|
+
class DatasetTypeError(CogneeValidationError):
|
|
36
39
|
def __init__(
|
|
37
40
|
self,
|
|
38
41
|
message: str = "Dataset type not supported.",
|
|
@@ -40,3 +43,13 @@ class DatasetTypeError(CogneeApiError):
|
|
|
40
43
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
41
44
|
):
|
|
42
45
|
super().__init__(message, name, status_code)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class InvalidTableAttributeError(CogneeValidationError):
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
message: str = "The provided data object is missing the required '__tablename__' attribute.",
|
|
52
|
+
name: str = "InvalidTableAttributeError",
|
|
53
|
+
status_code: int = status.HTTP_400_BAD_REQUEST,
|
|
54
|
+
):
|
|
55
|
+
super().__init__(message, name, status_code)
|
|
@@ -7,6 +7,7 @@ from .get_datasets import get_datasets
|
|
|
7
7
|
from .get_datasets_by_name import get_datasets_by_name
|
|
8
8
|
from .get_dataset_data import get_dataset_data
|
|
9
9
|
from .get_authorized_dataset import get_authorized_dataset
|
|
10
|
+
from .get_authorized_dataset_by_name import get_authorized_dataset_by_name
|
|
10
11
|
from .get_data import get_data
|
|
11
12
|
from .get_unique_dataset_id import get_unique_dataset_id
|
|
12
13
|
from .get_authorized_existing_datasets import get_authorized_existing_datasets
|
|
@@ -18,6 +19,7 @@ from .delete_data import delete_data
|
|
|
18
19
|
|
|
19
20
|
# Create
|
|
20
21
|
from .load_or_create_datasets import load_or_create_datasets
|
|
22
|
+
from .create_authorized_dataset import create_authorized_dataset
|
|
21
23
|
|
|
22
24
|
# Check
|
|
23
25
|
from .check_dataset_name import check_dataset_name
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
2
|
+
from cognee.modules.users.models import User
|
|
3
|
+
from cognee.modules.data.models import Dataset
|
|
4
|
+
from cognee.modules.users.permissions.methods import give_permission_on_dataset
|
|
5
|
+
from .create_dataset import create_dataset
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
async def create_authorized_dataset(dataset_name: str, user: User) -> Dataset:
|
|
9
|
+
db_engine = get_relational_engine()
|
|
10
|
+
|
|
11
|
+
async with db_engine.get_async_session() as session:
|
|
12
|
+
new_dataset = await create_dataset(dataset_name, user, session)
|
|
13
|
+
|
|
14
|
+
await give_permission_on_dataset(user, new_dataset.id, "read")
|
|
15
|
+
await give_permission_on_dataset(user, new_dataset.id, "write")
|
|
16
|
+
await give_permission_on_dataset(user, new_dataset.id, "delete")
|
|
17
|
+
await give_permission_on_dataset(user, new_dataset.id, "share")
|
|
18
|
+
|
|
19
|
+
return new_dataset
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.modules.data.exceptions.exceptions import InvalidTableAttributeError
|
|
2
2
|
from cognee.modules.data.models import Data
|
|
3
3
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
4
4
|
|
|
@@ -13,9 +13,7 @@ async def delete_data(data: Data):
|
|
|
13
13
|
ValueError: If the data object is invalid.
|
|
14
14
|
"""
|
|
15
15
|
if not hasattr(data, "__tablename__"):
|
|
16
|
-
raise
|
|
17
|
-
message="The provided data object is missing the required '__tablename__' attribute."
|
|
18
|
-
)
|
|
16
|
+
raise InvalidTableAttributeError()
|
|
19
17
|
|
|
20
18
|
db_engine = get_relational_engine()
|
|
21
19
|
|
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
1
|
from uuid import UUID
|
|
3
|
-
from
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from cognee.modules.users.models import User
|
|
5
|
+
from cognee.modules.data.methods.get_authorized_existing_datasets import (
|
|
6
|
+
get_authorized_existing_datasets,
|
|
7
|
+
)
|
|
4
8
|
from ..models import Dataset
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
async def get_authorized_dataset(
|
|
8
|
-
|
|
12
|
+
user: User, dataset_id: UUID, permission_type="read"
|
|
9
13
|
) -> Optional[Dataset]:
|
|
10
14
|
"""
|
|
11
15
|
Get a specific dataset with permissions for a user.
|
|
@@ -18,6 +22,8 @@ async def get_authorized_dataset(
|
|
|
18
22
|
Returns:
|
|
19
23
|
Optional[Dataset]: dataset with permissions
|
|
20
24
|
"""
|
|
21
|
-
|
|
25
|
+
authorized_datasets = await get_authorized_existing_datasets(
|
|
26
|
+
[dataset_id], permission_type, user
|
|
27
|
+
)
|
|
22
28
|
|
|
23
|
-
return
|
|
29
|
+
return authorized_datasets[0] if authorized_datasets else None
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from cognee.modules.users.models import User
|
|
4
|
+
from cognee.modules.data.methods.get_authorized_existing_datasets import (
|
|
5
|
+
get_authorized_existing_datasets,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
from ..models import Dataset
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def get_authorized_dataset_by_name(
|
|
12
|
+
dataset_name: str, user: User, permission_type: str
|
|
13
|
+
) -> Optional[Dataset]:
|
|
14
|
+
authorized_datasets = await get_authorized_existing_datasets([], permission_type, user)
|
|
15
|
+
|
|
16
|
+
return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None)
|
|
@@ -1,12 +1,9 @@
|
|
|
1
1
|
from typing import List, Union
|
|
2
2
|
from uuid import UUID
|
|
3
3
|
|
|
4
|
-
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
5
4
|
from cognee.modules.data.models import Dataset
|
|
6
|
-
from cognee.modules.data.methods import
|
|
7
|
-
from cognee.modules.data.methods import get_unique_dataset_id
|
|
5
|
+
from cognee.modules.data.methods import create_authorized_dataset
|
|
8
6
|
from cognee.modules.data.exceptions import DatasetNotFoundError
|
|
9
|
-
from cognee.modules.users.permissions.methods import give_permission_on_dataset
|
|
10
7
|
|
|
11
8
|
|
|
12
9
|
async def load_or_create_datasets(
|
|
@@ -34,22 +31,7 @@ async def load_or_create_datasets(
|
|
|
34
31
|
if isinstance(identifier, UUID):
|
|
35
32
|
raise DatasetNotFoundError(f"Dataset with given UUID does not exist: {identifier}")
|
|
36
33
|
|
|
37
|
-
|
|
38
|
-
new_dataset = Dataset(
|
|
39
|
-
id=await get_unique_dataset_id(dataset_name=identifier, user=user),
|
|
40
|
-
name=identifier,
|
|
41
|
-
owner_id=user.id,
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
# Save dataset to database
|
|
45
|
-
db_engine = get_relational_engine()
|
|
46
|
-
async with db_engine.get_async_session() as session:
|
|
47
|
-
await create_dataset(identifier, user, session)
|
|
48
|
-
|
|
49
|
-
await give_permission_on_dataset(user, new_dataset.id, "read")
|
|
50
|
-
await give_permission_on_dataset(user, new_dataset.id, "write")
|
|
51
|
-
await give_permission_on_dataset(user, new_dataset.id, "delete")
|
|
52
|
-
await give_permission_on_dataset(user, new_dataset.id, "share")
|
|
34
|
+
new_dataset = await create_authorized_dataset(identifier, user)
|
|
53
35
|
|
|
54
36
|
result.append(new_dataset)
|
|
55
37
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.exceptions import CogneeSystemError
|
|
2
2
|
from fastapi import status
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class PyPdfInternalError(
|
|
5
|
+
class PyPdfInternalError(CogneeSystemError):
|
|
6
6
|
"""Internal pypdf error"""
|
|
7
7
|
|
|
8
8
|
def __init__(
|
|
@@ -2,8 +2,11 @@ import time
|
|
|
2
2
|
from cognee.shared.logging_utils import get_logger
|
|
3
3
|
from typing import List, Dict, Union, Optional, Type
|
|
4
4
|
|
|
5
|
-
from cognee.exceptions import
|
|
6
|
-
|
|
5
|
+
from cognee.modules.graph.exceptions import (
|
|
6
|
+
EntityNotFoundError,
|
|
7
|
+
EntityAlreadyExistsError,
|
|
8
|
+
InvalidDimensionsError,
|
|
9
|
+
)
|
|
7
10
|
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
|
|
8
11
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
|
|
9
12
|
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
|
|
@@ -66,8 +69,7 @@ class CogneeGraph(CogneeAbstractGraph):
|
|
|
66
69
|
node_name: Optional[List[str]] = None,
|
|
67
70
|
) -> None:
|
|
68
71
|
if node_dimension < 1 or edge_dimension < 1:
|
|
69
|
-
raise
|
|
70
|
-
|
|
72
|
+
raise InvalidDimensionsError()
|
|
71
73
|
try:
|
|
72
74
|
import time
|
|
73
75
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
from typing import List, Dict, Optional, Any, Union
|
|
3
|
-
|
|
4
|
-
from cognee.exceptions import InvalidValueError
|
|
3
|
+
from cognee.modules.graph.exceptions import InvalidDimensionsError, DimensionOutOfRangeError
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
class Node:
|
|
@@ -24,7 +23,7 @@ class Node:
|
|
|
24
23
|
self, node_id: str, attributes: Optional[Dict[str, Any]] = None, dimension: int = 1
|
|
25
24
|
):
|
|
26
25
|
if dimension <= 0:
|
|
27
|
-
raise
|
|
26
|
+
raise InvalidDimensionsError()
|
|
28
27
|
self.id = node_id
|
|
29
28
|
self.attributes = attributes if attributes is not None else {}
|
|
30
29
|
self.attributes["vector_distance"] = float("inf")
|
|
@@ -58,9 +57,7 @@ class Node:
|
|
|
58
57
|
|
|
59
58
|
def is_node_alive_in_dimension(self, dimension: int) -> bool:
|
|
60
59
|
if dimension < 0 or dimension >= len(self.status):
|
|
61
|
-
raise
|
|
62
|
-
message=f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}."
|
|
63
|
-
)
|
|
60
|
+
raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1)
|
|
64
61
|
return self.status[dimension] == 1
|
|
65
62
|
|
|
66
63
|
def add_attribute(self, key: str, value: Any) -> None:
|
|
@@ -110,7 +107,7 @@ class Edge:
|
|
|
110
107
|
dimension: int = 1,
|
|
111
108
|
):
|
|
112
109
|
if dimension <= 0:
|
|
113
|
-
raise
|
|
110
|
+
raise InvalidDimensionsError()
|
|
114
111
|
self.node1 = node1
|
|
115
112
|
self.node2 = node2
|
|
116
113
|
self.attributes = attributes if attributes is not None else {}
|
|
@@ -120,9 +117,7 @@ class Edge:
|
|
|
120
117
|
|
|
121
118
|
def is_edge_alive_in_dimension(self, dimension: int) -> bool:
|
|
122
119
|
if dimension < 0 or dimension >= len(self.status):
|
|
123
|
-
raise
|
|
124
|
-
message=f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}."
|
|
125
|
-
)
|
|
120
|
+
raise DimensionOutOfRangeError(dimension=dimension, max_index=len(self.status) - 1)
|
|
126
121
|
return self.status[dimension] == 1
|
|
127
122
|
|
|
128
123
|
def add_attribute(self, key: str, value: Any) -> None:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.exceptions import CogneeValidationError
|
|
2
2
|
from fastapi import status
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class EntityNotFoundError(
|
|
5
|
+
class EntityNotFoundError(CogneeValidationError):
|
|
6
6
|
"""Database returns nothing"""
|
|
7
7
|
|
|
8
8
|
def __init__(
|
|
@@ -14,7 +14,7 @@ class EntityNotFoundError(CogneeApiError):
|
|
|
14
14
|
super().__init__(message, name, status_code)
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class EntityAlreadyExistsError(
|
|
17
|
+
class EntityAlreadyExistsError(CogneeValidationError):
|
|
18
18
|
"""Conflict detected, like trying to create a resource that already exists"""
|
|
19
19
|
|
|
20
20
|
def __init__(
|
|
@@ -24,3 +24,25 @@ class EntityAlreadyExistsError(CogneeApiError):
|
|
|
24
24
|
status_code=status.HTTP_409_CONFLICT,
|
|
25
25
|
):
|
|
26
26
|
super().__init__(message, name, status_code)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class InvalidDimensionsError(CogneeValidationError):
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
name: str = "InvalidDimensionsError",
|
|
33
|
+
status_code: int = status.HTTP_400_BAD_REQUEST,
|
|
34
|
+
):
|
|
35
|
+
message = "Dimensions must be positive integers."
|
|
36
|
+
super().__init__(message, name, status_code)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DimensionOutOfRangeError(CogneeValidationError):
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
dimension: int,
|
|
43
|
+
max_index: int,
|
|
44
|
+
name: str = "DimensionOutOfRangeError",
|
|
45
|
+
status_code: int = status.HTTP_400_BAD_REQUEST,
|
|
46
|
+
):
|
|
47
|
+
message = f"Dimension {dimension} is out of range. Valid range is 0 to {max_index}."
|
|
48
|
+
super().__init__(message, name, status_code)
|
|
@@ -3,10 +3,11 @@ from cognee.infrastructure.databases.graph import get_graph_engine
|
|
|
3
3
|
from cognee.context_global_variables import set_database_global_context_variables
|
|
4
4
|
from cognee.modules.data.exceptions.exceptions import DatasetNotFoundError
|
|
5
5
|
from cognee.modules.data.methods import get_authorized_dataset
|
|
6
|
+
from cognee.modules.users.models import User
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
async def get_formatted_graph_data(dataset_id: UUID,
|
|
9
|
-
dataset = await get_authorized_dataset(
|
|
9
|
+
async def get_formatted_graph_data(dataset_id: UUID, user: User):
|
|
10
|
+
dataset = await get_authorized_dataset(user, dataset_id)
|
|
10
11
|
if not dataset:
|
|
11
12
|
raise DatasetNotFoundError(message="Dataset not found.")
|
|
12
13
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.exceptions import CogneeValidationError
|
|
2
2
|
from fastapi import status
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class IngestionError(
|
|
5
|
+
class IngestionError(CogneeValidationError):
|
|
6
6
|
def __init__(
|
|
7
7
|
self,
|
|
8
8
|
message: str = "Type of data sent to classify not supported.",
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.exceptions import CogneeSystemError
|
|
2
2
|
from fastapi import status
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class OntologyInitializationError(
|
|
5
|
+
class OntologyInitializationError(CogneeSystemError):
|
|
6
6
|
def __init__(
|
|
7
7
|
self,
|
|
8
8
|
message: str = "Ontology initialization failed",
|
|
@@ -12,7 +12,7 @@ class OntologyInitializationError(CogneeApiError):
|
|
|
12
12
|
super().__init__(message, name, status_code)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class FindClosestMatchError(
|
|
15
|
+
class FindClosestMatchError(CogneeSystemError):
|
|
16
16
|
def __init__(
|
|
17
17
|
self,
|
|
18
18
|
message: str = "Error in find_closest_match",
|
|
@@ -22,7 +22,7 @@ class FindClosestMatchError(CogneeApiError):
|
|
|
22
22
|
super().__init__(message, name, status_code)
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class GetSubgraphError(
|
|
25
|
+
class GetSubgraphError(CogneeSystemError):
|
|
26
26
|
def __init__(
|
|
27
27
|
self,
|
|
28
28
|
message: str = "Failed to retrieve subgraph",
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.exceptions import CogneeSystemError
|
|
2
2
|
from fastapi import status
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class PipelineRunFailedError(
|
|
5
|
+
class PipelineRunFailedError(CogneeSystemError):
|
|
6
6
|
def __init__(
|
|
7
7
|
self,
|
|
8
8
|
message: str = "Pipeline run failed.",
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from fastapi import status
|
|
2
|
+
from cognee.exceptions import CogneeValidationError
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class WrongTaskTypeError(CogneeValidationError):
|
|
6
|
+
"""
|
|
7
|
+
Raised when the tasks argument is not a list of Task class instances.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
message: str = "tasks argument must be a list, containing Task class instances.",
|
|
13
|
+
name: str = "WrongTaskTypeError",
|
|
14
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
15
|
+
):
|
|
16
|
+
self.message = message
|
|
17
|
+
self.name = name
|
|
18
|
+
self.status_code = status_code
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .validate_pipeline_tasks import validate_pipeline_tasks
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from typing import Union, Optional
|
|
2
|
+
from cognee.modules.data.models import Dataset
|
|
3
|
+
from cognee.modules.data.models import Data
|
|
4
|
+
from cognee.modules.pipelines.models import PipelineRunStatus
|
|
5
|
+
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
|
|
6
|
+
from cognee.modules.pipelines.methods import get_pipeline_run_by_dataset
|
|
7
|
+
from cognee.shared.logging_utils import get_logger
|
|
8
|
+
|
|
9
|
+
from cognee.modules.pipelines.models.PipelineRunInfo import (
|
|
10
|
+
PipelineRunCompleted,
|
|
11
|
+
PipelineRunStarted,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def check_pipeline_run_qualification(
|
|
18
|
+
dataset: Dataset, data: list[Data], pipeline_name: str
|
|
19
|
+
) -> Optional[Union[PipelineRunStarted, PipelineRunCompleted]]:
|
|
20
|
+
"""
|
|
21
|
+
Function used to determine if pipeline is currently being processed or was already processed.
|
|
22
|
+
In case pipeline was or is being processed return value is returned and current pipline execution should be stopped.
|
|
23
|
+
In case pipeline is not or was not processed there will be no return value and pipeline processing can start.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
dataset: Dataset object
|
|
27
|
+
data: List of Data
|
|
28
|
+
pipeline_name: pipeline name
|
|
29
|
+
|
|
30
|
+
Returns: Pipeline state if it is being processed or was already processed
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# async with update_status_lock: TODO: Add UI lock to prevent multiple backend requests
|
|
35
|
+
if isinstance(dataset, Dataset):
|
|
36
|
+
task_status = await get_pipeline_status([dataset.id], pipeline_name)
|
|
37
|
+
else:
|
|
38
|
+
task_status = {}
|
|
39
|
+
|
|
40
|
+
if str(dataset.id) in task_status:
|
|
41
|
+
if task_status[str(dataset.id)] == PipelineRunStatus.DATASET_PROCESSING_STARTED:
|
|
42
|
+
logger.info("Dataset %s is already being processed.", dataset.id)
|
|
43
|
+
pipeline_run = await get_pipeline_run_by_dataset(dataset.id, pipeline_name)
|
|
44
|
+
return PipelineRunStarted(
|
|
45
|
+
pipeline_run_id=pipeline_run.pipeline_run_id,
|
|
46
|
+
dataset_id=dataset.id,
|
|
47
|
+
dataset_name=dataset.name,
|
|
48
|
+
payload=data,
|
|
49
|
+
)
|
|
50
|
+
elif task_status[str(dataset.id)] == PipelineRunStatus.DATASET_PROCESSING_COMPLETED:
|
|
51
|
+
logger.info("Dataset %s is already processed.", dataset.id)
|
|
52
|
+
pipeline_run = await get_pipeline_run_by_dataset(dataset.id, pipeline_name)
|
|
53
|
+
return PipelineRunCompleted(
|
|
54
|
+
pipeline_run_id=pipeline_run.pipeline_run_id,
|
|
55
|
+
dataset_id=dataset.id,
|
|
56
|
+
dataset_name=dataset.name,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return
|