cognee 0.2.3.dev0__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__main__.py +4 -0
- cognee/api/v1/add/add.py +18 -11
- cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
- cognee/api/v1/cognify/cognify.py +22 -115
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/config/config.py +5 -13
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -2
- cognee/api/v1/delete/delete.py +1 -1
- cognee/api/v1/exceptions/__init__.py +13 -0
- cognee/api/v1/{delete → exceptions}/exceptions.py +15 -12
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/search.py +6 -7
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +180 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/exceptions/__init__.py +5 -5
- cognee/exceptions/exceptions.py +37 -17
- cognee/infrastructure/data/exceptions/__init__.py +7 -0
- cognee/infrastructure/data/exceptions/exceptions.py +22 -0
- cognee/infrastructure/data/utils/extract_keywords.py +3 -3
- cognee/infrastructure/databases/exceptions/__init__.py +3 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +57 -9
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +15 -10
- cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +2 -2
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +4 -5
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -8
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
- cognee/infrastructure/databases/vector/exceptions/exceptions.py +3 -3
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +4 -3
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +14 -5
- cognee/infrastructure/llm/config.py +5 -5
- cognee/infrastructure/llm/exceptions.py +30 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +6 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +9 -7
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/modules/data/exceptions/exceptions.py +18 -5
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/delete_data.py +2 -4
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/data/processing/document_types/exceptions/exceptions.py +2 -2
- cognee/modules/graph/cognee_graph/CogneeGraph.py +6 -4
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +5 -10
- cognee/modules/graph/exceptions/__init__.py +2 -0
- cognee/modules/graph/exceptions/exceptions.py +25 -3
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/ingestion/exceptions/exceptions.py +2 -2
- cognee/modules/ontology/exceptions/exceptions.py +4 -4
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/exceptions.py +2 -2
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +23 -138
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/exceptions/exceptions.py +12 -6
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
- cognee/modules/retrieval/graph_completion_retriever.py +89 -5
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/exceptions/__init__.py +7 -0
- cognee/modules/search/exceptions/exceptions.py +15 -0
- cognee/modules/search/methods/search.py +47 -7
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/modules/users/exceptions/exceptions.py +6 -6
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/exceptions/exceptions.py +2 -2
- cognee/shared/logging_utils.py +142 -31
- cognee/shared/utils.py +0 -1
- cognee/tasks/completion/exceptions/exceptions.py +3 -3
- cognee/tasks/documents/classify_documents.py +4 -0
- cognee/tasks/documents/exceptions/__init__.py +11 -0
- cognee/tasks/documents/exceptions/exceptions.py +36 -0
- cognee/tasks/documents/extract_chunks_from_documents.py +8 -2
- cognee/tasks/graph/exceptions/__init__.py +12 -0
- cognee/tasks/graph/exceptions/exceptions.py +41 -0
- cognee/tasks/graph/extract_graph_from_data.py +34 -2
- cognee/tasks/ingestion/exceptions/__init__.py +8 -0
- cognee/tasks/ingestion/exceptions/exceptions.py +12 -0
- cognee/tasks/ingestion/resolve_data_directories.py +5 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
- cognee/tasks/storage/add_data_points.py +41 -3
- cognee/tasks/storage/exceptions/__init__.py +9 -0
- cognee/tasks/storage/exceptions/exceptions.py +13 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/summarization/exceptions/__init__.py +9 -0
- cognee/tasks/summarization/exceptions/exceptions.py +14 -0
- cognee/tasks/summarization/summarize_text.py +8 -1
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/test_delete_by_id.py +1 -1
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_search_db.py +126 -7
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +5 -5
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
- cognee/tests/unit/modules/search/search_methods_test.py +4 -2
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/RECORD +172 -121
- cognee-0.2.4.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/exceptions/EmbeddingException.py +0 -20
- cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/infrastructure/pipeline/models/__init__.py +0 -0
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
cognee/__main__.py
ADDED
cognee/api/v1/add/add.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import Union, BinaryIO, List, Optional
|
|
3
3
|
|
|
4
|
-
from cognee.modules.pipelines import Task
|
|
5
4
|
from cognee.modules.users.models import User
|
|
6
|
-
from cognee.modules.pipelines import
|
|
5
|
+
from cognee.modules.pipelines import Task, run_pipeline
|
|
6
|
+
from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
|
|
7
|
+
resolve_authorized_user_dataset,
|
|
8
|
+
)
|
|
9
|
+
from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
|
|
10
|
+
reset_dataset_pipeline_run_status,
|
|
11
|
+
)
|
|
12
|
+
from cognee.modules.engine.operations.setup import setup
|
|
7
13
|
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
|
|
8
14
|
|
|
9
15
|
|
|
@@ -128,28 +134,29 @@ async def add(
|
|
|
128
134
|
|
|
129
135
|
Optional:
|
|
130
136
|
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
|
|
131
|
-
- LLM_MODEL: Model name (default: "gpt-
|
|
137
|
+
- LLM_MODEL: Model name (default: "gpt-5-mini")
|
|
132
138
|
- DEFAULT_USER_EMAIL: Custom default user email
|
|
133
139
|
- DEFAULT_USER_PASSWORD: Custom default user password
|
|
134
140
|
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
|
|
135
|
-
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
|
|
141
|
+
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
|
|
136
142
|
|
|
137
|
-
Raises:
|
|
138
|
-
FileNotFoundError: If specified file paths don't exist
|
|
139
|
-
PermissionError: If user lacks access to files or dataset
|
|
140
|
-
UnsupportedFileTypeError: If file format cannot be processed
|
|
141
|
-
InvalidValueError: If LLM_API_KEY is not set or invalid
|
|
142
143
|
"""
|
|
143
144
|
tasks = [
|
|
144
145
|
Task(resolve_data_directories, include_subdirectories=True),
|
|
145
146
|
Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
|
|
146
147
|
]
|
|
147
148
|
|
|
149
|
+
await setup()
|
|
150
|
+
|
|
151
|
+
user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
|
|
152
|
+
|
|
153
|
+
await reset_dataset_pipeline_run_status(authorized_dataset.id, user)
|
|
154
|
+
|
|
148
155
|
pipeline_run_info = None
|
|
149
156
|
|
|
150
|
-
async for run_info in
|
|
157
|
+
async for run_info in run_pipeline(
|
|
151
158
|
tasks=tasks,
|
|
152
|
-
datasets=
|
|
159
|
+
datasets=[authorized_dataset.id],
|
|
153
160
|
data=data,
|
|
154
161
|
user=user,
|
|
155
162
|
pipeline_name="add_pipeline",
|
|
@@ -40,8 +40,14 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
|
|
|
40
40
|
user = await get_default_user()
|
|
41
41
|
detailed_extraction = True
|
|
42
42
|
|
|
43
|
+
# Multi-language support: allow passing supported_languages
|
|
44
|
+
supported_languages = None # defer to task defaults
|
|
43
45
|
tasks = [
|
|
44
|
-
Task(
|
|
46
|
+
Task(
|
|
47
|
+
get_repo_file_dependencies,
|
|
48
|
+
detailed_extraction=detailed_extraction,
|
|
49
|
+
supported_languages=supported_languages,
|
|
50
|
+
),
|
|
45
51
|
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
|
46
52
|
Task(add_data_points, task_config={"batch_size": 30}),
|
|
47
53
|
]
|
cognee/api/v1/cognify/cognify.py
CHANGED
|
@@ -7,12 +7,10 @@ from cognee.shared.logging_utils import get_logger
|
|
|
7
7
|
from cognee.shared.data_models import KnowledgeGraph
|
|
8
8
|
from cognee.infrastructure.llm import get_max_chunk_tokens
|
|
9
9
|
|
|
10
|
-
from cognee.modules.pipelines import
|
|
10
|
+
from cognee.modules.pipelines import run_pipeline
|
|
11
11
|
from cognee.modules.pipelines.tasks.task import Task
|
|
12
12
|
from cognee.modules.chunking.TextChunker import TextChunker
|
|
13
13
|
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
|
14
|
-
from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunErrored
|
|
15
|
-
from cognee.modules.pipelines.queues.pipeline_run_info_queues import push_to_queue
|
|
16
14
|
from cognee.modules.users.models import User
|
|
17
15
|
|
|
18
16
|
from cognee.tasks.documents import (
|
|
@@ -23,6 +21,7 @@ from cognee.tasks.documents import (
|
|
|
23
21
|
from cognee.tasks.graph import extract_graph_from_data
|
|
24
22
|
from cognee.tasks.storage import add_data_points
|
|
25
23
|
from cognee.tasks.summarization import summarize_text
|
|
24
|
+
from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
|
|
26
25
|
|
|
27
26
|
logger = get_logger("cognify")
|
|
28
27
|
|
|
@@ -40,6 +39,7 @@ async def cognify(
|
|
|
40
39
|
graph_db_config: dict = None,
|
|
41
40
|
run_in_background: bool = False,
|
|
42
41
|
incremental_loading: bool = True,
|
|
42
|
+
custom_prompt: Optional[str] = None,
|
|
43
43
|
):
|
|
44
44
|
"""
|
|
45
45
|
Transform ingested data into a structured knowledge graph.
|
|
@@ -91,7 +91,7 @@ async def cognify(
|
|
|
91
91
|
- LangchainChunker: Recursive character splitting with overlap
|
|
92
92
|
Determines how documents are segmented for processing.
|
|
93
93
|
chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
|
|
94
|
-
Formula: min(
|
|
94
|
+
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
|
|
95
95
|
Default limits: ~512-8192 tokens depending on models.
|
|
96
96
|
Smaller chunks = more granular but potentially fragmented knowledge.
|
|
97
97
|
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
|
|
@@ -102,6 +102,10 @@ async def cognify(
|
|
|
102
102
|
If False, waits for completion before returning.
|
|
103
103
|
Background mode recommended for large datasets (>100MB).
|
|
104
104
|
Use pipeline_run_id from return value to monitor progress.
|
|
105
|
+
custom_prompt: Optional custom prompt string to use for entity extraction and graph generation.
|
|
106
|
+
If provided, this prompt will be used instead of the default prompts for
|
|
107
|
+
knowledge graph extraction. The prompt should guide the LLM on how to
|
|
108
|
+
extract entities and relationships from the text content.
|
|
105
109
|
|
|
106
110
|
Returns:
|
|
107
111
|
Union[dict, list[PipelineRunInfo]]:
|
|
@@ -177,124 +181,25 @@ async def cognify(
|
|
|
177
181
|
- LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
|
|
178
182
|
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
|
|
179
183
|
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
|
|
180
|
-
|
|
181
|
-
Raises:
|
|
182
|
-
DatasetNotFoundError: If specified datasets don't exist
|
|
183
|
-
PermissionError: If user lacks processing rights
|
|
184
|
-
InvalidValueError: If LLM_API_KEY is not set
|
|
185
|
-
OntologyParsingError: If ontology file is malformed
|
|
186
|
-
ValueError: If chunks exceed max token limits (reduce chunk_size)
|
|
187
|
-
DatabaseNotCreatedError: If databases are not properly initialized
|
|
188
184
|
"""
|
|
189
|
-
tasks = await get_default_tasks(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
return await run_cognify_as_background_process(
|
|
193
|
-
tasks=tasks,
|
|
194
|
-
user=user,
|
|
195
|
-
datasets=datasets,
|
|
196
|
-
vector_db_config=vector_db_config,
|
|
197
|
-
graph_db_config=graph_db_config,
|
|
198
|
-
incremental_loading=incremental_loading,
|
|
199
|
-
)
|
|
200
|
-
else:
|
|
201
|
-
return await run_cognify_blocking(
|
|
202
|
-
tasks=tasks,
|
|
203
|
-
user=user,
|
|
204
|
-
datasets=datasets,
|
|
205
|
-
vector_db_config=vector_db_config,
|
|
206
|
-
graph_db_config=graph_db_config,
|
|
207
|
-
incremental_loading=incremental_loading,
|
|
208
|
-
)
|
|
209
|
-
|
|
185
|
+
tasks = await get_default_tasks(
|
|
186
|
+
user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
|
|
187
|
+
)
|
|
210
188
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
user,
|
|
214
|
-
datasets,
|
|
215
|
-
graph_db_config: dict = None,
|
|
216
|
-
vector_db_config: dict = False,
|
|
217
|
-
incremental_loading: bool = True,
|
|
218
|
-
):
|
|
219
|
-
total_run_info = {}
|
|
189
|
+
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
|
190
|
+
pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
|
|
220
191
|
|
|
221
|
-
|
|
192
|
+
# Run the run_pipeline in the background or blocking based on executor
|
|
193
|
+
return await pipeline_executor_func(
|
|
194
|
+
pipeline=run_pipeline,
|
|
222
195
|
tasks=tasks,
|
|
223
|
-
datasets=datasets,
|
|
224
196
|
user=user,
|
|
225
|
-
|
|
226
|
-
graph_db_config=graph_db_config,
|
|
197
|
+
datasets=datasets,
|
|
227
198
|
vector_db_config=vector_db_config,
|
|
199
|
+
graph_db_config=graph_db_config,
|
|
228
200
|
incremental_loading=incremental_loading,
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
total_run_info[run_info.dataset_id] = run_info
|
|
232
|
-
else:
|
|
233
|
-
total_run_info = run_info
|
|
234
|
-
|
|
235
|
-
return total_run_info
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
async def run_cognify_as_background_process(
|
|
239
|
-
tasks,
|
|
240
|
-
user,
|
|
241
|
-
datasets,
|
|
242
|
-
graph_db_config: dict = None,
|
|
243
|
-
vector_db_config: dict = False,
|
|
244
|
-
incremental_loading: bool = True,
|
|
245
|
-
):
|
|
246
|
-
# Convert dataset to list if it's a string
|
|
247
|
-
if isinstance(datasets, str):
|
|
248
|
-
datasets = [datasets]
|
|
249
|
-
|
|
250
|
-
# Store pipeline status for all pipelines
|
|
251
|
-
pipeline_run_started_info = {}
|
|
252
|
-
|
|
253
|
-
async def handle_rest_of_the_run(pipeline_list):
|
|
254
|
-
# Execute all provided pipelines one by one to avoid database write conflicts
|
|
255
|
-
# TODO: Convert to async gather task instead of for loop when Queue mechanism for database is created
|
|
256
|
-
for pipeline in pipeline_list:
|
|
257
|
-
while True:
|
|
258
|
-
try:
|
|
259
|
-
pipeline_run_info = await anext(pipeline)
|
|
260
|
-
|
|
261
|
-
push_to_queue(pipeline_run_info.pipeline_run_id, pipeline_run_info)
|
|
262
|
-
|
|
263
|
-
if isinstance(pipeline_run_info, PipelineRunCompleted) or isinstance(
|
|
264
|
-
pipeline_run_info, PipelineRunErrored
|
|
265
|
-
):
|
|
266
|
-
break
|
|
267
|
-
except StopAsyncIteration:
|
|
268
|
-
break
|
|
269
|
-
|
|
270
|
-
# Start all pipelines to get started status
|
|
271
|
-
pipeline_list = []
|
|
272
|
-
for dataset in datasets:
|
|
273
|
-
pipeline_run = cognee_pipeline(
|
|
274
|
-
tasks=tasks,
|
|
275
|
-
user=user,
|
|
276
|
-
datasets=dataset,
|
|
277
|
-
pipeline_name="cognify_pipeline",
|
|
278
|
-
graph_db_config=graph_db_config,
|
|
279
|
-
vector_db_config=vector_db_config,
|
|
280
|
-
incremental_loading=incremental_loading,
|
|
281
|
-
)
|
|
282
|
-
|
|
283
|
-
# Save dataset Pipeline run started info
|
|
284
|
-
run_info = await anext(pipeline_run)
|
|
285
|
-
pipeline_run_started_info[run_info.dataset_id] = run_info
|
|
286
|
-
|
|
287
|
-
if pipeline_run_started_info[run_info.dataset_id].payload:
|
|
288
|
-
# Remove payload info to avoid serialization
|
|
289
|
-
# TODO: Handle payload serialization
|
|
290
|
-
pipeline_run_started_info[run_info.dataset_id].payload = []
|
|
291
|
-
|
|
292
|
-
pipeline_list.append(pipeline_run)
|
|
293
|
-
|
|
294
|
-
# Send all started pipelines to execute one by one in background
|
|
295
|
-
asyncio.create_task(handle_rest_of_the_run(pipeline_list=pipeline_list))
|
|
296
|
-
|
|
297
|
-
return pipeline_run_started_info
|
|
201
|
+
pipeline_name="cognify_pipeline",
|
|
202
|
+
)
|
|
298
203
|
|
|
299
204
|
|
|
300
205
|
async def get_default_tasks( # TODO: Find out a better way to do this (Boris's comment)
|
|
@@ -303,6 +208,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
303
208
|
chunker=TextChunker,
|
|
304
209
|
chunk_size: int = None,
|
|
305
210
|
ontology_file_path: Optional[str] = None,
|
|
211
|
+
custom_prompt: Optional[str] = None,
|
|
306
212
|
) -> list[Task]:
|
|
307
213
|
default_tasks = [
|
|
308
214
|
Task(classify_documents),
|
|
@@ -316,6 +222,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
316
222
|
extract_graph_from_data,
|
|
317
223
|
graph_model=graph_model,
|
|
318
224
|
ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
|
|
225
|
+
custom_prompt=custom_prompt,
|
|
319
226
|
task_config={"batch_size": 10},
|
|
320
227
|
), # Generate knowledge graphs from the document chunks.
|
|
321
228
|
Task(
|
|
@@ -37,6 +37,9 @@ class CognifyPayloadDTO(InDTO):
|
|
|
37
37
|
datasets: Optional[List[str]] = Field(default=None)
|
|
38
38
|
dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
|
|
39
39
|
run_in_background: Optional[bool] = Field(default=False)
|
|
40
|
+
custom_prompt: Optional[str] = Field(
|
|
41
|
+
default=None, description="Custom prompt for entity extraction and graph generation"
|
|
42
|
+
)
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
def get_cognify_router() -> APIRouter:
|
|
@@ -63,6 +66,7 @@ def get_cognify_router() -> APIRouter:
|
|
|
63
66
|
- **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
|
|
64
67
|
- **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
|
|
65
68
|
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
|
|
69
|
+
- **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
|
|
66
70
|
|
|
67
71
|
## Response
|
|
68
72
|
- **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
|
|
@@ -76,7 +80,8 @@ def get_cognify_router() -> APIRouter:
|
|
|
76
80
|
```json
|
|
77
81
|
{
|
|
78
82
|
"datasets": ["research_papers", "documentation"],
|
|
79
|
-
"run_in_background": false
|
|
83
|
+
"run_in_background": false,
|
|
84
|
+
"custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
|
|
80
85
|
}
|
|
81
86
|
```
|
|
82
87
|
|
|
@@ -106,7 +111,10 @@ def get_cognify_router() -> APIRouter:
|
|
|
106
111
|
datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
|
|
107
112
|
|
|
108
113
|
cognify_run = await cognee_cognify(
|
|
109
|
-
datasets,
|
|
114
|
+
datasets,
|
|
115
|
+
user,
|
|
116
|
+
run_in_background=payload.run_in_background,
|
|
117
|
+
custom_prompt=payload.custom_prompt,
|
|
110
118
|
)
|
|
111
119
|
|
|
112
120
|
# If any cognify run errored return JSONResponse with proper error status code
|
|
@@ -164,7 +172,7 @@ def get_cognify_router() -> APIRouter:
|
|
|
164
172
|
{
|
|
165
173
|
"pipeline_run_id": str(pipeline_run_info.pipeline_run_id),
|
|
166
174
|
"status": pipeline_run_info.status,
|
|
167
|
-
"payload": await get_formatted_graph_data(pipeline_run.dataset_id, user
|
|
175
|
+
"payload": await get_formatted_graph_data(pipeline_run.dataset_id, user),
|
|
168
176
|
}
|
|
169
177
|
)
|
|
170
178
|
|
cognee/api/v1/config/config.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
from cognee.base_config import get_base_config
|
|
5
|
-
from cognee.exceptions import InvalidValueError, InvalidAttributeError
|
|
6
5
|
from cognee.modules.cognify.config import get_cognify_config
|
|
7
6
|
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
|
8
7
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
|
@@ -11,6 +10,7 @@ from cognee.infrastructure.llm.config import (
|
|
|
11
10
|
get_llm_config,
|
|
12
11
|
)
|
|
13
12
|
from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
|
|
13
|
+
from cognee.api.v1.exceptions.exceptions import InvalidConfigAttributeError
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class config:
|
|
@@ -92,9 +92,7 @@ class config:
|
|
|
92
92
|
if hasattr(llm_config, key):
|
|
93
93
|
object.__setattr__(llm_config, key, value)
|
|
94
94
|
else:
|
|
95
|
-
raise
|
|
96
|
-
message=f"'{key}' is not a valid attribute of the config."
|
|
97
|
-
)
|
|
95
|
+
raise InvalidConfigAttributeError(attribute=key)
|
|
98
96
|
|
|
99
97
|
@staticmethod
|
|
100
98
|
def set_chunk_strategy(chunk_strategy: object):
|
|
@@ -131,9 +129,7 @@ class config:
|
|
|
131
129
|
if hasattr(relational_db_config, key):
|
|
132
130
|
object.__setattr__(relational_db_config, key, value)
|
|
133
131
|
else:
|
|
134
|
-
raise
|
|
135
|
-
message=f"'{key}' is not a valid attribute of the config."
|
|
136
|
-
)
|
|
132
|
+
raise InvalidConfigAttributeError(attribute=key)
|
|
137
133
|
|
|
138
134
|
@staticmethod
|
|
139
135
|
def set_migration_db_config(config_dict: dict):
|
|
@@ -145,9 +141,7 @@ class config:
|
|
|
145
141
|
if hasattr(migration_db_config, key):
|
|
146
142
|
object.__setattr__(migration_db_config, key, value)
|
|
147
143
|
else:
|
|
148
|
-
raise
|
|
149
|
-
message=f"'{key}' is not a valid attribute of the config."
|
|
150
|
-
)
|
|
144
|
+
raise InvalidConfigAttributeError(attribute=key)
|
|
151
145
|
|
|
152
146
|
@staticmethod
|
|
153
147
|
def set_graph_db_config(config_dict: dict) -> None:
|
|
@@ -171,9 +165,7 @@ class config:
|
|
|
171
165
|
if hasattr(vector_db_config, key):
|
|
172
166
|
object.__setattr__(vector_db_config, key, value)
|
|
173
167
|
else:
|
|
174
|
-
|
|
175
|
-
message=f"'{key}' is not a valid attribute of the config."
|
|
176
|
-
)
|
|
168
|
+
InvalidConfigAttributeError(attribute=key)
|
|
177
169
|
|
|
178
170
|
@staticmethod
|
|
179
171
|
def set_vector_db_key(db_key: str):
|
|
@@ -13,7 +13,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
|
|
|
13
13
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
|
14
14
|
from cognee.modules.data.methods import create_dataset, get_datasets_by_name
|
|
15
15
|
from cognee.shared.logging_utils import get_logger
|
|
16
|
-
from cognee.api.v1.
|
|
16
|
+
from cognee.api.v1.exceptions import DataNotFoundError, DatasetNotFoundError
|
|
17
17
|
from cognee.modules.users.models import User
|
|
18
18
|
from cognee.modules.users.methods import get_authenticated_user
|
|
19
19
|
from cognee.modules.users.permissions.methods import (
|
|
@@ -284,7 +284,7 @@ def get_datasets_router() -> APIRouter:
|
|
|
284
284
|
- **500 Internal Server Error**: Error retrieving graph data
|
|
285
285
|
"""
|
|
286
286
|
|
|
287
|
-
graph_data = await get_formatted_graph_data(dataset_id, user
|
|
287
|
+
graph_data = await get_formatted_graph_data(dataset_id, user)
|
|
288
288
|
|
|
289
289
|
return graph_data
|
|
290
290
|
|
cognee/api/v1/delete/delete.py
CHANGED
|
@@ -16,7 +16,7 @@ from cognee.modules.users.methods import get_default_user
|
|
|
16
16
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
|
17
17
|
from cognee.context_global_variables import set_database_global_context_variables
|
|
18
18
|
|
|
19
|
-
from cognee.api.v1.
|
|
19
|
+
from cognee.api.v1.exceptions import (
|
|
20
20
|
DocumentNotFoundError,
|
|
21
21
|
DatasetNotFoundError,
|
|
22
22
|
DocumentSubgraphNotFoundError,
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions for the Cognee API.
|
|
3
|
+
|
|
4
|
+
This module defines a set of exceptions for handling various data errors
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .exceptions import (
|
|
8
|
+
InvalidConfigAttributeError,
|
|
9
|
+
DocumentNotFoundError,
|
|
10
|
+
DatasetNotFoundError,
|
|
11
|
+
DataNotFoundError,
|
|
12
|
+
DocumentSubgraphNotFoundError,
|
|
13
|
+
)
|
|
@@ -1,10 +1,19 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.exceptions import CogneeConfigurationError, CogneeValidationError
|
|
2
2
|
from fastapi import status
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class
|
|
6
|
-
|
|
5
|
+
class InvalidConfigAttributeError(CogneeConfigurationError):
|
|
6
|
+
def __init__(
|
|
7
|
+
self,
|
|
8
|
+
attribute: str,
|
|
9
|
+
name: str = "InvalidConfigAttributeError",
|
|
10
|
+
status_code: int = status.HTTP_400_BAD_REQUEST,
|
|
11
|
+
):
|
|
12
|
+
message = f"'{attribute}' is not a valid attribute of the configuration."
|
|
13
|
+
super().__init__(message, name, status_code)
|
|
7
14
|
|
|
15
|
+
|
|
16
|
+
class DocumentNotFoundError(CogneeValidationError):
|
|
8
17
|
def __init__(
|
|
9
18
|
self,
|
|
10
19
|
message: str = "Document not found in database.",
|
|
@@ -14,9 +23,7 @@ class DocumentNotFoundError(CogneeApiError):
|
|
|
14
23
|
super().__init__(message, name, status_code)
|
|
15
24
|
|
|
16
25
|
|
|
17
|
-
class DatasetNotFoundError(
|
|
18
|
-
"""Raised when a dataset cannot be found."""
|
|
19
|
-
|
|
26
|
+
class DatasetNotFoundError(CogneeValidationError):
|
|
20
27
|
def __init__(
|
|
21
28
|
self,
|
|
22
29
|
message: str = "Dataset not found.",
|
|
@@ -26,9 +33,7 @@ class DatasetNotFoundError(CogneeApiError):
|
|
|
26
33
|
super().__init__(message, name, status_code)
|
|
27
34
|
|
|
28
35
|
|
|
29
|
-
class DataNotFoundError(
|
|
30
|
-
"""Raised when a dataset cannot be found."""
|
|
31
|
-
|
|
36
|
+
class DataNotFoundError(CogneeValidationError):
|
|
32
37
|
def __init__(
|
|
33
38
|
self,
|
|
34
39
|
message: str = "Data not found.",
|
|
@@ -38,9 +43,7 @@ class DataNotFoundError(CogneeApiError):
|
|
|
38
43
|
super().__init__(message, name, status_code)
|
|
39
44
|
|
|
40
45
|
|
|
41
|
-
class DocumentSubgraphNotFoundError(
|
|
42
|
-
"""Raised when a document's subgraph cannot be found in the graph database."""
|
|
43
|
-
|
|
46
|
+
class DocumentSubgraphNotFoundError(CogneeValidationError):
|
|
44
47
|
def __init__(
|
|
45
48
|
self,
|
|
46
49
|
message: str = "Document subgraph not found in graph database.",
|
|
@@ -49,6 +49,10 @@ DEFAULT_TOOLS = [
|
|
|
49
49
|
"type": "string",
|
|
50
50
|
"description": "Path to a custom ontology file",
|
|
51
51
|
},
|
|
52
|
+
"custom_prompt": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"description": "Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts.",
|
|
55
|
+
},
|
|
52
56
|
},
|
|
53
57
|
"required": ["text"],
|
|
54
58
|
},
|
|
@@ -88,11 +88,16 @@ async def handle_cognify(arguments: Dict[str, Any], user) -> str:
|
|
|
88
88
|
"""Handle cognify function call"""
|
|
89
89
|
text = arguments.get("text")
|
|
90
90
|
ontology_file_path = arguments.get("ontology_file_path")
|
|
91
|
+
custom_prompt = arguments.get("custom_prompt")
|
|
91
92
|
|
|
92
93
|
if text:
|
|
93
94
|
await add(data=text, user=user)
|
|
94
95
|
|
|
95
|
-
await cognify(
|
|
96
|
+
await cognify(
|
|
97
|
+
user=user,
|
|
98
|
+
ontology_file_path=ontology_file_path if ontology_file_path else None,
|
|
99
|
+
custom_prompt=custom_prompt,
|
|
100
|
+
)
|
|
96
101
|
|
|
97
102
|
return (
|
|
98
103
|
"Text successfully converted into knowledge graph."
|
|
@@ -70,7 +70,7 @@ class ResponseRequest(InDTO):
|
|
|
70
70
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto"
|
|
71
71
|
user: Optional[str] = None
|
|
72
72
|
temperature: Optional[float] = 1.0
|
|
73
|
-
|
|
73
|
+
max_completion_tokens: Optional[int] = None
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
class ToolCallOutput(BaseModel):
|
cognee/api/v1/search/search.py
CHANGED
|
@@ -19,6 +19,8 @@ async def search(
|
|
|
19
19
|
top_k: int = 10,
|
|
20
20
|
node_type: Optional[Type] = None,
|
|
21
21
|
node_name: Optional[List[str]] = None,
|
|
22
|
+
save_interaction: bool = False,
|
|
23
|
+
last_k: Optional[int] = None,
|
|
22
24
|
) -> list:
|
|
23
25
|
"""
|
|
24
26
|
Search and query the knowledge graph for insights, information, and connections.
|
|
@@ -107,6 +109,8 @@ async def search(
|
|
|
107
109
|
|
|
108
110
|
node_name: Filter results to specific named entities (for targeted search).
|
|
109
111
|
|
|
112
|
+
save_interaction: Save interaction (query, context, answer connected to triplet endpoints) results into the graph or not
|
|
113
|
+
|
|
110
114
|
Returns:
|
|
111
115
|
list: Search results in format determined by query_type:
|
|
112
116
|
|
|
@@ -158,13 +162,6 @@ async def search(
|
|
|
158
162
|
- VECTOR_DB_PROVIDER: Must match what was used during cognify
|
|
159
163
|
- GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
|
|
160
164
|
|
|
161
|
-
Raises:
|
|
162
|
-
DatasetNotFoundError: If specified datasets don't exist or aren't accessible
|
|
163
|
-
PermissionDeniedError: If user lacks read access to requested datasets
|
|
164
|
-
NoDataError: If no relevant data found for the search query
|
|
165
|
-
InvalidValueError: If LLM_API_KEY is not set (for LLM-based search types)
|
|
166
|
-
ValueError: If query_text is empty or search parameters are invalid
|
|
167
|
-
CollectionNotFoundError: If vector collection not found (data not processed)
|
|
168
165
|
"""
|
|
169
166
|
# We use lists from now on for datasets
|
|
170
167
|
if isinstance(datasets, UUID) or isinstance(datasets, str):
|
|
@@ -189,6 +186,8 @@ async def search(
|
|
|
189
186
|
top_k=top_k,
|
|
190
187
|
node_type=node_type,
|
|
191
188
|
node_name=node_name,
|
|
189
|
+
save_interaction=save_interaction,
|
|
190
|
+
last_k=last_k,
|
|
192
191
|
)
|
|
193
192
|
|
|
194
193
|
return filtered_search_results
|
cognee/cli/__init__.py
ADDED