cognee 0.2.3.dev1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__main__.py +4 -0
- cognee/api/v1/add/add.py +18 -6
- cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
- cognee/api/v1/cognify/cognify.py +22 -107
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +1 -1
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/search.py +6 -0
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +180 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +14 -5
- cognee/infrastructure/llm/config.py +5 -5
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +23 -138
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
- cognee/modules/retrieval/graph_completion_retriever.py +89 -5
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/methods/search.py +46 -5
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/logging_utils.py +142 -31
- cognee/shared/utils.py +0 -1
- cognee/tasks/graph/extract_graph_from_data.py +6 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
- cognee/tasks/storage/add_data_points.py +33 -3
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_search_db.py +126 -7
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
- cognee/tests/unit/modules/search/search_methods_test.py +2 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/RECORD +120 -83
- cognee-0.2.4.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/infrastructure/pipeline/models/__init__.py +0 -0
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
cognee/__main__.py
ADDED
cognee/api/v1/add/add.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
from typing import Union, BinaryIO, List, Optional
|
|
3
3
|
|
|
4
|
-
from cognee.modules.pipelines import Task
|
|
5
4
|
from cognee.modules.users.models import User
|
|
6
|
-
from cognee.modules.pipelines import
|
|
5
|
+
from cognee.modules.pipelines import Task, run_pipeline
|
|
6
|
+
from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
|
|
7
|
+
resolve_authorized_user_dataset,
|
|
8
|
+
)
|
|
9
|
+
from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
|
|
10
|
+
reset_dataset_pipeline_run_status,
|
|
11
|
+
)
|
|
12
|
+
from cognee.modules.engine.operations.setup import setup
|
|
7
13
|
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
|
|
8
14
|
|
|
9
15
|
|
|
@@ -128,11 +134,11 @@ async def add(
|
|
|
128
134
|
|
|
129
135
|
Optional:
|
|
130
136
|
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
|
|
131
|
-
- LLM_MODEL: Model name (default: "gpt-
|
|
137
|
+
- LLM_MODEL: Model name (default: "gpt-5-mini")
|
|
132
138
|
- DEFAULT_USER_EMAIL: Custom default user email
|
|
133
139
|
- DEFAULT_USER_PASSWORD: Custom default user password
|
|
134
140
|
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
|
|
135
|
-
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
|
|
141
|
+
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
|
|
136
142
|
|
|
137
143
|
"""
|
|
138
144
|
tasks = [
|
|
@@ -140,11 +146,17 @@ async def add(
|
|
|
140
146
|
Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
|
|
141
147
|
]
|
|
142
148
|
|
|
149
|
+
await setup()
|
|
150
|
+
|
|
151
|
+
user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
|
|
152
|
+
|
|
153
|
+
await reset_dataset_pipeline_run_status(authorized_dataset.id, user)
|
|
154
|
+
|
|
143
155
|
pipeline_run_info = None
|
|
144
156
|
|
|
145
|
-
async for run_info in
|
|
157
|
+
async for run_info in run_pipeline(
|
|
146
158
|
tasks=tasks,
|
|
147
|
-
datasets=
|
|
159
|
+
datasets=[authorized_dataset.id],
|
|
148
160
|
data=data,
|
|
149
161
|
user=user,
|
|
150
162
|
pipeline_name="add_pipeline",
|
|
@@ -40,8 +40,14 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
|
|
|
40
40
|
user = await get_default_user()
|
|
41
41
|
detailed_extraction = True
|
|
42
42
|
|
|
43
|
+
# Multi-language support: allow passing supported_languages
|
|
44
|
+
supported_languages = None # defer to task defaults
|
|
43
45
|
tasks = [
|
|
44
|
-
Task(
|
|
46
|
+
Task(
|
|
47
|
+
get_repo_file_dependencies,
|
|
48
|
+
detailed_extraction=detailed_extraction,
|
|
49
|
+
supported_languages=supported_languages,
|
|
50
|
+
),
|
|
45
51
|
# Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
|
|
46
52
|
Task(add_data_points, task_config={"batch_size": 30}),
|
|
47
53
|
]
|
cognee/api/v1/cognify/cognify.py
CHANGED
|
@@ -7,12 +7,10 @@ from cognee.shared.logging_utils import get_logger
|
|
|
7
7
|
from cognee.shared.data_models import KnowledgeGraph
|
|
8
8
|
from cognee.infrastructure.llm import get_max_chunk_tokens
|
|
9
9
|
|
|
10
|
-
from cognee.modules.pipelines import
|
|
10
|
+
from cognee.modules.pipelines import run_pipeline
|
|
11
11
|
from cognee.modules.pipelines.tasks.task import Task
|
|
12
12
|
from cognee.modules.chunking.TextChunker import TextChunker
|
|
13
13
|
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
|
14
|
-
from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunErrored
|
|
15
|
-
from cognee.modules.pipelines.queues.pipeline_run_info_queues import push_to_queue
|
|
16
14
|
from cognee.modules.users.models import User
|
|
17
15
|
|
|
18
16
|
from cognee.tasks.documents import (
|
|
@@ -23,6 +21,7 @@ from cognee.tasks.documents import (
|
|
|
23
21
|
from cognee.tasks.graph import extract_graph_from_data
|
|
24
22
|
from cognee.tasks.storage import add_data_points
|
|
25
23
|
from cognee.tasks.summarization import summarize_text
|
|
24
|
+
from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
|
|
26
25
|
|
|
27
26
|
logger = get_logger("cognify")
|
|
28
27
|
|
|
@@ -40,6 +39,7 @@ async def cognify(
|
|
|
40
39
|
graph_db_config: dict = None,
|
|
41
40
|
run_in_background: bool = False,
|
|
42
41
|
incremental_loading: bool = True,
|
|
42
|
+
custom_prompt: Optional[str] = None,
|
|
43
43
|
):
|
|
44
44
|
"""
|
|
45
45
|
Transform ingested data into a structured knowledge graph.
|
|
@@ -91,7 +91,7 @@ async def cognify(
|
|
|
91
91
|
- LangchainChunker: Recursive character splitting with overlap
|
|
92
92
|
Determines how documents are segmented for processing.
|
|
93
93
|
chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
|
|
94
|
-
Formula: min(
|
|
94
|
+
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
|
|
95
95
|
Default limits: ~512-8192 tokens depending on models.
|
|
96
96
|
Smaller chunks = more granular but potentially fragmented knowledge.
|
|
97
97
|
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
|
|
@@ -102,6 +102,10 @@ async def cognify(
|
|
|
102
102
|
If False, waits for completion before returning.
|
|
103
103
|
Background mode recommended for large datasets (>100MB).
|
|
104
104
|
Use pipeline_run_id from return value to monitor progress.
|
|
105
|
+
custom_prompt: Optional custom prompt string to use for entity extraction and graph generation.
|
|
106
|
+
If provided, this prompt will be used instead of the default prompts for
|
|
107
|
+
knowledge graph extraction. The prompt should guide the LLM on how to
|
|
108
|
+
extract entities and relationships from the text content.
|
|
105
109
|
|
|
106
110
|
Returns:
|
|
107
111
|
Union[dict, list[PipelineRunInfo]]:
|
|
@@ -178,115 +182,24 @@ async def cognify(
|
|
|
178
182
|
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
|
|
179
183
|
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
|
|
180
184
|
"""
|
|
181
|
-
tasks = await get_default_tasks(
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
return await run_cognify_as_background_process(
|
|
185
|
-
tasks=tasks,
|
|
186
|
-
user=user,
|
|
187
|
-
datasets=datasets,
|
|
188
|
-
vector_db_config=vector_db_config,
|
|
189
|
-
graph_db_config=graph_db_config,
|
|
190
|
-
incremental_loading=incremental_loading,
|
|
191
|
-
)
|
|
192
|
-
else:
|
|
193
|
-
return await run_cognify_blocking(
|
|
194
|
-
tasks=tasks,
|
|
195
|
-
user=user,
|
|
196
|
-
datasets=datasets,
|
|
197
|
-
vector_db_config=vector_db_config,
|
|
198
|
-
graph_db_config=graph_db_config,
|
|
199
|
-
incremental_loading=incremental_loading,
|
|
200
|
-
)
|
|
201
|
-
|
|
185
|
+
tasks = await get_default_tasks(
|
|
186
|
+
user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
|
|
187
|
+
)
|
|
202
188
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
user,
|
|
206
|
-
datasets,
|
|
207
|
-
graph_db_config: dict = None,
|
|
208
|
-
vector_db_config: dict = False,
|
|
209
|
-
incremental_loading: bool = True,
|
|
210
|
-
):
|
|
211
|
-
total_run_info = {}
|
|
189
|
+
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
|
190
|
+
pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
|
|
212
191
|
|
|
213
|
-
|
|
192
|
+
# Run the run_pipeline in the background or blocking based on executor
|
|
193
|
+
return await pipeline_executor_func(
|
|
194
|
+
pipeline=run_pipeline,
|
|
214
195
|
tasks=tasks,
|
|
215
|
-
datasets=datasets,
|
|
216
196
|
user=user,
|
|
217
|
-
|
|
218
|
-
graph_db_config=graph_db_config,
|
|
197
|
+
datasets=datasets,
|
|
219
198
|
vector_db_config=vector_db_config,
|
|
199
|
+
graph_db_config=graph_db_config,
|
|
220
200
|
incremental_loading=incremental_loading,
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
total_run_info[run_info.dataset_id] = run_info
|
|
224
|
-
else:
|
|
225
|
-
total_run_info = run_info
|
|
226
|
-
|
|
227
|
-
return total_run_info
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
async def run_cognify_as_background_process(
|
|
231
|
-
tasks,
|
|
232
|
-
user,
|
|
233
|
-
datasets,
|
|
234
|
-
graph_db_config: dict = None,
|
|
235
|
-
vector_db_config: dict = False,
|
|
236
|
-
incremental_loading: bool = True,
|
|
237
|
-
):
|
|
238
|
-
# Convert dataset to list if it's a string
|
|
239
|
-
if isinstance(datasets, str):
|
|
240
|
-
datasets = [datasets]
|
|
241
|
-
|
|
242
|
-
# Store pipeline status for all pipelines
|
|
243
|
-
pipeline_run_started_info = {}
|
|
244
|
-
|
|
245
|
-
async def handle_rest_of_the_run(pipeline_list):
|
|
246
|
-
# Execute all provided pipelines one by one to avoid database write conflicts
|
|
247
|
-
# TODO: Convert to async gather task instead of for loop when Queue mechanism for database is created
|
|
248
|
-
for pipeline in pipeline_list:
|
|
249
|
-
while True:
|
|
250
|
-
try:
|
|
251
|
-
pipeline_run_info = await anext(pipeline)
|
|
252
|
-
|
|
253
|
-
push_to_queue(pipeline_run_info.pipeline_run_id, pipeline_run_info)
|
|
254
|
-
|
|
255
|
-
if isinstance(pipeline_run_info, PipelineRunCompleted) or isinstance(
|
|
256
|
-
pipeline_run_info, PipelineRunErrored
|
|
257
|
-
):
|
|
258
|
-
break
|
|
259
|
-
except StopAsyncIteration:
|
|
260
|
-
break
|
|
261
|
-
|
|
262
|
-
# Start all pipelines to get started status
|
|
263
|
-
pipeline_list = []
|
|
264
|
-
for dataset in datasets:
|
|
265
|
-
pipeline_run = cognee_pipeline(
|
|
266
|
-
tasks=tasks,
|
|
267
|
-
user=user,
|
|
268
|
-
datasets=dataset,
|
|
269
|
-
pipeline_name="cognify_pipeline",
|
|
270
|
-
graph_db_config=graph_db_config,
|
|
271
|
-
vector_db_config=vector_db_config,
|
|
272
|
-
incremental_loading=incremental_loading,
|
|
273
|
-
)
|
|
274
|
-
|
|
275
|
-
# Save dataset Pipeline run started info
|
|
276
|
-
run_info = await anext(pipeline_run)
|
|
277
|
-
pipeline_run_started_info[run_info.dataset_id] = run_info
|
|
278
|
-
|
|
279
|
-
if pipeline_run_started_info[run_info.dataset_id].payload:
|
|
280
|
-
# Remove payload info to avoid serialization
|
|
281
|
-
# TODO: Handle payload serialization
|
|
282
|
-
pipeline_run_started_info[run_info.dataset_id].payload = []
|
|
283
|
-
|
|
284
|
-
pipeline_list.append(pipeline_run)
|
|
285
|
-
|
|
286
|
-
# Send all started pipelines to execute one by one in background
|
|
287
|
-
asyncio.create_task(handle_rest_of_the_run(pipeline_list=pipeline_list))
|
|
288
|
-
|
|
289
|
-
return pipeline_run_started_info
|
|
201
|
+
pipeline_name="cognify_pipeline",
|
|
202
|
+
)
|
|
290
203
|
|
|
291
204
|
|
|
292
205
|
async def get_default_tasks( # TODO: Find out a better way to do this (Boris's comment)
|
|
@@ -295,6 +208,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
295
208
|
chunker=TextChunker,
|
|
296
209
|
chunk_size: int = None,
|
|
297
210
|
ontology_file_path: Optional[str] = None,
|
|
211
|
+
custom_prompt: Optional[str] = None,
|
|
298
212
|
) -> list[Task]:
|
|
299
213
|
default_tasks = [
|
|
300
214
|
Task(classify_documents),
|
|
@@ -308,6 +222,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|
|
308
222
|
extract_graph_from_data,
|
|
309
223
|
graph_model=graph_model,
|
|
310
224
|
ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
|
|
225
|
+
custom_prompt=custom_prompt,
|
|
311
226
|
task_config={"batch_size": 10},
|
|
312
227
|
), # Generate knowledge graphs from the document chunks.
|
|
313
228
|
Task(
|
|
@@ -37,6 +37,9 @@ class CognifyPayloadDTO(InDTO):
|
|
|
37
37
|
datasets: Optional[List[str]] = Field(default=None)
|
|
38
38
|
dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
|
|
39
39
|
run_in_background: Optional[bool] = Field(default=False)
|
|
40
|
+
custom_prompt: Optional[str] = Field(
|
|
41
|
+
default=None, description="Custom prompt for entity extraction and graph generation"
|
|
42
|
+
)
|
|
40
43
|
|
|
41
44
|
|
|
42
45
|
def get_cognify_router() -> APIRouter:
|
|
@@ -63,6 +66,7 @@ def get_cognify_router() -> APIRouter:
|
|
|
63
66
|
- **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
|
|
64
67
|
- **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
|
|
65
68
|
- **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
|
|
69
|
+
- **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
|
|
66
70
|
|
|
67
71
|
## Response
|
|
68
72
|
- **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
|
|
@@ -76,7 +80,8 @@ def get_cognify_router() -> APIRouter:
|
|
|
76
80
|
```json
|
|
77
81
|
{
|
|
78
82
|
"datasets": ["research_papers", "documentation"],
|
|
79
|
-
"run_in_background": false
|
|
83
|
+
"run_in_background": false,
|
|
84
|
+
"custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
|
|
80
85
|
}
|
|
81
86
|
```
|
|
82
87
|
|
|
@@ -106,7 +111,10 @@ def get_cognify_router() -> APIRouter:
|
|
|
106
111
|
datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
|
|
107
112
|
|
|
108
113
|
cognify_run = await cognee_cognify(
|
|
109
|
-
datasets,
|
|
114
|
+
datasets,
|
|
115
|
+
user,
|
|
116
|
+
run_in_background=payload.run_in_background,
|
|
117
|
+
custom_prompt=payload.custom_prompt,
|
|
110
118
|
)
|
|
111
119
|
|
|
112
120
|
# If any cognify run errored return JSONResponse with proper error status code
|
|
@@ -164,7 +172,7 @@ def get_cognify_router() -> APIRouter:
|
|
|
164
172
|
{
|
|
165
173
|
"pipeline_run_id": str(pipeline_run_info.pipeline_run_id),
|
|
166
174
|
"status": pipeline_run_info.status,
|
|
167
|
-
"payload": await get_formatted_graph_data(pipeline_run.dataset_id, user
|
|
175
|
+
"payload": await get_formatted_graph_data(pipeline_run.dataset_id, user),
|
|
168
176
|
}
|
|
169
177
|
)
|
|
170
178
|
|
|
@@ -284,7 +284,7 @@ def get_datasets_router() -> APIRouter:
|
|
|
284
284
|
- **500 Internal Server Error**: Error retrieving graph data
|
|
285
285
|
"""
|
|
286
286
|
|
|
287
|
-
graph_data = await get_formatted_graph_data(dataset_id, user
|
|
287
|
+
graph_data = await get_formatted_graph_data(dataset_id, user)
|
|
288
288
|
|
|
289
289
|
return graph_data
|
|
290
290
|
|
|
@@ -49,6 +49,10 @@ DEFAULT_TOOLS = [
|
|
|
49
49
|
"type": "string",
|
|
50
50
|
"description": "Path to a custom ontology file",
|
|
51
51
|
},
|
|
52
|
+
"custom_prompt": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"description": "Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts.",
|
|
55
|
+
},
|
|
52
56
|
},
|
|
53
57
|
"required": ["text"],
|
|
54
58
|
},
|
|
@@ -88,11 +88,16 @@ async def handle_cognify(arguments: Dict[str, Any], user) -> str:
|
|
|
88
88
|
"""Handle cognify function call"""
|
|
89
89
|
text = arguments.get("text")
|
|
90
90
|
ontology_file_path = arguments.get("ontology_file_path")
|
|
91
|
+
custom_prompt = arguments.get("custom_prompt")
|
|
91
92
|
|
|
92
93
|
if text:
|
|
93
94
|
await add(data=text, user=user)
|
|
94
95
|
|
|
95
|
-
await cognify(
|
|
96
|
+
await cognify(
|
|
97
|
+
user=user,
|
|
98
|
+
ontology_file_path=ontology_file_path if ontology_file_path else None,
|
|
99
|
+
custom_prompt=custom_prompt,
|
|
100
|
+
)
|
|
96
101
|
|
|
97
102
|
return (
|
|
98
103
|
"Text successfully converted into knowledge graph."
|
|
@@ -70,7 +70,7 @@ class ResponseRequest(InDTO):
|
|
|
70
70
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto"
|
|
71
71
|
user: Optional[str] = None
|
|
72
72
|
temperature: Optional[float] = 1.0
|
|
73
|
-
|
|
73
|
+
max_completion_tokens: Optional[int] = None
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
class ToolCallOutput(BaseModel):
|
cognee/api/v1/search/search.py
CHANGED
|
@@ -19,6 +19,8 @@ async def search(
|
|
|
19
19
|
top_k: int = 10,
|
|
20
20
|
node_type: Optional[Type] = None,
|
|
21
21
|
node_name: Optional[List[str]] = None,
|
|
22
|
+
save_interaction: bool = False,
|
|
23
|
+
last_k: Optional[int] = None,
|
|
22
24
|
) -> list:
|
|
23
25
|
"""
|
|
24
26
|
Search and query the knowledge graph for insights, information, and connections.
|
|
@@ -107,6 +109,8 @@ async def search(
|
|
|
107
109
|
|
|
108
110
|
node_name: Filter results to specific named entities (for targeted search).
|
|
109
111
|
|
|
112
|
+
save_interaction: Save interaction (query, context, answer connected to triplet endpoints) results into the graph or not
|
|
113
|
+
|
|
110
114
|
Returns:
|
|
111
115
|
list: Search results in format determined by query_type:
|
|
112
116
|
|
|
@@ -182,6 +186,8 @@ async def search(
|
|
|
182
186
|
top_k=top_k,
|
|
183
187
|
node_type=node_type,
|
|
184
188
|
node_name=node_name,
|
|
189
|
+
save_interaction=save_interaction,
|
|
190
|
+
last_k=last_k,
|
|
185
191
|
)
|
|
186
192
|
|
|
187
193
|
return filtered_search_results
|
cognee/cli/__init__.py
ADDED
cognee/cli/_cognee.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
import argparse
|
|
4
|
+
from typing import Any, Sequence, Dict, Type, cast, List
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import rich_argparse
|
|
9
|
+
from rich.markdown import Markdown
|
|
10
|
+
|
|
11
|
+
HAS_RICH = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
HAS_RICH = False
|
|
14
|
+
|
|
15
|
+
from cognee.cli import SupportsCliCommand, DEFAULT_DOCS_URL
|
|
16
|
+
from cognee.cli.config import CLI_DESCRIPTION
|
|
17
|
+
from cognee.cli import debug
|
|
18
|
+
import cognee.cli.echo as fmt
|
|
19
|
+
from cognee.cli.exceptions import CliCommandException
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
ACTION_EXECUTED = False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def print_help(parser: argparse.ArgumentParser) -> None:
|
|
26
|
+
if not ACTION_EXECUTED:
|
|
27
|
+
parser.print_help()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DebugAction(argparse.Action):
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
option_strings: Sequence[str],
|
|
34
|
+
dest: Any = argparse.SUPPRESS,
|
|
35
|
+
default: Any = argparse.SUPPRESS,
|
|
36
|
+
help: str = None,
|
|
37
|
+
) -> None:
|
|
38
|
+
super(DebugAction, self).__init__(
|
|
39
|
+
option_strings=option_strings, dest=dest, default=default, nargs=0, help=help
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def __call__(
|
|
43
|
+
self,
|
|
44
|
+
parser: argparse.ArgumentParser,
|
|
45
|
+
namespace: argparse.Namespace,
|
|
46
|
+
values: Any,
|
|
47
|
+
option_string: str = None,
|
|
48
|
+
) -> None:
|
|
49
|
+
# Enable debug mode for stack traces
|
|
50
|
+
debug.enable_debug()
|
|
51
|
+
fmt.note("Debug mode enabled. Full stack traces will be shown.")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Debug functionality is now in cognee.cli.debug module
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _discover_commands() -> List[Type[SupportsCliCommand]]:
|
|
58
|
+
"""Discover all available CLI commands"""
|
|
59
|
+
# Import commands dynamically to avoid early cognee initialization
|
|
60
|
+
commands = []
|
|
61
|
+
|
|
62
|
+
command_modules = [
|
|
63
|
+
("cognee.cli.commands.add_command", "AddCommand"),
|
|
64
|
+
("cognee.cli.commands.search_command", "SearchCommand"),
|
|
65
|
+
("cognee.cli.commands.cognify_command", "CognifyCommand"),
|
|
66
|
+
("cognee.cli.commands.delete_command", "DeleteCommand"),
|
|
67
|
+
("cognee.cli.commands.config_command", "ConfigCommand"),
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
for module_path, class_name in command_modules:
|
|
71
|
+
try:
|
|
72
|
+
module = __import__(module_path, fromlist=[class_name])
|
|
73
|
+
command_class = getattr(module, class_name)
|
|
74
|
+
commands.append(command_class)
|
|
75
|
+
except (ImportError, AttributeError) as e:
|
|
76
|
+
fmt.warning(f"Failed to load command {class_name}: {e}")
|
|
77
|
+
|
|
78
|
+
return commands
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _create_parser() -> tuple[argparse.ArgumentParser, Dict[str, SupportsCliCommand]]:
|
|
82
|
+
parser = argparse.ArgumentParser(
|
|
83
|
+
description=f"{CLI_DESCRIPTION} Further help is available at {DEFAULT_DOCS_URL}."
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Get version dynamically
|
|
87
|
+
try:
|
|
88
|
+
from cognee.version import get_cognee_version
|
|
89
|
+
|
|
90
|
+
version = get_cognee_version()
|
|
91
|
+
except ImportError:
|
|
92
|
+
version = "unknown"
|
|
93
|
+
|
|
94
|
+
parser.add_argument("--version", action="version", version=f"cognee {version}")
|
|
95
|
+
parser.add_argument(
|
|
96
|
+
"--debug",
|
|
97
|
+
action=DebugAction,
|
|
98
|
+
help="Enable debug mode to show full stack traces on exceptions",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
subparsers = parser.add_subparsers(title="Available commands", dest="command")
|
|
102
|
+
|
|
103
|
+
# Discover and install commands
|
|
104
|
+
command_classes = _discover_commands()
|
|
105
|
+
installed_commands: Dict[str, SupportsCliCommand] = {}
|
|
106
|
+
|
|
107
|
+
for command_class in command_classes:
|
|
108
|
+
command = command_class()
|
|
109
|
+
if command.command_string in installed_commands:
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
command_parser = subparsers.add_parser(
|
|
113
|
+
command.command_string,
|
|
114
|
+
help=command.help_string,
|
|
115
|
+
description=command.description if hasattr(command, "description") else None,
|
|
116
|
+
)
|
|
117
|
+
command.configure_parser(command_parser)
|
|
118
|
+
installed_commands[command.command_string] = command
|
|
119
|
+
|
|
120
|
+
# Add rich formatting if available
|
|
121
|
+
if HAS_RICH:
|
|
122
|
+
|
|
123
|
+
def add_formatter_class(parser: argparse.ArgumentParser) -> None:
|
|
124
|
+
parser.formatter_class = rich_argparse.RichHelpFormatter
|
|
125
|
+
|
|
126
|
+
if parser.description:
|
|
127
|
+
parser.description = Markdown(parser.description, style="argparse.text")
|
|
128
|
+
for action in parser._actions:
|
|
129
|
+
if isinstance(action, argparse._SubParsersAction):
|
|
130
|
+
for _subcmd, subparser in action.choices.items():
|
|
131
|
+
add_formatter_class(subparser)
|
|
132
|
+
|
|
133
|
+
add_formatter_class(parser)
|
|
134
|
+
|
|
135
|
+
return parser, installed_commands
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def main() -> int:
|
|
139
|
+
"""Main CLI entry point"""
|
|
140
|
+
parser, installed_commands = _create_parser()
|
|
141
|
+
args = parser.parse_args()
|
|
142
|
+
|
|
143
|
+
if cmd := installed_commands.get(args.command):
|
|
144
|
+
try:
|
|
145
|
+
cmd.execute(args)
|
|
146
|
+
except Exception as ex:
|
|
147
|
+
docs_url = cmd.docs_url if hasattr(cmd, "docs_url") else DEFAULT_DOCS_URL
|
|
148
|
+
error_code = -1
|
|
149
|
+
raiseable_exception = ex
|
|
150
|
+
|
|
151
|
+
# Handle CLI-specific exceptions
|
|
152
|
+
if isinstance(ex, CliCommandException):
|
|
153
|
+
error_code = ex.error_code
|
|
154
|
+
docs_url = ex.docs_url or docs_url
|
|
155
|
+
raiseable_exception = ex.raiseable_exception
|
|
156
|
+
|
|
157
|
+
# Print exception
|
|
158
|
+
if raiseable_exception:
|
|
159
|
+
fmt.error(str(ex))
|
|
160
|
+
|
|
161
|
+
fmt.note(f"Please refer to our docs at '{docs_url}' for further assistance.")
|
|
162
|
+
|
|
163
|
+
if debug.is_debug_enabled() and raiseable_exception:
|
|
164
|
+
raise raiseable_exception
|
|
165
|
+
|
|
166
|
+
return error_code
|
|
167
|
+
else:
|
|
168
|
+
print_help(parser)
|
|
169
|
+
return -1
|
|
170
|
+
|
|
171
|
+
return 0
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _main() -> None:
|
|
175
|
+
"""Script entry point"""
|
|
176
|
+
sys.exit(main())
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
if __name__ == "__main__":
|
|
180
|
+
sys.exit(main())
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# CLI Commands package
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import asyncio
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from cognee.cli.reference import SupportsCliCommand
|
|
6
|
+
from cognee.cli import DEFAULT_DOCS_URL
|
|
7
|
+
import cognee.cli.echo as fmt
|
|
8
|
+
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AddCommand(SupportsCliCommand):
|
|
12
|
+
command_string = "add"
|
|
13
|
+
help_string = "Add data to Cognee for knowledge graph processing"
|
|
14
|
+
docs_url = DEFAULT_DOCS_URL
|
|
15
|
+
description = """
|
|
16
|
+
Add data to Cognee for knowledge graph processing.
|
|
17
|
+
|
|
18
|
+
This is the first step in the Cognee workflow - it ingests raw data and prepares it
|
|
19
|
+
for processing. The function accepts various data formats including text, files, and
|
|
20
|
+
binary streams, then stores them in a specified dataset for further processing.
|
|
21
|
+
|
|
22
|
+
Supported Input Types:
|
|
23
|
+
- **Text strings**: Direct text content
|
|
24
|
+
- **File paths**: Local file paths (absolute paths starting with "/")
|
|
25
|
+
- **File URLs**: "file:///absolute/path" or "file://relative/path"
|
|
26
|
+
- **S3 paths**: "s3://bucket-name/path/to/file"
|
|
27
|
+
- **Lists**: Multiple files or text strings in a single call
|
|
28
|
+
|
|
29
|
+
Supported File Formats:
|
|
30
|
+
- Text files (.txt, .md, .csv)
|
|
31
|
+
- PDFs (.pdf)
|
|
32
|
+
- Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
|
|
33
|
+
- Audio files (.mp3, .wav) - transcribed to text
|
|
34
|
+
- Code files (.py, .js, .ts, etc.) - parsed for structure and content
|
|
35
|
+
- Office documents (.docx, .pptx)
|
|
36
|
+
|
|
37
|
+
After adding data, use `cognee cognify` to process it into knowledge graphs.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def configure_parser(self, parser: argparse.ArgumentParser) -> None:
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"data",
|
|
43
|
+
nargs="+",
|
|
44
|
+
help="Data to add: text content, file paths (/path/to/file), file URLs (file://path), S3 paths (s3://bucket/file), or mix of these",
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--dataset-name",
|
|
48
|
+
"-d",
|
|
49
|
+
default="main_dataset",
|
|
50
|
+
help="Dataset name to organize your data (default: main_dataset)",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def execute(self, args: argparse.Namespace) -> None:
|
|
54
|
+
try:
|
|
55
|
+
# Import cognee here to avoid circular imports
|
|
56
|
+
import cognee
|
|
57
|
+
|
|
58
|
+
fmt.echo(f"Adding {len(args.data)} item(s) to dataset '{args.dataset_name}'...")
|
|
59
|
+
|
|
60
|
+
# Run the async add function
|
|
61
|
+
async def run_add():
|
|
62
|
+
try:
|
|
63
|
+
# Pass all data items as a list to cognee.add if multiple items
|
|
64
|
+
if len(args.data) == 1:
|
|
65
|
+
data_to_add = args.data[0]
|
|
66
|
+
else:
|
|
67
|
+
data_to_add = args.data
|
|
68
|
+
|
|
69
|
+
fmt.echo("Processing data...")
|
|
70
|
+
await cognee.add(data=data_to_add, dataset_name=args.dataset_name)
|
|
71
|
+
fmt.success(f"Successfully added data to dataset '{args.dataset_name}'")
|
|
72
|
+
except Exception as e:
|
|
73
|
+
raise CliCommandInnerException(f"Failed to add data: {str(e)}")
|
|
74
|
+
|
|
75
|
+
asyncio.run(run_add())
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
if isinstance(e, CliCommandInnerException):
|
|
79
|
+
raise CliCommandException(str(e), error_code=1)
|
|
80
|
+
raise CliCommandException(f"Error adding data: {str(e)}", error_code=1)
|