cognee 0.2.3.dev0__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. cognee/__main__.py +4 -0
  2. cognee/api/v1/add/add.py +18 -11
  3. cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
  4. cognee/api/v1/cognify/cognify.py +22 -115
  5. cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
  6. cognee/api/v1/config/config.py +5 -13
  7. cognee/api/v1/datasets/routers/get_datasets_router.py +2 -2
  8. cognee/api/v1/delete/delete.py +1 -1
  9. cognee/api/v1/exceptions/__init__.py +13 -0
  10. cognee/api/v1/{delete → exceptions}/exceptions.py +15 -12
  11. cognee/api/v1/responses/default_tools.py +4 -0
  12. cognee/api/v1/responses/dispatch_function.py +6 -1
  13. cognee/api/v1/responses/models.py +1 -1
  14. cognee/api/v1/search/search.py +6 -7
  15. cognee/cli/__init__.py +10 -0
  16. cognee/cli/_cognee.py +180 -0
  17. cognee/cli/commands/__init__.py +1 -0
  18. cognee/cli/commands/add_command.py +80 -0
  19. cognee/cli/commands/cognify_command.py +128 -0
  20. cognee/cli/commands/config_command.py +225 -0
  21. cognee/cli/commands/delete_command.py +80 -0
  22. cognee/cli/commands/search_command.py +149 -0
  23. cognee/cli/config.py +33 -0
  24. cognee/cli/debug.py +21 -0
  25. cognee/cli/echo.py +45 -0
  26. cognee/cli/exceptions.py +23 -0
  27. cognee/cli/minimal_cli.py +97 -0
  28. cognee/cli/reference.py +26 -0
  29. cognee/cli/suppress_logging.py +12 -0
  30. cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
  31. cognee/eval_framework/eval_config.py +1 -1
  32. cognee/exceptions/__init__.py +5 -5
  33. cognee/exceptions/exceptions.py +37 -17
  34. cognee/infrastructure/data/exceptions/__init__.py +7 -0
  35. cognee/infrastructure/data/exceptions/exceptions.py +22 -0
  36. cognee/infrastructure/data/utils/extract_keywords.py +3 -3
  37. cognee/infrastructure/databases/exceptions/__init__.py +3 -0
  38. cognee/infrastructure/databases/exceptions/exceptions.py +57 -9
  39. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
  41. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
  42. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +15 -10
  43. cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +2 -2
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +4 -5
  45. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -2
  46. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
  47. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -8
  48. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
  49. cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
  50. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
  51. cognee/infrastructure/databases/vector/exceptions/exceptions.py +3 -3
  52. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -2
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +4 -3
  54. cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
  55. cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
  56. cognee/infrastructure/llm/LLMGateway.py +14 -5
  57. cognee/infrastructure/llm/config.py +5 -5
  58. cognee/infrastructure/llm/exceptions.py +30 -2
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
  60. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
  61. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -5
  62. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +6 -6
  63. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -15
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +9 -7
  67. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
  68. cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
  69. cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
  70. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
  71. cognee/infrastructure/llm/utils.py +7 -7
  72. cognee/modules/data/exceptions/exceptions.py +18 -5
  73. cognee/modules/data/methods/__init__.py +2 -0
  74. cognee/modules/data/methods/create_authorized_dataset.py +19 -0
  75. cognee/modules/data/methods/delete_data.py +2 -4
  76. cognee/modules/data/methods/get_authorized_dataset.py +11 -5
  77. cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
  78. cognee/modules/data/methods/load_or_create_datasets.py +2 -20
  79. cognee/modules/data/processing/document_types/exceptions/exceptions.py +2 -2
  80. cognee/modules/graph/cognee_graph/CogneeGraph.py +6 -4
  81. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +5 -10
  82. cognee/modules/graph/exceptions/__init__.py +2 -0
  83. cognee/modules/graph/exceptions/exceptions.py +25 -3
  84. cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
  85. cognee/modules/ingestion/exceptions/exceptions.py +2 -2
  86. cognee/modules/ontology/exceptions/exceptions.py +4 -4
  87. cognee/modules/pipelines/__init__.py +1 -1
  88. cognee/modules/pipelines/exceptions/exceptions.py +2 -2
  89. cognee/modules/pipelines/exceptions/tasks.py +18 -0
  90. cognee/modules/pipelines/layers/__init__.py +1 -0
  91. cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
  92. cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
  93. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
  94. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
  95. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
  96. cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
  97. cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
  98. cognee/modules/pipelines/methods/__init__.py +2 -0
  99. cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
  100. cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
  101. cognee/modules/pipelines/operations/__init__.py +0 -1
  102. cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
  103. cognee/modules/pipelines/operations/pipeline.py +23 -138
  104. cognee/modules/retrieval/base_feedback.py +11 -0
  105. cognee/modules/retrieval/cypher_search_retriever.py +1 -9
  106. cognee/modules/retrieval/exceptions/exceptions.py +12 -6
  107. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
  108. cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
  109. cognee/modules/retrieval/graph_completion_retriever.py +89 -5
  110. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  111. cognee/modules/retrieval/natural_language_retriever.py +0 -4
  112. cognee/modules/retrieval/user_qa_feedback.py +83 -0
  113. cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
  114. cognee/modules/retrieval/utils/models.py +40 -0
  115. cognee/modules/search/exceptions/__init__.py +7 -0
  116. cognee/modules/search/exceptions/exceptions.py +15 -0
  117. cognee/modules/search/methods/search.py +47 -7
  118. cognee/modules/search/types/SearchType.py +1 -0
  119. cognee/modules/settings/get_settings.py +2 -2
  120. cognee/modules/users/exceptions/exceptions.py +6 -6
  121. cognee/shared/CodeGraphEntities.py +1 -0
  122. cognee/shared/exceptions/exceptions.py +2 -2
  123. cognee/shared/logging_utils.py +142 -31
  124. cognee/shared/utils.py +0 -1
  125. cognee/tasks/completion/exceptions/exceptions.py +3 -3
  126. cognee/tasks/documents/classify_documents.py +4 -0
  127. cognee/tasks/documents/exceptions/__init__.py +11 -0
  128. cognee/tasks/documents/exceptions/exceptions.py +36 -0
  129. cognee/tasks/documents/extract_chunks_from_documents.py +8 -2
  130. cognee/tasks/graph/exceptions/__init__.py +12 -0
  131. cognee/tasks/graph/exceptions/exceptions.py +41 -0
  132. cognee/tasks/graph/extract_graph_from_data.py +34 -2
  133. cognee/tasks/ingestion/exceptions/__init__.py +8 -0
  134. cognee/tasks/ingestion/exceptions/exceptions.py +12 -0
  135. cognee/tasks/ingestion/resolve_data_directories.py +5 -0
  136. cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
  137. cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
  138. cognee/tasks/storage/add_data_points.py +41 -3
  139. cognee/tasks/storage/exceptions/__init__.py +9 -0
  140. cognee/tasks/storage/exceptions/exceptions.py +13 -0
  141. cognee/tasks/storage/index_data_points.py +1 -1
  142. cognee/tasks/summarization/exceptions/__init__.py +9 -0
  143. cognee/tasks/summarization/exceptions/exceptions.py +14 -0
  144. cognee/tasks/summarization/summarize_text.py +8 -1
  145. cognee/tests/integration/cli/__init__.py +3 -0
  146. cognee/tests/integration/cli/test_cli_integration.py +331 -0
  147. cognee/tests/integration/documents/PdfDocument_test.py +2 -2
  148. cognee/tests/integration/documents/TextDocument_test.py +2 -4
  149. cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
  150. cognee/tests/test_delete_by_id.py +1 -1
  151. cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
  152. cognee/tests/test_delete_soft.py +85 -0
  153. cognee/tests/test_kuzu.py +2 -2
  154. cognee/tests/test_neo4j.py +2 -2
  155. cognee/tests/test_search_db.py +126 -7
  156. cognee/tests/unit/cli/__init__.py +3 -0
  157. cognee/tests/unit/cli/test_cli_commands.py +483 -0
  158. cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
  159. cognee/tests/unit/cli/test_cli_main.py +173 -0
  160. cognee/tests/unit/cli/test_cli_runner.py +62 -0
  161. cognee/tests/unit/cli/test_cli_utils.py +127 -0
  162. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +5 -5
  163. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
  164. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
  165. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
  166. cognee/tests/unit/modules/search/search_methods_test.py +4 -2
  167. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
  168. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/RECORD +172 -121
  169. cognee-0.2.4.dist-info/entry_points.txt +2 -0
  170. cognee/infrastructure/databases/exceptions/EmbeddingException.py +0 -20
  171. cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
  172. cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
  173. cognee/infrastructure/pipeline/models/Operation.py +0 -60
  174. cognee/infrastructure/pipeline/models/__init__.py +0 -0
  175. cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
  176. cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
  177. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
  178. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
  179. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
cognee/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from cognee.cli._cognee import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
cognee/api/v1/add/add.py CHANGED
@@ -1,9 +1,15 @@
1
1
  from uuid import UUID
2
2
  from typing import Union, BinaryIO, List, Optional
3
3
 
4
- from cognee.modules.pipelines import Task
5
4
  from cognee.modules.users.models import User
6
- from cognee.modules.pipelines import cognee_pipeline
5
+ from cognee.modules.pipelines import Task, run_pipeline
6
+ from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
7
+ resolve_authorized_user_dataset,
8
+ )
9
+ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
10
+ reset_dataset_pipeline_run_status,
11
+ )
12
+ from cognee.modules.engine.operations.setup import setup
7
13
  from cognee.tasks.ingestion import ingest_data, resolve_data_directories
8
14
 
9
15
 
@@ -128,28 +134,29 @@ async def add(
128
134
 
129
135
  Optional:
130
136
  - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
131
- - LLM_MODEL: Model name (default: "gpt-4o-mini")
137
+ - LLM_MODEL: Model name (default: "gpt-5-mini")
132
138
  - DEFAULT_USER_EMAIL: Custom default user email
133
139
  - DEFAULT_USER_PASSWORD: Custom default user password
134
140
  - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
135
- - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
141
+ - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
136
142
 
137
- Raises:
138
- FileNotFoundError: If specified file paths don't exist
139
- PermissionError: If user lacks access to files or dataset
140
- UnsupportedFileTypeError: If file format cannot be processed
141
- InvalidValueError: If LLM_API_KEY is not set or invalid
142
143
  """
143
144
  tasks = [
144
145
  Task(resolve_data_directories, include_subdirectories=True),
145
146
  Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
146
147
  ]
147
148
 
149
+ await setup()
150
+
151
+ user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
152
+
153
+ await reset_dataset_pipeline_run_status(authorized_dataset.id, user)
154
+
148
155
  pipeline_run_info = None
149
156
 
150
- async for run_info in cognee_pipeline(
157
+ async for run_info in run_pipeline(
151
158
  tasks=tasks,
152
- datasets=dataset_id if dataset_id else dataset_name,
159
+ datasets=[authorized_dataset.id],
153
160
  data=data,
154
161
  user=user,
155
162
  pipeline_name="add_pipeline",
@@ -40,8 +40,14 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
40
40
  user = await get_default_user()
41
41
  detailed_extraction = True
42
42
 
43
+ # Multi-language support: allow passing supported_languages
44
+ supported_languages = None # defer to task defaults
43
45
  tasks = [
44
- Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction),
46
+ Task(
47
+ get_repo_file_dependencies,
48
+ detailed_extraction=detailed_extraction,
49
+ supported_languages=supported_languages,
50
+ ),
45
51
  # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
46
52
  Task(add_data_points, task_config={"batch_size": 30}),
47
53
  ]
@@ -7,12 +7,10 @@ from cognee.shared.logging_utils import get_logger
7
7
  from cognee.shared.data_models import KnowledgeGraph
8
8
  from cognee.infrastructure.llm import get_max_chunk_tokens
9
9
 
10
- from cognee.modules.pipelines import cognee_pipeline
10
+ from cognee.modules.pipelines import run_pipeline
11
11
  from cognee.modules.pipelines.tasks.task import Task
12
12
  from cognee.modules.chunking.TextChunker import TextChunker
13
13
  from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
14
- from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunErrored
15
- from cognee.modules.pipelines.queues.pipeline_run_info_queues import push_to_queue
16
14
  from cognee.modules.users.models import User
17
15
 
18
16
  from cognee.tasks.documents import (
@@ -23,6 +21,7 @@ from cognee.tasks.documents import (
23
21
  from cognee.tasks.graph import extract_graph_from_data
24
22
  from cognee.tasks.storage import add_data_points
25
23
  from cognee.tasks.summarization import summarize_text
24
+ from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
26
25
 
27
26
  logger = get_logger("cognify")
28
27
 
@@ -40,6 +39,7 @@ async def cognify(
40
39
  graph_db_config: dict = None,
41
40
  run_in_background: bool = False,
42
41
  incremental_loading: bool = True,
42
+ custom_prompt: Optional[str] = None,
43
43
  ):
44
44
  """
45
45
  Transform ingested data into a structured knowledge graph.
@@ -91,7 +91,7 @@ async def cognify(
91
91
  - LangchainChunker: Recursive character splitting with overlap
92
92
  Determines how documents are segmented for processing.
93
93
  chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
94
- Formula: min(embedding_max_tokens, llm_max_tokens // 2)
94
+ Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
95
95
  Default limits: ~512-8192 tokens depending on models.
96
96
  Smaller chunks = more granular but potentially fragmented knowledge.
97
97
  ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
@@ -102,6 +102,10 @@ async def cognify(
102
102
  If False, waits for completion before returning.
103
103
  Background mode recommended for large datasets (>100MB).
104
104
  Use pipeline_run_id from return value to monitor progress.
105
+ custom_prompt: Optional custom prompt string to use for entity extraction and graph generation.
106
+ If provided, this prompt will be used instead of the default prompts for
107
+ knowledge graph extraction. The prompt should guide the LLM on how to
108
+ extract entities and relationships from the text content.
105
109
 
106
110
  Returns:
107
111
  Union[dict, list[PipelineRunInfo]]:
@@ -177,124 +181,25 @@ async def cognify(
177
181
  - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
178
182
  - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
179
183
  - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
180
-
181
- Raises:
182
- DatasetNotFoundError: If specified datasets don't exist
183
- PermissionError: If user lacks processing rights
184
- InvalidValueError: If LLM_API_KEY is not set
185
- OntologyParsingError: If ontology file is malformed
186
- ValueError: If chunks exceed max token limits (reduce chunk_size)
187
- DatabaseNotCreatedError: If databases are not properly initialized
188
184
  """
189
- tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
190
-
191
- if run_in_background:
192
- return await run_cognify_as_background_process(
193
- tasks=tasks,
194
- user=user,
195
- datasets=datasets,
196
- vector_db_config=vector_db_config,
197
- graph_db_config=graph_db_config,
198
- incremental_loading=incremental_loading,
199
- )
200
- else:
201
- return await run_cognify_blocking(
202
- tasks=tasks,
203
- user=user,
204
- datasets=datasets,
205
- vector_db_config=vector_db_config,
206
- graph_db_config=graph_db_config,
207
- incremental_loading=incremental_loading,
208
- )
209
-
185
+ tasks = await get_default_tasks(
186
+ user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
187
+ )
210
188
 
211
- async def run_cognify_blocking(
212
- tasks,
213
- user,
214
- datasets,
215
- graph_db_config: dict = None,
216
- vector_db_config: dict = False,
217
- incremental_loading: bool = True,
218
- ):
219
- total_run_info = {}
189
+ # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
190
+ pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
220
191
 
221
- async for run_info in cognee_pipeline(
192
+ # Run the run_pipeline in the background or blocking based on executor
193
+ return await pipeline_executor_func(
194
+ pipeline=run_pipeline,
222
195
  tasks=tasks,
223
- datasets=datasets,
224
196
  user=user,
225
- pipeline_name="cognify_pipeline",
226
- graph_db_config=graph_db_config,
197
+ datasets=datasets,
227
198
  vector_db_config=vector_db_config,
199
+ graph_db_config=graph_db_config,
228
200
  incremental_loading=incremental_loading,
229
- ):
230
- if run_info.dataset_id:
231
- total_run_info[run_info.dataset_id] = run_info
232
- else:
233
- total_run_info = run_info
234
-
235
- return total_run_info
236
-
237
-
238
- async def run_cognify_as_background_process(
239
- tasks,
240
- user,
241
- datasets,
242
- graph_db_config: dict = None,
243
- vector_db_config: dict = False,
244
- incremental_loading: bool = True,
245
- ):
246
- # Convert dataset to list if it's a string
247
- if isinstance(datasets, str):
248
- datasets = [datasets]
249
-
250
- # Store pipeline status for all pipelines
251
- pipeline_run_started_info = {}
252
-
253
- async def handle_rest_of_the_run(pipeline_list):
254
- # Execute all provided pipelines one by one to avoid database write conflicts
255
- # TODO: Convert to async gather task instead of for loop when Queue mechanism for database is created
256
- for pipeline in pipeline_list:
257
- while True:
258
- try:
259
- pipeline_run_info = await anext(pipeline)
260
-
261
- push_to_queue(pipeline_run_info.pipeline_run_id, pipeline_run_info)
262
-
263
- if isinstance(pipeline_run_info, PipelineRunCompleted) or isinstance(
264
- pipeline_run_info, PipelineRunErrored
265
- ):
266
- break
267
- except StopAsyncIteration:
268
- break
269
-
270
- # Start all pipelines to get started status
271
- pipeline_list = []
272
- for dataset in datasets:
273
- pipeline_run = cognee_pipeline(
274
- tasks=tasks,
275
- user=user,
276
- datasets=dataset,
277
- pipeline_name="cognify_pipeline",
278
- graph_db_config=graph_db_config,
279
- vector_db_config=vector_db_config,
280
- incremental_loading=incremental_loading,
281
- )
282
-
283
- # Save dataset Pipeline run started info
284
- run_info = await anext(pipeline_run)
285
- pipeline_run_started_info[run_info.dataset_id] = run_info
286
-
287
- if pipeline_run_started_info[run_info.dataset_id].payload:
288
- # Remove payload info to avoid serialization
289
- # TODO: Handle payload serialization
290
- pipeline_run_started_info[run_info.dataset_id].payload = []
291
-
292
- pipeline_list.append(pipeline_run)
293
-
294
- # Send all started pipelines to execute one by one in background
295
- asyncio.create_task(handle_rest_of_the_run(pipeline_list=pipeline_list))
296
-
297
- return pipeline_run_started_info
201
+ pipeline_name="cognify_pipeline",
202
+ )
298
203
 
299
204
 
300
205
  async def get_default_tasks( # TODO: Find out a better way to do this (Boris's comment)
@@ -303,6 +208,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
303
208
  chunker=TextChunker,
304
209
  chunk_size: int = None,
305
210
  ontology_file_path: Optional[str] = None,
211
+ custom_prompt: Optional[str] = None,
306
212
  ) -> list[Task]:
307
213
  default_tasks = [
308
214
  Task(classify_documents),
@@ -316,6 +222,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
316
222
  extract_graph_from_data,
317
223
  graph_model=graph_model,
318
224
  ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
225
+ custom_prompt=custom_prompt,
319
226
  task_config={"batch_size": 10},
320
227
  ), # Generate knowledge graphs from the document chunks.
321
228
  Task(
@@ -37,6 +37,9 @@ class CognifyPayloadDTO(InDTO):
37
37
  datasets: Optional[List[str]] = Field(default=None)
38
38
  dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
39
39
  run_in_background: Optional[bool] = Field(default=False)
40
+ custom_prompt: Optional[str] = Field(
41
+ default=None, description="Custom prompt for entity extraction and graph generation"
42
+ )
40
43
 
41
44
 
42
45
  def get_cognify_router() -> APIRouter:
@@ -63,6 +66,7 @@ def get_cognify_router() -> APIRouter:
63
66
  - **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
64
67
  - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
65
68
  - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
69
+ - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
66
70
 
67
71
  ## Response
68
72
  - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@@ -76,7 +80,8 @@ def get_cognify_router() -> APIRouter:
76
80
  ```json
77
81
  {
78
82
  "datasets": ["research_papers", "documentation"],
79
- "run_in_background": false
83
+ "run_in_background": false,
84
+ "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
80
85
  }
81
86
  ```
82
87
 
@@ -106,7 +111,10 @@ def get_cognify_router() -> APIRouter:
106
111
  datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
107
112
 
108
113
  cognify_run = await cognee_cognify(
109
- datasets, user, run_in_background=payload.run_in_background
114
+ datasets,
115
+ user,
116
+ run_in_background=payload.run_in_background,
117
+ custom_prompt=payload.custom_prompt,
110
118
  )
111
119
 
112
120
  # If any cognify run errored return JSONResponse with proper error status code
@@ -164,7 +172,7 @@ def get_cognify_router() -> APIRouter:
164
172
  {
165
173
  "pipeline_run_id": str(pipeline_run_info.pipeline_run_id),
166
174
  "status": pipeline_run_info.status,
167
- "payload": await get_formatted_graph_data(pipeline_run.dataset_id, user.id),
175
+ "payload": await get_formatted_graph_data(pipeline_run.dataset_id, user),
168
176
  }
169
177
  )
170
178
 
@@ -2,7 +2,6 @@
2
2
 
3
3
  import os
4
4
  from cognee.base_config import get_base_config
5
- from cognee.exceptions import InvalidValueError, InvalidAttributeError
6
5
  from cognee.modules.cognify.config import get_cognify_config
7
6
  from cognee.infrastructure.data.chunking.config import get_chunk_config
8
7
  from cognee.infrastructure.databases.vector import get_vectordb_config
@@ -11,6 +10,7 @@ from cognee.infrastructure.llm.config import (
11
10
  get_llm_config,
12
11
  )
13
12
  from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
13
+ from cognee.api.v1.exceptions.exceptions import InvalidConfigAttributeError
14
14
 
15
15
 
16
16
  class config:
@@ -92,9 +92,7 @@ class config:
92
92
  if hasattr(llm_config, key):
93
93
  object.__setattr__(llm_config, key, value)
94
94
  else:
95
- raise InvalidAttributeError(
96
- message=f"'{key}' is not a valid attribute of the config."
97
- )
95
+ raise InvalidConfigAttributeError(attribute=key)
98
96
 
99
97
  @staticmethod
100
98
  def set_chunk_strategy(chunk_strategy: object):
@@ -131,9 +129,7 @@ class config:
131
129
  if hasattr(relational_db_config, key):
132
130
  object.__setattr__(relational_db_config, key, value)
133
131
  else:
134
- raise InvalidAttributeError(
135
- message=f"'{key}' is not a valid attribute of the config."
136
- )
132
+ raise InvalidConfigAttributeError(attribute=key)
137
133
 
138
134
  @staticmethod
139
135
  def set_migration_db_config(config_dict: dict):
@@ -145,9 +141,7 @@ class config:
145
141
  if hasattr(migration_db_config, key):
146
142
  object.__setattr__(migration_db_config, key, value)
147
143
  else:
148
- raise InvalidAttributeError(
149
- message=f"'{key}' is not a valid attribute of the config."
150
- )
144
+ raise InvalidConfigAttributeError(attribute=key)
151
145
 
152
146
  @staticmethod
153
147
  def set_graph_db_config(config_dict: dict) -> None:
@@ -171,9 +165,7 @@ class config:
171
165
  if hasattr(vector_db_config, key):
172
166
  object.__setattr__(vector_db_config, key, value)
173
167
  else:
174
- raise InvalidAttributeError(
175
- message=f"'{key}' is not a valid attribute of the config."
176
- )
168
+ InvalidConfigAttributeError(attribute=key)
177
169
 
178
170
  @staticmethod
179
171
  def set_vector_db_key(db_key: str):
@@ -13,7 +13,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
13
13
  from cognee.modules.data.methods import get_authorized_existing_datasets
14
14
  from cognee.modules.data.methods import create_dataset, get_datasets_by_name
15
15
  from cognee.shared.logging_utils import get_logger
16
- from cognee.api.v1.delete.exceptions import DataNotFoundError, DatasetNotFoundError
16
+ from cognee.api.v1.exceptions import DataNotFoundError, DatasetNotFoundError
17
17
  from cognee.modules.users.models import User
18
18
  from cognee.modules.users.methods import get_authenticated_user
19
19
  from cognee.modules.users.permissions.methods import (
@@ -284,7 +284,7 @@ def get_datasets_router() -> APIRouter:
284
284
  - **500 Internal Server Error**: Error retrieving graph data
285
285
  """
286
286
 
287
- graph_data = await get_formatted_graph_data(dataset_id, user.id)
287
+ graph_data = await get_formatted_graph_data(dataset_id, user)
288
288
 
289
289
  return graph_data
290
290
 
@@ -16,7 +16,7 @@ from cognee.modules.users.methods import get_default_user
16
16
  from cognee.modules.data.methods import get_authorized_existing_datasets
17
17
  from cognee.context_global_variables import set_database_global_context_variables
18
18
 
19
- from cognee.api.v1.delete.exceptions import (
19
+ from cognee.api.v1.exceptions import (
20
20
  DocumentNotFoundError,
21
21
  DatasetNotFoundError,
22
22
  DocumentSubgraphNotFoundError,
@@ -0,0 +1,13 @@
1
+ """
2
+ Custom exceptions for the Cognee API.
3
+
4
+ This module defines a set of exceptions for handling various data errors
5
+ """
6
+
7
+ from .exceptions import (
8
+ InvalidConfigAttributeError,
9
+ DocumentNotFoundError,
10
+ DatasetNotFoundError,
11
+ DataNotFoundError,
12
+ DocumentSubgraphNotFoundError,
13
+ )
@@ -1,10 +1,19 @@
1
- from cognee.exceptions import CogneeApiError
1
+ from cognee.exceptions import CogneeConfigurationError, CogneeValidationError
2
2
  from fastapi import status
3
3
 
4
4
 
5
- class DocumentNotFoundError(CogneeApiError):
6
- """Raised when a document cannot be found in the database."""
5
+ class InvalidConfigAttributeError(CogneeConfigurationError):
6
+ def __init__(
7
+ self,
8
+ attribute: str,
9
+ name: str = "InvalidConfigAttributeError",
10
+ status_code: int = status.HTTP_400_BAD_REQUEST,
11
+ ):
12
+ message = f"'{attribute}' is not a valid attribute of the configuration."
13
+ super().__init__(message, name, status_code)
7
14
 
15
+
16
+ class DocumentNotFoundError(CogneeValidationError):
8
17
  def __init__(
9
18
  self,
10
19
  message: str = "Document not found in database.",
@@ -14,9 +23,7 @@ class DocumentNotFoundError(CogneeApiError):
14
23
  super().__init__(message, name, status_code)
15
24
 
16
25
 
17
- class DatasetNotFoundError(CogneeApiError):
18
- """Raised when a dataset cannot be found."""
19
-
26
+ class DatasetNotFoundError(CogneeValidationError):
20
27
  def __init__(
21
28
  self,
22
29
  message: str = "Dataset not found.",
@@ -26,9 +33,7 @@ class DatasetNotFoundError(CogneeApiError):
26
33
  super().__init__(message, name, status_code)
27
34
 
28
35
 
29
- class DataNotFoundError(CogneeApiError):
30
- """Raised when a dataset cannot be found."""
31
-
36
+ class DataNotFoundError(CogneeValidationError):
32
37
  def __init__(
33
38
  self,
34
39
  message: str = "Data not found.",
@@ -38,9 +43,7 @@ class DataNotFoundError(CogneeApiError):
38
43
  super().__init__(message, name, status_code)
39
44
 
40
45
 
41
- class DocumentSubgraphNotFoundError(CogneeApiError):
42
- """Raised when a document's subgraph cannot be found in the graph database."""
43
-
46
+ class DocumentSubgraphNotFoundError(CogneeValidationError):
44
47
  def __init__(
45
48
  self,
46
49
  message: str = "Document subgraph not found in graph database.",
@@ -49,6 +49,10 @@ DEFAULT_TOOLS = [
49
49
  "type": "string",
50
50
  "description": "Path to a custom ontology file",
51
51
  },
52
+ "custom_prompt": {
53
+ "type": "string",
54
+ "description": "Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts.",
55
+ },
52
56
  },
53
57
  "required": ["text"],
54
58
  },
@@ -88,11 +88,16 @@ async def handle_cognify(arguments: Dict[str, Any], user) -> str:
88
88
  """Handle cognify function call"""
89
89
  text = arguments.get("text")
90
90
  ontology_file_path = arguments.get("ontology_file_path")
91
+ custom_prompt = arguments.get("custom_prompt")
91
92
 
92
93
  if text:
93
94
  await add(data=text, user=user)
94
95
 
95
- await cognify(user=user, ontology_file_path=ontology_file_path if ontology_file_path else None)
96
+ await cognify(
97
+ user=user,
98
+ ontology_file_path=ontology_file_path if ontology_file_path else None,
99
+ custom_prompt=custom_prompt,
100
+ )
96
101
 
97
102
  return (
98
103
  "Text successfully converted into knowledge graph."
@@ -70,7 +70,7 @@ class ResponseRequest(InDTO):
70
70
  tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto"
71
71
  user: Optional[str] = None
72
72
  temperature: Optional[float] = 1.0
73
- max_tokens: Optional[int] = None
73
+ max_completion_tokens: Optional[int] = None
74
74
 
75
75
 
76
76
  class ToolCallOutput(BaseModel):
@@ -19,6 +19,8 @@ async def search(
19
19
  top_k: int = 10,
20
20
  node_type: Optional[Type] = None,
21
21
  node_name: Optional[List[str]] = None,
22
+ save_interaction: bool = False,
23
+ last_k: Optional[int] = None,
22
24
  ) -> list:
23
25
  """
24
26
  Search and query the knowledge graph for insights, information, and connections.
@@ -107,6 +109,8 @@ async def search(
107
109
 
108
110
  node_name: Filter results to specific named entities (for targeted search).
109
111
 
112
+ save_interaction: Save interaction (query, context, answer connected to triplet endpoints) results into the graph or not
113
+
110
114
  Returns:
111
115
  list: Search results in format determined by query_type:
112
116
 
@@ -158,13 +162,6 @@ async def search(
158
162
  - VECTOR_DB_PROVIDER: Must match what was used during cognify
159
163
  - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
160
164
 
161
- Raises:
162
- DatasetNotFoundError: If specified datasets don't exist or aren't accessible
163
- PermissionDeniedError: If user lacks read access to requested datasets
164
- NoDataError: If no relevant data found for the search query
165
- InvalidValueError: If LLM_API_KEY is not set (for LLM-based search types)
166
- ValueError: If query_text is empty or search parameters are invalid
167
- CollectionNotFoundError: If vector collection not found (data not processed)
168
165
  """
169
166
  # We use lists from now on for datasets
170
167
  if isinstance(datasets, UUID) or isinstance(datasets, str):
@@ -189,6 +186,8 @@ async def search(
189
186
  top_k=top_k,
190
187
  node_type=node_type,
191
188
  node_name=node_name,
189
+ save_interaction=save_interaction,
190
+ last_k=last_k,
192
191
  )
193
192
 
194
193
  return filtered_search_results
cognee/cli/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ from cognee.cli.reference import SupportsCliCommand
2
+ from cognee.cli.exceptions import CliCommandException
3
+
4
+ DEFAULT_DOCS_URL = "https://docs.cognee.ai"
5
+
6
+ __all__ = [
7
+ "SupportsCliCommand",
8
+ "CliCommandException",
9
+ "DEFAULT_DOCS_URL",
10
+ ]