cognee 0.2.3.dev1__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. cognee/__main__.py +4 -0
  2. cognee/api/v1/add/add.py +18 -6
  3. cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
  4. cognee/api/v1/cognify/cognify.py +22 -107
  5. cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
  6. cognee/api/v1/datasets/routers/get_datasets_router.py +1 -1
  7. cognee/api/v1/responses/default_tools.py +4 -0
  8. cognee/api/v1/responses/dispatch_function.py +6 -1
  9. cognee/api/v1/responses/models.py +1 -1
  10. cognee/api/v1/search/search.py +6 -0
  11. cognee/cli/__init__.py +10 -0
  12. cognee/cli/_cognee.py +180 -0
  13. cognee/cli/commands/__init__.py +1 -0
  14. cognee/cli/commands/add_command.py +80 -0
  15. cognee/cli/commands/cognify_command.py +128 -0
  16. cognee/cli/commands/config_command.py +225 -0
  17. cognee/cli/commands/delete_command.py +80 -0
  18. cognee/cli/commands/search_command.py +149 -0
  19. cognee/cli/config.py +33 -0
  20. cognee/cli/debug.py +21 -0
  21. cognee/cli/echo.py +45 -0
  22. cognee/cli/exceptions.py +23 -0
  23. cognee/cli/minimal_cli.py +97 -0
  24. cognee/cli/reference.py +26 -0
  25. cognee/cli/suppress_logging.py +12 -0
  26. cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
  27. cognee/eval_framework/eval_config.py +1 -1
  28. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
  29. cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
  30. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
  31. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
  32. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
  33. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
  34. cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
  35. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
  36. cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
  37. cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
  38. cognee/infrastructure/llm/LLMGateway.py +14 -5
  39. cognee/infrastructure/llm/config.py +5 -5
  40. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
  41. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
  42. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
  43. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
  44. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
  45. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
  46. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
  47. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +3 -3
  48. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
  49. cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
  50. cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
  51. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
  52. cognee/infrastructure/llm/utils.py +7 -7
  53. cognee/modules/data/methods/__init__.py +2 -0
  54. cognee/modules/data/methods/create_authorized_dataset.py +19 -0
  55. cognee/modules/data/methods/get_authorized_dataset.py +11 -5
  56. cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
  57. cognee/modules/data/methods/load_or_create_datasets.py +2 -20
  58. cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
  59. cognee/modules/pipelines/__init__.py +1 -1
  60. cognee/modules/pipelines/exceptions/tasks.py +18 -0
  61. cognee/modules/pipelines/layers/__init__.py +1 -0
  62. cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
  63. cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
  64. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
  65. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
  66. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
  67. cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
  68. cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
  69. cognee/modules/pipelines/methods/__init__.py +2 -0
  70. cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
  71. cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
  72. cognee/modules/pipelines/operations/__init__.py +0 -1
  73. cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
  74. cognee/modules/pipelines/operations/pipeline.py +23 -138
  75. cognee/modules/retrieval/base_feedback.py +11 -0
  76. cognee/modules/retrieval/cypher_search_retriever.py +1 -9
  77. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
  78. cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
  79. cognee/modules/retrieval/graph_completion_retriever.py +89 -5
  80. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  81. cognee/modules/retrieval/natural_language_retriever.py +0 -4
  82. cognee/modules/retrieval/user_qa_feedback.py +83 -0
  83. cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
  84. cognee/modules/retrieval/utils/models.py +40 -0
  85. cognee/modules/search/methods/search.py +46 -5
  86. cognee/modules/search/types/SearchType.py +1 -0
  87. cognee/modules/settings/get_settings.py +2 -2
  88. cognee/shared/CodeGraphEntities.py +1 -0
  89. cognee/shared/logging_utils.py +142 -31
  90. cognee/shared/utils.py +0 -1
  91. cognee/tasks/graph/extract_graph_from_data.py +6 -2
  92. cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
  93. cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
  94. cognee/tasks/storage/add_data_points.py +33 -3
  95. cognee/tests/integration/cli/__init__.py +3 -0
  96. cognee/tests/integration/cli/test_cli_integration.py +331 -0
  97. cognee/tests/integration/documents/PdfDocument_test.py +2 -2
  98. cognee/tests/integration/documents/TextDocument_test.py +2 -4
  99. cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
  100. cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
  101. cognee/tests/test_delete_soft.py +85 -0
  102. cognee/tests/test_kuzu.py +2 -2
  103. cognee/tests/test_neo4j.py +2 -2
  104. cognee/tests/test_search_db.py +126 -7
  105. cognee/tests/unit/cli/__init__.py +3 -0
  106. cognee/tests/unit/cli/test_cli_commands.py +483 -0
  107. cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
  108. cognee/tests/unit/cli/test_cli_main.py +173 -0
  109. cognee/tests/unit/cli/test_cli_runner.py +62 -0
  110. cognee/tests/unit/cli/test_cli_utils.py +127 -0
  111. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
  112. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
  113. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
  114. cognee/tests/unit/modules/search/search_methods_test.py +2 -0
  115. {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
  116. {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/RECORD +120 -83
  117. cognee-0.2.4.dist-info/entry_points.txt +2 -0
  118. cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
  119. cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
  120. cognee/infrastructure/pipeline/models/Operation.py +0 -60
  121. cognee/infrastructure/pipeline/models/__init__.py +0 -0
  122. cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
  123. cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
  124. {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
  125. {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
  126. {cognee-0.2.3.dev1.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
cognee/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from cognee.cli._cognee import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
cognee/api/v1/add/add.py CHANGED
@@ -1,9 +1,15 @@
1
1
  from uuid import UUID
2
2
  from typing import Union, BinaryIO, List, Optional
3
3
 
4
- from cognee.modules.pipelines import Task
5
4
  from cognee.modules.users.models import User
6
- from cognee.modules.pipelines import cognee_pipeline
5
+ from cognee.modules.pipelines import Task, run_pipeline
6
+ from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
7
+ resolve_authorized_user_dataset,
8
+ )
9
+ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
10
+ reset_dataset_pipeline_run_status,
11
+ )
12
+ from cognee.modules.engine.operations.setup import setup
7
13
  from cognee.tasks.ingestion import ingest_data, resolve_data_directories
8
14
 
9
15
 
@@ -128,11 +134,11 @@ async def add(
128
134
 
129
135
  Optional:
130
136
  - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
131
- - LLM_MODEL: Model name (default: "gpt-4o-mini")
137
+ - LLM_MODEL: Model name (default: "gpt-5-mini")
132
138
  - DEFAULT_USER_EMAIL: Custom default user email
133
139
  - DEFAULT_USER_PASSWORD: Custom default user password
134
140
  - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
135
- - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
141
+ - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
136
142
 
137
143
  """
138
144
  tasks = [
@@ -140,11 +146,17 @@ async def add(
140
146
  Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
141
147
  ]
142
148
 
149
+ await setup()
150
+
151
+ user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
152
+
153
+ await reset_dataset_pipeline_run_status(authorized_dataset.id, user)
154
+
143
155
  pipeline_run_info = None
144
156
 
145
- async for run_info in cognee_pipeline(
157
+ async for run_info in run_pipeline(
146
158
  tasks=tasks,
147
- datasets=dataset_id if dataset_id else dataset_name,
159
+ datasets=[authorized_dataset.id],
148
160
  data=data,
149
161
  user=user,
150
162
  pipeline_name="add_pipeline",
@@ -40,8 +40,14 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
40
40
  user = await get_default_user()
41
41
  detailed_extraction = True
42
42
 
43
+ # Multi-language support: allow passing supported_languages
44
+ supported_languages = None # defer to task defaults
43
45
  tasks = [
44
- Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction),
46
+ Task(
47
+ get_repo_file_dependencies,
48
+ detailed_extraction=detailed_extraction,
49
+ supported_languages=supported_languages,
50
+ ),
45
51
  # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
46
52
  Task(add_data_points, task_config={"batch_size": 30}),
47
53
  ]
@@ -7,12 +7,10 @@ from cognee.shared.logging_utils import get_logger
7
7
  from cognee.shared.data_models import KnowledgeGraph
8
8
  from cognee.infrastructure.llm import get_max_chunk_tokens
9
9
 
10
- from cognee.modules.pipelines import cognee_pipeline
10
+ from cognee.modules.pipelines import run_pipeline
11
11
  from cognee.modules.pipelines.tasks.task import Task
12
12
  from cognee.modules.chunking.TextChunker import TextChunker
13
13
  from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
14
- from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunErrored
15
- from cognee.modules.pipelines.queues.pipeline_run_info_queues import push_to_queue
16
14
  from cognee.modules.users.models import User
17
15
 
18
16
  from cognee.tasks.documents import (
@@ -23,6 +21,7 @@ from cognee.tasks.documents import (
23
21
  from cognee.tasks.graph import extract_graph_from_data
24
22
  from cognee.tasks.storage import add_data_points
25
23
  from cognee.tasks.summarization import summarize_text
24
+ from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
26
25
 
27
26
  logger = get_logger("cognify")
28
27
 
@@ -40,6 +39,7 @@ async def cognify(
40
39
  graph_db_config: dict = None,
41
40
  run_in_background: bool = False,
42
41
  incremental_loading: bool = True,
42
+ custom_prompt: Optional[str] = None,
43
43
  ):
44
44
  """
45
45
  Transform ingested data into a structured knowledge graph.
@@ -91,7 +91,7 @@ async def cognify(
91
91
  - LangchainChunker: Recursive character splitting with overlap
92
92
  Determines how documents are segmented for processing.
93
93
  chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
94
- Formula: min(embedding_max_tokens, llm_max_tokens // 2)
94
+ Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
95
95
  Default limits: ~512-8192 tokens depending on models.
96
96
  Smaller chunks = more granular but potentially fragmented knowledge.
97
97
  ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
@@ -102,6 +102,10 @@ async def cognify(
102
102
  If False, waits for completion before returning.
103
103
  Background mode recommended for large datasets (>100MB).
104
104
  Use pipeline_run_id from return value to monitor progress.
105
+ custom_prompt: Optional custom prompt string to use for entity extraction and graph generation.
106
+ If provided, this prompt will be used instead of the default prompts for
107
+ knowledge graph extraction. The prompt should guide the LLM on how to
108
+ extract entities and relationships from the text content.
105
109
 
106
110
  Returns:
107
111
  Union[dict, list[PipelineRunInfo]]:
@@ -178,115 +182,24 @@ async def cognify(
178
182
  - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
179
183
  - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
180
184
  """
181
- tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
182
-
183
- if run_in_background:
184
- return await run_cognify_as_background_process(
185
- tasks=tasks,
186
- user=user,
187
- datasets=datasets,
188
- vector_db_config=vector_db_config,
189
- graph_db_config=graph_db_config,
190
- incremental_loading=incremental_loading,
191
- )
192
- else:
193
- return await run_cognify_blocking(
194
- tasks=tasks,
195
- user=user,
196
- datasets=datasets,
197
- vector_db_config=vector_db_config,
198
- graph_db_config=graph_db_config,
199
- incremental_loading=incremental_loading,
200
- )
201
-
185
+ tasks = await get_default_tasks(
186
+ user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
187
+ )
202
188
 
203
- async def run_cognify_blocking(
204
- tasks,
205
- user,
206
- datasets,
207
- graph_db_config: dict = None,
208
- vector_db_config: dict = False,
209
- incremental_loading: bool = True,
210
- ):
211
- total_run_info = {}
189
+ # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
190
+ pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
212
191
 
213
- async for run_info in cognee_pipeline(
192
+ # Run the run_pipeline in the background or blocking based on executor
193
+ return await pipeline_executor_func(
194
+ pipeline=run_pipeline,
214
195
  tasks=tasks,
215
- datasets=datasets,
216
196
  user=user,
217
- pipeline_name="cognify_pipeline",
218
- graph_db_config=graph_db_config,
197
+ datasets=datasets,
219
198
  vector_db_config=vector_db_config,
199
+ graph_db_config=graph_db_config,
220
200
  incremental_loading=incremental_loading,
221
- ):
222
- if run_info.dataset_id:
223
- total_run_info[run_info.dataset_id] = run_info
224
- else:
225
- total_run_info = run_info
226
-
227
- return total_run_info
228
-
229
-
230
- async def run_cognify_as_background_process(
231
- tasks,
232
- user,
233
- datasets,
234
- graph_db_config: dict = None,
235
- vector_db_config: dict = False,
236
- incremental_loading: bool = True,
237
- ):
238
- # Convert dataset to list if it's a string
239
- if isinstance(datasets, str):
240
- datasets = [datasets]
241
-
242
- # Store pipeline status for all pipelines
243
- pipeline_run_started_info = {}
244
-
245
- async def handle_rest_of_the_run(pipeline_list):
246
- # Execute all provided pipelines one by one to avoid database write conflicts
247
- # TODO: Convert to async gather task instead of for loop when Queue mechanism for database is created
248
- for pipeline in pipeline_list:
249
- while True:
250
- try:
251
- pipeline_run_info = await anext(pipeline)
252
-
253
- push_to_queue(pipeline_run_info.pipeline_run_id, pipeline_run_info)
254
-
255
- if isinstance(pipeline_run_info, PipelineRunCompleted) or isinstance(
256
- pipeline_run_info, PipelineRunErrored
257
- ):
258
- break
259
- except StopAsyncIteration:
260
- break
261
-
262
- # Start all pipelines to get started status
263
- pipeline_list = []
264
- for dataset in datasets:
265
- pipeline_run = cognee_pipeline(
266
- tasks=tasks,
267
- user=user,
268
- datasets=dataset,
269
- pipeline_name="cognify_pipeline",
270
- graph_db_config=graph_db_config,
271
- vector_db_config=vector_db_config,
272
- incremental_loading=incremental_loading,
273
- )
274
-
275
- # Save dataset Pipeline run started info
276
- run_info = await anext(pipeline_run)
277
- pipeline_run_started_info[run_info.dataset_id] = run_info
278
-
279
- if pipeline_run_started_info[run_info.dataset_id].payload:
280
- # Remove payload info to avoid serialization
281
- # TODO: Handle payload serialization
282
- pipeline_run_started_info[run_info.dataset_id].payload = []
283
-
284
- pipeline_list.append(pipeline_run)
285
-
286
- # Send all started pipelines to execute one by one in background
287
- asyncio.create_task(handle_rest_of_the_run(pipeline_list=pipeline_list))
288
-
289
- return pipeline_run_started_info
201
+ pipeline_name="cognify_pipeline",
202
+ )
290
203
 
291
204
 
292
205
  async def get_default_tasks( # TODO: Find out a better way to do this (Boris's comment)
@@ -295,6 +208,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
295
208
  chunker=TextChunker,
296
209
  chunk_size: int = None,
297
210
  ontology_file_path: Optional[str] = None,
211
+ custom_prompt: Optional[str] = None,
298
212
  ) -> list[Task]:
299
213
  default_tasks = [
300
214
  Task(classify_documents),
@@ -308,6 +222,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
308
222
  extract_graph_from_data,
309
223
  graph_model=graph_model,
310
224
  ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
225
+ custom_prompt=custom_prompt,
311
226
  task_config={"batch_size": 10},
312
227
  ), # Generate knowledge graphs from the document chunks.
313
228
  Task(
@@ -37,6 +37,9 @@ class CognifyPayloadDTO(InDTO):
37
37
  datasets: Optional[List[str]] = Field(default=None)
38
38
  dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
39
39
  run_in_background: Optional[bool] = Field(default=False)
40
+ custom_prompt: Optional[str] = Field(
41
+ default=None, description="Custom prompt for entity extraction and graph generation"
42
+ )
40
43
 
41
44
 
42
45
  def get_cognify_router() -> APIRouter:
@@ -63,6 +66,7 @@ def get_cognify_router() -> APIRouter:
63
66
  - **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
64
67
  - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
65
68
  - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
69
+ - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
66
70
 
67
71
  ## Response
68
72
  - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@@ -76,7 +80,8 @@ def get_cognify_router() -> APIRouter:
76
80
  ```json
77
81
  {
78
82
  "datasets": ["research_papers", "documentation"],
79
- "run_in_background": false
83
+ "run_in_background": false,
84
+ "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
80
85
  }
81
86
  ```
82
87
 
@@ -106,7 +111,10 @@ def get_cognify_router() -> APIRouter:
106
111
  datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
107
112
 
108
113
  cognify_run = await cognee_cognify(
109
- datasets, user, run_in_background=payload.run_in_background
114
+ datasets,
115
+ user,
116
+ run_in_background=payload.run_in_background,
117
+ custom_prompt=payload.custom_prompt,
110
118
  )
111
119
 
112
120
  # If any cognify run errored return JSONResponse with proper error status code
@@ -164,7 +172,7 @@ def get_cognify_router() -> APIRouter:
164
172
  {
165
173
  "pipeline_run_id": str(pipeline_run_info.pipeline_run_id),
166
174
  "status": pipeline_run_info.status,
167
- "payload": await get_formatted_graph_data(pipeline_run.dataset_id, user.id),
175
+ "payload": await get_formatted_graph_data(pipeline_run.dataset_id, user),
168
176
  }
169
177
  )
170
178
 
@@ -284,7 +284,7 @@ def get_datasets_router() -> APIRouter:
284
284
  - **500 Internal Server Error**: Error retrieving graph data
285
285
  """
286
286
 
287
- graph_data = await get_formatted_graph_data(dataset_id, user.id)
287
+ graph_data = await get_formatted_graph_data(dataset_id, user)
288
288
 
289
289
  return graph_data
290
290
 
@@ -49,6 +49,10 @@ DEFAULT_TOOLS = [
49
49
  "type": "string",
50
50
  "description": "Path to a custom ontology file",
51
51
  },
52
+ "custom_prompt": {
53
+ "type": "string",
54
+ "description": "Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts.",
55
+ },
52
56
  },
53
57
  "required": ["text"],
54
58
  },
@@ -88,11 +88,16 @@ async def handle_cognify(arguments: Dict[str, Any], user) -> str:
88
88
  """Handle cognify function call"""
89
89
  text = arguments.get("text")
90
90
  ontology_file_path = arguments.get("ontology_file_path")
91
+ custom_prompt = arguments.get("custom_prompt")
91
92
 
92
93
  if text:
93
94
  await add(data=text, user=user)
94
95
 
95
- await cognify(user=user, ontology_file_path=ontology_file_path if ontology_file_path else None)
96
+ await cognify(
97
+ user=user,
98
+ ontology_file_path=ontology_file_path if ontology_file_path else None,
99
+ custom_prompt=custom_prompt,
100
+ )
96
101
 
97
102
  return (
98
103
  "Text successfully converted into knowledge graph."
@@ -70,7 +70,7 @@ class ResponseRequest(InDTO):
70
70
  tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto"
71
71
  user: Optional[str] = None
72
72
  temperature: Optional[float] = 1.0
73
- max_tokens: Optional[int] = None
73
+ max_completion_tokens: Optional[int] = None
74
74
 
75
75
 
76
76
  class ToolCallOutput(BaseModel):
@@ -19,6 +19,8 @@ async def search(
19
19
  top_k: int = 10,
20
20
  node_type: Optional[Type] = None,
21
21
  node_name: Optional[List[str]] = None,
22
+ save_interaction: bool = False,
23
+ last_k: Optional[int] = None,
22
24
  ) -> list:
23
25
  """
24
26
  Search and query the knowledge graph for insights, information, and connections.
@@ -107,6 +109,8 @@ async def search(
107
109
 
108
110
  node_name: Filter results to specific named entities (for targeted search).
109
111
 
112
+ save_interaction: Save interaction (query, context, answer connected to triplet endpoints) results into the graph or not
113
+
110
114
  Returns:
111
115
  list: Search results in format determined by query_type:
112
116
 
@@ -182,6 +186,8 @@ async def search(
182
186
  top_k=top_k,
183
187
  node_type=node_type,
184
188
  node_name=node_name,
189
+ save_interaction=save_interaction,
190
+ last_k=last_k,
185
191
  )
186
192
 
187
193
  return filtered_search_results
cognee/cli/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ from cognee.cli.reference import SupportsCliCommand
2
+ from cognee.cli.exceptions import CliCommandException
3
+
4
+ DEFAULT_DOCS_URL = "https://docs.cognee.ai"
5
+
6
+ __all__ = [
7
+ "SupportsCliCommand",
8
+ "CliCommandException",
9
+ "DEFAULT_DOCS_URL",
10
+ ]
cognee/cli/_cognee.py ADDED
@@ -0,0 +1,180 @@
1
+ import sys
2
+ import os
3
+ import argparse
4
+ from typing import Any, Sequence, Dict, Type, cast, List
5
+ import click
6
+
7
+ try:
8
+ import rich_argparse
9
+ from rich.markdown import Markdown
10
+
11
+ HAS_RICH = True
12
+ except ImportError:
13
+ HAS_RICH = False
14
+
15
+ from cognee.cli import SupportsCliCommand, DEFAULT_DOCS_URL
16
+ from cognee.cli.config import CLI_DESCRIPTION
17
+ from cognee.cli import debug
18
+ import cognee.cli.echo as fmt
19
+ from cognee.cli.exceptions import CliCommandException
20
+
21
+
22
+ ACTION_EXECUTED = False
23
+
24
+
25
+ def print_help(parser: argparse.ArgumentParser) -> None:
26
+ if not ACTION_EXECUTED:
27
+ parser.print_help()
28
+
29
+
30
+ class DebugAction(argparse.Action):
31
+ def __init__(
32
+ self,
33
+ option_strings: Sequence[str],
34
+ dest: Any = argparse.SUPPRESS,
35
+ default: Any = argparse.SUPPRESS,
36
+ help: str = None,
37
+ ) -> None:
38
+ super(DebugAction, self).__init__(
39
+ option_strings=option_strings, dest=dest, default=default, nargs=0, help=help
40
+ )
41
+
42
+ def __call__(
43
+ self,
44
+ parser: argparse.ArgumentParser,
45
+ namespace: argparse.Namespace,
46
+ values: Any,
47
+ option_string: str = None,
48
+ ) -> None:
49
+ # Enable debug mode for stack traces
50
+ debug.enable_debug()
51
+ fmt.note("Debug mode enabled. Full stack traces will be shown.")
52
+
53
+
54
+ # Debug functionality is now in cognee.cli.debug module
55
+
56
+
57
+ def _discover_commands() -> List[Type[SupportsCliCommand]]:
58
+ """Discover all available CLI commands"""
59
+ # Import commands dynamically to avoid early cognee initialization
60
+ commands = []
61
+
62
+ command_modules = [
63
+ ("cognee.cli.commands.add_command", "AddCommand"),
64
+ ("cognee.cli.commands.search_command", "SearchCommand"),
65
+ ("cognee.cli.commands.cognify_command", "CognifyCommand"),
66
+ ("cognee.cli.commands.delete_command", "DeleteCommand"),
67
+ ("cognee.cli.commands.config_command", "ConfigCommand"),
68
+ ]
69
+
70
+ for module_path, class_name in command_modules:
71
+ try:
72
+ module = __import__(module_path, fromlist=[class_name])
73
+ command_class = getattr(module, class_name)
74
+ commands.append(command_class)
75
+ except (ImportError, AttributeError) as e:
76
+ fmt.warning(f"Failed to load command {class_name}: {e}")
77
+
78
+ return commands
79
+
80
+
81
+ def _create_parser() -> tuple[argparse.ArgumentParser, Dict[str, SupportsCliCommand]]:
82
+ parser = argparse.ArgumentParser(
83
+ description=f"{CLI_DESCRIPTION} Further help is available at {DEFAULT_DOCS_URL}."
84
+ )
85
+
86
+ # Get version dynamically
87
+ try:
88
+ from cognee.version import get_cognee_version
89
+
90
+ version = get_cognee_version()
91
+ except ImportError:
92
+ version = "unknown"
93
+
94
+ parser.add_argument("--version", action="version", version=f"cognee {version}")
95
+ parser.add_argument(
96
+ "--debug",
97
+ action=DebugAction,
98
+ help="Enable debug mode to show full stack traces on exceptions",
99
+ )
100
+
101
+ subparsers = parser.add_subparsers(title="Available commands", dest="command")
102
+
103
+ # Discover and install commands
104
+ command_classes = _discover_commands()
105
+ installed_commands: Dict[str, SupportsCliCommand] = {}
106
+
107
+ for command_class in command_classes:
108
+ command = command_class()
109
+ if command.command_string in installed_commands:
110
+ continue
111
+
112
+ command_parser = subparsers.add_parser(
113
+ command.command_string,
114
+ help=command.help_string,
115
+ description=command.description if hasattr(command, "description") else None,
116
+ )
117
+ command.configure_parser(command_parser)
118
+ installed_commands[command.command_string] = command
119
+
120
+ # Add rich formatting if available
121
+ if HAS_RICH:
122
+
123
+ def add_formatter_class(parser: argparse.ArgumentParser) -> None:
124
+ parser.formatter_class = rich_argparse.RichHelpFormatter
125
+
126
+ if parser.description:
127
+ parser.description = Markdown(parser.description, style="argparse.text")
128
+ for action in parser._actions:
129
+ if isinstance(action, argparse._SubParsersAction):
130
+ for _subcmd, subparser in action.choices.items():
131
+ add_formatter_class(subparser)
132
+
133
+ add_formatter_class(parser)
134
+
135
+ return parser, installed_commands
136
+
137
+
138
+ def main() -> int:
139
+ """Main CLI entry point"""
140
+ parser, installed_commands = _create_parser()
141
+ args = parser.parse_args()
142
+
143
+ if cmd := installed_commands.get(args.command):
144
+ try:
145
+ cmd.execute(args)
146
+ except Exception as ex:
147
+ docs_url = cmd.docs_url if hasattr(cmd, "docs_url") else DEFAULT_DOCS_URL
148
+ error_code = -1
149
+ raiseable_exception = ex
150
+
151
+ # Handle CLI-specific exceptions
152
+ if isinstance(ex, CliCommandException):
153
+ error_code = ex.error_code
154
+ docs_url = ex.docs_url or docs_url
155
+ raiseable_exception = ex.raiseable_exception
156
+
157
+ # Print exception
158
+ if raiseable_exception:
159
+ fmt.error(str(ex))
160
+
161
+ fmt.note(f"Please refer to our docs at '{docs_url}' for further assistance.")
162
+
163
+ if debug.is_debug_enabled() and raiseable_exception:
164
+ raise raiseable_exception
165
+
166
+ return error_code
167
+ else:
168
+ print_help(parser)
169
+ return -1
170
+
171
+ return 0
172
+
173
+
174
+ def _main() -> None:
175
+ """Script entry point"""
176
+ sys.exit(main())
177
+
178
+
179
+ if __name__ == "__main__":
180
+ sys.exit(main())
@@ -0,0 +1 @@
1
+ # CLI Commands package
@@ -0,0 +1,80 @@
1
+ import argparse
2
+ import asyncio
3
+ from typing import Optional
4
+
5
+ from cognee.cli.reference import SupportsCliCommand
6
+ from cognee.cli import DEFAULT_DOCS_URL
7
+ import cognee.cli.echo as fmt
8
+ from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
9
+
10
+
11
+ class AddCommand(SupportsCliCommand):
12
+ command_string = "add"
13
+ help_string = "Add data to Cognee for knowledge graph processing"
14
+ docs_url = DEFAULT_DOCS_URL
15
+ description = """
16
+ Add data to Cognee for knowledge graph processing.
17
+
18
+ This is the first step in the Cognee workflow - it ingests raw data and prepares it
19
+ for processing. The function accepts various data formats including text, files, and
20
+ binary streams, then stores them in a specified dataset for further processing.
21
+
22
+ Supported Input Types:
23
+ - **Text strings**: Direct text content
24
+ - **File paths**: Local file paths (absolute paths starting with "/")
25
+ - **File URLs**: "file:///absolute/path" or "file://relative/path"
26
+ - **S3 paths**: "s3://bucket-name/path/to/file"
27
+ - **Lists**: Multiple files or text strings in a single call
28
+
29
+ Supported File Formats:
30
+ - Text files (.txt, .md, .csv)
31
+ - PDFs (.pdf)
32
+ - Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
33
+ - Audio files (.mp3, .wav) - transcribed to text
34
+ - Code files (.py, .js, .ts, etc.) - parsed for structure and content
35
+ - Office documents (.docx, .pptx)
36
+
37
+ After adding data, use `cognee cognify` to process it into knowledge graphs.
38
+ """
39
+
40
+ def configure_parser(self, parser: argparse.ArgumentParser) -> None:
41
+ parser.add_argument(
42
+ "data",
43
+ nargs="+",
44
+ help="Data to add: text content, file paths (/path/to/file), file URLs (file://path), S3 paths (s3://bucket/file), or mix of these",
45
+ )
46
+ parser.add_argument(
47
+ "--dataset-name",
48
+ "-d",
49
+ default="main_dataset",
50
+ help="Dataset name to organize your data (default: main_dataset)",
51
+ )
52
+
53
+ def execute(self, args: argparse.Namespace) -> None:
54
+ try:
55
+ # Import cognee here to avoid circular imports
56
+ import cognee
57
+
58
+ fmt.echo(f"Adding {len(args.data)} item(s) to dataset '{args.dataset_name}'...")
59
+
60
+ # Run the async add function
61
+ async def run_add():
62
+ try:
63
+ # Pass all data items as a list to cognee.add if multiple items
64
+ if len(args.data) == 1:
65
+ data_to_add = args.data[0]
66
+ else:
67
+ data_to_add = args.data
68
+
69
+ fmt.echo("Processing data...")
70
+ await cognee.add(data=data_to_add, dataset_name=args.dataset_name)
71
+ fmt.success(f"Successfully added data to dataset '{args.dataset_name}'")
72
+ except Exception as e:
73
+ raise CliCommandInnerException(f"Failed to add data: {str(e)}")
74
+
75
+ asyncio.run(run_add())
76
+
77
+ except Exception as e:
78
+ if isinstance(e, CliCommandInnerException):
79
+ raise CliCommandException(str(e), error_code=1)
80
+ raise CliCommandException(f"Error adding data: {str(e)}", error_code=1)