cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. cognee/api/client.py +5 -1
  2. cognee/api/v1/add/add.py +1 -2
  3. cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
  4. cognee/api/v1/cognify/cognify.py +16 -24
  5. cognee/api/v1/cognify/routers/__init__.py +1 -0
  6. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/ontologies.py +37 -12
  10. cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
  11. cognee/api/v1/search/search.py +0 -4
  12. cognee/api/v1/ui/ui.py +68 -38
  13. cognee/context_global_variables.py +16 -61
  14. cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
  15. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  16. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
  17. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  18. cognee/eval_framework/eval_config.py +2 -2
  19. cognee/eval_framework/modal_run_eval.py +28 -16
  20. cognee/infrastructure/databases/graph/config.py +0 -3
  21. cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
  22. cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
  23. cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
  24. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
  25. cognee/infrastructure/databases/utils/__init__.py +0 -3
  26. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
  27. cognee/infrastructure/databases/vector/config.py +0 -2
  28. cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
  32. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
  33. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
  34. cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
  35. cognee/infrastructure/files/storage/s3_config.py +0 -2
  36. cognee/infrastructure/llm/LLMGateway.py +2 -5
  37. cognee/infrastructure/llm/config.py +0 -35
  38. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  39. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
  40. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
  41. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
  42. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
  43. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
  44. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
  45. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
  46. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
  47. cognee/modules/cognify/config.py +0 -2
  48. cognee/modules/data/deletion/prune_system.py +2 -52
  49. cognee/modules/data/methods/delete_dataset.py +0 -26
  50. cognee/modules/engine/models/__init__.py +0 -1
  51. cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
  52. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
  53. cognee/modules/memify/memify.py +7 -1
  54. cognee/modules/pipelines/operations/pipeline.py +2 -18
  55. cognee/modules/retrieval/__init__.py +1 -1
  56. cognee/modules/retrieval/code_retriever.py +232 -0
  57. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
  58. cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
  59. cognee/modules/retrieval/graph_completion_retriever.py +0 -10
  60. cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
  61. cognee/modules/retrieval/temporal_retriever.py +0 -4
  62. cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
  63. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
  64. cognee/modules/search/methods/get_search_type_tools.py +8 -54
  65. cognee/modules/search/methods/no_access_control_search.py +0 -4
  66. cognee/modules/search/methods/search.py +0 -21
  67. cognee/modules/search/types/SearchType.py +1 -1
  68. cognee/modules/settings/get_settings.py +0 -19
  69. cognee/modules/users/methods/get_authenticated_user.py +2 -2
  70. cognee/modules/users/models/DatasetDatabase.py +3 -15
  71. cognee/shared/logging_utils.py +0 -4
  72. cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
  73. cognee/tasks/code/get_local_dependencies_checker.py +20 -0
  74. cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
  75. cognee/tasks/documents/__init__.py +1 -0
  76. cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
  77. cognee/tasks/graph/extract_graph_from_data.py +10 -9
  78. cognee/tasks/repo_processor/__init__.py +2 -0
  79. cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
  80. cognee/tasks/repo_processor/get_non_code_files.py +158 -0
  81. cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
  82. cognee/tasks/storage/add_data_points.py +2 -142
  83. cognee/tests/test_cognee_server_start.py +4 -2
  84. cognee/tests/test_conversation_history.py +1 -23
  85. cognee/tests/test_delete_bmw_example.py +60 -0
  86. cognee/tests/test_search_db.py +1 -37
  87. cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
  88. cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
  89. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
  90. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  91. cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
  92. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
  93. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
  94. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
  95. cognee/api/v1/ui/node_setup.py +0 -360
  96. cognee/api/v1/ui/npm_utils.py +0 -50
  97. cognee/eval_framework/Dockerfile +0 -29
  98. cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
  99. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
  100. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
  101. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
  102. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
  103. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
  104. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
  105. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
  106. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
  107. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
  108. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
  109. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
  110. cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
  111. cognee/modules/engine/models/Triplet.py +0 -9
  112. cognee/modules/retrieval/register_retriever.py +0 -10
  113. cognee/modules/retrieval/registered_community_retrievers.py +0 -1
  114. cognee/modules/retrieval/triplet_retriever.py +0 -182
  115. cognee/shared/rate_limiting.py +0 -30
  116. cognee/tasks/memify/get_triplet_datapoints.py +0 -289
  117. cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
  118. cognee/tests/integration/tasks/test_add_data_points.py +0 -139
  119. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
  120. cognee/tests/test_dataset_database_handler.py +0 -137
  121. cognee/tests/test_dataset_delete.py +0 -76
  122. cognee/tests/test_edge_centered_payload.py +0 -170
  123. cognee/tests/test_pipeline_cache.py +0 -164
  124. cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
  125. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
  126. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
  127. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
  128. cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
  129. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
  130. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
  131. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/client.py CHANGED
@@ -21,7 +21,7 @@ from cognee.api.v1.notebooks.routers import get_notebooks_router
21
21
  from cognee.api.v1.permissions.routers import get_permissions_router
22
22
  from cognee.api.v1.settings.routers import get_settings_router
23
23
  from cognee.api.v1.datasets.routers import get_datasets_router
24
- from cognee.api.v1.cognify.routers import get_cognify_router
24
+ from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
25
25
  from cognee.api.v1.search.routers import get_search_router
26
26
  from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
27
27
  from cognee.api.v1.memify.routers import get_memify_router
@@ -278,6 +278,10 @@ app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["re
278
278
 
279
279
  app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
280
280
 
281
+ codegraph_routes = get_code_pipeline_router()
282
+ if codegraph_routes:
283
+ app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
284
+
281
285
  app.include_router(
282
286
  get_users_router(),
283
287
  prefix="/api/v1/users",
cognee/api/v1/add/add.py CHANGED
@@ -155,7 +155,7 @@ async def add(
155
155
  - LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
156
156
 
157
157
  Optional:
158
- - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral", "bedrock"
158
+ - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
159
159
  - LLM_MODEL: Model name (default: "gpt-5-mini")
160
160
  - DEFAULT_USER_EMAIL: Custom default user email
161
161
  - DEFAULT_USER_PASSWORD: Custom default user password
@@ -205,7 +205,6 @@ async def add(
205
205
  pipeline_name="add_pipeline",
206
206
  vector_db_config=vector_db_config,
207
207
  graph_db_config=graph_db_config,
208
- use_pipeline_cache=True,
209
208
  incremental_loading=incremental_loading,
210
209
  data_per_batch=data_per_batch,
211
210
  ):
@@ -0,0 +1,119 @@
1
+ import os
2
+ import pathlib
3
+ import asyncio
4
+ from typing import Optional
5
+ from cognee.shared.logging_utils import get_logger, setup_logging
6
+ from cognee.modules.observability.get_observe import get_observe
7
+
8
+ from cognee.api.v1.search import SearchType, search
9
+ from cognee.api.v1.visualize.visualize import visualize_graph
10
+ from cognee.modules.cognify.config import get_cognify_config
11
+ from cognee.modules.pipelines import run_tasks
12
+ from cognee.modules.pipelines.tasks.task import Task
13
+ from cognee.modules.users.methods import get_default_user
14
+ from cognee.shared.data_models import KnowledgeGraph
15
+ from cognee.modules.data.methods import create_dataset
16
+ from cognee.tasks.documents import classify_documents, extract_chunks_from_documents
17
+ from cognee.tasks.graph import extract_graph_from_data
18
+ from cognee.tasks.ingestion import ingest_data
19
+ from cognee.tasks.repo_processor import get_non_py_files, get_repo_file_dependencies
20
+
21
+ from cognee.tasks.storage import add_data_points
22
+ from cognee.tasks.summarization import summarize_text
23
+ from cognee.infrastructure.llm import get_max_chunk_tokens
24
+ from cognee.infrastructure.databases.relational import get_relational_engine
25
+
26
+ observe = get_observe()
27
+
28
+ logger = get_logger("code_graph_pipeline")
29
+
30
+
31
+ @observe
32
+ async def run_code_graph_pipeline(
33
+ repo_path,
34
+ include_docs=False,
35
+ excluded_paths: Optional[list[str]] = None,
36
+ supported_languages: Optional[list[str]] = None,
37
+ ):
38
+ import cognee
39
+ from cognee.low_level import setup
40
+
41
+ await cognee.prune.prune_data()
42
+ await cognee.prune.prune_system(metadata=True)
43
+ await setup()
44
+
45
+ cognee_config = get_cognify_config()
46
+ user = await get_default_user()
47
+ detailed_extraction = True
48
+
49
+ tasks = [
50
+ Task(
51
+ get_repo_file_dependencies,
52
+ detailed_extraction=detailed_extraction,
53
+ supported_languages=supported_languages,
54
+ excluded_paths=excluded_paths,
55
+ ),
56
+ # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
57
+ Task(add_data_points, task_config={"batch_size": 30}),
58
+ ]
59
+
60
+ if include_docs:
61
+ # This tasks take a long time to complete
62
+ non_code_tasks = [
63
+ Task(get_non_py_files, task_config={"batch_size": 50}),
64
+ Task(ingest_data, dataset_name="repo_docs", user=user),
65
+ Task(classify_documents),
66
+ Task(extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()),
67
+ Task(
68
+ extract_graph_from_data,
69
+ graph_model=KnowledgeGraph,
70
+ task_config={"batch_size": 50},
71
+ ),
72
+ Task(
73
+ summarize_text,
74
+ summarization_model=cognee_config.summarization_model,
75
+ task_config={"batch_size": 50},
76
+ ),
77
+ ]
78
+
79
+ dataset_name = "codebase"
80
+
81
+ # Save dataset to database
82
+ db_engine = get_relational_engine()
83
+ async with db_engine.get_async_session() as session:
84
+ dataset = await create_dataset(dataset_name, user, session)
85
+
86
+ if include_docs:
87
+ non_code_pipeline_run = run_tasks(
88
+ non_code_tasks, dataset.id, repo_path, user, "cognify_pipeline"
89
+ )
90
+ async for run_status in non_code_pipeline_run:
91
+ yield run_status
92
+
93
+ async for run_status in run_tasks(
94
+ tasks, dataset.id, repo_path, user, "cognify_code_pipeline", incremental_loading=False
95
+ ):
96
+ yield run_status
97
+
98
+
99
+ if __name__ == "__main__":
100
+
101
+ async def main():
102
+ async for run_status in run_code_graph_pipeline("REPO_PATH"):
103
+ print(f"{run_status.pipeline_run_id}: {run_status.status}")
104
+
105
+ file_path = os.path.join(
106
+ pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html"
107
+ )
108
+ await visualize_graph(file_path)
109
+
110
+ search_results = await search(
111
+ query_type=SearchType.CODE,
112
+ query_text="How is Relationship weight calculated?",
113
+ )
114
+
115
+ for file in search_results:
116
+ print(file["name"])
117
+
118
+ logger = setup_logging(name="code_graph_pipeline")
119
+ asyncio.run(main())
@@ -3,7 +3,6 @@ from pydantic import BaseModel
3
3
  from typing import Union, Optional
4
4
  from uuid import UUID
5
5
 
6
- from cognee.modules.cognify.config import get_cognify_config
7
6
  from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
8
7
  from cognee.shared.logging_utils import get_logger
9
8
  from cognee.shared.data_models import KnowledgeGraph
@@ -20,6 +19,7 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
20
19
  from cognee.modules.users.models import User
21
20
 
22
21
  from cognee.tasks.documents import (
22
+ check_permissions_on_dataset,
23
23
  classify_documents,
24
24
  extract_chunks_from_documents,
25
25
  )
@@ -53,7 +53,6 @@ async def cognify(
53
53
  custom_prompt: Optional[str] = None,
54
54
  temporal_cognify: bool = False,
55
55
  data_per_batch: int = 20,
56
- **kwargs,
57
56
  ):
58
57
  """
59
58
  Transform ingested data into a structured knowledge graph.
@@ -79,11 +78,12 @@ async def cognify(
79
78
 
80
79
  Processing Pipeline:
81
80
  1. **Document Classification**: Identifies document types and structures
82
- 2. **Text Chunking**: Breaks content into semantically meaningful segments
83
- 3. **Entity Extraction**: Identifies key concepts, people, places, organizations
84
- 4. **Relationship Detection**: Discovers connections between entities
85
- 5. **Graph Construction**: Builds semantic knowledge graph with embeddings
86
- 6. **Content Summarization**: Creates hierarchical summaries for navigation
81
+ 2. **Permission Validation**: Ensures user has processing rights
82
+ 3. **Text Chunking**: Breaks content into semantically meaningful segments
83
+ 4. **Entity Extraction**: Identifies key concepts, people, places, organizations
84
+ 5. **Relationship Detection**: Discovers connections between entities
85
+ 6. **Graph Construction**: Builds semantic knowledge graph with embeddings
86
+ 7. **Content Summarization**: Creates hierarchical summaries for navigation
87
87
 
88
88
  Graph Model Customization:
89
89
  The `graph_model` parameter allows custom knowledge structures:
@@ -224,7 +224,6 @@ async def cognify(
224
224
  config=config,
225
225
  custom_prompt=custom_prompt,
226
226
  chunks_per_batch=chunks_per_batch,
227
- **kwargs,
228
227
  )
229
228
 
230
229
  # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -239,7 +238,6 @@ async def cognify(
239
238
  vector_db_config=vector_db_config,
240
239
  graph_db_config=graph_db_config,
241
240
  incremental_loading=incremental_loading,
242
- use_pipeline_cache=True,
243
241
  pipeline_name="cognify_pipeline",
244
242
  data_per_batch=data_per_batch,
245
243
  )
@@ -253,7 +251,6 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
253
251
  config: Config = None,
254
252
  custom_prompt: Optional[str] = None,
255
253
  chunks_per_batch: int = 100,
256
- **kwargs,
257
254
  ) -> list[Task]:
258
255
  if config is None:
259
256
  ontology_config = get_ontology_env_config()
@@ -275,11 +272,9 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
275
272
  if chunks_per_batch is None:
276
273
  chunks_per_batch = 100
277
274
 
278
- cognify_config = get_cognify_config()
279
- embed_triplets = cognify_config.triplet_embedding
280
-
281
275
  default_tasks = [
282
276
  Task(classify_documents),
277
+ Task(check_permissions_on_dataset, user=user, permissions=["write"]),
283
278
  Task(
284
279
  extract_chunks_from_documents,
285
280
  max_chunk_size=chunk_size or get_max_chunk_tokens(),
@@ -291,17 +286,12 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
291
286
  config=config,
292
287
  custom_prompt=custom_prompt,
293
288
  task_config={"batch_size": chunks_per_batch},
294
- **kwargs,
295
289
  ), # Generate knowledge graphs from the document chunks.
296
290
  Task(
297
291
  summarize_text,
298
292
  task_config={"batch_size": chunks_per_batch},
299
293
  ),
300
- Task(
301
- add_data_points,
302
- embed_triplets=embed_triplets,
303
- task_config={"batch_size": chunks_per_batch},
304
- ),
294
+ Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
305
295
  ]
306
296
 
307
297
  return default_tasks
@@ -315,13 +305,14 @@ async def get_temporal_tasks(
315
305
 
316
306
  The pipeline includes:
317
307
  1. Document classification.
318
- 2. Document chunking with a specified or default chunk size.
319
- 3. Event and timestamp extraction from chunks.
320
- 4. Knowledge graph extraction from events.
321
- 5. Batched insertion of data points.
308
+ 2. Dataset permission checks (requires "write" access).
309
+ 3. Document chunking with a specified or default chunk size.
310
+ 4. Event and timestamp extraction from chunks.
311
+ 5. Knowledge graph extraction from events.
312
+ 6. Batched insertion of data points.
322
313
 
323
314
  Args:
324
- user (User, optional): The user requesting task execution.
315
+ user (User, optional): The user requesting task execution, used for permission checks.
325
316
  chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
326
317
  chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
327
318
  chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
@@ -334,6 +325,7 @@ async def get_temporal_tasks(
334
325
 
335
326
  temporal_tasks = [
336
327
  Task(classify_documents),
328
+ Task(check_permissions_on_dataset, user=user, permissions=["write"]),
337
329
  Task(
338
330
  extract_chunks_from_documents,
339
331
  max_chunk_size=chunk_size or get_max_chunk_tokens(),
@@ -1 +1,2 @@
1
1
  from .get_cognify_router import get_cognify_router
2
+ from .get_code_pipeline_router import get_code_pipeline_router
@@ -0,0 +1,90 @@
1
+ import json
2
+ from cognee.shared.logging_utils import get_logger
3
+ from fastapi import APIRouter
4
+ from fastapi.responses import JSONResponse
5
+ from cognee.api.DTO import InDTO
6
+ from cognee.modules.retrieval.code_retriever import CodeRetriever
7
+ from cognee.modules.storage.utils import JSONEncoder
8
+
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ class CodePipelineIndexPayloadDTO(InDTO):
14
+ repo_path: str
15
+ include_docs: bool = False
16
+
17
+
18
+ class CodePipelineRetrievePayloadDTO(InDTO):
19
+ query: str
20
+ full_input: str
21
+
22
+
23
+ def get_code_pipeline_router() -> APIRouter:
24
+ try:
25
+ import cognee.api.v1.cognify.code_graph_pipeline
26
+ except ModuleNotFoundError:
27
+ logger.error("codegraph dependencies not found. Skipping codegraph API routes.")
28
+ return None
29
+
30
+ router = APIRouter()
31
+
32
+ @router.post("/index", response_model=None)
33
+ async def code_pipeline_index(payload: CodePipelineIndexPayloadDTO):
34
+ """
35
+ Run indexation on a code repository.
36
+
37
+ This endpoint processes a code repository to create a knowledge graph
38
+ of the codebase structure, dependencies, and relationships.
39
+
40
+ ## Request Parameters
41
+ - **repo_path** (str): Path to the code repository
42
+ - **include_docs** (bool): Whether to include documentation files (default: false)
43
+
44
+ ## Response
45
+ No content returned. Processing results are logged.
46
+
47
+ ## Error Codes
48
+ - **409 Conflict**: Error during indexation process
49
+ """
50
+ from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
51
+
52
+ try:
53
+ async for result in run_code_graph_pipeline(payload.repo_path, payload.include_docs):
54
+ logger.info(result)
55
+ except Exception as error:
56
+ return JSONResponse(status_code=409, content={"error": str(error)})
57
+
58
+ @router.post("/retrieve", response_model=list[dict])
59
+ async def code_pipeline_retrieve(payload: CodePipelineRetrievePayloadDTO):
60
+ """
61
+ Retrieve context from the code knowledge graph.
62
+
63
+ This endpoint searches the indexed code repository to find relevant
64
+ context based on the provided query.
65
+
66
+ ## Request Parameters
67
+ - **query** (str): Search query for code context
68
+ - **full_input** (str): Full input text for processing
69
+
70
+ ## Response
71
+ Returns a list of relevant code files and context as JSON.
72
+
73
+ ## Error Codes
74
+ - **409 Conflict**: Error during retrieval process
75
+ """
76
+ try:
77
+ query = (
78
+ payload.full_input.replace("cognee ", "")
79
+ if payload.full_input.startswith("cognee ")
80
+ else payload.full_input
81
+ )
82
+
83
+ retriever = CodeRetriever()
84
+ retrieved_files = await retriever.get_context(query)
85
+
86
+ return json.dumps(retrieved_files, cls=JSONEncoder)
87
+ except Exception as error:
88
+ return JSONResponse(status_code=409, content={"error": str(error)})
89
+
90
+ return router
@@ -42,9 +42,7 @@ class CognifyPayloadDTO(InDTO):
42
42
  default="", description="Custom prompt for entity extraction and graph generation"
43
43
  )
44
44
  ontology_key: Optional[List[str]] = Field(
45
- default=None,
46
- examples=[[]],
47
- description="Reference to one or more previously uploaded ontologies",
45
+ default=None, description="Reference to one or more previously uploaded ontologies"
48
46
  )
49
47
 
50
48
 
@@ -208,14 +208,14 @@ def get_datasets_router() -> APIRouter:
208
208
  },
209
209
  )
210
210
 
211
- from cognee.modules.data.methods import delete_dataset
211
+ from cognee.modules.data.methods import get_dataset, delete_dataset
212
212
 
213
- dataset = await get_authorized_existing_datasets([dataset_id], "delete", user)
213
+ dataset = await get_dataset(user.id, dataset_id)
214
214
 
215
215
  if dataset is None:
216
216
  raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
217
217
 
218
- await delete_dataset(dataset[0])
218
+ await delete_dataset(dataset)
219
219
 
220
220
  @router.delete(
221
221
  "/{dataset_id}/data/{data_id}",
@@ -5,7 +5,6 @@ from pathlib import Path
5
5
  from datetime import datetime, timezone
6
6
  from typing import Optional, List
7
7
  from dataclasses import dataclass
8
- from fastapi import UploadFile
9
8
 
10
9
 
11
10
  @dataclass
@@ -46,10 +45,8 @@ class OntologyService:
46
45
  json.dump(metadata, f, indent=2)
47
46
 
48
47
  async def upload_ontology(
49
- self, ontology_key: str, file: UploadFile, user, description: Optional[str] = None
48
+ self, ontology_key: str, file, user, description: Optional[str] = None
50
49
  ) -> OntologyMetadata:
51
- if not file.filename:
52
- raise ValueError("File must have a filename")
53
50
  if not file.filename.lower().endswith(".owl"):
54
51
  raise ValueError("File must be in .owl format")
55
52
 
@@ -60,6 +57,8 @@ class OntologyService:
60
57
  raise ValueError(f"Ontology key '{ontology_key}' already exists")
61
58
 
62
59
  content = await file.read()
60
+ if len(content) > 10 * 1024 * 1024:
61
+ raise ValueError("File size exceeds 10MB limit")
63
62
 
64
63
  file_path = user_dir / f"{ontology_key}.owl"
65
64
  with open(file_path, "wb") as f:
@@ -83,11 +82,7 @@ class OntologyService:
83
82
  )
84
83
 
85
84
  async def upload_ontologies(
86
- self,
87
- ontology_key: List[str],
88
- files: List[UploadFile],
89
- user,
90
- descriptions: Optional[List[str]] = None,
85
+ self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
91
86
  ) -> List[OntologyMetadata]:
92
87
  """
93
88
  Upload ontology files with their respective keys.
@@ -110,17 +105,47 @@ class OntologyService:
110
105
  if len(set(ontology_key)) != len(ontology_key):
111
106
  raise ValueError("Duplicate ontology keys not allowed")
112
107
 
108
+ if descriptions and len(descriptions) != len(files):
109
+ raise ValueError("Number of descriptions must match number of files")
110
+
113
111
  results = []
112
+ user_dir = self._get_user_dir(str(user.id))
113
+ metadata = self._load_metadata(user_dir)
114
114
 
115
115
  for i, (key, file) in enumerate(zip(ontology_key, files)):
116
+ if key in metadata:
117
+ raise ValueError(f"Ontology key '{key}' already exists")
118
+
119
+ if not file.filename.lower().endswith(".owl"):
120
+ raise ValueError(f"File '{file.filename}' must be in .owl format")
121
+
122
+ content = await file.read()
123
+ if len(content) > 10 * 1024 * 1024:
124
+ raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
125
+
126
+ file_path = user_dir / f"{key}.owl"
127
+ with open(file_path, "wb") as f:
128
+ f.write(content)
129
+
130
+ ontology_metadata = {
131
+ "filename": file.filename,
132
+ "size_bytes": len(content),
133
+ "uploaded_at": datetime.now(timezone.utc).isoformat(),
134
+ "description": descriptions[i] if descriptions else None,
135
+ }
136
+ metadata[key] = ontology_metadata
137
+
116
138
  results.append(
117
- await self.upload_ontology(
139
+ OntologyMetadata(
118
140
  ontology_key=key,
119
- file=file,
120
- user=user,
141
+ filename=file.filename,
142
+ size_bytes=len(content),
143
+ uploaded_at=ontology_metadata["uploaded_at"],
121
144
  description=descriptions[i] if descriptions else None,
122
145
  )
123
146
  )
147
+
148
+ self._save_metadata(user_dir, metadata)
124
149
  return results
125
150
 
126
151
  def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
@@ -1,4 +1,4 @@
1
- from fastapi import APIRouter, File, Form, UploadFile, Depends, Request
1
+ from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
2
2
  from fastapi.responses import JSONResponse
3
3
  from typing import Optional, List
4
4
 
@@ -15,25 +15,28 @@ def get_ontology_router() -> APIRouter:
15
15
 
16
16
  @router.post("", response_model=dict)
17
17
  async def upload_ontology(
18
- request: Request,
19
18
  ontology_key: str = Form(...),
20
- ontology_file: UploadFile = File(...),
21
- description: Optional[str] = Form(None),
19
+ ontology_file: List[UploadFile] = File(...),
20
+ descriptions: Optional[str] = Form(None),
22
21
  user: User = Depends(get_authenticated_user),
23
22
  ):
24
23
  """
25
- Upload a single ontology file for later use in cognify operations.
24
+ Upload ontology files with their respective keys for later use in cognify operations.
25
+
26
+ Supports both single and multiple file uploads:
27
+ - Single file: ontology_key=["key"], ontology_file=[file]
28
+ - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
26
29
 
27
30
  ## Request Parameters
28
- - **ontology_key** (str): User-defined identifier for the ontology.
29
- - **ontology_file** (UploadFile): Single OWL format ontology file
30
- - **description** (Optional[str]): Optional description for the ontology.
31
+ - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
32
+ - **ontology_file** (List[UploadFile]): OWL format ontology files
33
+ - **descriptions** (Optional[str]): JSON array string of optional descriptions
31
34
 
32
35
  ## Response
33
- Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp.
36
+ Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
34
37
 
35
38
  ## Error Codes
36
- - **400 Bad Request**: Invalid file format, duplicate key, multiple files uploaded
39
+ - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
37
40
  - **500 Internal Server Error**: File system or processing errors
38
41
  """
39
42
  send_telemetry(
@@ -46,22 +49,16 @@ def get_ontology_router() -> APIRouter:
46
49
  )
47
50
 
48
51
  try:
49
- # Enforce: exactly one uploaded file for "ontology_file"
50
- form = await request.form()
51
- uploaded_files = form.getlist("ontology_file")
52
- if len(uploaded_files) != 1:
53
- raise ValueError("Only one ontology_file is allowed")
54
-
55
- if ontology_key.strip().startswith(("[", "{")):
56
- raise ValueError("ontology_key must be a string")
57
- if description is not None and description.strip().startswith(("[", "{")):
58
- raise ValueError("description must be a string")
59
-
60
- result = await ontology_service.upload_ontology(
61
- ontology_key=ontology_key,
62
- file=ontology_file,
63
- user=user,
64
- description=description,
52
+ import json
53
+
54
+ ontology_keys = json.loads(ontology_key)
55
+ description_list = json.loads(descriptions) if descriptions else None
56
+
57
+ if not isinstance(ontology_keys, list):
58
+ raise ValueError("ontology_key must be a JSON array")
59
+
60
+ results = await ontology_service.upload_ontologies(
61
+ ontology_keys, ontology_file, user, description_list
65
62
  )
66
63
 
67
64
  return {
@@ -73,9 +70,10 @@ def get_ontology_router() -> APIRouter:
73
70
  "uploaded_at": result.uploaded_at,
74
71
  "description": result.description,
75
72
  }
73
+ for result in results
76
74
  ]
77
75
  }
78
- except ValueError as e:
76
+ except (json.JSONDecodeError, ValueError) as e:
79
77
  return JSONResponse(status_code=400, content={"error": str(e)})
80
78
  except Exception as e:
81
79
  return JSONResponse(status_code=500, content={"error": str(e)})
@@ -31,8 +31,6 @@ async def search(
31
31
  only_context: bool = False,
32
32
  use_combined_context: bool = False,
33
33
  session_id: Optional[str] = None,
34
- wide_search_top_k: Optional[int] = 100,
35
- triplet_distance_penalty: Optional[float] = 3.5,
36
34
  ) -> Union[List[SearchResult], CombinedSearchResult]:
37
35
  """
38
36
  Search and query the knowledge graph for insights, information, and connections.
@@ -202,8 +200,6 @@ async def search(
202
200
  only_context=only_context,
203
201
  use_combined_context=use_combined_context,
204
202
  session_id=session_id,
205
- wide_search_top_k=wide_search_top_k,
206
- triplet_distance_penalty=triplet_distance_penalty,
207
203
  )
208
204
 
209
205
  return filtered_search_results