cognee 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +1 -1
  166. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +1 -1
  167. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +13 -27
  168. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  169. cognee/tests/test_add_docling_document.py +2 -2
  170. cognee/tests/test_cognee_server_start.py +84 -3
  171. cognee/tests/test_conversation_history.py +68 -5
  172. cognee/tests/test_data/example_with_header.csv +3 -0
  173. cognee/tests/test_dataset_database_handler.py +137 -0
  174. cognee/tests/test_dataset_delete.py +76 -0
  175. cognee/tests/test_edge_centered_payload.py +170 -0
  176. cognee/tests/test_edge_ingestion.py +27 -0
  177. cognee/tests/test_feedback_enrichment.py +1 -1
  178. cognee/tests/test_library.py +6 -4
  179. cognee/tests/test_load.py +62 -0
  180. cognee/tests/test_multi_tenancy.py +165 -0
  181. cognee/tests/test_parallel_databases.py +2 -0
  182. cognee/tests/test_pipeline_cache.py +164 -0
  183. cognee/tests/test_relational_db_migration.py +54 -2
  184. cognee/tests/test_search_db.py +44 -2
  185. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  186. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  187. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  188. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  189. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  190. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  191. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  192. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  193. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  194. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  195. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  196. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  197. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  198. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  199. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  200. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  201. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  202. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  203. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  204. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  205. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  206. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  207. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  208. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  209. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  210. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -6
  211. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/RECORD +215 -163
  212. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/WHEEL +1 -1
  213. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  214. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  215. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  216. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  217. cognee/modules/retrieval/code_retriever.py +0 -232
  218. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  219. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  220. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  221. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  222. cognee/tasks/repo_processor/__init__.py +0 -2
  223. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  224. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  225. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  226. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  227. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
cognee/__init__.py CHANGED
@@ -19,6 +19,7 @@ from .api.v1.add import add
19
19
  from .api.v1.delete import delete
20
20
  from .api.v1.cognify import cognify
21
21
  from .modules.memify import memify
22
+ from .modules.run_custom_pipeline import run_custom_pipeline
22
23
  from .api.v1.update import update
23
24
  from .api.v1.config.config import config
24
25
  from .api.v1.datasets.datasets import datasets
cognee/api/client.py CHANGED
@@ -21,8 +21,9 @@ from cognee.api.v1.notebooks.routers import get_notebooks_router
21
21
  from cognee.api.v1.permissions.routers import get_permissions_router
22
22
  from cognee.api.v1.settings.routers import get_settings_router
23
23
  from cognee.api.v1.datasets.routers import get_datasets_router
24
- from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
24
+ from cognee.api.v1.cognify.routers import get_cognify_router
25
25
  from cognee.api.v1.search.routers import get_search_router
26
+ from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
26
27
  from cognee.api.v1.memify.routers import get_memify_router
27
28
  from cognee.api.v1.add.routers import get_add_router
28
29
  from cognee.api.v1.delete.routers import get_delete_router
@@ -39,6 +40,8 @@ from cognee.api.v1.users.routers import (
39
40
  )
40
41
  from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION
41
42
 
43
+ # Ensure application logging is configured for container stdout/stderr
44
+ setup_logging()
42
45
  logger = get_logger()
43
46
 
44
47
  if os.getenv("ENV", "prod") == "prod":
@@ -74,6 +77,9 @@ async def lifespan(app: FastAPI):
74
77
 
75
78
  await get_default_user()
76
79
 
80
+ # Emit a clear startup message for docker logs
81
+ logger.info("Backend server has started")
82
+
77
83
  yield
78
84
 
79
85
 
@@ -258,6 +264,8 @@ app.include_router(
258
264
 
259
265
  app.include_router(get_datasets_router(), prefix="/api/v1/datasets", tags=["datasets"])
260
266
 
267
+ app.include_router(get_ontology_router(), prefix="/api/v1/ontologies", tags=["ontologies"])
268
+
261
269
  app.include_router(get_settings_router(), prefix="/api/v1/settings", tags=["settings"])
262
270
 
263
271
  app.include_router(get_visualize_router(), prefix="/api/v1/visualize", tags=["visualize"])
@@ -270,10 +278,6 @@ app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["re
270
278
 
271
279
  app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
272
280
 
273
- codegraph_routes = get_code_pipeline_router()
274
- if codegraph_routes:
275
- app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
276
-
277
281
  app.include_router(
278
282
  get_users_router(),
279
283
  prefix="/api/v1/users",
cognee/api/v1/add/add.py CHANGED
@@ -155,7 +155,7 @@ async def add(
155
155
  - LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
156
156
 
157
157
  Optional:
158
- - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
158
+ - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral", "bedrock"
159
159
  - LLM_MODEL: Model name (default: "gpt-5-mini")
160
160
  - DEFAULT_USER_EMAIL: Custom default user email
161
161
  - DEFAULT_USER_PASSWORD: Custom default user password
@@ -205,6 +205,7 @@ async def add(
205
205
  pipeline_name="add_pipeline",
206
206
  vector_db_config=vector_db_config,
207
207
  graph_db_config=graph_db_config,
208
+ use_pipeline_cache=True,
208
209
  incremental_loading=incremental_loading,
209
210
  data_per_batch=data_per_batch,
210
211
  ):
@@ -82,7 +82,9 @@ def get_add_router() -> APIRouter:
82
82
  datasetName,
83
83
  user=user,
84
84
  dataset_id=datasetId,
85
- node_set=node_set if node_set else None,
85
+ node_set=node_set
86
+ if node_set != [""]
87
+ else None, # Transform default node_set endpoint value to None
86
88
  )
87
89
 
88
90
  if isinstance(add_run, PipelineRunErrored):
@@ -3,6 +3,7 @@ from pydantic import BaseModel
3
3
  from typing import Union, Optional
4
4
  from uuid import UUID
5
5
 
6
+ from cognee.modules.cognify.config import get_cognify_config
6
7
  from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
7
8
  from cognee.shared.logging_utils import get_logger
8
9
  from cognee.shared.data_models import KnowledgeGraph
@@ -19,7 +20,6 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
19
20
  from cognee.modules.users.models import User
20
21
 
21
22
  from cognee.tasks.documents import (
22
- check_permissions_on_dataset,
23
23
  classify_documents,
24
24
  extract_chunks_from_documents,
25
25
  )
@@ -53,6 +53,7 @@ async def cognify(
53
53
  custom_prompt: Optional[str] = None,
54
54
  temporal_cognify: bool = False,
55
55
  data_per_batch: int = 20,
56
+ **kwargs,
56
57
  ):
57
58
  """
58
59
  Transform ingested data into a structured knowledge graph.
@@ -78,12 +79,11 @@ async def cognify(
78
79
 
79
80
  Processing Pipeline:
80
81
  1. **Document Classification**: Identifies document types and structures
81
- 2. **Permission Validation**: Ensures user has processing rights
82
- 3. **Text Chunking**: Breaks content into semantically meaningful segments
83
- 4. **Entity Extraction**: Identifies key concepts, people, places, organizations
84
- 5. **Relationship Detection**: Discovers connections between entities
85
- 6. **Graph Construction**: Builds semantic knowledge graph with embeddings
86
- 7. **Content Summarization**: Creates hierarchical summaries for navigation
82
+ 2. **Text Chunking**: Breaks content into semantically meaningful segments
83
+ 3. **Entity Extraction**: Identifies key concepts, people, places, organizations
84
+ 4. **Relationship Detection**: Discovers connections between entities
85
+ 5. **Graph Construction**: Builds semantic knowledge graph with embeddings
86
+ 6. **Content Summarization**: Creates hierarchical summaries for navigation
87
87
 
88
88
  Graph Model Customization:
89
89
  The `graph_model` parameter allows custom knowledge structures:
@@ -224,6 +224,7 @@ async def cognify(
224
224
  config=config,
225
225
  custom_prompt=custom_prompt,
226
226
  chunks_per_batch=chunks_per_batch,
227
+ **kwargs,
227
228
  )
228
229
 
229
230
  # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -238,6 +239,7 @@ async def cognify(
238
239
  vector_db_config=vector_db_config,
239
240
  graph_db_config=graph_db_config,
240
241
  incremental_loading=incremental_loading,
242
+ use_pipeline_cache=True,
241
243
  pipeline_name="cognify_pipeline",
242
244
  data_per_batch=data_per_batch,
243
245
  )
@@ -251,6 +253,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
251
253
  config: Config = None,
252
254
  custom_prompt: Optional[str] = None,
253
255
  chunks_per_batch: int = 100,
256
+ **kwargs,
254
257
  ) -> list[Task]:
255
258
  if config is None:
256
259
  ontology_config = get_ontology_env_config()
@@ -272,9 +275,11 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
272
275
  if chunks_per_batch is None:
273
276
  chunks_per_batch = 100
274
277
 
278
+ cognify_config = get_cognify_config()
279
+ embed_triplets = cognify_config.triplet_embedding
280
+
275
281
  default_tasks = [
276
282
  Task(classify_documents),
277
- Task(check_permissions_on_dataset, user=user, permissions=["write"]),
278
283
  Task(
279
284
  extract_chunks_from_documents,
280
285
  max_chunk_size=chunk_size or get_max_chunk_tokens(),
@@ -286,12 +291,17 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
286
291
  config=config,
287
292
  custom_prompt=custom_prompt,
288
293
  task_config={"batch_size": chunks_per_batch},
294
+ **kwargs,
289
295
  ), # Generate knowledge graphs from the document chunks.
290
296
  Task(
291
297
  summarize_text,
292
298
  task_config={"batch_size": chunks_per_batch},
293
299
  ),
294
- Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
300
+ Task(
301
+ add_data_points,
302
+ embed_triplets=embed_triplets,
303
+ task_config={"batch_size": chunks_per_batch},
304
+ ),
295
305
  ]
296
306
 
297
307
  return default_tasks
@@ -305,14 +315,13 @@ async def get_temporal_tasks(
305
315
 
306
316
  The pipeline includes:
307
317
  1. Document classification.
308
- 2. Dataset permission checks (requires "write" access).
309
- 3. Document chunking with a specified or default chunk size.
310
- 4. Event and timestamp extraction from chunks.
311
- 5. Knowledge graph extraction from events.
312
- 6. Batched insertion of data points.
318
+ 2. Document chunking with a specified or default chunk size.
319
+ 3. Event and timestamp extraction from chunks.
320
+ 4. Knowledge graph extraction from events.
321
+ 5. Batched insertion of data points.
313
322
 
314
323
  Args:
315
- user (User, optional): The user requesting task execution, used for permission checks.
324
+ user (User, optional): The user requesting task execution.
316
325
  chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
317
326
  chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
318
327
  chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
@@ -325,7 +334,6 @@ async def get_temporal_tasks(
325
334
 
326
335
  temporal_tasks = [
327
336
  Task(classify_documents),
328
- Task(check_permissions_on_dataset, user=user, permissions=["write"]),
329
337
  Task(
330
338
  extract_chunks_from_documents,
331
339
  max_chunk_size=chunk_size or get_max_chunk_tokens(),
@@ -1,2 +1 @@
1
1
  from .get_cognify_router import get_cognify_router
2
- from .get_code_pipeline_router import get_code_pipeline_router
@@ -41,6 +41,11 @@ class CognifyPayloadDTO(InDTO):
41
41
  custom_prompt: Optional[str] = Field(
42
42
  default="", description="Custom prompt for entity extraction and graph generation"
43
43
  )
44
+ ontology_key: Optional[List[str]] = Field(
45
+ default=None,
46
+ examples=[[]],
47
+ description="Reference to one or more previously uploaded ontologies",
48
+ )
44
49
 
45
50
 
46
51
  def get_cognify_router() -> APIRouter:
@@ -68,6 +73,7 @@ def get_cognify_router() -> APIRouter:
68
73
  - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
69
74
  - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
70
75
  - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
76
+ - **ontology_key** (Optional[List[str]]): Reference to one or more previously uploaded ontology files to use for knowledge graph construction.
71
77
 
72
78
  ## Response
73
79
  - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@@ -82,7 +88,8 @@ def get_cognify_router() -> APIRouter:
82
88
  {
83
89
  "datasets": ["research_papers", "documentation"],
84
90
  "run_in_background": false,
85
- "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
91
+ "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections.",
92
+ "ontology_key": ["medical_ontology_v1"]
86
93
  }
87
94
  ```
88
95
 
@@ -108,13 +115,35 @@ def get_cognify_router() -> APIRouter:
108
115
  )
109
116
 
110
117
  from cognee.api.v1.cognify import cognify as cognee_cognify
118
+ from cognee.api.v1.ontologies.ontologies import OntologyService
111
119
 
112
120
  try:
113
121
  datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
122
+ config_to_use = None
123
+
124
+ if payload.ontology_key:
125
+ ontology_service = OntologyService()
126
+ ontology_contents = ontology_service.get_ontology_contents(
127
+ payload.ontology_key, user
128
+ )
129
+
130
+ from cognee.modules.ontology.ontology_config import Config
131
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import (
132
+ RDFLibOntologyResolver,
133
+ )
134
+ from io import StringIO
135
+
136
+ ontology_streams = [StringIO(content) for content in ontology_contents]
137
+ config_to_use: Config = {
138
+ "ontology_config": {
139
+ "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_streams)
140
+ }
141
+ }
114
142
 
115
143
  cognify_run = await cognee_cognify(
116
144
  datasets,
117
145
  user,
146
+ config=config_to_use,
118
147
  run_in_background=payload.run_in_background,
119
148
  custom_prompt=payload.custom_prompt,
120
149
  )
@@ -208,14 +208,14 @@ def get_datasets_router() -> APIRouter:
208
208
  },
209
209
  )
210
210
 
211
- from cognee.modules.data.methods import get_dataset, delete_dataset
211
+ from cognee.modules.data.methods import delete_dataset
212
212
 
213
- dataset = await get_dataset(user.id, dataset_id)
213
+ dataset = await get_authorized_existing_datasets([dataset_id], "delete", user)
214
214
 
215
215
  if dataset is None:
216
216
  raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
217
217
 
218
- await delete_dataset(dataset)
218
+ await delete_dataset(dataset[0])
219
219
 
220
220
  @router.delete(
221
221
  "/{dataset_id}/data/{data_id}",
@@ -0,0 +1,4 @@
1
+ from .ontologies import OntologyService
2
+ from .routers.get_ontology_router import get_ontology_router
3
+
4
+ __all__ = ["OntologyService", "get_ontology_router"]
@@ -0,0 +1,158 @@
1
+ import os
2
+ import json
3
+ import tempfile
4
+ from pathlib import Path
5
+ from datetime import datetime, timezone
6
+ from typing import Optional, List
7
+ from dataclasses import dataclass
8
+ from fastapi import UploadFile
9
+
10
+
11
+ @dataclass
12
+ class OntologyMetadata:
13
+ ontology_key: str
14
+ filename: str
15
+ size_bytes: int
16
+ uploaded_at: str
17
+ description: Optional[str] = None
18
+
19
+
20
+ class OntologyService:
21
+ def __init__(self):
22
+ pass
23
+
24
+ @property
25
+ def base_dir(self) -> Path:
26
+ return Path(tempfile.gettempdir()) / "ontologies"
27
+
28
+ def _get_user_dir(self, user_id: str) -> Path:
29
+ user_dir = self.base_dir / str(user_id)
30
+ user_dir.mkdir(parents=True, exist_ok=True)
31
+ return user_dir
32
+
33
+ def _get_metadata_path(self, user_dir: Path) -> Path:
34
+ return user_dir / "metadata.json"
35
+
36
+ def _load_metadata(self, user_dir: Path) -> dict:
37
+ metadata_path = self._get_metadata_path(user_dir)
38
+ if metadata_path.exists():
39
+ with open(metadata_path, "r") as f:
40
+ return json.load(f)
41
+ return {}
42
+
43
+ def _save_metadata(self, user_dir: Path, metadata: dict):
44
+ metadata_path = self._get_metadata_path(user_dir)
45
+ with open(metadata_path, "w") as f:
46
+ json.dump(metadata, f, indent=2)
47
+
48
+ async def upload_ontology(
49
+ self, ontology_key: str, file: UploadFile, user, description: Optional[str] = None
50
+ ) -> OntologyMetadata:
51
+ if not file.filename:
52
+ raise ValueError("File must have a filename")
53
+ if not file.filename.lower().endswith(".owl"):
54
+ raise ValueError("File must be in .owl format")
55
+
56
+ user_dir = self._get_user_dir(str(user.id))
57
+ metadata = self._load_metadata(user_dir)
58
+
59
+ if ontology_key in metadata:
60
+ raise ValueError(f"Ontology key '{ontology_key}' already exists")
61
+
62
+ content = await file.read()
63
+
64
+ file_path = user_dir / f"{ontology_key}.owl"
65
+ with open(file_path, "wb") as f:
66
+ f.write(content)
67
+
68
+ ontology_metadata = {
69
+ "filename": file.filename,
70
+ "size_bytes": len(content),
71
+ "uploaded_at": datetime.now(timezone.utc).isoformat(),
72
+ "description": description,
73
+ }
74
+ metadata[ontology_key] = ontology_metadata
75
+ self._save_metadata(user_dir, metadata)
76
+
77
+ return OntologyMetadata(
78
+ ontology_key=ontology_key,
79
+ filename=file.filename,
80
+ size_bytes=len(content),
81
+ uploaded_at=ontology_metadata["uploaded_at"],
82
+ description=description,
83
+ )
84
+
85
+ async def upload_ontologies(
86
+ self,
87
+ ontology_key: List[str],
88
+ files: List[UploadFile],
89
+ user,
90
+ descriptions: Optional[List[str]] = None,
91
+ ) -> List[OntologyMetadata]:
92
+ """
93
+ Upload ontology files with their respective keys.
94
+
95
+ Args:
96
+ ontology_key: List of unique keys for each ontology
97
+ files: List of UploadFile objects (same length as keys)
98
+ user: Authenticated user
99
+ descriptions: Optional list of descriptions for each file
100
+
101
+ Returns:
102
+ List of OntologyMetadata objects for uploaded files
103
+
104
+ Raises:
105
+ ValueError: If keys duplicate, file format invalid, or array lengths don't match
106
+ """
107
+ if len(ontology_key) != len(files):
108
+ raise ValueError("Number of keys must match number of files")
109
+
110
+ if len(set(ontology_key)) != len(ontology_key):
111
+ raise ValueError("Duplicate ontology keys not allowed")
112
+
113
+ results = []
114
+
115
+ for i, (key, file) in enumerate(zip(ontology_key, files)):
116
+ results.append(
117
+ await self.upload_ontology(
118
+ ontology_key=key,
119
+ file=file,
120
+ user=user,
121
+ description=descriptions[i] if descriptions else None,
122
+ )
123
+ )
124
+ return results
125
+
126
+ def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:
127
+ """
128
+ Retrieve ontology content for one or more keys.
129
+
130
+ Args:
131
+ ontology_key: List of ontology keys to retrieve (can contain single item)
132
+ user: Authenticated user
133
+
134
+ Returns:
135
+ List of ontology content strings
136
+
137
+ Raises:
138
+ ValueError: If any ontology key not found
139
+ """
140
+ user_dir = self._get_user_dir(str(user.id))
141
+ metadata = self._load_metadata(user_dir)
142
+
143
+ contents = []
144
+ for key in ontology_key:
145
+ if key not in metadata:
146
+ raise ValueError(f"Ontology key '{key}' not found")
147
+
148
+ file_path = user_dir / f"{key}.owl"
149
+ if not file_path.exists():
150
+ raise ValueError(f"Ontology file for key '{key}' not found")
151
+
152
+ with open(file_path, "r", encoding="utf-8") as f:
153
+ contents.append(f.read())
154
+ return contents
155
+
156
+ def list_ontologies(self, user) -> dict:
157
+ user_dir = self._get_user_dir(str(user.id))
158
+ return self._load_metadata(user_dir)
File without changes
@@ -0,0 +1,109 @@
1
+ from fastapi import APIRouter, File, Form, UploadFile, Depends, Request
2
+ from fastapi.responses import JSONResponse
3
+ from typing import Optional, List
4
+
5
+ from cognee.modules.users.models import User
6
+ from cognee.modules.users.methods import get_authenticated_user
7
+ from cognee.shared.utils import send_telemetry
8
+ from cognee import __version__ as cognee_version
9
+ from ..ontologies import OntologyService
10
+
11
+
12
+ def get_ontology_router() -> APIRouter:
13
+ router = APIRouter()
14
+ ontology_service = OntologyService()
15
+
16
+ @router.post("", response_model=dict)
17
+ async def upload_ontology(
18
+ request: Request,
19
+ ontology_key: str = Form(...),
20
+ ontology_file: UploadFile = File(...),
21
+ description: Optional[str] = Form(None),
22
+ user: User = Depends(get_authenticated_user),
23
+ ):
24
+ """
25
+ Upload a single ontology file for later use in cognify operations.
26
+
27
+ ## Request Parameters
28
+ - **ontology_key** (str): User-defined identifier for the ontology.
29
+ - **ontology_file** (UploadFile): Single OWL format ontology file
30
+ - **description** (Optional[str]): Optional description for the ontology.
31
+
32
+ ## Response
33
+ Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp.
34
+
35
+ ## Error Codes
36
+ - **400 Bad Request**: Invalid file format, duplicate key, multiple files uploaded
37
+ - **500 Internal Server Error**: File system or processing errors
38
+ """
39
+ send_telemetry(
40
+ "Ontology Upload API Endpoint Invoked",
41
+ user.id,
42
+ additional_properties={
43
+ "endpoint": "POST /api/v1/ontologies",
44
+ "cognee_version": cognee_version,
45
+ },
46
+ )
47
+
48
+ try:
49
+ # Enforce: exactly one uploaded file for "ontology_file"
50
+ form = await request.form()
51
+ uploaded_files = form.getlist("ontology_file")
52
+ if len(uploaded_files) != 1:
53
+ raise ValueError("Only one ontology_file is allowed")
54
+
55
+ if ontology_key.strip().startswith(("[", "{")):
56
+ raise ValueError("ontology_key must be a string")
57
+ if description is not None and description.strip().startswith(("[", "{")):
58
+ raise ValueError("description must be a string")
59
+
60
+ result = await ontology_service.upload_ontology(
61
+ ontology_key=ontology_key,
62
+ file=ontology_file,
63
+ user=user,
64
+ description=description,
65
+ )
66
+
67
+ return {
68
+ "uploaded_ontologies": [
69
+ {
70
+ "ontology_key": result.ontology_key,
71
+ "filename": result.filename,
72
+ "size_bytes": result.size_bytes,
73
+ "uploaded_at": result.uploaded_at,
74
+ "description": result.description,
75
+ }
76
+ ]
77
+ }
78
+ except ValueError as e:
79
+ return JSONResponse(status_code=400, content={"error": str(e)})
80
+ except Exception as e:
81
+ return JSONResponse(status_code=500, content={"error": str(e)})
82
+
83
+ @router.get("", response_model=dict)
84
+ async def list_ontologies(user: User = Depends(get_authenticated_user)):
85
+ """
86
+ List all uploaded ontologies for the authenticated user.
87
+
88
+ ## Response
89
+ Returns a dictionary mapping ontology keys to their metadata including filename, size, and upload timestamp.
90
+
91
+ ## Error Codes
92
+ - **500 Internal Server Error**: File system or processing errors
93
+ """
94
+ send_telemetry(
95
+ "Ontology List API Endpoint Invoked",
96
+ user.id,
97
+ additional_properties={
98
+ "endpoint": "GET /api/v1/ontologies",
99
+ "cognee_version": cognee_version,
100
+ },
101
+ )
102
+
103
+ try:
104
+ metadata = ontology_service.list_ontologies(user)
105
+ return metadata
106
+ except Exception as e:
107
+ return JSONResponse(status_code=500, content={"error": str(e)})
108
+
109
+ return router
@@ -1,15 +1,20 @@
1
1
  from uuid import UUID
2
- from typing import List
2
+ from typing import List, Union
3
3
 
4
4
  from fastapi import APIRouter, Depends
5
5
  from fastapi.responses import JSONResponse
6
6
 
7
7
  from cognee.modules.users.models import User
8
+ from cognee.api.DTO import InDTO
8
9
  from cognee.modules.users.methods import get_authenticated_user
9
10
  from cognee.shared.utils import send_telemetry
10
11
  from cognee import __version__ as cognee_version
11
12
 
12
13
 
14
+ class SelectTenantDTO(InDTO):
15
+ tenant_id: UUID | None = None
16
+
17
+
13
18
  def get_permissions_router() -> APIRouter:
14
19
  permissions_router = APIRouter()
15
20
 
@@ -226,4 +231,39 @@ def get_permissions_router() -> APIRouter:
226
231
  status_code=200, content={"message": "Tenant created.", "tenant_id": str(tenant_id)}
227
232
  )
228
233
 
234
+ @permissions_router.post("/tenants/select")
235
+ async def select_tenant(payload: SelectTenantDTO, user: User = Depends(get_authenticated_user)):
236
+ """
237
+ Select current tenant.
238
+
239
+ This endpoint selects a tenant with the specified UUID. Tenants are used
240
+ to organize users and resources in multi-tenant environments, providing
241
+ isolation and access control between different groups or organizations.
242
+
243
+ Sending a null/None value as tenant_id selects his default single user tenant
244
+
245
+ ## Request Parameters
246
+ - **tenant_id** (Union[UUID, None]): UUID of the tenant to select, If null/None is provided use the default single user tenant
247
+
248
+ ## Response
249
+ Returns a success message along with selected tenant id.
250
+ """
251
+ send_telemetry(
252
+ "Permissions API Endpoint Invoked",
253
+ user.id,
254
+ additional_properties={
255
+ "endpoint": f"POST /v1/permissions/tenants/{str(payload.tenant_id)}",
256
+ "tenant_id": str(payload.tenant_id),
257
+ },
258
+ )
259
+
260
+ from cognee.modules.users.tenants.methods import select_tenant as select_tenant_method
261
+
262
+ await select_tenant_method(user_id=user.id, tenant_id=payload.tenant_id)
263
+
264
+ return JSONResponse(
265
+ status_code=200,
266
+ content={"message": "Tenant selected.", "tenant_id": str(payload.tenant_id)},
267
+ )
268
+
229
269
  return permissions_router
@@ -31,6 +31,8 @@ async def search(
31
31
  only_context: bool = False,
32
32
  use_combined_context: bool = False,
33
33
  session_id: Optional[str] = None,
34
+ wide_search_top_k: Optional[int] = 100,
35
+ triplet_distance_penalty: Optional[float] = 3.5,
34
36
  ) -> Union[List[SearchResult], CombinedSearchResult]:
35
37
  """
36
38
  Search and query the knowledge graph for insights, information, and connections.
@@ -200,6 +202,8 @@ async def search(
200
202
  only_context=only_context,
201
203
  use_combined_context=use_combined_context,
202
204
  session_id=session_id,
205
+ wide_search_top_k=wide_search_top_k,
206
+ triplet_distance_penalty=triplet_distance_penalty,
203
207
  )
204
208
 
205
209
  return filtered_search_results