cognee 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/health.py +2 -12
  3. cognee/api/v1/add/add.py +46 -6
  4. cognee/api/v1/add/routers/get_add_router.py +11 -2
  5. cognee/api/v1/cognify/cognify.py +29 -9
  6. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  7. cognee/api/v1/datasets/datasets.py +11 -0
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
  9. cognee/api/v1/delete/routers/get_delete_router.py +2 -0
  10. cognee/api/v1/memify/routers/get_memify_router.py +2 -1
  11. cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
  12. cognee/api/v1/responses/default_tools.py +0 -1
  13. cognee/api/v1/responses/dispatch_function.py +1 -1
  14. cognee/api/v1/responses/routers/default_tools.py +0 -1
  15. cognee/api/v1/search/routers/get_search_router.py +3 -3
  16. cognee/api/v1/search/search.py +11 -9
  17. cognee/api/v1/settings/routers/get_settings_router.py +7 -1
  18. cognee/api/v1/sync/routers/get_sync_router.py +3 -0
  19. cognee/api/v1/ui/ui.py +45 -16
  20. cognee/api/v1/update/routers/get_update_router.py +3 -1
  21. cognee/api/v1/update/update.py +3 -3
  22. cognee/api/v1/users/routers/get_visualize_router.py +2 -0
  23. cognee/cli/_cognee.py +61 -10
  24. cognee/cli/commands/add_command.py +3 -3
  25. cognee/cli/commands/cognify_command.py +3 -3
  26. cognee/cli/commands/config_command.py +9 -7
  27. cognee/cli/commands/delete_command.py +3 -3
  28. cognee/cli/commands/search_command.py +3 -7
  29. cognee/cli/config.py +0 -1
  30. cognee/context_global_variables.py +5 -0
  31. cognee/exceptions/exceptions.py +1 -1
  32. cognee/infrastructure/databases/cache/__init__.py +2 -0
  33. cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
  34. cognee/infrastructure/databases/cache/config.py +44 -0
  35. cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
  36. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
  37. cognee/infrastructure/databases/exceptions/__init__.py +1 -0
  38. cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
  39. cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
  40. cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
  41. cognee/infrastructure/databases/graph/kuzu/adapter.py +76 -47
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
  43. cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
  44. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
  46. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
  47. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
  48. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
  49. cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
  50. cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
  52. cognee/infrastructure/files/exceptions.py +1 -1
  53. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
  54. cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
  55. cognee/infrastructure/files/utils/guess_file_type.py +6 -0
  56. cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
  57. cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
  58. cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
  59. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
  60. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
  61. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
  62. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
  63. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
  68. cognee/infrastructure/loaders/LoaderEngine.py +27 -7
  69. cognee/infrastructure/loaders/external/__init__.py +7 -0
  70. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
  71. cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
  72. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  73. cognee/modules/data/exceptions/exceptions.py +1 -1
  74. cognee/modules/data/methods/__init__.py +3 -0
  75. cognee/modules/data/methods/get_dataset_data.py +4 -1
  76. cognee/modules/data/methods/has_dataset_data.py +21 -0
  77. cognee/modules/engine/models/TableRow.py +0 -1
  78. cognee/modules/ingestion/save_data_to_file.py +9 -2
  79. cognee/modules/pipelines/exceptions/exceptions.py +1 -1
  80. cognee/modules/pipelines/operations/pipeline.py +12 -1
  81. cognee/modules/pipelines/operations/run_tasks.py +25 -197
  82. cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
  83. cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
  84. cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
  85. cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
  86. cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
  87. cognee/modules/retrieval/base_graph_retriever.py +3 -1
  88. cognee/modules/retrieval/base_retriever.py +3 -1
  89. cognee/modules/retrieval/chunks_retriever.py +5 -1
  90. cognee/modules/retrieval/code_retriever.py +20 -2
  91. cognee/modules/retrieval/completion_retriever.py +50 -9
  92. cognee/modules/retrieval/cypher_search_retriever.py +11 -1
  93. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
  94. cognee/modules/retrieval/graph_completion_cot_retriever.py +152 -22
  95. cognee/modules/retrieval/graph_completion_retriever.py +54 -10
  96. cognee/modules/retrieval/lexical_retriever.py +20 -2
  97. cognee/modules/retrieval/natural_language_retriever.py +10 -1
  98. cognee/modules/retrieval/summaries_retriever.py +5 -1
  99. cognee/modules/retrieval/temporal_retriever.py +62 -10
  100. cognee/modules/retrieval/user_qa_feedback.py +3 -2
  101. cognee/modules/retrieval/utils/completion.py +30 -4
  102. cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
  103. cognee/modules/retrieval/utils/session_cache.py +156 -0
  104. cognee/modules/search/methods/get_search_type_tools.py +0 -5
  105. cognee/modules/search/methods/no_access_control_search.py +12 -1
  106. cognee/modules/search/methods/search.py +51 -5
  107. cognee/modules/search/types/SearchType.py +0 -1
  108. cognee/modules/settings/get_settings.py +23 -0
  109. cognee/modules/users/methods/get_authenticated_user.py +3 -1
  110. cognee/modules/users/methods/get_default_user.py +1 -6
  111. cognee/modules/users/roles/methods/create_role.py +2 -2
  112. cognee/modules/users/tenants/methods/create_tenant.py +2 -2
  113. cognee/shared/exceptions/exceptions.py +1 -1
  114. cognee/shared/logging_utils.py +18 -11
  115. cognee/shared/utils.py +24 -2
  116. cognee/tasks/codingagents/coding_rule_associations.py +1 -2
  117. cognee/tasks/documents/exceptions/exceptions.py +1 -1
  118. cognee/tasks/feedback/__init__.py +13 -0
  119. cognee/tasks/feedback/create_enrichments.py +84 -0
  120. cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
  121. cognee/tasks/feedback/generate_improved_answers.py +130 -0
  122. cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
  123. cognee/tasks/feedback/models.py +26 -0
  124. cognee/tasks/graph/extract_graph_from_data.py +2 -0
  125. cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
  126. cognee/tasks/ingestion/ingest_data.py +11 -5
  127. cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
  128. cognee/tasks/storage/add_data_points.py +3 -10
  129. cognee/tasks/storage/index_data_points.py +19 -14
  130. cognee/tasks/storage/index_graph_edges.py +25 -11
  131. cognee/tasks/web_scraper/__init__.py +34 -0
  132. cognee/tasks/web_scraper/config.py +26 -0
  133. cognee/tasks/web_scraper/default_url_crawler.py +446 -0
  134. cognee/tasks/web_scraper/models.py +46 -0
  135. cognee/tasks/web_scraper/types.py +4 -0
  136. cognee/tasks/web_scraper/utils.py +142 -0
  137. cognee/tasks/web_scraper/web_scraper_task.py +396 -0
  138. cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
  139. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
  140. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
  141. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
  142. cognee/tests/subprocesses/reader.py +25 -0
  143. cognee/tests/subprocesses/simple_cognify_1.py +31 -0
  144. cognee/tests/subprocesses/simple_cognify_2.py +31 -0
  145. cognee/tests/subprocesses/writer.py +32 -0
  146. cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
  147. cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
  148. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
  149. cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
  150. cognee/tests/test_add_docling_document.py +56 -0
  151. cognee/tests/test_chromadb.py +7 -11
  152. cognee/tests/test_concurrent_subprocess_access.py +76 -0
  153. cognee/tests/test_conversation_history.py +240 -0
  154. cognee/tests/test_feedback_enrichment.py +174 -0
  155. cognee/tests/test_kuzu.py +27 -15
  156. cognee/tests/test_lancedb.py +7 -11
  157. cognee/tests/test_library.py +32 -2
  158. cognee/tests/test_neo4j.py +24 -16
  159. cognee/tests/test_neptune_analytics_vector.py +7 -11
  160. cognee/tests/test_permissions.py +9 -13
  161. cognee/tests/test_pgvector.py +4 -4
  162. cognee/tests/test_remote_kuzu.py +8 -11
  163. cognee/tests/test_s3_file_storage.py +1 -1
  164. cognee/tests/test_search_db.py +6 -8
  165. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
  166. cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
  167. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
  168. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +21 -6
  169. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +178 -139
  170. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +1 -0
  171. distributed/Dockerfile +0 -3
  172. distributed/entrypoint.py +21 -9
  173. distributed/signal.py +5 -0
  174. distributed/workers/data_point_saving_worker.py +64 -34
  175. distributed/workers/graph_saving_worker.py +71 -47
  176. cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
  177. cognee/modules/retrieval/insights_retriever.py +0 -133
  178. cognee/tests/test_memgraph.py +0 -109
  179. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
  180. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
  181. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
  182. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0
cognee/__init__.py CHANGED
@@ -19,6 +19,7 @@ from .api.v1.add import add
19
19
  from .api.v1.delete import delete
20
20
  from .api.v1.cognify import cognify
21
21
  from .modules.memify import memify
22
+ from .api.v1.update import update
22
23
  from .api.v1.config.config import config
23
24
  from .api.v1.datasets.datasets import datasets
24
25
  from .api.v1.prune import prune
cognee/api/health.py CHANGED
@@ -241,16 +241,6 @@ class HealthChecker:
241
241
  """Get comprehensive health status."""
242
242
  components = {}
243
243
 
244
- # Critical services
245
- critical_components = [
246
- "relational_db",
247
- "vector_db",
248
- "graph_db",
249
- "file_storage",
250
- "llm_provider",
251
- "embedding_service",
252
- ]
253
-
254
244
  critical_checks = [
255
245
  ("relational_db", self.check_relational_db()),
256
246
  ("vector_db", self.check_vector_db()),
@@ -296,11 +286,11 @@ class HealthChecker:
296
286
  else:
297
287
  components[name] = result
298
288
 
289
+ critical_comps = [check[0] for check in critical_checks]
299
290
  # Determine overall status
300
291
  critical_unhealthy = any(
301
- comp.status == HealthStatus.UNHEALTHY
292
+ comp.status == HealthStatus.UNHEALTHY and name in critical_comps
302
293
  for name, comp in components.items()
303
- if name in critical_components
304
294
  )
305
295
 
306
296
  has_degraded = any(comp.status == HealthStatus.DEGRADED for comp in components.values())
cognee/api/v1/add/add.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from uuid import UUID
2
- from typing import Union, BinaryIO, List, Optional
3
-
2
+ from typing import Union, BinaryIO, List, Optional, Any
4
3
  from cognee.modules.users.models import User
5
4
  from cognee.modules.pipelines import Task, run_pipeline
6
5
  from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
@@ -11,6 +10,9 @@ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
11
10
  )
12
11
  from cognee.modules.engine.operations.setup import setup
13
12
  from cognee.tasks.ingestion import ingest_data, resolve_data_directories
13
+ from cognee.shared.logging_utils import get_logger
14
+
15
+ logger = get_logger()
14
16
 
15
17
 
16
18
  async def add(
@@ -21,14 +23,15 @@ async def add(
21
23
  vector_db_config: dict = None,
22
24
  graph_db_config: dict = None,
23
25
  dataset_id: Optional[UUID] = None,
24
- preferred_loaders: List[str] = None,
26
+ preferred_loaders: Optional[List[Union[str, dict[str, dict[str, Any]]]]] = None,
25
27
  incremental_loading: bool = True,
28
+ data_per_batch: Optional[int] = 20,
26
29
  ):
27
30
  """
28
31
  Add data to Cognee for knowledge graph processing.
29
32
 
30
33
  This is the first step in the Cognee workflow - it ingests raw data and prepares it
31
- for processing. The function accepts various data formats including text, files, and
34
+ for processing. The function accepts various data formats including text, files, urls and
32
35
  binary streams, then stores them in a specified dataset for further processing.
33
36
 
34
37
  Prerequisites:
@@ -68,6 +71,7 @@ async def add(
68
71
  - S3 path: "s3://my-bucket/documents/file.pdf"
69
72
  - List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
70
73
  - Binary file object: open("file.txt", "rb")
74
+ - url: A web link url (https or http)
71
75
  dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
72
76
  Create separate datasets to organize different knowledge domains.
73
77
  user: User object for authentication and permissions. Uses default user if None.
@@ -78,6 +82,9 @@ async def add(
78
82
  vector_db_config: Optional configuration for vector database (for custom setups).
79
83
  graph_db_config: Optional configuration for graph database (for custom setups).
80
84
  dataset_id: Optional specific dataset UUID to use instead of dataset_name.
85
+ extraction_rules: Optional dictionary of rules (e.g., CSS selectors, XPath) for extracting specific content from web pages using BeautifulSoup
86
+ tavily_config: Optional configuration for Tavily API, including API key and extraction settings
87
+ soup_crawler_config: Optional configuration for BeautifulSoup crawler, specifying concurrency, crawl delay, and extraction rules.
81
88
 
82
89
  Returns:
83
90
  PipelineRunInfo: Information about the ingestion pipeline execution including:
@@ -126,6 +133,21 @@ async def add(
126
133
 
127
134
  # Add a single file
128
135
  await cognee.add("/home/user/documents/analysis.pdf")
136
+
137
+ # Add a single url and bs4 extract ingestion method
138
+ extraction_rules = {
139
+ "title": "h1",
140
+ "description": "p",
141
+ "more_info": "a[href*='more-info']"
142
+ }
143
+ await cognee.add("https://example.com",extraction_rules=extraction_rules)
144
+
145
+ # Add a single url and tavily extract ingestion method
146
+ Make sure to set TAVILY_API_KEY = YOUR_TAVILY_API_KEY as a environment variable
147
+ await cognee.add("https://example.com")
148
+
149
+ # Add multiple urls
150
+ await cognee.add(["https://example.com","https://books.toscrape.com"])
129
151
  ```
130
152
 
131
153
  Environment Variables:
@@ -133,17 +155,34 @@ async def add(
133
155
  - LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
134
156
 
135
157
  Optional:
136
- - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
158
+ - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
137
159
  - LLM_MODEL: Model name (default: "gpt-5-mini")
138
160
  - DEFAULT_USER_EMAIL: Custom default user email
139
161
  - DEFAULT_USER_PASSWORD: Custom default user password
140
162
  - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
141
163
  - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
164
+ - TAVILY_API_KEY: YOUR_TAVILY_API_KEY
142
165
 
143
166
  """
167
+ if preferred_loaders is not None:
168
+ transformed = {}
169
+ for item in preferred_loaders:
170
+ if isinstance(item, dict):
171
+ transformed.update(item)
172
+ else:
173
+ transformed[item] = {}
174
+ preferred_loaders = transformed
175
+
144
176
  tasks = [
145
177
  Task(resolve_data_directories, include_subdirectories=True),
146
- Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
178
+ Task(
179
+ ingest_data,
180
+ dataset_name,
181
+ user,
182
+ node_set,
183
+ dataset_id,
184
+ preferred_loaders,
185
+ ),
147
186
  ]
148
187
 
149
188
  await setup()
@@ -167,6 +206,7 @@ async def add(
167
206
  vector_db_config=vector_db_config,
168
207
  graph_db_config=graph_db_config,
169
208
  incremental_loading=incremental_loading,
209
+ data_per_batch=data_per_batch,
170
210
  ):
171
211
  pipeline_run_info = run_info
172
212
 
@@ -10,6 +10,7 @@ from cognee.modules.users.methods import get_authenticated_user
10
10
  from cognee.shared.utils import send_telemetry
11
11
  from cognee.modules.pipelines.models import PipelineRunErrored
12
12
  from cognee.shared.logging_utils import get_logger
13
+ from cognee import __version__ as cognee_version
13
14
 
14
15
  logger = get_logger()
15
16
 
@@ -63,7 +64,11 @@ def get_add_router() -> APIRouter:
63
64
  send_telemetry(
64
65
  "Add API Endpoint Invoked",
65
66
  user.id,
66
- additional_properties={"endpoint": "POST /v1/add", "node_set": node_set},
67
+ additional_properties={
68
+ "endpoint": "POST /v1/add",
69
+ "node_set": node_set,
70
+ "cognee_version": cognee_version,
71
+ },
67
72
  )
68
73
 
69
74
  from cognee.api.v1.add import add as cognee_add
@@ -73,7 +78,11 @@ def get_add_router() -> APIRouter:
73
78
 
74
79
  try:
75
80
  add_run = await cognee_add(
76
- data, datasetName, user=user, dataset_id=datasetId, node_set=node_set
81
+ data,
82
+ datasetName,
83
+ user=user,
84
+ dataset_id=datasetId,
85
+ node_set=node_set if node_set else None,
77
86
  )
78
87
 
79
88
  if isinstance(add_run, PipelineRunErrored):
@@ -44,6 +44,7 @@ async def cognify(
44
44
  graph_model: BaseModel = KnowledgeGraph,
45
45
  chunker=TextChunker,
46
46
  chunk_size: int = None,
47
+ chunks_per_batch: int = None,
47
48
  config: Config = None,
48
49
  vector_db_config: dict = None,
49
50
  graph_db_config: dict = None,
@@ -51,6 +52,7 @@ async def cognify(
51
52
  incremental_loading: bool = True,
52
53
  custom_prompt: Optional[str] = None,
53
54
  temporal_cognify: bool = False,
55
+ data_per_batch: int = 20,
54
56
  ):
55
57
  """
56
58
  Transform ingested data into a structured knowledge graph.
@@ -105,6 +107,7 @@ async def cognify(
105
107
  Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
106
108
  Default limits: ~512-8192 tokens depending on models.
107
109
  Smaller chunks = more granular but potentially fragmented knowledge.
110
+ chunks_per_batch: Number of chunks to be processed in a single batch in Cognify tasks.
108
111
  vector_db_config: Custom vector database configuration for embeddings storage.
109
112
  graph_db_config: Custom graph database configuration for relationship storage.
110
113
  run_in_background: If True, starts processing asynchronously and returns immediately.
@@ -148,7 +151,7 @@ async def cognify(
148
151
  # 2. Get entity relationships and connections
149
152
  relationships = await cognee.search(
150
153
  "connections between concepts",
151
- query_type=SearchType.INSIGHTS
154
+ query_type=SearchType.GRAPH_COMPLETION
152
155
  )
153
156
 
154
157
  # 3. Find relevant document chunks
@@ -209,10 +212,18 @@ async def cognify(
209
212
  }
210
213
 
211
214
  if temporal_cognify:
212
- tasks = await get_temporal_tasks(user, chunker, chunk_size)
215
+ tasks = await get_temporal_tasks(
216
+ user=user, chunker=chunker, chunk_size=chunk_size, chunks_per_batch=chunks_per_batch
217
+ )
213
218
  else:
214
219
  tasks = await get_default_tasks(
215
- user, graph_model, chunker, chunk_size, config, custom_prompt
220
+ user=user,
221
+ graph_model=graph_model,
222
+ chunker=chunker,
223
+ chunk_size=chunk_size,
224
+ config=config,
225
+ custom_prompt=custom_prompt,
226
+ chunks_per_batch=chunks_per_batch,
216
227
  )
217
228
 
218
229
  # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -228,6 +239,7 @@ async def cognify(
228
239
  graph_db_config=graph_db_config,
229
240
  incremental_loading=incremental_loading,
230
241
  pipeline_name="cognify_pipeline",
242
+ data_per_batch=data_per_batch,
231
243
  )
232
244
 
233
245
 
@@ -238,6 +250,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
238
250
  chunk_size: int = None,
239
251
  config: Config = None,
240
252
  custom_prompt: Optional[str] = None,
253
+ chunks_per_batch: int = 100,
241
254
  ) -> list[Task]:
242
255
  if config is None:
243
256
  ontology_config = get_ontology_env_config()
@@ -256,6 +269,9 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
256
269
  "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
257
270
  }
258
271
 
272
+ if chunks_per_batch is None:
273
+ chunks_per_batch = 100
274
+
259
275
  default_tasks = [
260
276
  Task(classify_documents),
261
277
  Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -269,20 +285,20 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
269
285
  graph_model=graph_model,
270
286
  config=config,
271
287
  custom_prompt=custom_prompt,
272
- task_config={"batch_size": 10},
288
+ task_config={"batch_size": chunks_per_batch},
273
289
  ), # Generate knowledge graphs from the document chunks.
274
290
  Task(
275
291
  summarize_text,
276
- task_config={"batch_size": 10},
292
+ task_config={"batch_size": chunks_per_batch},
277
293
  ),
278
- Task(add_data_points, task_config={"batch_size": 10}),
294
+ Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
279
295
  ]
280
296
 
281
297
  return default_tasks
282
298
 
283
299
 
284
300
  async def get_temporal_tasks(
285
- user: User = None, chunker=TextChunker, chunk_size: int = None
301
+ user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
286
302
  ) -> list[Task]:
287
303
  """
288
304
  Builds and returns a list of temporal processing tasks to be executed in sequence.
@@ -299,10 +315,14 @@ async def get_temporal_tasks(
299
315
  user (User, optional): The user requesting task execution, used for permission checks.
300
316
  chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
301
317
  chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
318
+ chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
302
319
 
303
320
  Returns:
304
321
  list[Task]: A list of Task objects representing the temporal processing pipeline.
305
322
  """
323
+ if chunks_per_batch is None:
324
+ chunks_per_batch = 10
325
+
306
326
  temporal_tasks = [
307
327
  Task(classify_documents),
308
328
  Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -311,9 +331,9 @@ async def get_temporal_tasks(
311
331
  max_chunk_size=chunk_size or get_max_chunk_tokens(),
312
332
  chunker=chunker,
313
333
  ),
314
- Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
334
+ Task(extract_events_and_timestamps, task_config={"batch_size": chunks_per_batch}),
315
335
  Task(extract_knowledge_graph_from_events),
316
- Task(add_data_points, task_config={"batch_size": 10}),
336
+ Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
317
337
  ]
318
338
 
319
339
  return temporal_tasks
@@ -29,7 +29,7 @@ from cognee.modules.pipelines.queues.pipeline_run_info_queues import (
29
29
  )
30
30
  from cognee.shared.logging_utils import get_logger
31
31
  from cognee.shared.utils import send_telemetry
32
-
32
+ from cognee import __version__ as cognee_version
33
33
 
34
34
  logger = get_logger("api.cognify")
35
35
 
@@ -98,6 +98,7 @@ def get_cognify_router() -> APIRouter:
98
98
  user.id,
99
99
  additional_properties={
100
100
  "endpoint": "POST /v1/cognify",
101
+ "cognee_version": cognee_version,
101
102
  },
102
103
  )
103
104
 
@@ -1,4 +1,5 @@
1
1
  from uuid import UUID
2
+ from cognee.modules.data.methods import has_dataset_data
2
3
  from cognee.modules.users.methods import get_default_user
3
4
  from cognee.modules.ingestion import discover_directory_datasets
4
5
  from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
@@ -26,6 +27,16 @@ class datasets:
26
27
 
27
28
  return await get_dataset_data(dataset.id)
28
29
 
30
+ @staticmethod
31
+ async def has_data(dataset_id: str) -> bool:
32
+ from cognee.modules.data.methods import get_dataset
33
+
34
+ user = await get_default_user()
35
+
36
+ dataset = await get_dataset(user.id, dataset_id)
37
+
38
+ return await has_dataset_data(dataset.id)
39
+
29
40
  @staticmethod
30
41
  async def get_status(dataset_ids: list[UUID]) -> dict:
31
42
  return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline")
@@ -24,6 +24,7 @@ from cognee.modules.users.permissions.methods import (
24
24
  from cognee.modules.graph.methods import get_formatted_graph_data
25
25
  from cognee.modules.pipelines.models import PipelineRunStatus
26
26
  from cognee.shared.utils import send_telemetry
27
+ from cognee import __version__ as cognee_version
27
28
 
28
29
  logger = get_logger()
29
30
 
@@ -100,6 +101,7 @@ def get_datasets_router() -> APIRouter:
100
101
  user.id,
101
102
  additional_properties={
102
103
  "endpoint": "GET /v1/datasets",
104
+ "cognee_version": cognee_version,
103
105
  },
104
106
  )
105
107
 
@@ -147,6 +149,7 @@ def get_datasets_router() -> APIRouter:
147
149
  user.id,
148
150
  additional_properties={
149
151
  "endpoint": "POST /v1/datasets",
152
+ "cognee_version": cognee_version,
150
153
  },
151
154
  )
152
155
 
@@ -201,6 +204,7 @@ def get_datasets_router() -> APIRouter:
201
204
  additional_properties={
202
205
  "endpoint": f"DELETE /v1/datasets/{str(dataset_id)}",
203
206
  "dataset_id": str(dataset_id),
207
+ "cognee_version": cognee_version,
204
208
  },
205
209
  )
206
210
 
@@ -246,6 +250,7 @@ def get_datasets_router() -> APIRouter:
246
250
  "endpoint": f"DELETE /v1/datasets/{str(dataset_id)}/data/{str(data_id)}",
247
251
  "dataset_id": str(dataset_id),
248
252
  "data_id": str(data_id),
253
+ "cognee_version": cognee_version,
249
254
  },
250
255
  )
251
256
 
@@ -327,6 +332,7 @@ def get_datasets_router() -> APIRouter:
327
332
  additional_properties={
328
333
  "endpoint": f"GET /v1/datasets/{str(dataset_id)}/data",
329
334
  "dataset_id": str(dataset_id),
335
+ "cognee_version": cognee_version,
330
336
  },
331
337
  )
332
338
 
@@ -387,6 +393,7 @@ def get_datasets_router() -> APIRouter:
387
393
  additional_properties={
388
394
  "endpoint": "GET /v1/datasets/status",
389
395
  "datasets": [str(dataset_id) for dataset_id in datasets],
396
+ "cognee_version": cognee_version,
390
397
  },
391
398
  )
392
399
 
@@ -433,6 +440,7 @@ def get_datasets_router() -> APIRouter:
433
440
  "endpoint": f"GET /v1/datasets/{str(dataset_id)}/data/{str(data_id)}/raw",
434
441
  "dataset_id": str(dataset_id),
435
442
  "data_id": str(data_id),
443
+ "cognee_version": cognee_version,
436
444
  },
437
445
  )
438
446
 
@@ -6,6 +6,7 @@ from cognee.shared.logging_utils import get_logger
6
6
  from cognee.modules.users.models import User
7
7
  from cognee.modules.users.methods import get_authenticated_user
8
8
  from cognee.shared.utils import send_telemetry
9
+ from cognee import __version__ as cognee_version
9
10
 
10
11
  logger = get_logger()
11
12
 
@@ -39,6 +40,7 @@ def get_delete_router() -> APIRouter:
39
40
  "endpoint": "DELETE /v1/delete",
40
41
  "dataset_id": str(dataset_id),
41
42
  "data_id": str(data_id),
43
+ "cognee_version": cognee_version,
42
44
  },
43
45
  )
44
46
 
@@ -12,6 +12,7 @@ from cognee.modules.users.methods import get_authenticated_user
12
12
  from cognee.shared.utils import send_telemetry
13
13
  from cognee.modules.pipelines.models import PipelineRunErrored
14
14
  from cognee.shared.logging_utils import get_logger
15
+ from cognee import __version__ as cognee_version
15
16
 
16
17
  logger = get_logger()
17
18
 
@@ -73,7 +74,7 @@ def get_memify_router() -> APIRouter:
73
74
  send_telemetry(
74
75
  "Memify API Endpoint Invoked",
75
76
  user.id,
76
- additional_properties={"endpoint": "POST /v1/memify"},
77
+ additional_properties={"endpoint": "POST /v1/memify", "cognee_version": cognee_version},
77
78
  )
78
79
 
79
80
  if not payload.dataset_id and not payload.dataset_name:
@@ -7,6 +7,7 @@ from fastapi.responses import JSONResponse
7
7
  from cognee.modules.users.models import User
8
8
  from cognee.modules.users.methods import get_authenticated_user
9
9
  from cognee.shared.utils import send_telemetry
10
+ from cognee import __version__ as cognee_version
10
11
 
11
12
 
12
13
  def get_permissions_router() -> APIRouter:
@@ -48,6 +49,7 @@ def get_permissions_router() -> APIRouter:
48
49
  "endpoint": f"POST /v1/permissions/datasets/{str(principal_id)}",
49
50
  "dataset_ids": str(dataset_ids),
50
51
  "principal_id": str(principal_id),
52
+ "cognee_version": cognee_version,
51
53
  },
52
54
  )
53
55
 
@@ -89,6 +91,7 @@ def get_permissions_router() -> APIRouter:
89
91
  additional_properties={
90
92
  "endpoint": "POST /v1/permissions/roles",
91
93
  "role_name": role_name,
94
+ "cognee_version": cognee_version,
92
95
  },
93
96
  )
94
97
 
@@ -133,6 +136,7 @@ def get_permissions_router() -> APIRouter:
133
136
  "endpoint": f"POST /v1/permissions/users/{str(user_id)}/roles",
134
137
  "user_id": str(user_id),
135
138
  "role_id": str(role_id),
139
+ "cognee_version": cognee_version,
136
140
  },
137
141
  )
138
142
 
@@ -175,6 +179,7 @@ def get_permissions_router() -> APIRouter:
175
179
  "endpoint": f"POST /v1/permissions/users/{str(user_id)}/tenants",
176
180
  "user_id": str(user_id),
177
181
  "tenant_id": str(tenant_id),
182
+ "cognee_version": cognee_version,
178
183
  },
179
184
  )
180
185
 
@@ -209,6 +214,7 @@ def get_permissions_router() -> APIRouter:
209
214
  additional_properties={
210
215
  "endpoint": "POST /v1/permissions/tenants",
211
216
  "tenant_name": tenant_name,
217
+ "cognee_version": cognee_version,
212
218
  },
213
219
  )
214
220
 
@@ -14,7 +14,6 @@ DEFAULT_TOOLS = [
14
14
  "type": "string",
15
15
  "description": "Type of search to perform",
16
16
  "enum": [
17
- "INSIGHTS",
18
17
  "CODE",
19
18
  "GRAPH_COMPLETION",
20
19
  "NATURAL_LANGUAGE",
@@ -59,7 +59,7 @@ async def handle_search(arguments: Dict[str, Any], user) -> list:
59
59
  valid_search_types = (
60
60
  search_tool["parameters"]["properties"]["search_type"]["enum"]
61
61
  if search_tool
62
- else ["INSIGHTS", "CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"]
62
+ else ["CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"]
63
63
  )
64
64
 
65
65
  if search_type_str not in valid_search_types:
@@ -14,7 +14,6 @@ DEFAULT_TOOLS = [
14
14
  "type": "string",
15
15
  "description": "Type of search to perform",
16
16
  "enum": [
17
- "INSIGHTS",
18
17
  "CODE",
19
18
  "GRAPH_COMPLETION",
20
19
  "NATURAL_LANGUAGE",
@@ -13,6 +13,7 @@ from cognee.modules.users.models import User
13
13
  from cognee.modules.search.operations import get_history
14
14
  from cognee.modules.users.methods import get_authenticated_user
15
15
  from cognee.shared.utils import send_telemetry
16
+ from cognee import __version__ as cognee_version
16
17
 
17
18
 
18
19
  # Note: Datasets sent by name will only map to datasets owned by the request sender
@@ -61,9 +62,7 @@ def get_search_router() -> APIRouter:
61
62
  send_telemetry(
62
63
  "Search API Endpoint Invoked",
63
64
  user.id,
64
- additional_properties={
65
- "endpoint": "GET /v1/search",
66
- },
65
+ additional_properties={"endpoint": "GET /v1/search", "cognee_version": cognee_version},
67
66
  )
68
67
 
69
68
  try:
@@ -118,6 +117,7 @@ def get_search_router() -> APIRouter:
118
117
  "top_k": payload.top_k,
119
118
  "only_context": payload.only_context,
120
119
  "use_combined_context": payload.use_combined_context,
120
+ "cognee_version": cognee_version,
121
121
  },
122
122
  )
123
123
 
@@ -1,6 +1,7 @@
1
1
  from uuid import UUID
2
2
  from typing import Union, Optional, List, Type
3
3
 
4
+ from cognee.infrastructure.databases.graph import get_graph_engine
4
5
  from cognee.modules.engine.models.node_set import NodeSet
5
6
  from cognee.modules.users.models import User
6
7
  from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
@@ -8,6 +9,10 @@ from cognee.modules.users.methods import get_default_user
8
9
  from cognee.modules.search.methods import search as search_function
9
10
  from cognee.modules.data.methods import get_authorized_existing_datasets
10
11
  from cognee.modules.data.exceptions import DatasetNotFoundError
12
+ from cognee.context_global_variables import set_session_user_context_variable
13
+ from cognee.shared.logging_utils import get_logger
14
+
15
+ logger = get_logger()
11
16
 
12
17
 
13
18
  async def search(
@@ -25,6 +30,7 @@ async def search(
25
30
  last_k: Optional[int] = 1,
26
31
  only_context: bool = False,
27
32
  use_combined_context: bool = False,
33
+ session_id: Optional[str] = None,
28
34
  ) -> Union[List[SearchResult], CombinedSearchResult]:
29
35
  """
30
36
  Search and query the knowledge graph for insights, information, and connections.
@@ -52,11 +58,6 @@ async def search(
52
58
  Best for: Direct document retrieval, specific fact-finding.
53
59
  Returns: LLM responses based on relevant text chunks.
54
60
 
55
- **INSIGHTS**:
56
- Structured entity relationships and semantic connections.
57
- Best for: Understanding concept relationships, knowledge mapping.
58
- Returns: Formatted relationship data and entity connections.
59
-
60
61
  **CHUNKS**:
61
62
  Raw text segments that match the query semantically.
62
63
  Best for: Finding specific passages, citations, exact content.
@@ -118,15 +119,14 @@ async def search(
118
119
 
119
120
  save_interaction: Save interaction (query, context, answer connected to triplet endpoints) results into the graph or not
120
121
 
122
+ session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.
123
+
121
124
  Returns:
122
125
  list: Search results in format determined by query_type:
123
126
 
124
127
  **GRAPH_COMPLETION/RAG_COMPLETION**:
125
128
  [List of conversational AI response strings]
126
129
 
127
- **INSIGHTS**:
128
- [List of formatted relationship descriptions and entity connections]
129
-
130
130
  **CHUNKS**:
131
131
  [List of relevant text passages with source metadata]
132
132
 
@@ -146,7 +146,6 @@ async def search(
146
146
  Performance & Optimization:
147
147
  - **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
148
148
  - **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
149
- - **INSIGHTS**: Fast, returns structured relationships without LLM processing
150
149
  - **CHUNKS**: Fastest, pure vector similarity search without LLM
151
150
  - **SUMMARIES**: Fast, returns pre-computed summaries
152
151
  - **CODE**: Medium speed, specialized for code understanding
@@ -177,6 +176,8 @@ async def search(
177
176
  if user is None:
178
177
  user = await get_default_user()
179
178
 
179
+ await set_session_user_context_variable(user)
180
+
180
181
  # Transform string based datasets to UUID - String based datasets can only be found for current user
181
182
  if datasets is not None and [all(isinstance(dataset, str) for dataset in datasets)]:
182
183
  datasets = await get_authorized_existing_datasets(datasets, "read", user)
@@ -198,6 +199,7 @@ async def search(
198
199
  last_k=last_k,
199
200
  only_context=only_context,
200
201
  use_combined_context=use_combined_context,
202
+ session_id=session_id,
201
203
  )
202
204
 
203
205
  return filtered_search_results
@@ -21,7 +21,13 @@ class SettingsDTO(OutDTO):
21
21
 
22
22
 
23
23
  class LLMConfigInputDTO(InDTO):
24
- provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"], Literal["gemini"]]
24
+ provider: Union[
25
+ Literal["openai"],
26
+ Literal["ollama"],
27
+ Literal["anthropic"],
28
+ Literal["gemini"],
29
+ Literal["mistral"],
30
+ ]
25
31
  model: str
26
32
  api_key: str
27
33