cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/chunks/__init__.py +9 -0
  156. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  157. cognee/tasks/graph/__init__.py +7 -0
  158. cognee/tasks/memify/__init__.py +8 -0
  159. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  160. cognee/tasks/summarization/models.py +0 -2
  161. cognee/tasks/temporal_graph/__init__.py +0 -1
  162. cognee/tasks/translation/__init__.py +96 -0
  163. cognee/tasks/translation/config.py +110 -0
  164. cognee/tasks/translation/detect_language.py +190 -0
  165. cognee/tasks/translation/exceptions.py +62 -0
  166. cognee/tasks/translation/models.py +72 -0
  167. cognee/tasks/translation/providers/__init__.py +44 -0
  168. cognee/tasks/translation/providers/azure_provider.py +192 -0
  169. cognee/tasks/translation/providers/base.py +85 -0
  170. cognee/tasks/translation/providers/google_provider.py +158 -0
  171. cognee/tasks/translation/providers/llm_provider.py +143 -0
  172. cognee/tasks/translation/translate_content.py +282 -0
  173. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  174. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  175. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  176. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  177. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  178. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  179. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  180. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  181. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  182. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  183. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  184. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  185. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  186. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  187. cognee/tests/tasks/translation/README.md +147 -0
  188. cognee/tests/tasks/translation/__init__.py +1 -0
  189. cognee/tests/tasks/translation/config_test.py +93 -0
  190. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  191. cognee/tests/tasks/translation/providers_test.py +151 -0
  192. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  193. cognee/tests/test_chromadb.py +1 -1
  194. cognee/tests/test_cleanup_unused_data.py +165 -0
  195. cognee/tests/test_delete_by_id.py +6 -6
  196. cognee/tests/test_extract_usage_frequency.py +308 -0
  197. cognee/tests/test_kuzu.py +17 -7
  198. cognee/tests/test_lancedb.py +3 -1
  199. cognee/tests/test_library.py +1 -1
  200. cognee/tests/test_neo4j.py +17 -7
  201. cognee/tests/test_neptune_analytics_vector.py +3 -1
  202. cognee/tests/test_permissions.py +172 -187
  203. cognee/tests/test_pgvector.py +3 -1
  204. cognee/tests/test_relational_db_migration.py +15 -1
  205. cognee/tests/test_remote_kuzu.py +3 -1
  206. cognee/tests/test_s3_file_storage.py +1 -1
  207. cognee/tests/test_search_db.py +97 -110
  208. cognee/tests/test_usage_logger_e2e.py +268 -0
  209. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  210. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  211. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  212. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  213. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  214. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  215. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  216. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  217. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  218. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  219. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  220. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  221. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  222. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  223. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  224. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  225. cognee/tests/unit/modules/search/test_search.py +176 -0
  226. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  227. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  228. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  229. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  230. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  231. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
  232. cognee/api/.env.example +0 -5
  233. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  234. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  235. cognee/modules/search/methods/no_access_control_search.py +0 -62
  236. cognee/modules/search/utils/prepare_search_result.py +0 -63
  237. cognee/tests/test_feedback_enrichment.py +0 -174
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  239. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  240. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  241. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
cognee/alembic.ini ADDED
@@ -0,0 +1,117 @@
1
+ # A generic, single database configuration.
2
+
3
+ [alembic]
4
+ # path to migration scripts
5
+ # Use forward slashes (/) also on windows to provide an os agnostic path
6
+ script_location = alembic
7
+
8
+ # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
9
+ # Uncomment the line below if you want the files to be prepended with date and time
10
+ # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
11
+ # for all available tokens
12
+ # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
13
+
14
+ # sys.path path, will be prepended to sys.path if present.
15
+ # defaults to the current working directory.
16
+ prepend_sys_path = .
17
+
18
+ # timezone to use when rendering the date within the migration file
19
+ # as well as the filename.
20
+ # If specified, requires the python>=3.9 or backports.zoneinfo library.
21
+ # Any required deps can installed by adding `alembic[tz]` to the pip requirements
22
+ # string value is passed to ZoneInfo()
23
+ # leave blank for localtime
24
+ # timezone =
25
+
26
+ # max length of characters to apply to the "slug" field
27
+ # truncate_slug_length = 40
28
+
29
+ # set to 'true' to run the environment during
30
+ # the 'revision' command, regardless of autogenerate
31
+ # revision_environment = false
32
+
33
+ # set to 'true' to allow .pyc and .pyo files without
34
+ # a source .py file to be detected as revisions in the
35
+ # versions/ directory
36
+ # sourceless = false
37
+
38
+ # version location specification; This defaults
39
+ # to alembic/versions. When using multiple version
40
+ # directories, initial revisions must be specified with --version-path.
41
+ # The path separator used here should be the separator specified by "version_path_separator" below.
42
+ # version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
43
+
44
+ # version path separator; As mentioned above, this is the character used to split
45
+ # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
46
+ # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
47
+ # Valid values for version_path_separator are:
48
+ #
49
+ # version_path_separator = :
50
+ # version_path_separator = ;
51
+ # version_path_separator = space
52
+ # version_path_separator = newline
53
+ version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
54
+
55
+ # set to 'true' to search source files recursively
56
+ # in each "version_locations" directory
57
+ # new in Alembic version 1.10
58
+ # recursive_version_locations = false
59
+
60
+ # the output encoding used when revision files
61
+ # are written from script.py.mako
62
+ # output_encoding = utf-8
63
+
64
+ sqlalchemy.url = %(SQLALCHEMY_DATABASE_URI)s
65
+
66
+
67
+ [post_write_hooks]
68
+ # post_write_hooks defines scripts or Python functions that are run
69
+ # on newly generated revision scripts. See the documentation for further
70
+ # detail and examples
71
+
72
+ # format using "black" - use the console_scripts runner, against the "black" entrypoint
73
+ # hooks = black
74
+ # black.type = console_scripts
75
+ # black.entrypoint = black
76
+ # black.options = -l 79 REVISION_SCRIPT_FILENAME
77
+
78
+ # lint with attempts to fix using "ruff" - use the exec runner, execute a binary
79
+ # hooks = ruff
80
+ # ruff.type = exec
81
+ # ruff.executable = %(here)s/.venv/bin/ruff
82
+ # ruff.options = --fix REVISION_SCRIPT_FILENAME
83
+
84
+ # Logging configuration
85
+ [loggers]
86
+ keys = root,sqlalchemy,alembic
87
+
88
+ [handlers]
89
+ keys = console
90
+
91
+ [formatters]
92
+ keys = generic
93
+
94
+ [logger_root]
95
+ level = WARN
96
+ handlers = console
97
+ qualname =
98
+
99
+ [logger_sqlalchemy]
100
+ level = WARN
101
+ handlers =
102
+ qualname = sqlalchemy.engine
103
+
104
+ [logger_alembic]
105
+ level = WARN
106
+ handlers =
107
+ qualname = alembic
108
+
109
+ [handler_console]
110
+ class = StreamHandler
111
+ args = (sys.stderr,)
112
+ level = NOTSET
113
+ formatter = generic
114
+
115
+ [formatter_generic]
116
+ format = %(levelname)-5.5s [%(name)s] %(message)s
117
+ datefmt = %H:%M:%S
@@ -10,6 +10,7 @@ from cognee.modules.users.methods import get_authenticated_user
10
10
  from cognee.shared.utils import send_telemetry
11
11
  from cognee.modules.pipelines.models import PipelineRunErrored
12
12
  from cognee.shared.logging_utils import get_logger
13
+ from cognee.shared.usage_logger import log_usage
13
14
  from cognee import __version__ as cognee_version
14
15
 
15
16
  logger = get_logger()
@@ -19,6 +20,7 @@ def get_add_router() -> APIRouter:
19
20
  router = APIRouter()
20
21
 
21
22
  @router.post("", response_model=dict)
23
+ @log_usage(function_name="POST /v1/add", log_type="api_endpoint")
22
24
  async def add(
23
25
  data: List[UploadFile] = File(default=None),
24
26
  datasetName: Optional[str] = Form(default=None),
@@ -252,7 +252,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
252
252
  chunk_size: int = None,
253
253
  config: Config = None,
254
254
  custom_prompt: Optional[str] = None,
255
- chunks_per_batch: int = 100,
255
+ chunks_per_batch: int = None,
256
256
  **kwargs,
257
257
  ) -> list[Task]:
258
258
  if config is None:
@@ -272,12 +272,14 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
272
272
  "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
273
273
  }
274
274
 
275
- if chunks_per_batch is None:
276
- chunks_per_batch = 100
277
-
278
275
  cognify_config = get_cognify_config()
279
276
  embed_triplets = cognify_config.triplet_embedding
280
277
 
278
+ if chunks_per_batch is None:
279
+ chunks_per_batch = (
280
+ cognify_config.chunks_per_batch if cognify_config.chunks_per_batch is not None else 100
281
+ )
282
+
281
283
  default_tasks = [
282
284
  Task(classify_documents),
283
285
  Task(
@@ -308,7 +310,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
308
310
 
309
311
 
310
312
  async def get_temporal_tasks(
311
- user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
313
+ user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = None
312
314
  ) -> list[Task]:
313
315
  """
314
316
  Builds and returns a list of temporal processing tasks to be executed in sequence.
@@ -330,7 +332,10 @@ async def get_temporal_tasks(
330
332
  list[Task]: A list of Task objects representing the temporal processing pipeline.
331
333
  """
332
334
  if chunks_per_batch is None:
333
- chunks_per_batch = 10
335
+ from cognee.modules.cognify.config import get_cognify_config
336
+
337
+ configured = get_cognify_config().chunks_per_batch
338
+ chunks_per_batch = configured if configured is not None else 10
334
339
 
335
340
  temporal_tasks = [
336
341
  Task(classify_documents),
@@ -29,6 +29,7 @@ from cognee.modules.pipelines.queues.pipeline_run_info_queues import (
29
29
  )
30
30
  from cognee.shared.logging_utils import get_logger
31
31
  from cognee.shared.utils import send_telemetry
32
+ from cognee.shared.usage_logger import log_usage
32
33
  from cognee import __version__ as cognee_version
33
34
 
34
35
  logger = get_logger("api.cognify")
@@ -46,12 +47,18 @@ class CognifyPayloadDTO(InDTO):
46
47
  examples=[[]],
47
48
  description="Reference to one or more previously uploaded ontologies",
48
49
  )
50
+ chunks_per_batch: Optional[int] = Field(
51
+ default=None,
52
+ description="Number of chunks to process per task batch in Cognify (overrides default).",
53
+ examples=[10, 20, 50, 100],
54
+ )
49
55
 
50
56
 
51
57
  def get_cognify_router() -> APIRouter:
52
58
  router = APIRouter()
53
59
 
54
60
  @router.post("", response_model=dict)
61
+ @log_usage(function_name="POST /v1/cognify", log_type="api_endpoint")
55
62
  async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
56
63
  """
57
64
  Transform datasets into structured knowledge graphs through cognitive processing.
@@ -146,6 +153,7 @@ def get_cognify_router() -> APIRouter:
146
153
  config=config_to_use,
147
154
  run_in_background=payload.run_in_background,
148
155
  custom_prompt=payload.custom_prompt,
156
+ chunks_per_batch=payload.chunks_per_batch,
149
157
  )
150
158
 
151
159
  # If any cognify run errored return JSONResponse with proper error status code
@@ -10,6 +10,7 @@ from cognee.infrastructure.llm.config import (
10
10
  get_llm_config,
11
11
  )
12
12
  from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
13
+ from cognee.tasks.translation.config import get_translation_config
13
14
  from cognee.api.v1.exceptions.exceptions import InvalidConfigAttributeError
14
15
 
15
16
 
@@ -176,3 +177,62 @@ class config:
176
177
  def set_vector_db_url(db_url: str):
177
178
  vector_db_config = get_vectordb_config()
178
179
  vector_db_config.vector_db_url = db_url
180
+
181
+ # Translation configuration methods
182
+
183
+ @staticmethod
184
+ def set_translation_provider(provider: str):
185
+ """Set the translation provider (llm, google, azure)."""
186
+ translation_config = get_translation_config()
187
+ translation_config.translation_provider = provider
188
+
189
+ @staticmethod
190
+ def set_translation_target_language(target_language: str):
191
+ """Set the default target language for translations."""
192
+ translation_config = get_translation_config()
193
+ translation_config.target_language = target_language
194
+
195
+ @staticmethod
196
+ def set_translation_config(config_dict: dict):
197
+ """
198
+ Updates the translation config with values from config_dict.
199
+ """
200
+ translation_config = get_translation_config()
201
+ for key, value in config_dict.items():
202
+ if hasattr(translation_config, key):
203
+ object.__setattr__(translation_config, key, value)
204
+ else:
205
+ raise InvalidConfigAttributeError(attribute=key)
206
+
207
+ def set(key: str, value):
208
+ """
209
+ Generic setter that maps configuration keys to their specific setter methods.
210
+ This enables CLI commands like 'cognee config set llm_api_key <value>'.
211
+ """
212
+ # Map configuration keys to their setter methods
213
+ setter_mapping = {
214
+ "llm_provider": "set_llm_provider",
215
+ "llm_model": "set_llm_model",
216
+ "llm_api_key": "set_llm_api_key",
217
+ "llm_endpoint": "set_llm_endpoint",
218
+ "graph_database_provider": "set_graph_database_provider",
219
+ "vector_db_provider": "set_vector_db_provider",
220
+ "vector_db_url": "set_vector_db_url",
221
+ "vector_db_key": "set_vector_db_key",
222
+ "chunk_size": "set_chunk_size",
223
+ "chunk_overlap": "set_chunk_overlap",
224
+ "chunk_strategy": "set_chunk_strategy",
225
+ "chunk_engine": "set_chunk_engine",
226
+ "classification_model": "set_classification_model",
227
+ "summarization_model": "set_summarization_model",
228
+ "graph_model": "set_graph_model",
229
+ "system_root_directory": "system_root_directory",
230
+ "data_root_directory": "data_root_directory",
231
+ }
232
+
233
+ if key not in setter_mapping:
234
+ raise InvalidConfigAttributeError(attribute=key)
235
+
236
+ method_name = setter_mapping[key]
237
+ method = getattr(config, method_name)
238
+ method(value)
@@ -7,7 +7,9 @@ from fastapi import status
7
7
  from fastapi import APIRouter
8
8
  from fastapi.encoders import jsonable_encoder
9
9
  from fastapi import HTTPException, Query, Depends
10
- from fastapi.responses import JSONResponse, FileResponse
10
+ from fastapi.responses import JSONResponse, FileResponse, StreamingResponse, Response
11
+ from urllib.parse import urlparse
12
+ from pathlib import Path
11
13
 
12
14
  from cognee.api.DTO import InDTO, OutDTO
13
15
  from cognee.infrastructure.databases.relational import get_relational_engine
@@ -415,7 +417,7 @@ def get_datasets_router() -> APIRouter:
415
417
  @router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
416
418
  async def get_raw_data(
417
419
  dataset_id: UUID, data_id: UUID, user: User = Depends(get_authenticated_user)
418
- ):
420
+ ) -> Response:
419
421
  """
420
422
  Download the raw data file for a specific data item.
421
423
 
@@ -476,6 +478,46 @@ def get_datasets_router() -> APIRouter:
476
478
  message=f"Data ({data_id}) not found in dataset ({dataset_id})."
477
479
  )
478
480
 
479
- return data.raw_data_location
481
+ raw_location = data.raw_data_location
482
+ parsed_uri = urlparse(raw_location)
483
+
484
+ if parsed_uri.scheme == "s3":
485
+ from cognee.infrastructure.files.utils.open_data_file import open_data_file
486
+ from cognee.infrastructure.utils.run_async import run_async
487
+
488
+ download_name = Path(parsed_uri.path).name or data.name
489
+ media_type = data.mime_type or "application/octet-stream"
490
+
491
+ async def file_iterator(chunk_size: int = 1024 * 1024):
492
+ async with open_data_file(raw_location, mode="rb") as file:
493
+ while True:
494
+ chunk = await run_async(file.read, chunk_size)
495
+ if not chunk:
496
+ break
497
+ yield chunk
498
+
499
+ return StreamingResponse(
500
+ file_iterator(),
501
+ media_type=media_type,
502
+ headers={"Content-Disposition": f'attachment; filename="{download_name}"'},
503
+ )
504
+
505
+ if parsed_uri.scheme in ("file", "") or (
506
+ len(parsed_uri.scheme) == 1 and parsed_uri.scheme.isalpha()
507
+ ):
508
+ from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
509
+
510
+ file_path = get_data_file_path(raw_location)
511
+ path = Path(file_path)
512
+
513
+ if not path.is_file():
514
+ raise DataNotFoundError(message=f"Raw file not found on disk for data ({data_id}).")
515
+
516
+ return FileResponse(path=path)
517
+
518
+ raise HTTPException(
519
+ status_code=status.HTTP_501_NOT_IMPLEMENTED,
520
+ detail=f"Storage scheme '{parsed_uri.scheme}' not supported for direct download.",
521
+ )
480
522
 
481
523
  return router
@@ -12,6 +12,7 @@ from cognee.modules.users.methods import get_authenticated_user
12
12
  from cognee.shared.utils import send_telemetry
13
13
  from cognee.modules.pipelines.models import PipelineRunErrored
14
14
  from cognee.shared.logging_utils import get_logger
15
+ from cognee.shared.usage_logger import log_usage
15
16
  from cognee import __version__ as cognee_version
16
17
 
17
18
  logger = get_logger()
@@ -35,6 +36,7 @@ def get_memify_router() -> APIRouter:
35
36
  router = APIRouter()
36
37
 
37
38
  @router.post("", response_model=dict)
39
+ @log_usage(function_name="POST /v1/memify", log_type="api_endpoint")
38
40
  async def memify(payload: MemifyPayloadDTO, user: User = Depends(get_authenticated_user)):
39
41
  """
40
42
  Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data,
@@ -6,14 +6,17 @@ from fastapi import Depends, APIRouter
6
6
  from fastapi.responses import JSONResponse
7
7
  from fastapi.encoders import jsonable_encoder
8
8
 
9
- from cognee.modules.search.types import SearchType, SearchResult, CombinedSearchResult
9
+ from cognee.modules.search.types import SearchType, SearchResult
10
10
  from cognee.api.DTO import InDTO, OutDTO
11
- from cognee.modules.users.exceptions.exceptions import PermissionDeniedError
11
+ from cognee.modules.users.exceptions.exceptions import PermissionDeniedError, UserNotFoundError
12
12
  from cognee.modules.users.models import User
13
13
  from cognee.modules.search.operations import get_history
14
14
  from cognee.modules.users.methods import get_authenticated_user
15
15
  from cognee.shared.utils import send_telemetry
16
+ from cognee.shared.usage_logger import log_usage
16
17
  from cognee import __version__ as cognee_version
18
+ from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
19
+ from cognee.exceptions import CogneeValidationError
17
20
 
18
21
 
19
22
  # Note: Datasets sent by name will only map to datasets owned by the request sender
@@ -29,7 +32,7 @@ class SearchPayloadDTO(InDTO):
29
32
  node_name: Optional[list[str]] = Field(default=None, example=[])
30
33
  top_k: Optional[int] = Field(default=10)
31
34
  only_context: bool = Field(default=False)
32
- use_combined_context: bool = Field(default=False)
35
+ verbose: bool = Field(default=False)
33
36
 
34
37
 
35
38
  def get_search_router() -> APIRouter:
@@ -72,7 +75,8 @@ def get_search_router() -> APIRouter:
72
75
  except Exception as error:
73
76
  return JSONResponse(status_code=500, content={"error": str(error)})
74
77
 
75
- @router.post("", response_model=Union[List[SearchResult], CombinedSearchResult, List])
78
+ @router.post("", response_model=Union[List[SearchResult], List])
79
+ @log_usage(function_name="POST /v1/search", log_type="api_endpoint")
76
80
  async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
77
81
  """
78
82
  Search for nodes in the graph database.
@@ -116,7 +120,7 @@ def get_search_router() -> APIRouter:
116
120
  "node_name": payload.node_name,
117
121
  "top_k": payload.top_k,
118
122
  "only_context": payload.only_context,
119
- "use_combined_context": payload.use_combined_context,
123
+ "verbose": payload.verbose,
120
124
  "cognee_version": cognee_version,
121
125
  },
122
126
  )
@@ -133,11 +137,22 @@ def get_search_router() -> APIRouter:
133
137
  system_prompt=payload.system_prompt,
134
138
  node_name=payload.node_name,
135
139
  top_k=payload.top_k,
140
+ verbose=payload.verbose,
136
141
  only_context=payload.only_context,
137
- use_combined_context=payload.use_combined_context,
138
142
  )
139
143
 
140
144
  return jsonable_encoder(results)
145
+ except (DatabaseNotCreatedError, UserNotFoundError, CogneeValidationError) as e:
146
+ # Return a clear 422 with actionable guidance instead of leaking a stacktrace
147
+ status_code = getattr(e, "status_code", 422)
148
+ return JSONResponse(
149
+ status_code=status_code,
150
+ content={
151
+ "error": "Search prerequisites not met",
152
+ "detail": str(e),
153
+ "hint": "Run `await cognee.add(...)` then `await cognee.cognify()` before searching.",
154
+ },
155
+ )
141
156
  except PermissionDeniedError:
142
157
  return []
143
158
  except Exception as error:
@@ -4,13 +4,16 @@ from typing import Union, Optional, List, Type
4
4
  from cognee.infrastructure.databases.graph import get_graph_engine
5
5
  from cognee.modules.engine.models.node_set import NodeSet
6
6
  from cognee.modules.users.models import User
7
- from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
7
+ from cognee.modules.search.types import SearchResult, SearchType
8
8
  from cognee.modules.users.methods import get_default_user
9
9
  from cognee.modules.search.methods import search as search_function
10
10
  from cognee.modules.data.methods import get_authorized_existing_datasets
11
11
  from cognee.modules.data.exceptions import DatasetNotFoundError
12
12
  from cognee.context_global_variables import set_session_user_context_variable
13
13
  from cognee.shared.logging_utils import get_logger
14
+ from cognee.infrastructure.databases.exceptions import DatabaseNotCreatedError
15
+ from cognee.exceptions import CogneeValidationError
16
+ from cognee.modules.users.exceptions.exceptions import UserNotFoundError
14
17
 
15
18
  logger = get_logger()
16
19
 
@@ -29,11 +32,12 @@ async def search(
29
32
  save_interaction: bool = False,
30
33
  last_k: Optional[int] = 1,
31
34
  only_context: bool = False,
32
- use_combined_context: bool = False,
33
35
  session_id: Optional[str] = None,
34
36
  wide_search_top_k: Optional[int] = 100,
35
37
  triplet_distance_penalty: Optional[float] = 3.5,
36
- ) -> Union[List[SearchResult], CombinedSearchResult]:
38
+ verbose: bool = False,
39
+ retriever_specific_config: Optional[dict] = None,
40
+ ) -> List[SearchResult]:
37
41
  """
38
42
  Search and query the knowledge graph for insights, information, and connections.
39
43
 
@@ -123,6 +127,10 @@ async def search(
123
127
 
124
128
  session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.
125
129
 
130
+ verbose: If True, returns detailed result information including graph representation (when possible).
131
+
132
+ retriever_specific_config: Optional dictionary of additional configuration parameters specific to the retriever being used.
133
+
126
134
  Returns:
127
135
  list: Search results in format determined by query_type:
128
136
 
@@ -176,7 +184,18 @@ async def search(
176
184
  datasets = [datasets]
177
185
 
178
186
  if user is None:
179
- user = await get_default_user()
187
+ try:
188
+ user = await get_default_user()
189
+ except (DatabaseNotCreatedError, UserNotFoundError) as error:
190
+ # Provide a clear, actionable message instead of surfacing low-level stacktraces
191
+ raise CogneeValidationError(
192
+ message=(
193
+ "Search prerequisites not met: no database/default user found. "
194
+ "Initialize Cognee before searching by:\n"
195
+ "• running `await cognee.add(...)` followed by `await cognee.cognify()`."
196
+ ),
197
+ name="SearchPreconditionError",
198
+ ) from error
180
199
 
181
200
  await set_session_user_context_variable(user)
182
201
 
@@ -200,10 +219,11 @@ async def search(
200
219
  save_interaction=save_interaction,
201
220
  last_k=last_k,
202
221
  only_context=only_context,
203
- use_combined_context=use_combined_context,
204
222
  session_id=session_id,
205
223
  wide_search_top_k=wide_search_top_k,
206
224
  triplet_distance_penalty=triplet_distance_penalty,
225
+ verbose=verbose,
226
+ retriever_specific_config=retriever_specific_config,
207
227
  )
208
228
 
209
229
  return filtered_search_results
@@ -71,7 +71,7 @@ def get_sync_router() -> APIRouter:
71
71
  -H "Content-Type: application/json" \\
72
72
  -H "Cookie: auth_token=your-token" \\
73
73
  -d '{"dataset_ids": ["123e4567-e89b-12d3-a456-426614174000", "456e7890-e12b-34c5-d678-901234567000"]}'
74
-
74
+
75
75
  # Sync all user datasets (empty request body or null dataset_ids)
76
76
  curl -X POST "http://localhost:8000/api/v1/sync" \\
77
77
  -H "Content-Type: application/json" \\
@@ -88,7 +88,7 @@ def get_sync_router() -> APIRouter:
88
88
  - **413 Payload Too Large**: Dataset too large for current cloud plan
89
89
  - **429 Too Many Requests**: Rate limit exceeded
90
90
 
91
- ## Notes
91
+ ## Notes
92
92
  - Sync operations run in the background - you get an immediate response
93
93
  - Use the returned run_id to track progress (status API coming soon)
94
94
  - Large datasets are automatically chunked for efficient transfer
@@ -179,7 +179,7 @@ def get_sync_router() -> APIRouter:
179
179
  ```
180
180
 
181
181
  ## Example Responses
182
-
182
+
183
183
  **No running syncs:**
184
184
  ```json
185
185
  {
@@ -21,7 +21,7 @@ binary streams, then stores them in a specified dataset for further processing.
21
21
 
22
22
  Supported Input Types:
23
23
  - **Text strings**: Direct text content
24
- - **File paths**: Local file paths (absolute paths starting with "/")
24
+ - **File paths**: Local file paths (absolute paths starting with "/")
25
25
  - **File URLs**: "file:///absolute/path" or "file://relative/path"
26
26
  - **S3 paths**: "s3://bucket-name/path/to/file"
27
27
  - **Lists**: Multiple files or text strings in a single call
@@ -62,6 +62,11 @@ After successful cognify processing, use `cognee search` to query the knowledge
62
62
  parser.add_argument(
63
63
  "--verbose", "-v", action="store_true", help="Show detailed progress information"
64
64
  )
65
+ parser.add_argument(
66
+ "--chunks-per-batch",
67
+ type=int,
68
+ help="Number of chunks to process per task batch (try 50 for large single documents).",
69
+ )
65
70
 
66
71
  def execute(self, args: argparse.Namespace) -> None:
67
72
  try:
@@ -111,6 +116,7 @@ After successful cognify processing, use `cognee search` to query the knowledge
111
116
  chunk_size=args.chunk_size,
112
117
  ontology_file_path=args.ontology_file,
113
118
  run_in_background=args.background,
119
+ chunks_per_batch=getattr(args, "chunks_per_batch", None),
114
120
  )
115
121
  return result
116
122
  except Exception as e:
@@ -17,7 +17,7 @@ The `cognee config` command allows you to view and modify configuration settings
17
17
 
18
18
  You can:
19
19
  - View all current configuration settings
20
- - Get specific configuration values
20
+ - Get specific configuration values
21
21
  - Set configuration values
22
22
  - Unset (reset to default) specific configuration values
23
23
  - Reset all configuration to defaults
@@ -121,13 +121,17 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
121
121
  )
122
122
 
123
123
  # Set vector and graph database configuration based on dataset database information
124
- # TODO: Add better handling of vector and graph config accross Cognee.
124
+ # TODO: Add better handling of vector and graph config across Cognee.
125
125
  # LRU_CACHE takes into account order of inputs, if order of inputs is changed it will be registered as a new DB adapter
126
126
  vector_config = {
127
127
  "vector_db_provider": dataset_database.vector_database_provider,
128
128
  "vector_db_url": dataset_database.vector_database_url,
129
129
  "vector_db_key": dataset_database.vector_database_key,
130
130
  "vector_db_name": dataset_database.vector_database_name,
131
+ "vector_db_port": dataset_database.vector_database_connection_info.get("port", ""),
132
+ "vector_db_host": dataset_database.vector_database_connection_info.get("host", ""),
133
+ "vector_db_username": dataset_database.vector_database_connection_info.get("username", ""),
134
+ "vector_db_password": dataset_database.vector_database_connection_info.get("password", ""),
131
135
  }
132
136
 
133
137
  graph_config = {
@@ -32,16 +32,15 @@ class AnswerGeneratorExecutor:
32
32
  query_text = instance["question"]
33
33
  correct_answer = instance["answer"]
34
34
 
35
- retrieval_context = await retriever.get_context(query_text)
36
- search_results = await retriever.get_completion(query_text, retrieval_context)
35
+ retrieved_objects = await retriever.get_retrieved_objects(query_text)
36
+ retrieval_context = await retriever.get_context_from_objects(
37
+ query_text, retrieved_objects
38
+ )
39
+ search_results = await retriever.get_completion_from_context(
40
+ query_text, retrieved_objects, retrieval_context
41
+ )
37
42
 
38
43
  ############
39
- #:TODO This is a quick fix until we don't structure retriever results properly but lets not leave it like this...this is needed now due to the changed combined retriever structure..
40
- if isinstance(retrieval_context, list):
41
- retrieval_context = await retriever.convert_retrieved_objects_to_context(
42
- triplets=retrieval_context
43
- )
44
-
45
44
  if isinstance(search_results, str):
46
45
  search_results = [search_results]
47
46
  #############