cognee 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (265) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/add.py +2 -1
  23. cognee/api/v1/add/routers/get_add_router.py +2 -0
  24. cognee/api/v1/cognify/cognify.py +11 -6
  25. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  26. cognee/api/v1/config/config.py +60 -0
  27. cognee/api/v1/datasets/routers/get_datasets_router.py +46 -3
  28. cognee/api/v1/memify/routers/get_memify_router.py +3 -0
  29. cognee/api/v1/search/routers/get_search_router.py +21 -6
  30. cognee/api/v1/search/search.py +21 -5
  31. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  32. cognee/cli/commands/add_command.py +1 -1
  33. cognee/cli/commands/cognify_command.py +6 -0
  34. cognee/cli/commands/config_command.py +1 -1
  35. cognee/context_global_variables.py +5 -1
  36. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  37. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  38. cognee/infrastructure/databases/cache/config.py +6 -0
  39. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  40. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  41. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  42. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  43. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  44. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  45. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  46. cognee/infrastructure/databases/relational/config.py +16 -1
  47. cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
  48. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +26 -3
  49. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  50. cognee/infrastructure/databases/vector/config.py +6 -0
  51. cognee/infrastructure/databases/vector/create_vector_engine.py +70 -16
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  55. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  56. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  57. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  58. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  59. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  60. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  61. cognee/infrastructure/llm/LLMGateway.py +0 -13
  62. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  64. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  66. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  67. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  68. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  69. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  70. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  71. cognee/infrastructure/llm/prompts/test.txt +1 -1
  72. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  73. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
  74. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +29 -5
  77. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  78. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
  79. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
  80. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
  82. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
  83. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/models/Data.py +3 -1
  86. cognee/modules/engine/models/Entity.py +0 -1
  87. cognee/modules/engine/operations/setup.py +6 -0
  88. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  89. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  90. cognee/modules/graph/utils/__init__.py +1 -0
  91. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  92. cognee/modules/notebooks/methods/__init__.py +1 -0
  93. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  94. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  95. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  96. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  97. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  98. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  99. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  100. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  101. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  102. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  103. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  114. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  115. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  116. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  117. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  118. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  119. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  120. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  121. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  122. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  123. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  124. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  125. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  126. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  127. cognee/modules/retrieval/__init__.py +0 -1
  128. cognee/modules/retrieval/base_retriever.py +66 -10
  129. cognee/modules/retrieval/chunks_retriever.py +57 -49
  130. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  131. cognee/modules/retrieval/completion_retriever.py +29 -28
  132. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  133. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  134. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  135. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  136. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  137. cognee/modules/retrieval/lexical_retriever.py +34 -12
  138. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  139. cognee/modules/retrieval/summaries_retriever.py +51 -34
  140. cognee/modules/retrieval/temporal_retriever.py +59 -49
  141. cognee/modules/retrieval/triplet_retriever.py +32 -33
  142. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  143. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -103
  144. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  145. cognee/modules/search/methods/__init__.py +1 -0
  146. cognee/modules/search/methods/get_retriever_output.py +53 -0
  147. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  148. cognee/modules/search/methods/search.py +90 -222
  149. cognee/modules/search/models/SearchResultPayload.py +67 -0
  150. cognee/modules/search/types/SearchResult.py +1 -8
  151. cognee/modules/search/types/SearchType.py +1 -2
  152. cognee/modules/search/types/__init__.py +1 -1
  153. cognee/modules/search/utils/__init__.py +1 -2
  154. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  155. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  156. cognee/modules/users/authentication/default/default_transport.py +11 -1
  157. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  158. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  159. cognee/modules/users/methods/create_user.py +0 -9
  160. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  161. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  162. cognee/run_migrations.py +48 -0
  163. cognee/shared/exceptions/__init__.py +1 -3
  164. cognee/shared/exceptions/exceptions.py +11 -1
  165. cognee/shared/usage_logger.py +332 -0
  166. cognee/shared/utils.py +12 -5
  167. cognee/tasks/chunks/__init__.py +9 -0
  168. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  169. cognee/tasks/graph/__init__.py +7 -0
  170. cognee/tasks/ingestion/data_item.py +8 -0
  171. cognee/tasks/ingestion/ingest_data.py +12 -1
  172. cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
  173. cognee/tasks/memify/__init__.py +8 -0
  174. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  175. cognee/tasks/summarization/models.py +0 -2
  176. cognee/tasks/temporal_graph/__init__.py +0 -1
  177. cognee/tasks/translation/__init__.py +96 -0
  178. cognee/tasks/translation/config.py +110 -0
  179. cognee/tasks/translation/detect_language.py +190 -0
  180. cognee/tasks/translation/exceptions.py +62 -0
  181. cognee/tasks/translation/models.py +72 -0
  182. cognee/tasks/translation/providers/__init__.py +44 -0
  183. cognee/tasks/translation/providers/azure_provider.py +192 -0
  184. cognee/tasks/translation/providers/base.py +85 -0
  185. cognee/tasks/translation/providers/google_provider.py +158 -0
  186. cognee/tasks/translation/providers/llm_provider.py +143 -0
  187. cognee/tasks/translation/translate_content.py +282 -0
  188. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  189. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  190. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  191. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  192. cognee/tests/integration/retrieval/test_chunks_retriever.py +351 -0
  193. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +276 -0
  194. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +228 -0
  195. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +217 -0
  196. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +319 -0
  197. cognee/tests/integration/retrieval/test_structured_output.py +258 -0
  198. cognee/tests/integration/retrieval/test_summaries_retriever.py +195 -0
  199. cognee/tests/integration/retrieval/test_temporal_retriever.py +336 -0
  200. cognee/tests/integration/retrieval/test_triplet_retriever.py +45 -1
  201. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  202. cognee/tests/tasks/translation/README.md +147 -0
  203. cognee/tests/tasks/translation/__init__.py +1 -0
  204. cognee/tests/tasks/translation/config_test.py +93 -0
  205. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  206. cognee/tests/tasks/translation/providers_test.py +151 -0
  207. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  208. cognee/tests/test_chromadb.py +1 -1
  209. cognee/tests/test_cleanup_unused_data.py +165 -0
  210. cognee/tests/test_custom_data_label.py +68 -0
  211. cognee/tests/test_delete_by_id.py +6 -6
  212. cognee/tests/test_extract_usage_frequency.py +308 -0
  213. cognee/tests/test_kuzu.py +17 -7
  214. cognee/tests/test_lancedb.py +3 -1
  215. cognee/tests/test_library.py +1 -1
  216. cognee/tests/test_neo4j.py +17 -7
  217. cognee/tests/test_neptune_analytics_vector.py +3 -1
  218. cognee/tests/test_permissions.py +172 -187
  219. cognee/tests/test_pgvector.py +3 -1
  220. cognee/tests/test_relational_db_migration.py +15 -1
  221. cognee/tests/test_remote_kuzu.py +3 -1
  222. cognee/tests/test_s3_file_storage.py +1 -1
  223. cognee/tests/test_search_db.py +345 -205
  224. cognee/tests/test_usage_logger_e2e.py +268 -0
  225. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  226. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  227. cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
  228. cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
  229. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  230. cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
  231. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  232. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  233. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +122 -168
  234. cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
  235. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +486 -157
  236. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +693 -155
  237. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +619 -200
  238. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +300 -171
  239. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +184 -155
  240. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +544 -79
  241. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +476 -28
  242. cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
  243. cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
  244. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  245. cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
  246. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +267 -7
  247. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  248. cognee/tests/unit/modules/search/test_search.py +96 -20
  249. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  250. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  251. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  252. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  253. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  254. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/RECORD +258 -157
  255. cognee/api/.env.example +0 -5
  256. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  257. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  258. cognee/modules/search/methods/no_access_control_search.py +0 -62
  259. cognee/modules/search/utils/prepare_search_result.py +0 -63
  260. cognee/tests/test_feedback_enrichment.py +0 -174
  261. cognee/tests/unit/modules/retrieval/structured_output_test.py +0 -204
  262. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  263. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  264. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  265. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,282 @@
1
+ import asyncio
2
+ from typing import List, Optional
3
+ from uuid import uuid5
4
+
5
+ from cognee.modules.chunking.models import DocumentChunk
6
+ from cognee.shared.logging_utils import get_logger
7
+
8
+ from .config import get_translation_config, TranslationProviderType
9
+ from .detect_language import detect_language_async, LanguageDetectionResult
10
+ from .exceptions import TranslationError, LanguageDetectionError
11
+ from .models import TranslatedContent, LanguageMetadata
12
+ from .providers import get_translation_provider, TranslationResult
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ async def translate_content(
18
+ data_chunks: List[DocumentChunk],
19
+ target_language: str = None,
20
+ translation_provider: TranslationProviderType = None,
21
+ confidence_threshold: float = None,
22
+ skip_if_target_language: bool = True,
23
+ preserve_original: bool = True,
24
+ ) -> List[DocumentChunk]:
25
+ """
26
+ Translate non-English content to the target language.
27
+
28
+ This task detects the language of each document chunk and translates
29
+ non-target-language content using the specified translation provider.
30
+ Original text is preserved alongside translated versions.
31
+
32
+ Args:
33
+ data_chunks: List of DocumentChunk objects to process
34
+ target_language: Target language code (default: "en" for English)
35
+ If not provided, uses config default
36
+ translation_provider: Translation service to use ("llm", "google", "azure")
37
+ If not provided, uses config default
38
+ confidence_threshold: Minimum confidence for language detection (0.0 to 1.0)
39
+ If not provided, uses config default
40
+ skip_if_target_language: If True, skip chunks already in target language
41
+ preserve_original: If True, store original text in TranslatedContent
42
+
43
+ Returns:
44
+ List of DocumentChunk objects with translated content.
45
+ Chunks that required translation will have TranslatedContent
46
+ objects in their 'contains' list.
47
+
48
+ Note:
49
+ This function mutates the input chunks in-place. Specifically:
50
+ - chunk.text is replaced with the translated text
51
+ - chunk.contains is updated with LanguageMetadata and TranslatedContent
52
+ The original text is preserved in TranslatedContent.original_text
53
+ if preserve_original=True.
54
+
55
+ Example:
56
+ ```python
57
+ from cognee.tasks.translation import translate_content
58
+
59
+ # Translate chunks using default settings
60
+ translated_chunks = await translate_content(chunks)
61
+
62
+ # Translate with specific provider
63
+ translated_chunks = await translate_content(
64
+ chunks,
65
+ translation_provider="llm",
66
+ confidence_threshold=0.9
67
+ )
68
+ ```
69
+ """
70
+ if not isinstance(data_chunks, list):
71
+ raise TranslationError("data_chunks must be a list")
72
+
73
+ if len(data_chunks) == 0:
74
+ return data_chunks
75
+
76
+ # Get configuration
77
+ config = get_translation_config()
78
+ provider_name = translation_provider or config.translation_provider
79
+ target_lang = target_language or config.target_language
80
+ threshold = confidence_threshold or config.confidence_threshold
81
+
82
+ logger.info(
83
+ f"Starting translation task for {len(data_chunks)} chunks "
84
+ f"using {provider_name} provider, target language: {target_lang}"
85
+ )
86
+
87
+ # Get the translation provider
88
+ provider = get_translation_provider(provider_name)
89
+
90
+ # Process chunks
91
+ processed_chunks = []
92
+ total_chunks = len(data_chunks)
93
+
94
+ for chunk_index, chunk in enumerate(data_chunks):
95
+ # Log progress for large batches
96
+ if chunk_index > 0 and chunk_index % 100 == 0:
97
+ logger.info(f"Translation progress: {chunk_index}/{total_chunks} chunks processed")
98
+
99
+ if not hasattr(chunk, "text") or not chunk.text:
100
+ processed_chunks.append(chunk)
101
+ continue
102
+
103
+ try:
104
+ # Detect language
105
+ detection = await detect_language_async(chunk.text, target_lang, threshold)
106
+
107
+ # Create language metadata
108
+ language_metadata = LanguageMetadata(
109
+ id=uuid5(chunk.id, "LanguageMetadata"),
110
+ content_id=chunk.id,
111
+ detected_language=detection.language_code,
112
+ language_confidence=detection.confidence,
113
+ requires_translation=detection.requires_translation,
114
+ character_count=detection.character_count,
115
+ language_name=detection.language_name,
116
+ )
117
+
118
+ # Skip if already in target language
119
+ if not detection.requires_translation:
120
+ if skip_if_target_language:
121
+ logger.debug(
122
+ f"Skipping chunk {chunk.id}: already in target language "
123
+ f"({detection.language_code})"
124
+ )
125
+ # Add language metadata to chunk
126
+ _add_to_chunk_contains(chunk, language_metadata)
127
+ processed_chunks.append(chunk)
128
+ continue
129
+
130
+ # Translate the content
131
+ logger.debug(
132
+ f"Translating chunk {chunk.id} from {detection.language_code} to {target_lang}"
133
+ )
134
+
135
+ translation_result = await provider.translate(
136
+ text=chunk.text,
137
+ target_language=target_lang,
138
+ source_language=detection.language_code,
139
+ )
140
+
141
+ # Create TranslatedContent data point
142
+ translated_content = TranslatedContent(
143
+ id=uuid5(chunk.id, "TranslatedContent"),
144
+ original_chunk_id=chunk.id,
145
+ original_text=chunk.text if preserve_original else "",
146
+ translated_text=translation_result.translated_text,
147
+ source_language=translation_result.source_language,
148
+ target_language=translation_result.target_language,
149
+ translation_provider=translation_result.provider,
150
+ confidence_score=translation_result.confidence_score,
151
+ translated_from=chunk,
152
+ )
153
+
154
+ # Update chunk text with translated content
155
+ chunk.text = translation_result.translated_text
156
+
157
+ # Add metadata to chunk's contains list
158
+ _add_to_chunk_contains(chunk, language_metadata)
159
+ _add_to_chunk_contains(chunk, translated_content)
160
+
161
+ processed_chunks.append(chunk)
162
+
163
+ logger.debug(
164
+ f"Successfully translated chunk {chunk.id}: "
165
+ f"{detection.language_code} -> {target_lang}"
166
+ )
167
+
168
+ except LanguageDetectionError as e:
169
+ logger.warning(f"Language detection failed for chunk {chunk.id}: {e}")
170
+ processed_chunks.append(chunk)
171
+ except TranslationError as e:
172
+ logger.error(f"Translation failed for chunk {chunk.id}: {e}")
173
+ processed_chunks.append(chunk)
174
+ except Exception as e:
175
+ logger.error(f"Unexpected error processing chunk {chunk.id}: {e}")
176
+ processed_chunks.append(chunk)
177
+
178
+ logger.info(f"Translation task completed for {len(processed_chunks)} chunks")
179
+ return processed_chunks
180
+
181
+
182
+ def _add_to_chunk_contains(chunk: DocumentChunk, item) -> None:
183
+ """Helper to add an item to a chunk's contains list."""
184
+ if chunk.contains is None:
185
+ chunk.contains = []
186
+ chunk.contains.append(item)
187
+
188
+
189
+ async def translate_text(
190
+ text: str,
191
+ target_language: str = None,
192
+ translation_provider: TranslationProviderType = None,
193
+ source_language: Optional[str] = None,
194
+ ) -> TranslationResult:
195
+ """
196
+ Translate a single text string.
197
+
198
+ This is a convenience function for translating individual texts
199
+ without creating DocumentChunk objects.
200
+
201
+ Args:
202
+ text: The text to translate
203
+ target_language: Target language code (default: uses config, typically "en")
204
+ If not provided, uses config default
205
+ translation_provider: Translation service to use
206
+ If not provided, uses config default
207
+ source_language: Source language code (optional, auto-detected if not provided)
208
+
209
+ Returns:
210
+ TranslationResult with translated text and metadata
211
+
212
+ Example:
213
+ ```python
214
+ from cognee.tasks.translation import translate_text
215
+
216
+ result = await translate_text(
217
+ "Bonjour le monde!",
218
+ target_language="en"
219
+ )
220
+ print(result.translated_text) # "Hello world!"
221
+ print(result.source_language) # "fr"
222
+ ```
223
+ """
224
+ config = get_translation_config()
225
+ provider_name = translation_provider or config.translation_provider
226
+ target_lang = target_language or config.target_language
227
+
228
+ provider = get_translation_provider(provider_name)
229
+
230
+ return await provider.translate(
231
+ text=text,
232
+ target_language=target_lang,
233
+ source_language=source_language,
234
+ )
235
+
236
+
237
+ async def batch_translate_texts(
238
+ texts: List[str],
239
+ target_language: str = None,
240
+ translation_provider: TranslationProviderType = None,
241
+ source_language: Optional[str] = None,
242
+ ) -> List[TranslationResult]:
243
+ """
244
+ Translate multiple text strings in batch.
245
+
246
+ This is more efficient than translating texts individually,
247
+ especially for providers that support native batch operations.
248
+
249
+ Args:
250
+ texts: List of texts to translate
251
+ target_language: Target language code (default: uses config, typically "en")
252
+ If not provided, uses config default
253
+ translation_provider: Translation service to use
254
+ If not provided, uses config default
255
+ source_language: Source language code (optional)
256
+
257
+ Returns:
258
+ List of TranslationResult objects
259
+
260
+ Example:
261
+ ```python
262
+ from cognee.tasks.translation import batch_translate_texts
263
+
264
+ results = await batch_translate_texts(
265
+ ["Hola", "¿Cómo estás?", "Adiós"],
266
+ target_language="en"
267
+ )
268
+ for result in results:
269
+ print(f"{result.source_language}: {result.translated_text}")
270
+ ```
271
+ """
272
+ config = get_translation_config()
273
+ provider_name = translation_provider or config.translation_provider
274
+ target_lang = target_language or config.target_language
275
+
276
+ provider = get_translation_provider(provider_name)
277
+
278
+ return await provider.translate_batch(
279
+ texts=texts,
280
+ target_language=target_lang,
281
+ source_language=source_language,
282
+ )
@@ -73,7 +73,11 @@ class DefaultUrlCrawler:
73
73
  self.timeout = timeout
74
74
  self.max_retries = max_retries
75
75
  self.retry_delay_factor = retry_delay_factor
76
- self.headers = headers or {"User-Agent": "Cognee-Scraper/1.0"}
76
+ self.headers = headers or {
77
+ "User-Agent": "Cognee-Scraper/1.0 (hello@cognee.ai)",
78
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
79
+ "Accept-Language": "en-US,en;q=0.9",
80
+ }
77
81
  self.robots_cache_ttl = robots_cache_ttl
78
82
  self._last_request_time_per_domain: Dict[str, float] = {}
79
83
  self._robots_cache: Dict[str, RobotsTxtCache] = {}
@@ -288,7 +292,7 @@ class DefaultUrlCrawler:
288
292
  while True:
289
293
  try:
290
294
  await self._respect_rate_limit(url, crawl_delay)
291
- resp = await self._client.get(url)
295
+ resp = await self._client.get(url, headers=self.headers)
292
296
  resp.raise_for_status()
293
297
  logger.info(
294
298
  f"Successfully fetched {url} (status={resp.status_code}, size={len(resp.text)} bytes)"
@@ -238,6 +238,7 @@ class TestCognifyCommand:
238
238
  ontology_file_path=None,
239
239
  chunker=TextChunker,
240
240
  run_in_background=False,
241
+ chunks_per_batch=None,
241
242
  )
242
243
 
243
244
  @patch("cognee.cli.commands.cognify_command.asyncio.run")
@@ -262,6 +262,7 @@ class TestCognifyCommandEdgeCases:
262
262
  ontology_file_path=None,
263
263
  chunker=TextChunker,
264
264
  run_in_background=False,
265
+ chunks_per_batch=None,
265
266
  )
266
267
 
267
268
  @patch("cognee.cli.commands.cognify_command.asyncio.run", side_effect=_mock_run)
@@ -295,6 +296,7 @@ class TestCognifyCommandEdgeCases:
295
296
  ontology_file_path="/nonexistent/path/ontology.owl",
296
297
  chunker=TextChunker,
297
298
  run_in_background=False,
299
+ chunks_per_batch=None,
298
300
  )
299
301
 
300
302
  @patch("cognee.cli.commands.cognify_command.asyncio.run")
@@ -373,6 +375,7 @@ class TestCognifyCommandEdgeCases:
373
375
  ontology_file_path=None,
374
376
  chunker=TextChunker,
375
377
  run_in_background=False,
378
+ chunks_per_batch=None,
376
379
  )
377
380
 
378
381
 
@@ -0,0 +1,62 @@
1
+ import pathlib
2
+
3
+ import pytest
4
+ import pytest_asyncio
5
+ import cognee
6
+
7
+ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
8
+ from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search
9
+
10
+
11
+ @pytest_asyncio.fixture
12
+ async def clean_environment():
13
+ """Configure isolated storage and ensure cleanup before/after."""
14
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
15
+ system_directory_path = str(base_dir / ".cognee_system/test_brute_force_triplet_search_e2e")
16
+ data_directory_path = str(base_dir / ".data_storage/test_brute_force_triplet_search_e2e")
17
+
18
+ cognee.config.system_root_directory(system_directory_path)
19
+ cognee.config.data_root_directory(data_directory_path)
20
+
21
+ await cognee.prune.prune_data()
22
+ await cognee.prune.prune_system(metadata=True)
23
+
24
+ yield
25
+
26
+ try:
27
+ await cognee.prune.prune_data()
28
+ await cognee.prune.prune_system(metadata=True)
29
+ except Exception:
30
+ pass
31
+
32
+
33
+ @pytest.mark.asyncio
34
+ async def test_brute_force_triplet_search_end_to_end(clean_environment):
35
+ """Minimal end-to-end exercise of single and batch triplet search."""
36
+
37
+ text = """
38
+ Cognee is an open-source AI memory engine that structures data into searchable formats for use with AI agents.
39
+ The company focuses on persistent memory systems using knowledge graphs and vector search.
40
+ It is a Berlin-based startup building infrastructure for context-aware AI applications.
41
+ NLP systems can use Cognee to store and retrieve structured information.
42
+ """
43
+
44
+ await cognee.add(text)
45
+ await cognee.cognify()
46
+
47
+ single_result = await brute_force_triplet_search(
48
+ query="What can NLP systems use Cognee for?",
49
+ top_k=1,
50
+ )
51
+ assert isinstance(single_result, list)
52
+ assert single_result
53
+ assert all(isinstance(edge, Edge) for edge in single_result)
54
+
55
+ batch_queries = ["What is Cognee?", "What is the company's focus?"]
56
+ batch_result = await brute_force_triplet_search(query_batch=batch_queries, top_k=1)
57
+
58
+ assert isinstance(batch_result, list)
59
+ assert len(batch_result) == len(batch_queries)
60
+ assert all(isinstance(per_query, list) for per_query in batch_result)
61
+ assert all(per_query for per_query in batch_result)
62
+ assert all(isinstance(edge, Edge) for per_query in batch_result for edge in per_query)