cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/chunks/__init__.py +9 -0
  156. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  157. cognee/tasks/graph/__init__.py +7 -0
  158. cognee/tasks/memify/__init__.py +8 -0
  159. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  160. cognee/tasks/summarization/models.py +0 -2
  161. cognee/tasks/temporal_graph/__init__.py +0 -1
  162. cognee/tasks/translation/__init__.py +96 -0
  163. cognee/tasks/translation/config.py +110 -0
  164. cognee/tasks/translation/detect_language.py +190 -0
  165. cognee/tasks/translation/exceptions.py +62 -0
  166. cognee/tasks/translation/models.py +72 -0
  167. cognee/tasks/translation/providers/__init__.py +44 -0
  168. cognee/tasks/translation/providers/azure_provider.py +192 -0
  169. cognee/tasks/translation/providers/base.py +85 -0
  170. cognee/tasks/translation/providers/google_provider.py +158 -0
  171. cognee/tasks/translation/providers/llm_provider.py +143 -0
  172. cognee/tasks/translation/translate_content.py +282 -0
  173. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  174. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  175. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  176. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  177. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  178. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  179. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  180. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  181. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  182. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  183. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  184. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  185. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  186. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  187. cognee/tests/tasks/translation/README.md +147 -0
  188. cognee/tests/tasks/translation/__init__.py +1 -0
  189. cognee/tests/tasks/translation/config_test.py +93 -0
  190. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  191. cognee/tests/tasks/translation/providers_test.py +151 -0
  192. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  193. cognee/tests/test_chromadb.py +1 -1
  194. cognee/tests/test_cleanup_unused_data.py +165 -0
  195. cognee/tests/test_delete_by_id.py +6 -6
  196. cognee/tests/test_extract_usage_frequency.py +308 -0
  197. cognee/tests/test_kuzu.py +17 -7
  198. cognee/tests/test_lancedb.py +3 -1
  199. cognee/tests/test_library.py +1 -1
  200. cognee/tests/test_neo4j.py +17 -7
  201. cognee/tests/test_neptune_analytics_vector.py +3 -1
  202. cognee/tests/test_permissions.py +172 -187
  203. cognee/tests/test_pgvector.py +3 -1
  204. cognee/tests/test_relational_db_migration.py +15 -1
  205. cognee/tests/test_remote_kuzu.py +3 -1
  206. cognee/tests/test_s3_file_storage.py +1 -1
  207. cognee/tests/test_search_db.py +97 -110
  208. cognee/tests/test_usage_logger_e2e.py +268 -0
  209. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  210. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  211. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  212. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  213. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  214. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  215. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  216. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  217. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  218. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  219. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  220. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  221. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  222. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  223. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  224. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  225. cognee/tests/unit/modules/search/test_search.py +176 -0
  226. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  227. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  228. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  229. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  230. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  231. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
  232. cognee/api/.env.example +0 -5
  233. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  234. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  235. cognee/modules/search/methods/no_access_control_search.py +0 -62
  236. cognee/modules/search/utils/prepare_search_result.py +0 -63
  237. cognee/tests/test_feedback_enrichment.py +0 -174
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  239. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  240. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  241. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,282 @@
1
+ import asyncio
2
+ from typing import List, Optional
3
+ from uuid import uuid5
4
+
5
+ from cognee.modules.chunking.models import DocumentChunk
6
+ from cognee.shared.logging_utils import get_logger
7
+
8
+ from .config import get_translation_config, TranslationProviderType
9
+ from .detect_language import detect_language_async, LanguageDetectionResult
10
+ from .exceptions import TranslationError, LanguageDetectionError
11
+ from .models import TranslatedContent, LanguageMetadata
12
+ from .providers import get_translation_provider, TranslationResult
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ async def translate_content(
18
+ data_chunks: List[DocumentChunk],
19
+ target_language: str = None,
20
+ translation_provider: TranslationProviderType = None,
21
+ confidence_threshold: float = None,
22
+ skip_if_target_language: bool = True,
23
+ preserve_original: bool = True,
24
+ ) -> List[DocumentChunk]:
25
+ """
26
+ Translate non-English content to the target language.
27
+
28
+ This task detects the language of each document chunk and translates
29
+ non-target-language content using the specified translation provider.
30
+ Original text is preserved alongside translated versions.
31
+
32
+ Args:
33
+ data_chunks: List of DocumentChunk objects to process
34
+ target_language: Target language code (default: "en" for English)
35
+ If not provided, uses config default
36
+ translation_provider: Translation service to use ("llm", "google", "azure")
37
+ If not provided, uses config default
38
+ confidence_threshold: Minimum confidence for language detection (0.0 to 1.0)
39
+ If not provided, uses config default
40
+ skip_if_target_language: If True, skip chunks already in target language
41
+ preserve_original: If True, store original text in TranslatedContent
42
+
43
+ Returns:
44
+ List of DocumentChunk objects with translated content.
45
+ Chunks that required translation will have TranslatedContent
46
+ objects in their 'contains' list.
47
+
48
+ Note:
49
+ This function mutates the input chunks in-place. Specifically:
50
+ - chunk.text is replaced with the translated text
51
+ - chunk.contains is updated with LanguageMetadata and TranslatedContent
52
+ The original text is preserved in TranslatedContent.original_text
53
+ if preserve_original=True.
54
+
55
+ Example:
56
+ ```python
57
+ from cognee.tasks.translation import translate_content
58
+
59
+ # Translate chunks using default settings
60
+ translated_chunks = await translate_content(chunks)
61
+
62
+ # Translate with specific provider
63
+ translated_chunks = await translate_content(
64
+ chunks,
65
+ translation_provider="llm",
66
+ confidence_threshold=0.9
67
+ )
68
+ ```
69
+ """
70
+ if not isinstance(data_chunks, list):
71
+ raise TranslationError("data_chunks must be a list")
72
+
73
+ if len(data_chunks) == 0:
74
+ return data_chunks
75
+
76
+ # Get configuration
77
+ config = get_translation_config()
78
+ provider_name = translation_provider or config.translation_provider
79
+ target_lang = target_language or config.target_language
80
+ threshold = confidence_threshold or config.confidence_threshold
81
+
82
+ logger.info(
83
+ f"Starting translation task for {len(data_chunks)} chunks "
84
+ f"using {provider_name} provider, target language: {target_lang}"
85
+ )
86
+
87
+ # Get the translation provider
88
+ provider = get_translation_provider(provider_name)
89
+
90
+ # Process chunks
91
+ processed_chunks = []
92
+ total_chunks = len(data_chunks)
93
+
94
+ for chunk_index, chunk in enumerate(data_chunks):
95
+ # Log progress for large batches
96
+ if chunk_index > 0 and chunk_index % 100 == 0:
97
+ logger.info(f"Translation progress: {chunk_index}/{total_chunks} chunks processed")
98
+
99
+ if not hasattr(chunk, "text") or not chunk.text:
100
+ processed_chunks.append(chunk)
101
+ continue
102
+
103
+ try:
104
+ # Detect language
105
+ detection = await detect_language_async(chunk.text, target_lang, threshold)
106
+
107
+ # Create language metadata
108
+ language_metadata = LanguageMetadata(
109
+ id=uuid5(chunk.id, "LanguageMetadata"),
110
+ content_id=chunk.id,
111
+ detected_language=detection.language_code,
112
+ language_confidence=detection.confidence,
113
+ requires_translation=detection.requires_translation,
114
+ character_count=detection.character_count,
115
+ language_name=detection.language_name,
116
+ )
117
+
118
+ # Skip if already in target language
119
+ if not detection.requires_translation:
120
+ if skip_if_target_language:
121
+ logger.debug(
122
+ f"Skipping chunk {chunk.id}: already in target language "
123
+ f"({detection.language_code})"
124
+ )
125
+ # Add language metadata to chunk
126
+ _add_to_chunk_contains(chunk, language_metadata)
127
+ processed_chunks.append(chunk)
128
+ continue
129
+
130
+ # Translate the content
131
+ logger.debug(
132
+ f"Translating chunk {chunk.id} from {detection.language_code} to {target_lang}"
133
+ )
134
+
135
+ translation_result = await provider.translate(
136
+ text=chunk.text,
137
+ target_language=target_lang,
138
+ source_language=detection.language_code,
139
+ )
140
+
141
+ # Create TranslatedContent data point
142
+ translated_content = TranslatedContent(
143
+ id=uuid5(chunk.id, "TranslatedContent"),
144
+ original_chunk_id=chunk.id,
145
+ original_text=chunk.text if preserve_original else "",
146
+ translated_text=translation_result.translated_text,
147
+ source_language=translation_result.source_language,
148
+ target_language=translation_result.target_language,
149
+ translation_provider=translation_result.provider,
150
+ confidence_score=translation_result.confidence_score,
151
+ translated_from=chunk,
152
+ )
153
+
154
+ # Update chunk text with translated content
155
+ chunk.text = translation_result.translated_text
156
+
157
+ # Add metadata to chunk's contains list
158
+ _add_to_chunk_contains(chunk, language_metadata)
159
+ _add_to_chunk_contains(chunk, translated_content)
160
+
161
+ processed_chunks.append(chunk)
162
+
163
+ logger.debug(
164
+ f"Successfully translated chunk {chunk.id}: "
165
+ f"{detection.language_code} -> {target_lang}"
166
+ )
167
+
168
+ except LanguageDetectionError as e:
169
+ logger.warning(f"Language detection failed for chunk {chunk.id}: {e}")
170
+ processed_chunks.append(chunk)
171
+ except TranslationError as e:
172
+ logger.error(f"Translation failed for chunk {chunk.id}: {e}")
173
+ processed_chunks.append(chunk)
174
+ except Exception as e:
175
+ logger.error(f"Unexpected error processing chunk {chunk.id}: {e}")
176
+ processed_chunks.append(chunk)
177
+
178
+ logger.info(f"Translation task completed for {len(processed_chunks)} chunks")
179
+ return processed_chunks
180
+
181
+
182
+ def _add_to_chunk_contains(chunk: DocumentChunk, item) -> None:
183
+ """Helper to add an item to a chunk's contains list."""
184
+ if chunk.contains is None:
185
+ chunk.contains = []
186
+ chunk.contains.append(item)
187
+
188
+
189
+ async def translate_text(
190
+ text: str,
191
+ target_language: str = None,
192
+ translation_provider: TranslationProviderType = None,
193
+ source_language: Optional[str] = None,
194
+ ) -> TranslationResult:
195
+ """
196
+ Translate a single text string.
197
+
198
+ This is a convenience function for translating individual texts
199
+ without creating DocumentChunk objects.
200
+
201
+ Args:
202
+ text: The text to translate
203
+ target_language: Target language code (default: uses config, typically "en")
204
+ If not provided, uses config default
205
+ translation_provider: Translation service to use
206
+ If not provided, uses config default
207
+ source_language: Source language code (optional, auto-detected if not provided)
208
+
209
+ Returns:
210
+ TranslationResult with translated text and metadata
211
+
212
+ Example:
213
+ ```python
214
+ from cognee.tasks.translation import translate_text
215
+
216
+ result = await translate_text(
217
+ "Bonjour le monde!",
218
+ target_language="en"
219
+ )
220
+ print(result.translated_text) # "Hello world!"
221
+ print(result.source_language) # "fr"
222
+ ```
223
+ """
224
+ config = get_translation_config()
225
+ provider_name = translation_provider or config.translation_provider
226
+ target_lang = target_language or config.target_language
227
+
228
+ provider = get_translation_provider(provider_name)
229
+
230
+ return await provider.translate(
231
+ text=text,
232
+ target_language=target_lang,
233
+ source_language=source_language,
234
+ )
235
+
236
+
237
+ async def batch_translate_texts(
238
+ texts: List[str],
239
+ target_language: str = None,
240
+ translation_provider: TranslationProviderType = None,
241
+ source_language: Optional[str] = None,
242
+ ) -> List[TranslationResult]:
243
+ """
244
+ Translate multiple text strings in batch.
245
+
246
+ This is more efficient than translating texts individually,
247
+ especially for providers that support native batch operations.
248
+
249
+ Args:
250
+ texts: List of texts to translate
251
+ target_language: Target language code (default: uses config, typically "en")
252
+ If not provided, uses config default
253
+ translation_provider: Translation service to use
254
+ If not provided, uses config default
255
+ source_language: Source language code (optional)
256
+
257
+ Returns:
258
+ List of TranslationResult objects
259
+
260
+ Example:
261
+ ```python
262
+ from cognee.tasks.translation import batch_translate_texts
263
+
264
+ results = await batch_translate_texts(
265
+ ["Hola", "¿Cómo estás?", "Adiós"],
266
+ target_language="en"
267
+ )
268
+ for result in results:
269
+ print(f"{result.source_language}: {result.translated_text}")
270
+ ```
271
+ """
272
+ config = get_translation_config()
273
+ provider_name = translation_provider or config.translation_provider
274
+ target_lang = target_language or config.target_language
275
+
276
+ provider = get_translation_provider(provider_name)
277
+
278
+ return await provider.translate_batch(
279
+ texts=texts,
280
+ target_language=target_lang,
281
+ source_language=source_language,
282
+ )
@@ -73,7 +73,11 @@ class DefaultUrlCrawler:
73
73
  self.timeout = timeout
74
74
  self.max_retries = max_retries
75
75
  self.retry_delay_factor = retry_delay_factor
76
- self.headers = headers or {"User-Agent": "Cognee-Scraper/1.0"}
76
+ self.headers = headers or {
77
+ "User-Agent": "Cognee-Scraper/1.0 (hello@cognee.ai)",
78
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
79
+ "Accept-Language": "en-US,en;q=0.9",
80
+ }
77
81
  self.robots_cache_ttl = robots_cache_ttl
78
82
  self._last_request_time_per_domain: Dict[str, float] = {}
79
83
  self._robots_cache: Dict[str, RobotsTxtCache] = {}
@@ -288,7 +292,7 @@ class DefaultUrlCrawler:
288
292
  while True:
289
293
  try:
290
294
  await self._respect_rate_limit(url, crawl_delay)
291
- resp = await self._client.get(url)
295
+ resp = await self._client.get(url, headers=self.headers)
292
296
  resp.raise_for_status()
293
297
  logger.info(
294
298
  f"Successfully fetched {url} (status={resp.status_code}, size={len(resp.text)} bytes)"
@@ -238,6 +238,7 @@ class TestCognifyCommand:
238
238
  ontology_file_path=None,
239
239
  chunker=TextChunker,
240
240
  run_in_background=False,
241
+ chunks_per_batch=None,
241
242
  )
242
243
 
243
244
  @patch("cognee.cli.commands.cognify_command.asyncio.run")
@@ -262,6 +262,7 @@ class TestCognifyCommandEdgeCases:
262
262
  ontology_file_path=None,
263
263
  chunker=TextChunker,
264
264
  run_in_background=False,
265
+ chunks_per_batch=None,
265
266
  )
266
267
 
267
268
  @patch("cognee.cli.commands.cognify_command.asyncio.run", side_effect=_mock_run)
@@ -295,6 +296,7 @@ class TestCognifyCommandEdgeCases:
295
296
  ontology_file_path="/nonexistent/path/ontology.owl",
296
297
  chunker=TextChunker,
297
298
  run_in_background=False,
299
+ chunks_per_batch=None,
298
300
  )
299
301
 
300
302
  @patch("cognee.cli.commands.cognify_command.asyncio.run")
@@ -373,6 +375,7 @@ class TestCognifyCommandEdgeCases:
373
375
  ontology_file_path=None,
374
376
  chunker=TextChunker,
375
377
  run_in_background=False,
378
+ chunks_per_batch=None,
376
379
  )
377
380
 
378
381
 
@@ -0,0 +1,62 @@
1
+ import pathlib
2
+
3
+ import pytest
4
+ import pytest_asyncio
5
+ import cognee
6
+
7
+ from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
8
+ from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search
9
+
10
+
11
+ @pytest_asyncio.fixture
12
+ async def clean_environment():
13
+ """Configure isolated storage and ensure cleanup before/after."""
14
+ base_dir = pathlib.Path(__file__).parent.parent.parent.parent
15
+ system_directory_path = str(base_dir / ".cognee_system/test_brute_force_triplet_search_e2e")
16
+ data_directory_path = str(base_dir / ".data_storage/test_brute_force_triplet_search_e2e")
17
+
18
+ cognee.config.system_root_directory(system_directory_path)
19
+ cognee.config.data_root_directory(data_directory_path)
20
+
21
+ await cognee.prune.prune_data()
22
+ await cognee.prune.prune_system(metadata=True)
23
+
24
+ yield
25
+
26
+ try:
27
+ await cognee.prune.prune_data()
28
+ await cognee.prune.prune_system(metadata=True)
29
+ except Exception:
30
+ pass
31
+
32
+
33
+ @pytest.mark.asyncio
34
+ async def test_brute_force_triplet_search_end_to_end(clean_environment):
35
+ """Minimal end-to-end exercise of single and batch triplet search."""
36
+
37
+ text = """
38
+ Cognee is an open-source AI memory engine that structures data into searchable formats for use with AI agents.
39
+ The company focuses on persistent memory systems using knowledge graphs and vector search.
40
+ It is a Berlin-based startup building infrastructure for context-aware AI applications.
41
+ NLP systems can use Cognee to store and retrieve structured information.
42
+ """
43
+
44
+ await cognee.add(text)
45
+ await cognee.cognify()
46
+
47
+ single_result = await brute_force_triplet_search(
48
+ query="What can NLP systems use Cognee for?",
49
+ top_k=1,
50
+ )
51
+ assert isinstance(single_result, list)
52
+ assert single_result
53
+ assert all(isinstance(edge, Edge) for edge in single_result)
54
+
55
+ batch_queries = ["What is Cognee?", "What is the company's focus?"]
56
+ batch_result = await brute_force_triplet_search(query_batch=batch_queries, top_k=1)
57
+
58
+ assert isinstance(batch_result, list)
59
+ assert len(batch_result) == len(batch_queries)
60
+ assert all(isinstance(per_query, list) for per_query in batch_result)
61
+ assert all(per_query for per_query in batch_result)
62
+ assert all(isinstance(edge, Edge) for per_query in batch_result for edge in per_query)
@@ -10,7 +10,6 @@ from cognee.tasks.storage import add_data_points
10
10
  from cognee.infrastructure.databases.vector import get_vector_engine
11
11
  from cognee.modules.chunking.models import DocumentChunk
12
12
  from cognee.modules.data.processing.document_types import TextDocument
13
- from cognee.modules.retrieval.exceptions.exceptions import NoDataError
14
13
  from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
15
14
  from cognee.infrastructure.engine import DataPoint
16
15
  from cognee.modules.data.processing.document_types import Document
@@ -40,6 +39,17 @@ async def setup_test_environment_with_chunks_simple():
40
39
 
41
40
  await cognee.prune.prune_data()
42
41
  await cognee.prune.prune_system(metadata=True)
42
+ from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
43
+ from cognee.infrastructure.databases.vector.create_vector_engine import (
44
+ _create_vector_engine,
45
+ )
46
+ from cognee.infrastructure.databases.relational.create_relational_engine import (
47
+ create_relational_engine,
48
+ )
49
+
50
+ _create_graph_engine.cache_clear()
51
+ _create_vector_engine.cache_clear()
52
+ create_relational_engine.cache_clear()
43
53
  await setup()
44
54
 
45
55
  document = TextDocument(
@@ -83,6 +93,17 @@ async def setup_test_environment_with_chunks_simple():
83
93
  try:
84
94
  await cognee.prune.prune_data()
85
95
  await cognee.prune.prune_system(metadata=True)
96
+ from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
97
+ from cognee.infrastructure.databases.vector.create_vector_engine import (
98
+ _create_vector_engine,
99
+ )
100
+ from cognee.infrastructure.databases.relational.create_relational_engine import (
101
+ create_relational_engine,
102
+ )
103
+
104
+ _create_graph_engine.cache_clear()
105
+ _create_vector_engine.cache_clear()
106
+ create_relational_engine.cache_clear()
86
107
  except Exception:
87
108
  pass
88
109
 
@@ -99,6 +120,17 @@ async def setup_test_environment_with_chunks_complex():
99
120
 
100
121
  await cognee.prune.prune_data()
101
122
  await cognee.prune.prune_system(metadata=True)
123
+ from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
124
+ from cognee.infrastructure.databases.vector.create_vector_engine import (
125
+ _create_vector_engine,
126
+ )
127
+ from cognee.infrastructure.databases.relational.create_relational_engine import (
128
+ create_relational_engine,
129
+ )
130
+
131
+ _create_graph_engine.cache_clear()
132
+ _create_vector_engine.cache_clear()
133
+ create_relational_engine.cache_clear()
102
134
  await setup()
103
135
 
104
136
  document1 = TextDocument(
@@ -174,6 +206,17 @@ async def setup_test_environment_with_chunks_complex():
174
206
  try:
175
207
  await cognee.prune.prune_data()
176
208
  await cognee.prune.prune_system(metadata=True)
209
+ from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
210
+ from cognee.infrastructure.databases.vector.create_vector_engine import (
211
+ _create_vector_engine,
212
+ )
213
+ from cognee.infrastructure.databases.relational.create_relational_engine import (
214
+ create_relational_engine,
215
+ )
216
+
217
+ _create_graph_engine.cache_clear()
218
+ _create_vector_engine.cache_clear()
219
+ create_relational_engine.cache_clear()
177
220
  except Exception:
178
221
  pass
179
222
 
@@ -190,26 +233,53 @@ async def setup_test_environment_empty():
190
233
 
191
234
  await cognee.prune.prune_data()
192
235
  await cognee.prune.prune_system(metadata=True)
236
+ from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
237
+ from cognee.infrastructure.databases.vector.create_vector_engine import (
238
+ _create_vector_engine,
239
+ )
240
+ from cognee.infrastructure.databases.relational.create_relational_engine import (
241
+ create_relational_engine,
242
+ )
243
+
244
+ _create_graph_engine.cache_clear()
245
+ _create_vector_engine.cache_clear()
246
+ create_relational_engine.cache_clear()
193
247
 
194
248
  yield
195
249
 
196
250
  try:
197
251
  await cognee.prune.prune_data()
198
252
  await cognee.prune.prune_system(metadata=True)
253
+ from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
254
+ from cognee.infrastructure.databases.vector.create_vector_engine import (
255
+ _create_vector_engine,
256
+ )
257
+ from cognee.infrastructure.databases.relational.create_relational_engine import (
258
+ create_relational_engine,
259
+ )
260
+
261
+ _create_graph_engine.cache_clear()
262
+ _create_vector_engine.cache_clear()
263
+ create_relational_engine.cache_clear()
199
264
  except Exception:
200
265
  pass
201
266
 
202
267
 
203
268
  @pytest.mark.asyncio
204
- async def test_chunks_retriever_context_multiple_chunks(setup_test_environment_with_chunks_simple):
269
+ async def test_chunks_retriever_multiple_chunks(setup_test_environment_with_chunks_simple):
205
270
  """Integration test: verify ChunksRetriever can retrieve multiple chunks."""
206
271
  retriever = ChunksRetriever()
272
+ query = "Steve"
273
+ chunks = await retriever.get_retrieved_objects("Steve")
274
+ context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
207
275
 
208
- context = await retriever.get_context("Steve")
276
+ completion = await retriever.get_completion_from_context(
277
+ query=query, retrieved_objects=chunks, context=context
278
+ )
209
279
 
210
- assert isinstance(context, list), "Context should be a list"
211
- assert len(context) > 0, "Context should not be empty"
212
- assert any(chunk["text"] == "Steve Rodger" for chunk in context), (
280
+ assert isinstance(completion, list), "Retrieved objects should be a list"
281
+ assert len(completion) > 0, "Retrieved objects list should not be empty"
282
+ assert any(chunk["text"] == "Steve Rodger" for chunk in completion), (
213
283
  "Failed to get Steve Rodger chunk"
214
284
  )
215
285
 
@@ -218,35 +288,64 @@ async def test_chunks_retriever_context_multiple_chunks(setup_test_environment_w
218
288
  async def test_chunks_retriever_top_k_limit(setup_test_environment_with_chunks_complex):
219
289
  """Integration test: verify ChunksRetriever respects top_k parameter."""
220
290
  retriever = ChunksRetriever(top_k=2)
291
+ query = "Employee"
221
292
 
222
- context = await retriever.get_context("Employee")
293
+ chunks = await retriever.get_retrieved_objects("Steve")
294
+ context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
223
295
 
224
- assert isinstance(context, list), "Context should be a list"
225
- assert len(context) <= 2, "Should respect top_k limit"
296
+ completion = await retriever.get_completion_from_context(
297
+ query=query, retrieved_objects=chunks, context=context
298
+ )
299
+
300
+ assert isinstance(completion, list), "Context should be a list"
301
+ assert len(completion) <= 2, "Should respect top_k limit"
226
302
 
227
303
 
228
304
  @pytest.mark.asyncio
229
305
  async def test_chunks_retriever_context_complex(setup_test_environment_with_chunks_complex):
230
306
  """Integration test: verify ChunksRetriever can retrieve chunk context (complex)."""
231
307
  retriever = ChunksRetriever(top_k=20)
308
+ query = "Christina"
309
+
310
+ chunks = await retriever.get_retrieved_objects(query)
232
311
 
233
- context = await retriever.get_context("Christina")
312
+ context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
234
313
 
235
- assert context[0]["text"] == "Christina Mayer", "Failed to get Christina Mayer"
314
+ assert context[0:15] == "Christina Mayer", "Failed to get Christina Mayer"
236
315
 
237
316
 
238
317
  @pytest.mark.asyncio
239
- async def test_chunks_retriever_context_on_empty_graph(setup_test_environment_empty):
318
+ async def test_chunks_retriever_on_empty_graph(setup_test_environment_empty):
240
319
  """Integration test: verify ChunksRetriever handles empty graph correctly."""
241
320
  retriever = ChunksRetriever()
321
+ query = "Christina Mayer"
322
+
323
+ vector_engine = get_vector_engine()
324
+ await vector_engine.create_collection(
325
+ "DocumentChunk_text", payload_schema=DocumentChunkWithEntities
326
+ )
327
+
328
+ chunks = await retriever.get_retrieved_objects(query)
329
+ context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
242
330
 
243
- with pytest.raises(NoDataError):
244
- await retriever.get_context("Christina Mayer")
331
+ completion = await retriever.get_completion_from_context(
332
+ query=query, retrieved_objects=chunks, context=context
333
+ )
334
+ assert isinstance(completion, list), "Retrieved objects should be a list"
335
+ assert len(completion) == 0, "Found chunks when none should exist"
336
+
337
+
338
+ @pytest.mark.asyncio
339
+ async def test_chunks_retriever_context_on_empty_graph(setup_test_environment_empty):
340
+ """Integration test: verify ChunksRetriever context handles empty graph correctly."""
341
+ retriever = ChunksRetriever()
342
+ query = "Christina Mayer"
245
343
 
246
344
  vector_engine = get_vector_engine()
247
345
  await vector_engine.create_collection(
248
346
  "DocumentChunk_text", payload_schema=DocumentChunkWithEntities
249
347
  )
250
348
 
251
- context = await retriever.get_context("Christina Mayer")
252
- assert len(context) == 0, "Found chunks when none should exist"
349
+ chunks = await retriever.get_retrieved_objects(query)
350
+ context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
351
+ assert context == "", "Found chunks when none should exist"
@@ -6,7 +6,6 @@ from typing import Optional, Union
6
6
  import cognee
7
7
 
8
8
  from cognee.low_level import setup, DataPoint
9
- from cognee.modules.graph.utils import resolve_edges_to_text
10
9
  from cognee.tasks.storage import add_data_points
11
10
  from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
12
11
 
@@ -174,8 +173,11 @@ async def setup_test_environment_empty():
174
173
  async def test_graph_completion_context_simple(setup_test_environment_simple):
175
174
  """Integration test: verify GraphCompletionRetriever can retrieve context (simple)."""
176
175
  retriever = GraphCompletionRetriever()
176
+ query = "Who works at Canva?"
177
177
 
178
- context = await resolve_edges_to_text(await retriever.get_context("Who works at Canva?"))
178
+ triplets = await retriever.get_retrieved_objects(query)
179
+
180
+ context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
179
181
 
180
182
  # Ensure the top-level sections are present
181
183
  assert "Nodes:" in context, "Missing 'Nodes:' section in context"
@@ -240,8 +242,11 @@ async def test_graph_completion_context_simple(setup_test_environment_simple):
240
242
  async def test_graph_completion_context_complex(setup_test_environment_complex):
241
243
  """Integration test: verify GraphCompletionRetriever can retrieve context (complex)."""
242
244
  retriever = GraphCompletionRetriever(top_k=20)
245
+ query = "Who works at Figma?"
246
+
247
+ triplets = await retriever.get_retrieved_objects(query)
243
248
 
244
- context = await resolve_edges_to_text(await retriever.get_context("Who works at Figma?"))
249
+ context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
245
250
 
246
251
  assert "Mike Rodger --[works_for]--> Figma" in context, "Failed to get Mike Rodger"
247
252
  assert "Ike Loma --[works_for]--> Figma" in context, "Failed to get Ike Loma"
@@ -252,9 +257,12 @@ async def test_graph_completion_context_complex(setup_test_environment_complex):
252
257
  async def test_get_graph_completion_context_on_empty_graph(setup_test_environment_empty):
253
258
  """Integration test: verify GraphCompletionRetriever handles empty graph correctly."""
254
259
  retriever = GraphCompletionRetriever()
260
+ query = "Who works at Figma?"
261
+
262
+ triplets = await retriever.get_retrieved_objects(query)
255
263
 
256
- context = await retriever.get_context("Who works at Figma?")
257
- assert context == [], "Context should be empty on an empty graph"
264
+ context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
265
+ assert context == "", "Context should be empty on an empty graph"
258
266
 
259
267
 
260
268
  @pytest.mark.asyncio