cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/chunks/__init__.py +9 -0
  156. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  157. cognee/tasks/graph/__init__.py +7 -0
  158. cognee/tasks/memify/__init__.py +8 -0
  159. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  160. cognee/tasks/summarization/models.py +0 -2
  161. cognee/tasks/temporal_graph/__init__.py +0 -1
  162. cognee/tasks/translation/__init__.py +96 -0
  163. cognee/tasks/translation/config.py +110 -0
  164. cognee/tasks/translation/detect_language.py +190 -0
  165. cognee/tasks/translation/exceptions.py +62 -0
  166. cognee/tasks/translation/models.py +72 -0
  167. cognee/tasks/translation/providers/__init__.py +44 -0
  168. cognee/tasks/translation/providers/azure_provider.py +192 -0
  169. cognee/tasks/translation/providers/base.py +85 -0
  170. cognee/tasks/translation/providers/google_provider.py +158 -0
  171. cognee/tasks/translation/providers/llm_provider.py +143 -0
  172. cognee/tasks/translation/translate_content.py +282 -0
  173. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  174. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  175. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  176. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  177. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  178. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  179. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  180. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  181. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  182. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  183. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  184. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  185. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  186. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  187. cognee/tests/tasks/translation/README.md +147 -0
  188. cognee/tests/tasks/translation/__init__.py +1 -0
  189. cognee/tests/tasks/translation/config_test.py +93 -0
  190. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  191. cognee/tests/tasks/translation/providers_test.py +151 -0
  192. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  193. cognee/tests/test_chromadb.py +1 -1
  194. cognee/tests/test_cleanup_unused_data.py +165 -0
  195. cognee/tests/test_delete_by_id.py +6 -6
  196. cognee/tests/test_extract_usage_frequency.py +308 -0
  197. cognee/tests/test_kuzu.py +17 -7
  198. cognee/tests/test_lancedb.py +3 -1
  199. cognee/tests/test_library.py +1 -1
  200. cognee/tests/test_neo4j.py +17 -7
  201. cognee/tests/test_neptune_analytics_vector.py +3 -1
  202. cognee/tests/test_permissions.py +172 -187
  203. cognee/tests/test_pgvector.py +3 -1
  204. cognee/tests/test_relational_db_migration.py +15 -1
  205. cognee/tests/test_remote_kuzu.py +3 -1
  206. cognee/tests/test_s3_file_storage.py +1 -1
  207. cognee/tests/test_search_db.py +97 -110
  208. cognee/tests/test_usage_logger_e2e.py +268 -0
  209. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  210. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  211. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  212. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  213. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  214. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  215. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  216. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  217. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  218. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  219. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  220. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  221. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  222. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  223. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  224. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  225. cognee/tests/unit/modules/search/test_search.py +176 -0
  226. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  227. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  228. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  229. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  230. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  231. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
  232. cognee/api/.env.example +0 -5
  233. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  234. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  235. cognee/modules/search/methods/no_access_control_search.py +0 -62
  236. cognee/modules/search/utils/prepare_search_result.py +0 -63
  237. cognee/tests/test_feedback_enrichment.py +0 -174
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  239. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  240. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  241. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,192 @@
1
+ from typing import Optional
2
+
3
+ import aiohttp
4
+
5
+ from cognee.shared.logging_utils import get_logger
6
+
7
+ from .base import TranslationProvider, TranslationResult
8
+ from ..config import get_translation_config
9
+ from ..exceptions import TranslationProviderError
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ class AzureTranslationProvider(TranslationProvider):
15
+ """
16
+ Translation provider using Azure Translator API.
17
+
18
+ Requires:
19
+ - AZURE_TRANSLATOR_KEY environment variable
20
+ - AZURE_TRANSLATOR_REGION environment variable (optional)
21
+ """
22
+
23
+ def __init__(self):
24
+ self._config = get_translation_config()
25
+
26
+ @property
27
+ def provider_name(self) -> str:
28
+ return "azure"
29
+
30
+ def is_available(self) -> bool:
31
+ """Check if Azure Translator is available."""
32
+ return self._config.azure_translator_key is not None
33
+
34
+ async def translate(
35
+ self,
36
+ text: str,
37
+ target_language: str = "en",
38
+ source_language: Optional[str] = None,
39
+ ) -> TranslationResult:
40
+ """
41
+ Translate text using Azure Translator API.
42
+
43
+ Args:
44
+ text: The text to translate
45
+ target_language: Target language code (default: "en")
46
+ source_language: Source language code (optional)
47
+
48
+ Returns:
49
+ TranslationResult with translated text and metadata
50
+ """
51
+ if not self.is_available():
52
+ raise TranslationProviderError(
53
+ provider=self.provider_name,
54
+ message="Azure Translator API key not configured. Set AZURE_TRANSLATOR_KEY environment variable.",
55
+ )
56
+
57
+ endpoint = f"{self._config.azure_translator_endpoint}/translate"
58
+
59
+ params = {
60
+ "api-version": "3.0",
61
+ "to": target_language,
62
+ }
63
+ if source_language:
64
+ params["from"] = source_language
65
+
66
+ headers = {
67
+ "Ocp-Apim-Subscription-Key": self._config.azure_translator_key,
68
+ "Content-Type": "application/json",
69
+ }
70
+ if self._config.azure_translator_region:
71
+ headers["Ocp-Apim-Subscription-Region"] = self._config.azure_translator_region
72
+
73
+ body = [{"text": text}]
74
+
75
+ try:
76
+ async with aiohttp.ClientSession() as session:
77
+ async with session.post(
78
+ endpoint,
79
+ params=params,
80
+ headers=headers,
81
+ json=body,
82
+ timeout=aiohttp.ClientTimeout(total=self._config.timeout_seconds),
83
+ ) as response:
84
+ response.raise_for_status()
85
+ result = await response.json()
86
+
87
+ translation = result[0]["translations"][0]
88
+ detected_language = result[0].get("detectedLanguage", {})
89
+
90
+ return TranslationResult(
91
+ translated_text=translation["text"],
92
+ source_language=source_language or detected_language.get("language", "unknown"),
93
+ target_language=target_language,
94
+ confidence_score=detected_language.get("score", 0.9),
95
+ provider=self.provider_name,
96
+ raw_response=result[0],
97
+ )
98
+
99
+ except Exception as e:
100
+ logger.error(f"Azure translation failed: {e}")
101
+ raise TranslationProviderError(
102
+ provider=self.provider_name,
103
+ message=f"Translation failed: {e}",
104
+ original_error=e,
105
+ )
106
+
107
+ async def translate_batch(
108
+ self,
109
+ texts: list[str],
110
+ target_language: str = "en",
111
+ source_language: Optional[str] = None,
112
+ ) -> list[TranslationResult]:
113
+ """
114
+ Translate multiple texts using Azure Translator API.
115
+
116
+ Azure Translator supports up to 100 texts per request.
117
+
118
+ Args:
119
+ texts: List of texts to translate
120
+ target_language: Target language code
121
+ source_language: Source language code (optional)
122
+
123
+ Returns:
124
+ List of TranslationResult objects
125
+ """
126
+ if not self.is_available():
127
+ raise TranslationProviderError(
128
+ provider=self.provider_name,
129
+ message="Azure Translator API key not configured. Set AZURE_TRANSLATOR_KEY environment variable.",
130
+ )
131
+
132
+ endpoint = f"{self._config.azure_translator_endpoint}/translate"
133
+
134
+ params = {
135
+ "api-version": "3.0",
136
+ "to": target_language,
137
+ }
138
+ if source_language:
139
+ params["from"] = source_language
140
+
141
+ headers = {
142
+ "Ocp-Apim-Subscription-Key": self._config.azure_translator_key,
143
+ "Content-Type": "application/json",
144
+ }
145
+ if self._config.azure_translator_region:
146
+ headers["Ocp-Apim-Subscription-Region"] = self._config.azure_translator_region
147
+
148
+ # Azure supports up to 100 texts per request
149
+ batch_size = min(100, self._config.batch_size)
150
+ all_results = []
151
+
152
+ try:
153
+ async with aiohttp.ClientSession() as session:
154
+ for i in range(0, len(texts), batch_size):
155
+ batch = texts[i : i + batch_size]
156
+ body = [{"text": text} for text in batch]
157
+
158
+ async with session.post(
159
+ endpoint,
160
+ params=params,
161
+ headers=headers,
162
+ json=body,
163
+ timeout=aiohttp.ClientTimeout(total=self._config.timeout_seconds),
164
+ ) as response:
165
+ response.raise_for_status()
166
+ results = await response.json()
167
+
168
+ for result in results:
169
+ translation = result["translations"][0]
170
+ detected_language = result.get("detectedLanguage", {})
171
+
172
+ all_results.append(
173
+ TranslationResult(
174
+ translated_text=translation["text"],
175
+ source_language=source_language
176
+ or detected_language.get("language", "unknown"),
177
+ target_language=target_language,
178
+ confidence_score=detected_language.get("score", 0.9),
179
+ provider=self.provider_name,
180
+ raw_response=result,
181
+ )
182
+ )
183
+
184
+ except Exception as e:
185
+ logger.error(f"Azure batch translation failed: {e}")
186
+ raise TranslationProviderError(
187
+ provider=self.provider_name,
188
+ message=f"Batch translation failed: {e}",
189
+ original_error=e,
190
+ )
191
+
192
+ return all_results
@@ -0,0 +1,85 @@
1
+ """
2
+ Base classes for translation providers.
3
+
4
+ This module defines the abstract interface that all translation providers must implement.
5
+ Providers handle the actual translation of text using external services like OpenAI,
6
+ Google Translate, or Azure Translator.
7
+ """
8
+
9
+ from abc import ABC, abstractmethod
10
+ from dataclasses import dataclass
11
+ from typing import Optional
12
+
13
+
14
+ @dataclass
15
+ class TranslationResult:
16
+ """Result of a translation operation."""
17
+
18
+ translated_text: str
19
+ source_language: str
20
+ target_language: str
21
+ # Confidence score from the provider, or None if not available (e.g., Google Translate)
22
+ confidence_score: Optional[float]
23
+ provider: str
24
+ raw_response: Optional[dict] = None
25
+
26
+
27
+ class TranslationProvider(ABC):
28
+ """Abstract base class for translation providers."""
29
+
30
+ @property
31
+ @abstractmethod
32
+ def provider_name(self) -> str:
33
+ """Return the name of this translation provider."""
34
+ pass
35
+
36
+ @abstractmethod
37
+ async def translate(
38
+ self,
39
+ text: str,
40
+ target_language: str = "en",
41
+ source_language: Optional[str] = None,
42
+ ) -> TranslationResult:
43
+ """
44
+ Translate text to the target language.
45
+
46
+ Args:
47
+ text: The text to translate
48
+ target_language: Target language code (default: "en")
49
+ source_language: Source language code (optional, will be auto-detected if not provided)
50
+
51
+ Returns:
52
+ TranslationResult with translated text and metadata
53
+ """
54
+ pass
55
+
56
+ @abstractmethod
57
+ async def translate_batch(
58
+ self,
59
+ texts: list[str],
60
+ target_language: str = "en",
61
+ source_language: Optional[str] = None,
62
+ ) -> list[TranslationResult]:
63
+ """
64
+ Translate multiple texts to the target language.
65
+
66
+ Args:
67
+ texts: List of texts to translate
68
+ target_language: Target language code (default: "en")
69
+ source_language: Source language code (optional)
70
+
71
+ Returns:
72
+ List of TranslationResult objects
73
+ """
74
+ pass
75
+
76
+ @abstractmethod
77
+ def is_available(self) -> bool:
78
+ """Check if this provider is available (has required credentials).
79
+
80
+ All providers must implement this method to validate their credentials.
81
+
82
+ Returns:
83
+ True if the provider has valid credentials and is ready to use.
84
+ """
85
+ pass
@@ -0,0 +1,158 @@
1
+ import asyncio
2
+ from typing import Optional
3
+
4
+ from cognee.shared.logging_utils import get_logger
5
+
6
+ from .base import TranslationProvider, TranslationResult
7
+ from ..config import get_translation_config
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ class GoogleTranslationProvider(TranslationProvider):
13
+ """
14
+ Translation provider using Google Cloud Translation API.
15
+
16
+ Requires:
17
+ - google-cloud-translate package
18
+ - GOOGLE_TRANSLATE_API_KEY or GOOGLE_PROJECT_ID environment variable
19
+ """
20
+
21
+ def __init__(self):
22
+ self._client = None
23
+ self._config = get_translation_config()
24
+
25
+ @property
26
+ def provider_name(self) -> str:
27
+ return "google"
28
+
29
+ def _get_client(self):
30
+ """Lazy initialization of Google Translate client."""
31
+ if self._client is None:
32
+ try:
33
+ from google.cloud import translate_v2 as translate
34
+
35
+ self._client = translate.Client()
36
+ except ImportError:
37
+ raise ImportError(
38
+ "google-cloud-translate is required for Google translation. "
39
+ "Install it with: pip install google-cloud-translate"
40
+ )
41
+ except Exception as e:
42
+ logger.error(f"Failed to initialize Google Translate client: {e}")
43
+ raise
44
+ return self._client
45
+
46
+ def is_available(self) -> bool:
47
+ """Check if Google Translate is available."""
48
+ try:
49
+ self._get_client()
50
+ return True
51
+ except Exception as e:
52
+ logger.debug(f"Google Translate not available: {e}")
53
+ return False
54
+
55
+ async def translate(
56
+ self,
57
+ text: str,
58
+ target_language: str = "en",
59
+ source_language: Optional[str] = None,
60
+ ) -> TranslationResult:
61
+ """
62
+ Translate text using Google Translate API.
63
+
64
+ Args:
65
+ text: The text to translate
66
+ target_language: Target language code (default: "en")
67
+ source_language: Source language code (optional)
68
+
69
+ Returns:
70
+ TranslationResult with translated text and metadata
71
+ """
72
+ try:
73
+ client = self._get_client()
74
+
75
+ # Run in thread pool since google-cloud-translate is synchronous
76
+ loop = asyncio.get_running_loop()
77
+
78
+ # Build kwargs for translate call
79
+ translate_kwargs = {"target_language": target_language}
80
+ if source_language:
81
+ translate_kwargs["source_language"] = source_language
82
+
83
+ result = await loop.run_in_executor(
84
+ None,
85
+ lambda: client.translate(text, **translate_kwargs),
86
+ )
87
+
88
+ detected_language = result.get("detectedSourceLanguage", source_language or "unknown")
89
+
90
+ return TranslationResult(
91
+ translated_text=result["translatedText"],
92
+ source_language=detected_language,
93
+ target_language=target_language,
94
+ # Google Translate API does not provide confidence scores
95
+ confidence_score=None,
96
+ provider=self.provider_name,
97
+ raw_response=result,
98
+ )
99
+
100
+ except Exception as e:
101
+ logger.error(f"Google translation failed: {e}")
102
+ raise
103
+
104
+ async def translate_batch(
105
+ self,
106
+ texts: list[str],
107
+ target_language: str = "en",
108
+ source_language: Optional[str] = None,
109
+ ) -> list[TranslationResult]:
110
+ """
111
+ Translate multiple texts using Google Translate API.
112
+
113
+ Google Translate supports batch translation natively.
114
+
115
+ Args:
116
+ texts: List of texts to translate
117
+ target_language: Target language code
118
+ source_language: Source language code (optional)
119
+
120
+ Returns:
121
+ List of TranslationResult objects
122
+ """
123
+ try:
124
+ client = self._get_client()
125
+ loop = asyncio.get_running_loop()
126
+
127
+ # Build kwargs for translate call
128
+ translate_kwargs = {"target_language": target_language}
129
+ if source_language:
130
+ translate_kwargs["source_language"] = source_language
131
+
132
+ results = await loop.run_in_executor(
133
+ None,
134
+ lambda: client.translate(texts, **translate_kwargs),
135
+ )
136
+
137
+ translation_results = []
138
+ for result in results:
139
+ detected_language = result.get(
140
+ "detectedSourceLanguage", source_language or "unknown"
141
+ )
142
+ translation_results.append(
143
+ TranslationResult(
144
+ translated_text=result["translatedText"],
145
+ source_language=detected_language,
146
+ target_language=target_language,
147
+ # Google Translate API does not provide confidence scores
148
+ confidence_score=None,
149
+ provider=self.provider_name,
150
+ raw_response=result,
151
+ )
152
+ )
153
+
154
+ return translation_results
155
+
156
+ except Exception as e:
157
+ logger.error(f"Google batch translation failed: {e}")
158
+ raise
@@ -0,0 +1,143 @@
1
+ import asyncio
2
+ from typing import Optional
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
7
+ from cognee.infrastructure.llm.config import get_llm_config
8
+ from cognee.infrastructure.llm.prompts import read_query_prompt
9
+ from cognee.shared.logging_utils import get_logger
10
+
11
+ from .base import TranslationProvider, TranslationResult
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ class TranslationOutput(BaseModel):
17
+ """Pydantic model for structured translation output from LLM."""
18
+
19
+ translated_text: str
20
+ detected_source_language: str
21
+ translation_notes: Optional[str] = None
22
+
23
+
24
+ class LLMTranslationProvider(TranslationProvider):
25
+ """
26
+ Translation provider using the configured LLM for translation.
27
+
28
+ This provider leverages the existing LLM infrastructure in Cognee
29
+ to perform translations using any LLM configured via LLM_PROVIDER
30
+ (OpenAI, Azure, Ollama, Anthropic, etc.).
31
+
32
+ The LLM used is determined by the cognee LLM configuration settings:
33
+ - LLM_PROVIDER: The LLM provider (openai, azure, ollama, etc.)
34
+ - LLM_MODEL: The model to use
35
+ - LLM_API_KEY: API key for the provider
36
+ """
37
+
38
+ @property
39
+ def provider_name(self) -> str:
40
+ """Return 'llm' as the provider name."""
41
+ return "llm"
42
+
43
+ async def translate(
44
+ self,
45
+ text: str,
46
+ target_language: str = "en",
47
+ source_language: Optional[str] = None,
48
+ ) -> TranslationResult:
49
+ """
50
+ Translate text using the configured LLM.
51
+
52
+ Args:
53
+ text: The text to translate
54
+ target_language: Target language code (default: "en")
55
+ source_language: Source language code (optional)
56
+
57
+ Returns:
58
+ TranslationResult with translated text and metadata
59
+ """
60
+ try:
61
+ system_prompt = read_query_prompt("translate_content.txt")
62
+
63
+ # Validate system prompt was loaded successfully
64
+ if system_prompt is None:
65
+ logger.warning("translate_content.txt prompt file not found, using default prompt")
66
+ system_prompt = (
67
+ "You are a professional translator. Translate the given text accurately "
68
+ "while preserving the original meaning, tone, and style. "
69
+ "Detect the source language if not provided."
70
+ )
71
+
72
+ # Build the input with context
73
+ if source_language:
74
+ input_text = (
75
+ f"Translate the following text from {source_language} to {target_language}.\n\n"
76
+ f"Text to translate:\n{text}"
77
+ )
78
+ else:
79
+ input_text = (
80
+ f"Translate the following text to {target_language}. "
81
+ f"First detect the source language.\n\n"
82
+ f"Text to translate:\n{text}"
83
+ )
84
+
85
+ result = await LLMGateway.acreate_structured_output(
86
+ text_input=input_text,
87
+ system_prompt=system_prompt,
88
+ response_model=TranslationOutput,
89
+ )
90
+
91
+ return TranslationResult(
92
+ translated_text=result.translated_text,
93
+ source_language=source_language or result.detected_source_language,
94
+ target_language=target_language,
95
+ # TODO: Consider deriving confidence from LLM response metadata
96
+ # or making configurable via TranslationConfig
97
+ confidence_score=0.95, # LLM translations are generally high quality
98
+ provider=self.provider_name,
99
+ raw_response={"notes": result.translation_notes},
100
+ )
101
+
102
+ except Exception as e:
103
+ logger.error(f"LLM translation failed: {e}")
104
+ raise
105
+
106
+ async def translate_batch(
107
+ self,
108
+ texts: list[str],
109
+ target_language: str = "en",
110
+ source_language: Optional[str] = None,
111
+ max_concurrent: int = 5,
112
+ ) -> list[TranslationResult]:
113
+ """
114
+ Translate multiple texts using the configured LLM.
115
+
116
+ Uses a semaphore to limit concurrent requests and avoid API rate limits.
117
+
118
+ Args:
119
+ texts: List of texts to translate
120
+ target_language: Target language code
121
+ source_language: Source language code (optional)
122
+ max_concurrent: Maximum concurrent translation requests (default: 5)
123
+
124
+ Returns:
125
+ List of TranslationResult objects
126
+ """
127
+ semaphore = asyncio.Semaphore(max_concurrent)
128
+
129
+ async def limited_translate(text: str) -> TranslationResult:
130
+ async with semaphore:
131
+ return await self.translate(text, target_language, source_language)
132
+
133
+ tasks = [limited_translate(text) for text in texts]
134
+ return await asyncio.gather(*tasks)
135
+
136
+ def is_available(self) -> bool:
137
+ """Check if LLM provider is available (has required credentials)."""
138
+ try:
139
+ llm_config = get_llm_config()
140
+ # Check if API key is configured (required for most providers)
141
+ return bool(llm_config.llm_api_key)
142
+ except Exception:
143
+ return False