cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
import aiohttp
|
|
4
|
+
|
|
5
|
+
from cognee.shared.logging_utils import get_logger
|
|
6
|
+
|
|
7
|
+
from .base import TranslationProvider, TranslationResult
|
|
8
|
+
from ..config import get_translation_config
|
|
9
|
+
from ..exceptions import TranslationProviderError
|
|
10
|
+
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AzureTranslationProvider(TranslationProvider):
|
|
15
|
+
"""
|
|
16
|
+
Translation provider using Azure Translator API.
|
|
17
|
+
|
|
18
|
+
Requires:
|
|
19
|
+
- AZURE_TRANSLATOR_KEY environment variable
|
|
20
|
+
- AZURE_TRANSLATOR_REGION environment variable (optional)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self):
|
|
24
|
+
self._config = get_translation_config()
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def provider_name(self) -> str:
|
|
28
|
+
return "azure"
|
|
29
|
+
|
|
30
|
+
def is_available(self) -> bool:
|
|
31
|
+
"""Check if Azure Translator is available."""
|
|
32
|
+
return self._config.azure_translator_key is not None
|
|
33
|
+
|
|
34
|
+
async def translate(
|
|
35
|
+
self,
|
|
36
|
+
text: str,
|
|
37
|
+
target_language: str = "en",
|
|
38
|
+
source_language: Optional[str] = None,
|
|
39
|
+
) -> TranslationResult:
|
|
40
|
+
"""
|
|
41
|
+
Translate text using Azure Translator API.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
text: The text to translate
|
|
45
|
+
target_language: Target language code (default: "en")
|
|
46
|
+
source_language: Source language code (optional)
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
TranslationResult with translated text and metadata
|
|
50
|
+
"""
|
|
51
|
+
if not self.is_available():
|
|
52
|
+
raise TranslationProviderError(
|
|
53
|
+
provider=self.provider_name,
|
|
54
|
+
message="Azure Translator API key not configured. Set AZURE_TRANSLATOR_KEY environment variable.",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
endpoint = f"{self._config.azure_translator_endpoint}/translate"
|
|
58
|
+
|
|
59
|
+
params = {
|
|
60
|
+
"api-version": "3.0",
|
|
61
|
+
"to": target_language,
|
|
62
|
+
}
|
|
63
|
+
if source_language:
|
|
64
|
+
params["from"] = source_language
|
|
65
|
+
|
|
66
|
+
headers = {
|
|
67
|
+
"Ocp-Apim-Subscription-Key": self._config.azure_translator_key,
|
|
68
|
+
"Content-Type": "application/json",
|
|
69
|
+
}
|
|
70
|
+
if self._config.azure_translator_region:
|
|
71
|
+
headers["Ocp-Apim-Subscription-Region"] = self._config.azure_translator_region
|
|
72
|
+
|
|
73
|
+
body = [{"text": text}]
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
async with aiohttp.ClientSession() as session:
|
|
77
|
+
async with session.post(
|
|
78
|
+
endpoint,
|
|
79
|
+
params=params,
|
|
80
|
+
headers=headers,
|
|
81
|
+
json=body,
|
|
82
|
+
timeout=aiohttp.ClientTimeout(total=self._config.timeout_seconds),
|
|
83
|
+
) as response:
|
|
84
|
+
response.raise_for_status()
|
|
85
|
+
result = await response.json()
|
|
86
|
+
|
|
87
|
+
translation = result[0]["translations"][0]
|
|
88
|
+
detected_language = result[0].get("detectedLanguage", {})
|
|
89
|
+
|
|
90
|
+
return TranslationResult(
|
|
91
|
+
translated_text=translation["text"],
|
|
92
|
+
source_language=source_language or detected_language.get("language", "unknown"),
|
|
93
|
+
target_language=target_language,
|
|
94
|
+
confidence_score=detected_language.get("score", 0.9),
|
|
95
|
+
provider=self.provider_name,
|
|
96
|
+
raw_response=result[0],
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.error(f"Azure translation failed: {e}")
|
|
101
|
+
raise TranslationProviderError(
|
|
102
|
+
provider=self.provider_name,
|
|
103
|
+
message=f"Translation failed: {e}",
|
|
104
|
+
original_error=e,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
async def translate_batch(
|
|
108
|
+
self,
|
|
109
|
+
texts: list[str],
|
|
110
|
+
target_language: str = "en",
|
|
111
|
+
source_language: Optional[str] = None,
|
|
112
|
+
) -> list[TranslationResult]:
|
|
113
|
+
"""
|
|
114
|
+
Translate multiple texts using Azure Translator API.
|
|
115
|
+
|
|
116
|
+
Azure Translator supports up to 100 texts per request.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
texts: List of texts to translate
|
|
120
|
+
target_language: Target language code
|
|
121
|
+
source_language: Source language code (optional)
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List of TranslationResult objects
|
|
125
|
+
"""
|
|
126
|
+
if not self.is_available():
|
|
127
|
+
raise TranslationProviderError(
|
|
128
|
+
provider=self.provider_name,
|
|
129
|
+
message="Azure Translator API key not configured. Set AZURE_TRANSLATOR_KEY environment variable.",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
endpoint = f"{self._config.azure_translator_endpoint}/translate"
|
|
133
|
+
|
|
134
|
+
params = {
|
|
135
|
+
"api-version": "3.0",
|
|
136
|
+
"to": target_language,
|
|
137
|
+
}
|
|
138
|
+
if source_language:
|
|
139
|
+
params["from"] = source_language
|
|
140
|
+
|
|
141
|
+
headers = {
|
|
142
|
+
"Ocp-Apim-Subscription-Key": self._config.azure_translator_key,
|
|
143
|
+
"Content-Type": "application/json",
|
|
144
|
+
}
|
|
145
|
+
if self._config.azure_translator_region:
|
|
146
|
+
headers["Ocp-Apim-Subscription-Region"] = self._config.azure_translator_region
|
|
147
|
+
|
|
148
|
+
# Azure supports up to 100 texts per request
|
|
149
|
+
batch_size = min(100, self._config.batch_size)
|
|
150
|
+
all_results = []
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
async with aiohttp.ClientSession() as session:
|
|
154
|
+
for i in range(0, len(texts), batch_size):
|
|
155
|
+
batch = texts[i : i + batch_size]
|
|
156
|
+
body = [{"text": text} for text in batch]
|
|
157
|
+
|
|
158
|
+
async with session.post(
|
|
159
|
+
endpoint,
|
|
160
|
+
params=params,
|
|
161
|
+
headers=headers,
|
|
162
|
+
json=body,
|
|
163
|
+
timeout=aiohttp.ClientTimeout(total=self._config.timeout_seconds),
|
|
164
|
+
) as response:
|
|
165
|
+
response.raise_for_status()
|
|
166
|
+
results = await response.json()
|
|
167
|
+
|
|
168
|
+
for result in results:
|
|
169
|
+
translation = result["translations"][0]
|
|
170
|
+
detected_language = result.get("detectedLanguage", {})
|
|
171
|
+
|
|
172
|
+
all_results.append(
|
|
173
|
+
TranslationResult(
|
|
174
|
+
translated_text=translation["text"],
|
|
175
|
+
source_language=source_language
|
|
176
|
+
or detected_language.get("language", "unknown"),
|
|
177
|
+
target_language=target_language,
|
|
178
|
+
confidence_score=detected_language.get("score", 0.9),
|
|
179
|
+
provider=self.provider_name,
|
|
180
|
+
raw_response=result,
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.error(f"Azure batch translation failed: {e}")
|
|
186
|
+
raise TranslationProviderError(
|
|
187
|
+
provider=self.provider_name,
|
|
188
|
+
message=f"Batch translation failed: {e}",
|
|
189
|
+
original_error=e,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return all_results
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base classes for translation providers.
|
|
3
|
+
|
|
4
|
+
This module defines the abstract interface that all translation providers must implement.
|
|
5
|
+
Providers handle the actual translation of text using external services like OpenAI,
|
|
6
|
+
Google Translate, or Azure Translator.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class TranslationResult:
|
|
16
|
+
"""Result of a translation operation."""
|
|
17
|
+
|
|
18
|
+
translated_text: str
|
|
19
|
+
source_language: str
|
|
20
|
+
target_language: str
|
|
21
|
+
# Confidence score from the provider, or None if not available (e.g., Google Translate)
|
|
22
|
+
confidence_score: Optional[float]
|
|
23
|
+
provider: str
|
|
24
|
+
raw_response: Optional[dict] = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TranslationProvider(ABC):
|
|
28
|
+
"""Abstract base class for translation providers."""
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def provider_name(self) -> str:
|
|
33
|
+
"""Return the name of this translation provider."""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
async def translate(
|
|
38
|
+
self,
|
|
39
|
+
text: str,
|
|
40
|
+
target_language: str = "en",
|
|
41
|
+
source_language: Optional[str] = None,
|
|
42
|
+
) -> TranslationResult:
|
|
43
|
+
"""
|
|
44
|
+
Translate text to the target language.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
text: The text to translate
|
|
48
|
+
target_language: Target language code (default: "en")
|
|
49
|
+
source_language: Source language code (optional, will be auto-detected if not provided)
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
TranslationResult with translated text and metadata
|
|
53
|
+
"""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
async def translate_batch(
|
|
58
|
+
self,
|
|
59
|
+
texts: list[str],
|
|
60
|
+
target_language: str = "en",
|
|
61
|
+
source_language: Optional[str] = None,
|
|
62
|
+
) -> list[TranslationResult]:
|
|
63
|
+
"""
|
|
64
|
+
Translate multiple texts to the target language.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
texts: List of texts to translate
|
|
68
|
+
target_language: Target language code (default: "en")
|
|
69
|
+
source_language: Source language code (optional)
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
List of TranslationResult objects
|
|
73
|
+
"""
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
def is_available(self) -> bool:
|
|
78
|
+
"""Check if this provider is available (has required credentials).
|
|
79
|
+
|
|
80
|
+
All providers must implement this method to validate their credentials.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
True if the provider has valid credentials and is ready to use.
|
|
84
|
+
"""
|
|
85
|
+
pass
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from cognee.shared.logging_utils import get_logger
|
|
5
|
+
|
|
6
|
+
from .base import TranslationProvider, TranslationResult
|
|
7
|
+
from ..config import get_translation_config
|
|
8
|
+
|
|
9
|
+
logger = get_logger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GoogleTranslationProvider(TranslationProvider):
|
|
13
|
+
"""
|
|
14
|
+
Translation provider using Google Cloud Translation API.
|
|
15
|
+
|
|
16
|
+
Requires:
|
|
17
|
+
- google-cloud-translate package
|
|
18
|
+
- GOOGLE_TRANSLATE_API_KEY or GOOGLE_PROJECT_ID environment variable
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self._client = None
|
|
23
|
+
self._config = get_translation_config()
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def provider_name(self) -> str:
|
|
27
|
+
return "google"
|
|
28
|
+
|
|
29
|
+
def _get_client(self):
|
|
30
|
+
"""Lazy initialization of Google Translate client."""
|
|
31
|
+
if self._client is None:
|
|
32
|
+
try:
|
|
33
|
+
from google.cloud import translate_v2 as translate
|
|
34
|
+
|
|
35
|
+
self._client = translate.Client()
|
|
36
|
+
except ImportError:
|
|
37
|
+
raise ImportError(
|
|
38
|
+
"google-cloud-translate is required for Google translation. "
|
|
39
|
+
"Install it with: pip install google-cloud-translate"
|
|
40
|
+
)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
logger.error(f"Failed to initialize Google Translate client: {e}")
|
|
43
|
+
raise
|
|
44
|
+
return self._client
|
|
45
|
+
|
|
46
|
+
def is_available(self) -> bool:
|
|
47
|
+
"""Check if Google Translate is available."""
|
|
48
|
+
try:
|
|
49
|
+
self._get_client()
|
|
50
|
+
return True
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.debug(f"Google Translate not available: {e}")
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
async def translate(
|
|
56
|
+
self,
|
|
57
|
+
text: str,
|
|
58
|
+
target_language: str = "en",
|
|
59
|
+
source_language: Optional[str] = None,
|
|
60
|
+
) -> TranslationResult:
|
|
61
|
+
"""
|
|
62
|
+
Translate text using Google Translate API.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
text: The text to translate
|
|
66
|
+
target_language: Target language code (default: "en")
|
|
67
|
+
source_language: Source language code (optional)
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
TranslationResult with translated text and metadata
|
|
71
|
+
"""
|
|
72
|
+
try:
|
|
73
|
+
client = self._get_client()
|
|
74
|
+
|
|
75
|
+
# Run in thread pool since google-cloud-translate is synchronous
|
|
76
|
+
loop = asyncio.get_running_loop()
|
|
77
|
+
|
|
78
|
+
# Build kwargs for translate call
|
|
79
|
+
translate_kwargs = {"target_language": target_language}
|
|
80
|
+
if source_language:
|
|
81
|
+
translate_kwargs["source_language"] = source_language
|
|
82
|
+
|
|
83
|
+
result = await loop.run_in_executor(
|
|
84
|
+
None,
|
|
85
|
+
lambda: client.translate(text, **translate_kwargs),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
detected_language = result.get("detectedSourceLanguage", source_language or "unknown")
|
|
89
|
+
|
|
90
|
+
return TranslationResult(
|
|
91
|
+
translated_text=result["translatedText"],
|
|
92
|
+
source_language=detected_language,
|
|
93
|
+
target_language=target_language,
|
|
94
|
+
# Google Translate API does not provide confidence scores
|
|
95
|
+
confidence_score=None,
|
|
96
|
+
provider=self.provider_name,
|
|
97
|
+
raw_response=result,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
except Exception as e:
|
|
101
|
+
logger.error(f"Google translation failed: {e}")
|
|
102
|
+
raise
|
|
103
|
+
|
|
104
|
+
async def translate_batch(
|
|
105
|
+
self,
|
|
106
|
+
texts: list[str],
|
|
107
|
+
target_language: str = "en",
|
|
108
|
+
source_language: Optional[str] = None,
|
|
109
|
+
) -> list[TranslationResult]:
|
|
110
|
+
"""
|
|
111
|
+
Translate multiple texts using Google Translate API.
|
|
112
|
+
|
|
113
|
+
Google Translate supports batch translation natively.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
texts: List of texts to translate
|
|
117
|
+
target_language: Target language code
|
|
118
|
+
source_language: Source language code (optional)
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
List of TranslationResult objects
|
|
122
|
+
"""
|
|
123
|
+
try:
|
|
124
|
+
client = self._get_client()
|
|
125
|
+
loop = asyncio.get_running_loop()
|
|
126
|
+
|
|
127
|
+
# Build kwargs for translate call
|
|
128
|
+
translate_kwargs = {"target_language": target_language}
|
|
129
|
+
if source_language:
|
|
130
|
+
translate_kwargs["source_language"] = source_language
|
|
131
|
+
|
|
132
|
+
results = await loop.run_in_executor(
|
|
133
|
+
None,
|
|
134
|
+
lambda: client.translate(texts, **translate_kwargs),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
translation_results = []
|
|
138
|
+
for result in results:
|
|
139
|
+
detected_language = result.get(
|
|
140
|
+
"detectedSourceLanguage", source_language or "unknown"
|
|
141
|
+
)
|
|
142
|
+
translation_results.append(
|
|
143
|
+
TranslationResult(
|
|
144
|
+
translated_text=result["translatedText"],
|
|
145
|
+
source_language=detected_language,
|
|
146
|
+
target_language=target_language,
|
|
147
|
+
# Google Translate API does not provide confidence scores
|
|
148
|
+
confidence_score=None,
|
|
149
|
+
provider=self.provider_name,
|
|
150
|
+
raw_response=result,
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return translation_results
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logger.error(f"Google batch translation failed: {e}")
|
|
158
|
+
raise
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
7
|
+
from cognee.infrastructure.llm.config import get_llm_config
|
|
8
|
+
from cognee.infrastructure.llm.prompts import read_query_prompt
|
|
9
|
+
from cognee.shared.logging_utils import get_logger
|
|
10
|
+
|
|
11
|
+
from .base import TranslationProvider, TranslationResult
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TranslationOutput(BaseModel):
|
|
17
|
+
"""Pydantic model for structured translation output from LLM."""
|
|
18
|
+
|
|
19
|
+
translated_text: str
|
|
20
|
+
detected_source_language: str
|
|
21
|
+
translation_notes: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LLMTranslationProvider(TranslationProvider):
|
|
25
|
+
"""
|
|
26
|
+
Translation provider using the configured LLM for translation.
|
|
27
|
+
|
|
28
|
+
This provider leverages the existing LLM infrastructure in Cognee
|
|
29
|
+
to perform translations using any LLM configured via LLM_PROVIDER
|
|
30
|
+
(OpenAI, Azure, Ollama, Anthropic, etc.).
|
|
31
|
+
|
|
32
|
+
The LLM used is determined by the cognee LLM configuration settings:
|
|
33
|
+
- LLM_PROVIDER: The LLM provider (openai, azure, ollama, etc.)
|
|
34
|
+
- LLM_MODEL: The model to use
|
|
35
|
+
- LLM_API_KEY: API key for the provider
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def provider_name(self) -> str:
|
|
40
|
+
"""Return 'llm' as the provider name."""
|
|
41
|
+
return "llm"
|
|
42
|
+
|
|
43
|
+
async def translate(
|
|
44
|
+
self,
|
|
45
|
+
text: str,
|
|
46
|
+
target_language: str = "en",
|
|
47
|
+
source_language: Optional[str] = None,
|
|
48
|
+
) -> TranslationResult:
|
|
49
|
+
"""
|
|
50
|
+
Translate text using the configured LLM.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
text: The text to translate
|
|
54
|
+
target_language: Target language code (default: "en")
|
|
55
|
+
source_language: Source language code (optional)
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
TranslationResult with translated text and metadata
|
|
59
|
+
"""
|
|
60
|
+
try:
|
|
61
|
+
system_prompt = read_query_prompt("translate_content.txt")
|
|
62
|
+
|
|
63
|
+
# Validate system prompt was loaded successfully
|
|
64
|
+
if system_prompt is None:
|
|
65
|
+
logger.warning("translate_content.txt prompt file not found, using default prompt")
|
|
66
|
+
system_prompt = (
|
|
67
|
+
"You are a professional translator. Translate the given text accurately "
|
|
68
|
+
"while preserving the original meaning, tone, and style. "
|
|
69
|
+
"Detect the source language if not provided."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Build the input with context
|
|
73
|
+
if source_language:
|
|
74
|
+
input_text = (
|
|
75
|
+
f"Translate the following text from {source_language} to {target_language}.\n\n"
|
|
76
|
+
f"Text to translate:\n{text}"
|
|
77
|
+
)
|
|
78
|
+
else:
|
|
79
|
+
input_text = (
|
|
80
|
+
f"Translate the following text to {target_language}. "
|
|
81
|
+
f"First detect the source language.\n\n"
|
|
82
|
+
f"Text to translate:\n{text}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
result = await LLMGateway.acreate_structured_output(
|
|
86
|
+
text_input=input_text,
|
|
87
|
+
system_prompt=system_prompt,
|
|
88
|
+
response_model=TranslationOutput,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return TranslationResult(
|
|
92
|
+
translated_text=result.translated_text,
|
|
93
|
+
source_language=source_language or result.detected_source_language,
|
|
94
|
+
target_language=target_language,
|
|
95
|
+
# TODO: Consider deriving confidence from LLM response metadata
|
|
96
|
+
# or making configurable via TranslationConfig
|
|
97
|
+
confidence_score=0.95, # LLM translations are generally high quality
|
|
98
|
+
provider=self.provider_name,
|
|
99
|
+
raw_response={"notes": result.translation_notes},
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logger.error(f"LLM translation failed: {e}")
|
|
104
|
+
raise
|
|
105
|
+
|
|
106
|
+
async def translate_batch(
|
|
107
|
+
self,
|
|
108
|
+
texts: list[str],
|
|
109
|
+
target_language: str = "en",
|
|
110
|
+
source_language: Optional[str] = None,
|
|
111
|
+
max_concurrent: int = 5,
|
|
112
|
+
) -> list[TranslationResult]:
|
|
113
|
+
"""
|
|
114
|
+
Translate multiple texts using the configured LLM.
|
|
115
|
+
|
|
116
|
+
Uses a semaphore to limit concurrent requests and avoid API rate limits.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
texts: List of texts to translate
|
|
120
|
+
target_language: Target language code
|
|
121
|
+
source_language: Source language code (optional)
|
|
122
|
+
max_concurrent: Maximum concurrent translation requests (default: 5)
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
List of TranslationResult objects
|
|
126
|
+
"""
|
|
127
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
128
|
+
|
|
129
|
+
async def limited_translate(text: str) -> TranslationResult:
|
|
130
|
+
async with semaphore:
|
|
131
|
+
return await self.translate(text, target_language, source_language)
|
|
132
|
+
|
|
133
|
+
tasks = [limited_translate(text) for text in texts]
|
|
134
|
+
return await asyncio.gather(*tasks)
|
|
135
|
+
|
|
136
|
+
def is_available(self) -> bool:
|
|
137
|
+
"""Check if LLM provider is available (has required credentials)."""
|
|
138
|
+
try:
|
|
139
|
+
llm_config = get_llm_config()
|
|
140
|
+
# Check if API key is configured (required for most providers)
|
|
141
|
+
return bool(llm_config.llm_api_key)
|
|
142
|
+
except Exception:
|
|
143
|
+
return False
|