cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  156. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  157. cognee/tasks/summarization/models.py +0 -2
  158. cognee/tasks/temporal_graph/__init__.py +0 -1
  159. cognee/tasks/translation/__init__.py +96 -0
  160. cognee/tasks/translation/config.py +110 -0
  161. cognee/tasks/translation/detect_language.py +190 -0
  162. cognee/tasks/translation/exceptions.py +62 -0
  163. cognee/tasks/translation/models.py +72 -0
  164. cognee/tasks/translation/providers/__init__.py +44 -0
  165. cognee/tasks/translation/providers/azure_provider.py +192 -0
  166. cognee/tasks/translation/providers/base.py +85 -0
  167. cognee/tasks/translation/providers/google_provider.py +158 -0
  168. cognee/tasks/translation/providers/llm_provider.py +143 -0
  169. cognee/tasks/translation/translate_content.py +282 -0
  170. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  171. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  172. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  173. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  174. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  175. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  176. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  177. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  178. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  179. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  180. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  181. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  182. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  183. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  184. cognee/tests/tasks/translation/README.md +147 -0
  185. cognee/tests/tasks/translation/__init__.py +1 -0
  186. cognee/tests/tasks/translation/config_test.py +93 -0
  187. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  188. cognee/tests/tasks/translation/providers_test.py +151 -0
  189. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  190. cognee/tests/test_chromadb.py +1 -1
  191. cognee/tests/test_cleanup_unused_data.py +165 -0
  192. cognee/tests/test_delete_by_id.py +6 -6
  193. cognee/tests/test_extract_usage_frequency.py +308 -0
  194. cognee/tests/test_kuzu.py +17 -7
  195. cognee/tests/test_lancedb.py +3 -1
  196. cognee/tests/test_library.py +1 -1
  197. cognee/tests/test_neo4j.py +17 -7
  198. cognee/tests/test_neptune_analytics_vector.py +3 -1
  199. cognee/tests/test_permissions.py +172 -187
  200. cognee/tests/test_pgvector.py +3 -1
  201. cognee/tests/test_relational_db_migration.py +15 -1
  202. cognee/tests/test_remote_kuzu.py +3 -1
  203. cognee/tests/test_s3_file_storage.py +1 -1
  204. cognee/tests/test_search_db.py +97 -110
  205. cognee/tests/test_usage_logger_e2e.py +268 -0
  206. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  207. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  208. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  209. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  210. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  211. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  212. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  213. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  214. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  215. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  216. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  217. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  218. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  219. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  220. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  221. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  222. cognee/tests/unit/modules/search/test_search.py +176 -0
  223. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  224. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  225. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  226. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  227. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
  228. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
  229. cognee/api/.env.example +0 -5
  230. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  231. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  232. cognee/modules/search/methods/no_access_control_search.py +0 -62
  233. cognee/modules/search/utils/prepare_search_result.py +0 -63
  234. cognee/tests/test_feedback_enrichment.py +0 -174
  235. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
  236. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
  237. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,151 @@
1
+ """
2
+ Unit tests for translation providers
3
+ """
4
+
5
+ import os
6
+
7
+ import pytest
8
+
9
+ from cognee.tasks.translation.providers import (
10
+ get_translation_provider,
11
+ LLMTranslationProvider,
12
+ TranslationResult,
13
+ )
14
+ from cognee.tasks.translation.exceptions import TranslationError
15
+
16
+
17
+ def has_llm_api_key():
18
+ """Check if LLM API key is available"""
19
+ return bool(os.environ.get("LLM_API_KEY"))
20
+
21
+
22
+ @pytest.mark.asyncio
23
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
24
+ async def test_llm_provider_basic_translation():
25
+ """Test basic translation with LLM provider (uses configured LLM)"""
26
+ provider = LLMTranslationProvider()
27
+
28
+ result = await provider.translate(text="Hola mundo", target_language="en", source_language="es")
29
+
30
+ assert isinstance(result, TranslationResult)
31
+ assert result.translated_text is not None
32
+ assert len(result.translated_text) > 0
33
+ assert result.source_language == "es"
34
+ assert result.target_language == "en"
35
+ assert result.provider == "llm"
36
+
37
+
38
+ @pytest.mark.asyncio
39
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
40
+ async def test_llm_provider_auto_detect_source():
41
+ """Test translation with automatic source language detection"""
42
+ provider = LLMTranslationProvider()
43
+
44
+ result = await provider.translate(
45
+ text="Bonjour le monde",
46
+ target_language="en",
47
+ # source_language not provided - should auto-detect
48
+ )
49
+
50
+ assert result.translated_text is not None
51
+ assert result.target_language == "en"
52
+
53
+
54
+ @pytest.mark.asyncio
55
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
56
+ async def test_llm_provider_long_text():
57
+ """Test translation of longer text"""
58
+ provider = LLMTranslationProvider()
59
+
60
+ long_text = """
61
+ La inteligencia artificial es una rama de la informática que se centra en
62
+ crear sistemas capaces de realizar tareas que normalmente requieren inteligencia humana.
63
+ Estos sistemas pueden aprender, razonar y resolver problemas complejos.
64
+ """
65
+
66
+ result = await provider.translate(text=long_text, target_language="en", source_language="es")
67
+
68
+ assert len(result.translated_text) > 0
69
+ assert result.source_language == "es"
70
+
71
+
72
+ def test_get_translation_provider_factory():
73
+ """Test provider factory function"""
74
+ provider = get_translation_provider("llm")
75
+ assert isinstance(provider, LLMTranslationProvider)
76
+
77
+
78
+ def test_get_translation_provider_invalid():
79
+ """Test provider factory with invalid provider name"""
80
+ try:
81
+ get_translation_provider("invalid_provider")
82
+ assert False, "Expected TranslationError or ValueError"
83
+ except (TranslationError, ValueError):
84
+ pass
85
+
86
+
87
+ @pytest.mark.asyncio
88
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
89
+ async def test_llm_batch_translation():
90
+ """Test batch translation with LLM provider"""
91
+ provider = LLMTranslationProvider()
92
+
93
+ texts = ["Hola", "¿Cómo estás?", "Adiós"]
94
+
95
+ results = await provider.translate_batch(
96
+ texts=texts, target_language="en", source_language="es"
97
+ )
98
+
99
+ assert len(results) == len(texts)
100
+ for result in results:
101
+ assert isinstance(result, TranslationResult)
102
+ assert result.translated_text is not None
103
+ assert result.source_language == "es"
104
+ assert result.target_language == "en"
105
+
106
+
107
+ @pytest.mark.asyncio
108
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
109
+ async def test_translation_preserves_formatting():
110
+ """Test that translation preserves basic formatting"""
111
+ provider = LLMTranslationProvider()
112
+
113
+ text_with_newlines = "Primera línea.\nSegunda línea."
114
+
115
+ result = await provider.translate(
116
+ text=text_with_newlines, target_language="en", source_language="es"
117
+ )
118
+
119
+ # Should preserve structure (though exact newlines may vary)
120
+ assert result.translated_text is not None
121
+ assert len(result.translated_text) > 0
122
+
123
+
124
+ @pytest.mark.asyncio
125
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
126
+ async def test_translation_special_characters():
127
+ """Test translation with special characters"""
128
+ provider = LLMTranslationProvider()
129
+
130
+ text = "¡Hola! ¿Cómo estás? Está bien."
131
+
132
+ result = await provider.translate(text=text, target_language="en", source_language="es")
133
+
134
+ assert result.translated_text is not None
135
+ assert len(result.translated_text) > 0
136
+
137
+
138
+ @pytest.mark.asyncio
139
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
140
+ async def test_empty_text_translation():
141
+ """Test translation with empty text - should return empty or handle gracefully"""
142
+ provider = LLMTranslationProvider()
143
+
144
+ # Empty text may either raise an error or return an empty result
145
+ try:
146
+ result = await provider.translate(text="", target_language="en", source_language="es")
147
+ # If no error, should return a TranslationResult (possibly with empty text)
148
+ assert isinstance(result, TranslationResult)
149
+ except TranslationError:
150
+ # This is also acceptable behavior
151
+ pass
@@ -0,0 +1,213 @@
1
+ """
2
+ Unit tests for translate_content task
3
+ """
4
+
5
+ import os
6
+ from uuid import uuid4
7
+
8
+ import pytest
9
+
10
+ from cognee.modules.chunking.models import DocumentChunk
11
+ from cognee.modules.data.processing.document_types import TextDocument
12
+ from cognee.tasks.translation import translate_content
13
+ from cognee.tasks.translation.models import TranslatedContent, LanguageMetadata
14
+
15
+
16
+ def has_llm_api_key():
17
+ """Check if LLM API key is available"""
18
+ return bool(os.environ.get("LLM_API_KEY"))
19
+
20
+
21
+ def create_test_chunk(text: str, chunk_index: int = 0):
22
+ """Helper to create a DocumentChunk with all required fields"""
23
+ # Create a minimal Document for the is_part_of field
24
+ doc = TextDocument(
25
+ id=uuid4(),
26
+ name="test_doc",
27
+ raw_data_location="/tmp/test.txt",
28
+ external_metadata=None,
29
+ mime_type="text/plain",
30
+ )
31
+
32
+ return DocumentChunk(
33
+ id=uuid4(),
34
+ text=text,
35
+ chunk_index=chunk_index,
36
+ chunk_size=len(text),
37
+ cut_type="sentence",
38
+ is_part_of=doc,
39
+ )
40
+
41
+
42
+ @pytest.mark.asyncio
43
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
44
+ async def test_translate_content_basic():
45
+ """Test basic content translation"""
46
+ # Create test chunk with Spanish text
47
+ original_text = "Hola mundo, esta es una prueba."
48
+ chunk = create_test_chunk(original_text)
49
+
50
+ result = await translate_content(
51
+ data_chunks=[chunk], target_language="en", translation_provider="llm"
52
+ )
53
+
54
+ assert len(result) == 1
55
+ # The chunk's text should now be translated (different from original Spanish)
56
+ assert result[0].text != original_text # Text should be translated to English
57
+ assert result[0].contains is not None
58
+
59
+ # Check for TranslatedContent in contains
60
+ has_translated_content = any(isinstance(item, TranslatedContent) for item in result[0].contains)
61
+ assert has_translated_content
62
+
63
+
64
+ @pytest.mark.asyncio
65
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
66
+ async def test_translate_content_preserves_original():
67
+ """Test that original text is preserved"""
68
+ original_text = "Bonjour le monde"
69
+ chunk = create_test_chunk(original_text)
70
+
71
+ result = await translate_content(
72
+ data_chunks=[chunk], target_language="en", preserve_original=True
73
+ )
74
+
75
+ # Find TranslatedContent in contains
76
+ translated_content = None
77
+ for item in result[0].contains:
78
+ if isinstance(item, TranslatedContent):
79
+ translated_content = item
80
+ break
81
+
82
+ assert translated_content is not None
83
+ assert translated_content.original_text == original_text
84
+ assert translated_content.translated_text != original_text
85
+
86
+
87
+ @pytest.mark.asyncio
88
+ async def test_translate_content_skip_english():
89
+ """Test skipping translation for English text"""
90
+ # This test doesn't require API call since English text is skipped
91
+ chunk = create_test_chunk("Hello world, this is a test.")
92
+
93
+ result = await translate_content(
94
+ data_chunks=[chunk], target_language="en", skip_if_target_language=True
95
+ )
96
+
97
+ # Text should remain unchanged
98
+ assert result[0].text == chunk.text
99
+
100
+ # Should have LanguageMetadata but not TranslatedContent
101
+ has_language_metadata = any(
102
+ isinstance(item, LanguageMetadata) for item in (result[0].contains or [])
103
+ )
104
+ has_translated_content = any(
105
+ isinstance(item, TranslatedContent) for item in (result[0].contains or [])
106
+ )
107
+
108
+ assert has_language_metadata
109
+ assert not has_translated_content
110
+
111
+
112
+ @pytest.mark.asyncio
113
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
114
+ async def test_translate_content_multiple_chunks():
115
+ """Test translation of multiple chunks"""
116
+ # Use longer texts to ensure reliable language detection
117
+ original_texts = [
118
+ "Hola mundo, esta es una prueba de traducción.",
119
+ "Bonjour le monde, ceci est un test de traduction.",
120
+ "Ciao mondo, questo è un test di traduzione.",
121
+ ]
122
+ chunks = [create_test_chunk(text, i) for i, text in enumerate(original_texts)]
123
+
124
+ result = await translate_content(data_chunks=chunks, target_language="en")
125
+
126
+ assert len(result) == 3
127
+ # Check that at least some chunks were translated
128
+ translated_count = sum(
129
+ 1
130
+ for chunk in result
131
+ if any(isinstance(item, TranslatedContent) for item in (chunk.contains or []))
132
+ )
133
+ assert translated_count >= 2 # At least 2 chunks should be translated
134
+
135
+
136
+ @pytest.mark.asyncio
137
+ async def test_translate_content_empty_list():
138
+ """Test with empty chunk list"""
139
+ result = await translate_content(data_chunks=[], target_language="en")
140
+
141
+ assert result == []
142
+
143
+
144
+ @pytest.mark.asyncio
145
+ async def test_translate_content_empty_text():
146
+ """Test with chunk containing empty text"""
147
+ chunk = create_test_chunk("")
148
+
149
+ result = await translate_content(data_chunks=[chunk], target_language="en")
150
+
151
+ assert len(result) == 1
152
+ assert result[0].text == ""
153
+
154
+
155
+ @pytest.mark.asyncio
156
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
157
+ async def test_translate_content_language_metadata():
158
+ """Test that LanguageMetadata is created correctly"""
159
+ # Use a longer, distinctly Spanish text to ensure reliable detection
160
+ chunk = create_test_chunk(
161
+ "La inteligencia artificial está cambiando el mundo de manera significativa"
162
+ )
163
+
164
+ result = await translate_content(data_chunks=[chunk], target_language="en")
165
+
166
+ # Find LanguageMetadata
167
+ language_metadata = None
168
+ for item in result[0].contains:
169
+ if isinstance(item, LanguageMetadata):
170
+ language_metadata = item
171
+ break
172
+
173
+ assert language_metadata is not None
174
+ # Just check that a language was detected (short texts can be ambiguous)
175
+ assert language_metadata.detected_language is not None
176
+ assert language_metadata.requires_translation is True
177
+ assert language_metadata.language_confidence > 0.0
178
+
179
+
180
+ @pytest.mark.asyncio
181
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
182
+ async def test_translate_content_confidence_threshold():
183
+ """Test with custom confidence threshold"""
184
+ # Use longer text for more reliable detection
185
+ chunk = create_test_chunk("Hola mundo, esta es una frase más larga para mejor detección")
186
+
187
+ result = await translate_content(
188
+ data_chunks=[chunk], target_language="en", confidence_threshold=0.5
189
+ )
190
+
191
+ assert len(result) == 1
192
+
193
+
194
+ @pytest.mark.asyncio
195
+ @pytest.mark.skipif(not has_llm_api_key(), reason="No LLM API key available")
196
+ async def test_translate_content_no_preserve_original():
197
+ """Test translation without preserving original"""
198
+ # Use longer text for more reliable detection
199
+ chunk = create_test_chunk("Bonjour le monde, comment allez-vous aujourd'hui")
200
+
201
+ result = await translate_content(
202
+ data_chunks=[chunk], target_language="en", preserve_original=False
203
+ )
204
+
205
+ # Find TranslatedContent
206
+ translated_content = None
207
+ for item in result[0].contains:
208
+ if isinstance(item, TranslatedContent):
209
+ translated_content = item
210
+ break
211
+
212
+ assert translated_content is not None
213
+ assert translated_content.original_text == "" # Should be empty
@@ -97,7 +97,7 @@ async def test_vector_engine_search_none_limit():
97
97
  query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
98
98
 
99
99
  result = await vector_engine.search(
100
- collection_name=collection_name, query_vector=query_vector, limit=None
100
+ collection_name=collection_name, query_vector=query_vector, limit=None, include_payload=True
101
101
  )
102
102
 
103
103
  # Check that we did not accidentally use any default value for limit
@@ -0,0 +1,165 @@
1
+ import os
2
+ import pathlib
3
+ import cognee
4
+ from datetime import datetime, timezone, timedelta
5
+ from uuid import UUID
6
+ from sqlalchemy import select, update
7
+ from cognee.modules.data.models import Data, DatasetData
8
+ from cognee.infrastructure.databases.relational import get_relational_engine
9
+ from cognee.modules.users.methods import get_default_user
10
+ from cognee.shared.logging_utils import get_logger
11
+ from cognee.modules.search.types import SearchType
12
+
13
+ logger = get_logger()
14
+
15
+
16
+ async def test_textdocument_cleanup_with_sql():
17
+ """
18
+ End-to-end test for TextDocument cleanup based on last_accessed timestamps.
19
+ """
20
+ # Enable last accessed tracking BEFORE any cognee operations
21
+ os.environ["ENABLE_LAST_ACCESSED"] = "true"
22
+
23
+ # Setup test directories
24
+ data_directory_path = str(
25
+ pathlib.Path(
26
+ os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_cleanup")
27
+ ).resolve()
28
+ )
29
+ cognee_directory_path = str(
30
+ pathlib.Path(
31
+ os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_cleanup")
32
+ ).resolve()
33
+ )
34
+
35
+ cognee.config.data_root_directory(data_directory_path)
36
+ cognee.config.system_root_directory(cognee_directory_path)
37
+
38
+ # Initialize database
39
+ from cognee.modules.engine.operations.setup import setup
40
+
41
+ # Clean slate
42
+ await cognee.prune.prune_data()
43
+ await cognee.prune.prune_system(metadata=True)
44
+
45
+ logger.info("🧪 Testing TextDocument cleanup based on last_accessed")
46
+
47
+ # Step 1: Add and cognify a test document
48
+ dataset_name = "test_cleanup_dataset"
49
+ test_text = """
50
+ Machine learning is a subset of artificial intelligence that enables systems to learn
51
+ and improve from experience without being explicitly programmed. Deep learning uses
52
+ neural networks with multiple layers to process data.
53
+ """
54
+
55
+ await setup()
56
+ user = await get_default_user()
57
+ await cognee.add([test_text], dataset_name=dataset_name, user=user)
58
+
59
+ cognify_result = await cognee.cognify([dataset_name], user=user)
60
+
61
+ # Extract dataset_id from cognify result
62
+ dataset_id = None
63
+ for ds_id, pipeline_result in cognify_result.items():
64
+ dataset_id = ds_id
65
+ break
66
+
67
+ assert dataset_id is not None, "Failed to get dataset_id from cognify result"
68
+ logger.info(f"✅ Document added and cognified. Dataset ID: {dataset_id}")
69
+
70
+ # Step 2: Perform search to trigger last_accessed update
71
+ logger.info("Triggering search to update last_accessed...")
72
+ search_results = await cognee.search(
73
+ query_type=SearchType.CHUNKS,
74
+ query_text="machine learning",
75
+ datasets=[dataset_name],
76
+ user=user,
77
+ )
78
+ logger.info(f"✅ Search completed, found {len(search_results)} results")
79
+ assert len(search_results) > 0, "Search should return results"
80
+
81
+ # Step 3: Verify last_accessed was set and get data_id
82
+ db_engine = get_relational_engine()
83
+ async with db_engine.get_async_session() as session:
84
+ result = await session.execute(
85
+ select(Data, DatasetData)
86
+ .join(DatasetData, Data.id == DatasetData.data_id)
87
+ .where(DatasetData.dataset_id == dataset_id)
88
+ )
89
+ data_records = result.all()
90
+ assert len(data_records) > 0, "No Data records found for the dataset"
91
+ data_record = data_records[0][0]
92
+ data_id = data_record.id
93
+
94
+ # Verify last_accessed is set
95
+ assert data_record.last_accessed is not None, (
96
+ "last_accessed should be set after search operation"
97
+ )
98
+
99
+ original_last_accessed = data_record.last_accessed
100
+ logger.info(f"✅ last_accessed verified: {original_last_accessed}")
101
+
102
+ # Step 4: Manually age the timestamp
103
+ minutes_threshold = 30
104
+ aged_timestamp = datetime.now(timezone.utc) - timedelta(minutes=minutes_threshold + 10)
105
+
106
+ async with db_engine.get_async_session() as session:
107
+ stmt = update(Data).where(Data.id == data_id).values(last_accessed=aged_timestamp)
108
+ await session.execute(stmt)
109
+ await session.commit()
110
+
111
+ # Verify timestamp was updated
112
+ async with db_engine.get_async_session() as session:
113
+ result = await session.execute(select(Data).where(Data.id == data_id))
114
+ updated_data = result.scalar_one_or_none()
115
+ assert updated_data is not None, "Data record should exist"
116
+ retrieved_timestamp = updated_data.last_accessed
117
+ if retrieved_timestamp.tzinfo is None:
118
+ retrieved_timestamp = retrieved_timestamp.replace(tzinfo=timezone.utc)
119
+ assert retrieved_timestamp == aged_timestamp, "Timestamp should be updated to aged value"
120
+
121
+ # Step 5: Test cleanup (document-level is now the default)
122
+ from cognee.tasks.cleanup.cleanup_unused_data import cleanup_unused_data
123
+
124
+ # First do a dry run
125
+ logger.info("Testing dry run...")
126
+ dry_run_result = await cleanup_unused_data(minutes_threshold=10, dry_run=True, user_id=user.id)
127
+
128
+ # Debug: Print the actual result
129
+ logger.info(f"Dry run result: {dry_run_result}")
130
+
131
+ assert dry_run_result["status"] == "dry_run", (
132
+ f"Status should be 'dry_run', got: {dry_run_result['status']}"
133
+ )
134
+ assert dry_run_result["unused_count"] > 0, "Should find at least one unused document"
135
+ logger.info(f"✅ Dry run found {dry_run_result['unused_count']} unused documents")
136
+
137
+ # Now run actual cleanup
138
+ logger.info("Executing cleanup...")
139
+ cleanup_result = await cleanup_unused_data(minutes_threshold=30, dry_run=False, user_id=user.id)
140
+
141
+ assert cleanup_result["status"] == "completed", "Cleanup should complete successfully"
142
+ assert cleanup_result["deleted_count"]["documents"] > 0, (
143
+ "At least one document should be deleted"
144
+ )
145
+ logger.info(
146
+ f"✅ Cleanup completed. Deleted {cleanup_result['deleted_count']['documents']} documents"
147
+ )
148
+
149
+ # Step 6: Verify deletion
150
+ async with db_engine.get_async_session() as session:
151
+ deleted_data = (
152
+ await session.execute(select(Data).where(Data.id == data_id))
153
+ ).scalar_one_or_none()
154
+ assert deleted_data is None, "Data record should be deleted"
155
+ logger.info("✅ Confirmed: Data record was deleted")
156
+
157
+ logger.info("🎉 All cleanup tests passed!")
158
+ return True
159
+
160
+
161
+ if __name__ == "__main__":
162
+ import asyncio
163
+
164
+ success = asyncio.run(test_textdocument_cleanup_with_sql())
165
+ exit(0 if success else 1)
@@ -47,20 +47,20 @@ async def main():
47
47
 
48
48
  # Test data
49
49
  text_1 = """
50
- Apple Inc. is an American multinational technology company that specializes in consumer electronics,
51
- software, and online services. Apple is the world's largest technology company by revenue and,
50
+ Apple Inc. is an American multinational technology company that specializes in consumer electronics,
51
+ software, and online services. Apple is the world's largest technology company by revenue and,
52
52
  since January 2021, the world's most valuable company.
53
53
  """
54
54
 
55
55
  text_2 = """
56
- Microsoft Corporation is an American multinational technology corporation which produces computer software,
57
- consumer electronics, personal computers, and related services. Its best known software products are the
56
+ Microsoft Corporation is an American multinational technology corporation which produces computer software,
57
+ consumer electronics, personal computers, and related services. Its best known software products are the
58
58
  Microsoft Windows line of operating systems and the Microsoft Office suite.
59
59
  """
60
60
 
61
61
  text_3 = """
62
- Google LLC is an American multinational technology company that specializes in Internet-related services and products,
63
- which include online advertising technologies, search engine, cloud computing, software, and hardware. Google has been
62
+ Google LLC is an American multinational technology company that specializes in Internet-related services and products,
63
+ which include online advertising technologies, search engine, cloud computing, software, and hardware. Google has been
64
64
  referred to as the most powerful company in the world and one of the world's most valuable brands.
65
65
  """
66
66