cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
from uuid import uuid5
|
|
4
|
+
|
|
5
|
+
from cognee.modules.chunking.models import DocumentChunk
|
|
6
|
+
from cognee.shared.logging_utils import get_logger
|
|
7
|
+
|
|
8
|
+
from .config import get_translation_config, TranslationProviderType
|
|
9
|
+
from .detect_language import detect_language_async, LanguageDetectionResult
|
|
10
|
+
from .exceptions import TranslationError, LanguageDetectionError
|
|
11
|
+
from .models import TranslatedContent, LanguageMetadata
|
|
12
|
+
from .providers import get_translation_provider, TranslationResult
|
|
13
|
+
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def translate_content(
|
|
18
|
+
data_chunks: List[DocumentChunk],
|
|
19
|
+
target_language: str = None,
|
|
20
|
+
translation_provider: TranslationProviderType = None,
|
|
21
|
+
confidence_threshold: float = None,
|
|
22
|
+
skip_if_target_language: bool = True,
|
|
23
|
+
preserve_original: bool = True,
|
|
24
|
+
) -> List[DocumentChunk]:
|
|
25
|
+
"""
|
|
26
|
+
Translate non-English content to the target language.
|
|
27
|
+
|
|
28
|
+
This task detects the language of each document chunk and translates
|
|
29
|
+
non-target-language content using the specified translation provider.
|
|
30
|
+
Original text is preserved alongside translated versions.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
data_chunks: List of DocumentChunk objects to process
|
|
34
|
+
target_language: Target language code (default: "en" for English)
|
|
35
|
+
If not provided, uses config default
|
|
36
|
+
translation_provider: Translation service to use ("llm", "google", "azure")
|
|
37
|
+
If not provided, uses config default
|
|
38
|
+
confidence_threshold: Minimum confidence for language detection (0.0 to 1.0)
|
|
39
|
+
If not provided, uses config default
|
|
40
|
+
skip_if_target_language: If True, skip chunks already in target language
|
|
41
|
+
preserve_original: If True, store original text in TranslatedContent
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
List of DocumentChunk objects with translated content.
|
|
45
|
+
Chunks that required translation will have TranslatedContent
|
|
46
|
+
objects in their 'contains' list.
|
|
47
|
+
|
|
48
|
+
Note:
|
|
49
|
+
This function mutates the input chunks in-place. Specifically:
|
|
50
|
+
- chunk.text is replaced with the translated text
|
|
51
|
+
- chunk.contains is updated with LanguageMetadata and TranslatedContent
|
|
52
|
+
The original text is preserved in TranslatedContent.original_text
|
|
53
|
+
if preserve_original=True.
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
```python
|
|
57
|
+
from cognee.tasks.translation import translate_content
|
|
58
|
+
|
|
59
|
+
# Translate chunks using default settings
|
|
60
|
+
translated_chunks = await translate_content(chunks)
|
|
61
|
+
|
|
62
|
+
# Translate with specific provider
|
|
63
|
+
translated_chunks = await translate_content(
|
|
64
|
+
chunks,
|
|
65
|
+
translation_provider="llm",
|
|
66
|
+
confidence_threshold=0.9
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
"""
|
|
70
|
+
if not isinstance(data_chunks, list):
|
|
71
|
+
raise TranslationError("data_chunks must be a list")
|
|
72
|
+
|
|
73
|
+
if len(data_chunks) == 0:
|
|
74
|
+
return data_chunks
|
|
75
|
+
|
|
76
|
+
# Get configuration
|
|
77
|
+
config = get_translation_config()
|
|
78
|
+
provider_name = translation_provider or config.translation_provider
|
|
79
|
+
target_lang = target_language or config.target_language
|
|
80
|
+
threshold = confidence_threshold or config.confidence_threshold
|
|
81
|
+
|
|
82
|
+
logger.info(
|
|
83
|
+
f"Starting translation task for {len(data_chunks)} chunks "
|
|
84
|
+
f"using {provider_name} provider, target language: {target_lang}"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Get the translation provider
|
|
88
|
+
provider = get_translation_provider(provider_name)
|
|
89
|
+
|
|
90
|
+
# Process chunks
|
|
91
|
+
processed_chunks = []
|
|
92
|
+
total_chunks = len(data_chunks)
|
|
93
|
+
|
|
94
|
+
for chunk_index, chunk in enumerate(data_chunks):
|
|
95
|
+
# Log progress for large batches
|
|
96
|
+
if chunk_index > 0 and chunk_index % 100 == 0:
|
|
97
|
+
logger.info(f"Translation progress: {chunk_index}/{total_chunks} chunks processed")
|
|
98
|
+
|
|
99
|
+
if not hasattr(chunk, "text") or not chunk.text:
|
|
100
|
+
processed_chunks.append(chunk)
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
# Detect language
|
|
105
|
+
detection = await detect_language_async(chunk.text, target_lang, threshold)
|
|
106
|
+
|
|
107
|
+
# Create language metadata
|
|
108
|
+
language_metadata = LanguageMetadata(
|
|
109
|
+
id=uuid5(chunk.id, "LanguageMetadata"),
|
|
110
|
+
content_id=chunk.id,
|
|
111
|
+
detected_language=detection.language_code,
|
|
112
|
+
language_confidence=detection.confidence,
|
|
113
|
+
requires_translation=detection.requires_translation,
|
|
114
|
+
character_count=detection.character_count,
|
|
115
|
+
language_name=detection.language_name,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Skip if already in target language
|
|
119
|
+
if not detection.requires_translation:
|
|
120
|
+
if skip_if_target_language:
|
|
121
|
+
logger.debug(
|
|
122
|
+
f"Skipping chunk {chunk.id}: already in target language "
|
|
123
|
+
f"({detection.language_code})"
|
|
124
|
+
)
|
|
125
|
+
# Add language metadata to chunk
|
|
126
|
+
_add_to_chunk_contains(chunk, language_metadata)
|
|
127
|
+
processed_chunks.append(chunk)
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
# Translate the content
|
|
131
|
+
logger.debug(
|
|
132
|
+
f"Translating chunk {chunk.id} from {detection.language_code} to {target_lang}"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
translation_result = await provider.translate(
|
|
136
|
+
text=chunk.text,
|
|
137
|
+
target_language=target_lang,
|
|
138
|
+
source_language=detection.language_code,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Create TranslatedContent data point
|
|
142
|
+
translated_content = TranslatedContent(
|
|
143
|
+
id=uuid5(chunk.id, "TranslatedContent"),
|
|
144
|
+
original_chunk_id=chunk.id,
|
|
145
|
+
original_text=chunk.text if preserve_original else "",
|
|
146
|
+
translated_text=translation_result.translated_text,
|
|
147
|
+
source_language=translation_result.source_language,
|
|
148
|
+
target_language=translation_result.target_language,
|
|
149
|
+
translation_provider=translation_result.provider,
|
|
150
|
+
confidence_score=translation_result.confidence_score,
|
|
151
|
+
translated_from=chunk,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Update chunk text with translated content
|
|
155
|
+
chunk.text = translation_result.translated_text
|
|
156
|
+
|
|
157
|
+
# Add metadata to chunk's contains list
|
|
158
|
+
_add_to_chunk_contains(chunk, language_metadata)
|
|
159
|
+
_add_to_chunk_contains(chunk, translated_content)
|
|
160
|
+
|
|
161
|
+
processed_chunks.append(chunk)
|
|
162
|
+
|
|
163
|
+
logger.debug(
|
|
164
|
+
f"Successfully translated chunk {chunk.id}: "
|
|
165
|
+
f"{detection.language_code} -> {target_lang}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
except LanguageDetectionError as e:
|
|
169
|
+
logger.warning(f"Language detection failed for chunk {chunk.id}: {e}")
|
|
170
|
+
processed_chunks.append(chunk)
|
|
171
|
+
except TranslationError as e:
|
|
172
|
+
logger.error(f"Translation failed for chunk {chunk.id}: {e}")
|
|
173
|
+
processed_chunks.append(chunk)
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.error(f"Unexpected error processing chunk {chunk.id}: {e}")
|
|
176
|
+
processed_chunks.append(chunk)
|
|
177
|
+
|
|
178
|
+
logger.info(f"Translation task completed for {len(processed_chunks)} chunks")
|
|
179
|
+
return processed_chunks
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _add_to_chunk_contains(chunk: DocumentChunk, item) -> None:
|
|
183
|
+
"""Helper to add an item to a chunk's contains list."""
|
|
184
|
+
if chunk.contains is None:
|
|
185
|
+
chunk.contains = []
|
|
186
|
+
chunk.contains.append(item)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
async def translate_text(
|
|
190
|
+
text: str,
|
|
191
|
+
target_language: str = None,
|
|
192
|
+
translation_provider: TranslationProviderType = None,
|
|
193
|
+
source_language: Optional[str] = None,
|
|
194
|
+
) -> TranslationResult:
|
|
195
|
+
"""
|
|
196
|
+
Translate a single text string.
|
|
197
|
+
|
|
198
|
+
This is a convenience function for translating individual texts
|
|
199
|
+
without creating DocumentChunk objects.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
text: The text to translate
|
|
203
|
+
target_language: Target language code (default: uses config, typically "en")
|
|
204
|
+
If not provided, uses config default
|
|
205
|
+
translation_provider: Translation service to use
|
|
206
|
+
If not provided, uses config default
|
|
207
|
+
source_language: Source language code (optional, auto-detected if not provided)
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
TranslationResult with translated text and metadata
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
```python
|
|
214
|
+
from cognee.tasks.translation import translate_text
|
|
215
|
+
|
|
216
|
+
result = await translate_text(
|
|
217
|
+
"Bonjour le monde!",
|
|
218
|
+
target_language="en"
|
|
219
|
+
)
|
|
220
|
+
print(result.translated_text) # "Hello world!"
|
|
221
|
+
print(result.source_language) # "fr"
|
|
222
|
+
```
|
|
223
|
+
"""
|
|
224
|
+
config = get_translation_config()
|
|
225
|
+
provider_name = translation_provider or config.translation_provider
|
|
226
|
+
target_lang = target_language or config.target_language
|
|
227
|
+
|
|
228
|
+
provider = get_translation_provider(provider_name)
|
|
229
|
+
|
|
230
|
+
return await provider.translate(
|
|
231
|
+
text=text,
|
|
232
|
+
target_language=target_lang,
|
|
233
|
+
source_language=source_language,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
async def batch_translate_texts(
|
|
238
|
+
texts: List[str],
|
|
239
|
+
target_language: str = None,
|
|
240
|
+
translation_provider: TranslationProviderType = None,
|
|
241
|
+
source_language: Optional[str] = None,
|
|
242
|
+
) -> List[TranslationResult]:
|
|
243
|
+
"""
|
|
244
|
+
Translate multiple text strings in batch.
|
|
245
|
+
|
|
246
|
+
This is more efficient than translating texts individually,
|
|
247
|
+
especially for providers that support native batch operations.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
texts: List of texts to translate
|
|
251
|
+
target_language: Target language code (default: uses config, typically "en")
|
|
252
|
+
If not provided, uses config default
|
|
253
|
+
translation_provider: Translation service to use
|
|
254
|
+
If not provided, uses config default
|
|
255
|
+
source_language: Source language code (optional)
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
List of TranslationResult objects
|
|
259
|
+
|
|
260
|
+
Example:
|
|
261
|
+
```python
|
|
262
|
+
from cognee.tasks.translation import batch_translate_texts
|
|
263
|
+
|
|
264
|
+
results = await batch_translate_texts(
|
|
265
|
+
["Hola", "¿Cómo estás?", "Adiós"],
|
|
266
|
+
target_language="en"
|
|
267
|
+
)
|
|
268
|
+
for result in results:
|
|
269
|
+
print(f"{result.source_language}: {result.translated_text}")
|
|
270
|
+
```
|
|
271
|
+
"""
|
|
272
|
+
config = get_translation_config()
|
|
273
|
+
provider_name = translation_provider or config.translation_provider
|
|
274
|
+
target_lang = target_language or config.target_language
|
|
275
|
+
|
|
276
|
+
provider = get_translation_provider(provider_name)
|
|
277
|
+
|
|
278
|
+
return await provider.translate_batch(
|
|
279
|
+
texts=texts,
|
|
280
|
+
target_language=target_lang,
|
|
281
|
+
source_language=source_language,
|
|
282
|
+
)
|
|
@@ -73,7 +73,11 @@ class DefaultUrlCrawler:
|
|
|
73
73
|
self.timeout = timeout
|
|
74
74
|
self.max_retries = max_retries
|
|
75
75
|
self.retry_delay_factor = retry_delay_factor
|
|
76
|
-
self.headers = headers or {
|
|
76
|
+
self.headers = headers or {
|
|
77
|
+
"User-Agent": "Cognee-Scraper/1.0 (hello@cognee.ai)",
|
|
78
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
79
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
80
|
+
}
|
|
77
81
|
self.robots_cache_ttl = robots_cache_ttl
|
|
78
82
|
self._last_request_time_per_domain: Dict[str, float] = {}
|
|
79
83
|
self._robots_cache: Dict[str, RobotsTxtCache] = {}
|
|
@@ -288,7 +292,7 @@ class DefaultUrlCrawler:
|
|
|
288
292
|
while True:
|
|
289
293
|
try:
|
|
290
294
|
await self._respect_rate_limit(url, crawl_delay)
|
|
291
|
-
resp = await self._client.get(url)
|
|
295
|
+
resp = await self._client.get(url, headers=self.headers)
|
|
292
296
|
resp.raise_for_status()
|
|
293
297
|
logger.info(
|
|
294
298
|
f"Successfully fetched {url} (status={resp.status_code}, size={len(resp.text)} bytes)"
|
|
@@ -262,6 +262,7 @@ class TestCognifyCommandEdgeCases:
|
|
|
262
262
|
ontology_file_path=None,
|
|
263
263
|
chunker=TextChunker,
|
|
264
264
|
run_in_background=False,
|
|
265
|
+
chunks_per_batch=None,
|
|
265
266
|
)
|
|
266
267
|
|
|
267
268
|
@patch("cognee.cli.commands.cognify_command.asyncio.run", side_effect=_mock_run)
|
|
@@ -295,6 +296,7 @@ class TestCognifyCommandEdgeCases:
|
|
|
295
296
|
ontology_file_path="/nonexistent/path/ontology.owl",
|
|
296
297
|
chunker=TextChunker,
|
|
297
298
|
run_in_background=False,
|
|
299
|
+
chunks_per_batch=None,
|
|
298
300
|
)
|
|
299
301
|
|
|
300
302
|
@patch("cognee.cli.commands.cognify_command.asyncio.run")
|
|
@@ -373,6 +375,7 @@ class TestCognifyCommandEdgeCases:
|
|
|
373
375
|
ontology_file_path=None,
|
|
374
376
|
chunker=TextChunker,
|
|
375
377
|
run_in_background=False,
|
|
378
|
+
chunks_per_batch=None,
|
|
376
379
|
)
|
|
377
380
|
|
|
378
381
|
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
import pytest_asyncio
|
|
5
|
+
import cognee
|
|
6
|
+
|
|
7
|
+
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
8
|
+
from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest_asyncio.fixture
|
|
12
|
+
async def clean_environment():
|
|
13
|
+
"""Configure isolated storage and ensure cleanup before/after."""
|
|
14
|
+
base_dir = pathlib.Path(__file__).parent.parent.parent.parent
|
|
15
|
+
system_directory_path = str(base_dir / ".cognee_system/test_brute_force_triplet_search_e2e")
|
|
16
|
+
data_directory_path = str(base_dir / ".data_storage/test_brute_force_triplet_search_e2e")
|
|
17
|
+
|
|
18
|
+
cognee.config.system_root_directory(system_directory_path)
|
|
19
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
20
|
+
|
|
21
|
+
await cognee.prune.prune_data()
|
|
22
|
+
await cognee.prune.prune_system(metadata=True)
|
|
23
|
+
|
|
24
|
+
yield
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
await cognee.prune.prune_data()
|
|
28
|
+
await cognee.prune.prune_system(metadata=True)
|
|
29
|
+
except Exception:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.mark.asyncio
|
|
34
|
+
async def test_brute_force_triplet_search_end_to_end(clean_environment):
|
|
35
|
+
"""Minimal end-to-end exercise of single and batch triplet search."""
|
|
36
|
+
|
|
37
|
+
text = """
|
|
38
|
+
Cognee is an open-source AI memory engine that structures data into searchable formats for use with AI agents.
|
|
39
|
+
The company focuses on persistent memory systems using knowledge graphs and vector search.
|
|
40
|
+
It is a Berlin-based startup building infrastructure for context-aware AI applications.
|
|
41
|
+
NLP systems can use Cognee to store and retrieve structured information.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
await cognee.add(text)
|
|
45
|
+
await cognee.cognify()
|
|
46
|
+
|
|
47
|
+
single_result = await brute_force_triplet_search(
|
|
48
|
+
query="What can NLP systems use Cognee for?",
|
|
49
|
+
top_k=1,
|
|
50
|
+
)
|
|
51
|
+
assert isinstance(single_result, list)
|
|
52
|
+
assert single_result
|
|
53
|
+
assert all(isinstance(edge, Edge) for edge in single_result)
|
|
54
|
+
|
|
55
|
+
batch_queries = ["What is Cognee?", "What is the company's focus?"]
|
|
56
|
+
batch_result = await brute_force_triplet_search(query_batch=batch_queries, top_k=1)
|
|
57
|
+
|
|
58
|
+
assert isinstance(batch_result, list)
|
|
59
|
+
assert len(batch_result) == len(batch_queries)
|
|
60
|
+
assert all(isinstance(per_query, list) for per_query in batch_result)
|
|
61
|
+
assert all(per_query for per_query in batch_result)
|
|
62
|
+
assert all(isinstance(edge, Edge) for per_query in batch_result for edge in per_query)
|
|
@@ -10,7 +10,6 @@ from cognee.tasks.storage import add_data_points
|
|
|
10
10
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
11
11
|
from cognee.modules.chunking.models import DocumentChunk
|
|
12
12
|
from cognee.modules.data.processing.document_types import TextDocument
|
|
13
|
-
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
|
14
13
|
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
|
|
15
14
|
from cognee.infrastructure.engine import DataPoint
|
|
16
15
|
from cognee.modules.data.processing.document_types import Document
|
|
@@ -40,6 +39,17 @@ async def setup_test_environment_with_chunks_simple():
|
|
|
40
39
|
|
|
41
40
|
await cognee.prune.prune_data()
|
|
42
41
|
await cognee.prune.prune_system(metadata=True)
|
|
42
|
+
from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
|
|
43
|
+
from cognee.infrastructure.databases.vector.create_vector_engine import (
|
|
44
|
+
_create_vector_engine,
|
|
45
|
+
)
|
|
46
|
+
from cognee.infrastructure.databases.relational.create_relational_engine import (
|
|
47
|
+
create_relational_engine,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
_create_graph_engine.cache_clear()
|
|
51
|
+
_create_vector_engine.cache_clear()
|
|
52
|
+
create_relational_engine.cache_clear()
|
|
43
53
|
await setup()
|
|
44
54
|
|
|
45
55
|
document = TextDocument(
|
|
@@ -83,6 +93,17 @@ async def setup_test_environment_with_chunks_simple():
|
|
|
83
93
|
try:
|
|
84
94
|
await cognee.prune.prune_data()
|
|
85
95
|
await cognee.prune.prune_system(metadata=True)
|
|
96
|
+
from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
|
|
97
|
+
from cognee.infrastructure.databases.vector.create_vector_engine import (
|
|
98
|
+
_create_vector_engine,
|
|
99
|
+
)
|
|
100
|
+
from cognee.infrastructure.databases.relational.create_relational_engine import (
|
|
101
|
+
create_relational_engine,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
_create_graph_engine.cache_clear()
|
|
105
|
+
_create_vector_engine.cache_clear()
|
|
106
|
+
create_relational_engine.cache_clear()
|
|
86
107
|
except Exception:
|
|
87
108
|
pass
|
|
88
109
|
|
|
@@ -99,6 +120,17 @@ async def setup_test_environment_with_chunks_complex():
|
|
|
99
120
|
|
|
100
121
|
await cognee.prune.prune_data()
|
|
101
122
|
await cognee.prune.prune_system(metadata=True)
|
|
123
|
+
from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
|
|
124
|
+
from cognee.infrastructure.databases.vector.create_vector_engine import (
|
|
125
|
+
_create_vector_engine,
|
|
126
|
+
)
|
|
127
|
+
from cognee.infrastructure.databases.relational.create_relational_engine import (
|
|
128
|
+
create_relational_engine,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
_create_graph_engine.cache_clear()
|
|
132
|
+
_create_vector_engine.cache_clear()
|
|
133
|
+
create_relational_engine.cache_clear()
|
|
102
134
|
await setup()
|
|
103
135
|
|
|
104
136
|
document1 = TextDocument(
|
|
@@ -174,6 +206,17 @@ async def setup_test_environment_with_chunks_complex():
|
|
|
174
206
|
try:
|
|
175
207
|
await cognee.prune.prune_data()
|
|
176
208
|
await cognee.prune.prune_system(metadata=True)
|
|
209
|
+
from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
|
|
210
|
+
from cognee.infrastructure.databases.vector.create_vector_engine import (
|
|
211
|
+
_create_vector_engine,
|
|
212
|
+
)
|
|
213
|
+
from cognee.infrastructure.databases.relational.create_relational_engine import (
|
|
214
|
+
create_relational_engine,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
_create_graph_engine.cache_clear()
|
|
218
|
+
_create_vector_engine.cache_clear()
|
|
219
|
+
create_relational_engine.cache_clear()
|
|
177
220
|
except Exception:
|
|
178
221
|
pass
|
|
179
222
|
|
|
@@ -190,26 +233,53 @@ async def setup_test_environment_empty():
|
|
|
190
233
|
|
|
191
234
|
await cognee.prune.prune_data()
|
|
192
235
|
await cognee.prune.prune_system(metadata=True)
|
|
236
|
+
from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
|
|
237
|
+
from cognee.infrastructure.databases.vector.create_vector_engine import (
|
|
238
|
+
_create_vector_engine,
|
|
239
|
+
)
|
|
240
|
+
from cognee.infrastructure.databases.relational.create_relational_engine import (
|
|
241
|
+
create_relational_engine,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
_create_graph_engine.cache_clear()
|
|
245
|
+
_create_vector_engine.cache_clear()
|
|
246
|
+
create_relational_engine.cache_clear()
|
|
193
247
|
|
|
194
248
|
yield
|
|
195
249
|
|
|
196
250
|
try:
|
|
197
251
|
await cognee.prune.prune_data()
|
|
198
252
|
await cognee.prune.prune_system(metadata=True)
|
|
253
|
+
from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
|
|
254
|
+
from cognee.infrastructure.databases.vector.create_vector_engine import (
|
|
255
|
+
_create_vector_engine,
|
|
256
|
+
)
|
|
257
|
+
from cognee.infrastructure.databases.relational.create_relational_engine import (
|
|
258
|
+
create_relational_engine,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
_create_graph_engine.cache_clear()
|
|
262
|
+
_create_vector_engine.cache_clear()
|
|
263
|
+
create_relational_engine.cache_clear()
|
|
199
264
|
except Exception:
|
|
200
265
|
pass
|
|
201
266
|
|
|
202
267
|
|
|
203
268
|
@pytest.mark.asyncio
|
|
204
|
-
async def
|
|
269
|
+
async def test_chunks_retriever_multiple_chunks(setup_test_environment_with_chunks_simple):
|
|
205
270
|
"""Integration test: verify ChunksRetriever can retrieve multiple chunks."""
|
|
206
271
|
retriever = ChunksRetriever()
|
|
272
|
+
query = "Steve"
|
|
273
|
+
chunks = await retriever.get_retrieved_objects("Steve")
|
|
274
|
+
context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
|
|
207
275
|
|
|
208
|
-
|
|
276
|
+
completion = await retriever.get_completion_from_context(
|
|
277
|
+
query=query, retrieved_objects=chunks, context=context
|
|
278
|
+
)
|
|
209
279
|
|
|
210
|
-
assert isinstance(
|
|
211
|
-
assert len(
|
|
212
|
-
assert any(chunk["text"] == "Steve Rodger" for chunk in
|
|
280
|
+
assert isinstance(completion, list), "Retrieved objects should be a list"
|
|
281
|
+
assert len(completion) > 0, "Retrieved objects list should not be empty"
|
|
282
|
+
assert any(chunk["text"] == "Steve Rodger" for chunk in completion), (
|
|
213
283
|
"Failed to get Steve Rodger chunk"
|
|
214
284
|
)
|
|
215
285
|
|
|
@@ -218,35 +288,64 @@ async def test_chunks_retriever_context_multiple_chunks(setup_test_environment_w
|
|
|
218
288
|
async def test_chunks_retriever_top_k_limit(setup_test_environment_with_chunks_complex):
|
|
219
289
|
"""Integration test: verify ChunksRetriever respects top_k parameter."""
|
|
220
290
|
retriever = ChunksRetriever(top_k=2)
|
|
291
|
+
query = "Employee"
|
|
221
292
|
|
|
222
|
-
|
|
293
|
+
chunks = await retriever.get_retrieved_objects("Steve")
|
|
294
|
+
context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
|
|
223
295
|
|
|
224
|
-
|
|
225
|
-
|
|
296
|
+
completion = await retriever.get_completion_from_context(
|
|
297
|
+
query=query, retrieved_objects=chunks, context=context
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
assert isinstance(completion, list), "Context should be a list"
|
|
301
|
+
assert len(completion) <= 2, "Should respect top_k limit"
|
|
226
302
|
|
|
227
303
|
|
|
228
304
|
@pytest.mark.asyncio
|
|
229
305
|
async def test_chunks_retriever_context_complex(setup_test_environment_with_chunks_complex):
|
|
230
306
|
"""Integration test: verify ChunksRetriever can retrieve chunk context (complex)."""
|
|
231
307
|
retriever = ChunksRetriever(top_k=20)
|
|
308
|
+
query = "Christina"
|
|
309
|
+
|
|
310
|
+
chunks = await retriever.get_retrieved_objects(query)
|
|
232
311
|
|
|
233
|
-
context = await retriever.
|
|
312
|
+
context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
|
|
234
313
|
|
|
235
|
-
assert context[0]
|
|
314
|
+
assert context[0:15] == "Christina Mayer", "Failed to get Christina Mayer"
|
|
236
315
|
|
|
237
316
|
|
|
238
317
|
@pytest.mark.asyncio
|
|
239
|
-
async def
|
|
318
|
+
async def test_chunks_retriever_on_empty_graph(setup_test_environment_empty):
|
|
240
319
|
"""Integration test: verify ChunksRetriever handles empty graph correctly."""
|
|
241
320
|
retriever = ChunksRetriever()
|
|
321
|
+
query = "Christina Mayer"
|
|
322
|
+
|
|
323
|
+
vector_engine = get_vector_engine()
|
|
324
|
+
await vector_engine.create_collection(
|
|
325
|
+
"DocumentChunk_text", payload_schema=DocumentChunkWithEntities
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
chunks = await retriever.get_retrieved_objects(query)
|
|
329
|
+
context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
|
|
242
330
|
|
|
243
|
-
|
|
244
|
-
|
|
331
|
+
completion = await retriever.get_completion_from_context(
|
|
332
|
+
query=query, retrieved_objects=chunks, context=context
|
|
333
|
+
)
|
|
334
|
+
assert isinstance(completion, list), "Retrieved objects should be a list"
|
|
335
|
+
assert len(completion) == 0, "Found chunks when none should exist"
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
@pytest.mark.asyncio
|
|
339
|
+
async def test_chunks_retriever_context_on_empty_graph(setup_test_environment_empty):
|
|
340
|
+
"""Integration test: verify ChunksRetriever context handles empty graph correctly."""
|
|
341
|
+
retriever = ChunksRetriever()
|
|
342
|
+
query = "Christina Mayer"
|
|
245
343
|
|
|
246
344
|
vector_engine = get_vector_engine()
|
|
247
345
|
await vector_engine.create_collection(
|
|
248
346
|
"DocumentChunk_text", payload_schema=DocumentChunkWithEntities
|
|
249
347
|
)
|
|
250
348
|
|
|
251
|
-
|
|
252
|
-
|
|
349
|
+
chunks = await retriever.get_retrieved_objects(query)
|
|
350
|
+
context = await retriever.get_context_from_objects(query=query, retrieved_objects=chunks)
|
|
351
|
+
assert context == "", "Found chunks when none should exist"
|
|
@@ -6,7 +6,6 @@ from typing import Optional, Union
|
|
|
6
6
|
import cognee
|
|
7
7
|
|
|
8
8
|
from cognee.low_level import setup, DataPoint
|
|
9
|
-
from cognee.modules.graph.utils import resolve_edges_to_text
|
|
10
9
|
from cognee.tasks.storage import add_data_points
|
|
11
10
|
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
|
12
11
|
|
|
@@ -174,8 +173,11 @@ async def setup_test_environment_empty():
|
|
|
174
173
|
async def test_graph_completion_context_simple(setup_test_environment_simple):
|
|
175
174
|
"""Integration test: verify GraphCompletionRetriever can retrieve context (simple)."""
|
|
176
175
|
retriever = GraphCompletionRetriever()
|
|
176
|
+
query = "Who works at Canva?"
|
|
177
177
|
|
|
178
|
-
|
|
178
|
+
triplets = await retriever.get_retrieved_objects(query)
|
|
179
|
+
|
|
180
|
+
context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
|
|
179
181
|
|
|
180
182
|
# Ensure the top-level sections are present
|
|
181
183
|
assert "Nodes:" in context, "Missing 'Nodes:' section in context"
|
|
@@ -240,8 +242,11 @@ async def test_graph_completion_context_simple(setup_test_environment_simple):
|
|
|
240
242
|
async def test_graph_completion_context_complex(setup_test_environment_complex):
|
|
241
243
|
"""Integration test: verify GraphCompletionRetriever can retrieve context (complex)."""
|
|
242
244
|
retriever = GraphCompletionRetriever(top_k=20)
|
|
245
|
+
query = "Who works at Figma?"
|
|
246
|
+
|
|
247
|
+
triplets = await retriever.get_retrieved_objects(query)
|
|
243
248
|
|
|
244
|
-
context = await
|
|
249
|
+
context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
|
|
245
250
|
|
|
246
251
|
assert "Mike Rodger --[works_for]--> Figma" in context, "Failed to get Mike Rodger"
|
|
247
252
|
assert "Ike Loma --[works_for]--> Figma" in context, "Failed to get Ike Loma"
|
|
@@ -252,9 +257,12 @@ async def test_graph_completion_context_complex(setup_test_environment_complex):
|
|
|
252
257
|
async def test_get_graph_completion_context_on_empty_graph(setup_test_environment_empty):
|
|
253
258
|
"""Integration test: verify GraphCompletionRetriever handles empty graph correctly."""
|
|
254
259
|
retriever = GraphCompletionRetriever()
|
|
260
|
+
query = "Who works at Figma?"
|
|
261
|
+
|
|
262
|
+
triplets = await retriever.get_retrieved_objects(query)
|
|
255
263
|
|
|
256
|
-
context = await retriever.
|
|
257
|
-
assert context ==
|
|
264
|
+
context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
|
|
265
|
+
assert context == "", "Context should be empty on an empty graph"
|
|
258
266
|
|
|
259
267
|
|
|
260
268
|
@pytest.mark.asyncio
|