cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +9 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/add/routers/get_add_router.py +3 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/__init__.py +4 -0
- cognee/api/v1/ontologies/ontologies.py +158 -0
- cognee/api/v1/ontologies/routers/__init__.py +0 -0
- cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
- cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
- cognee/api/v1/search/search.py +4 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/cli/commands/cognify_command.py +8 -1
- cognee/cli/config.py +1 -1
- cognee/context_global_variables.py +86 -9
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/cache/config.py +3 -1
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
- cognee/infrastructure/databases/graph/config.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +5 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/engine/models/Edge.py +13 -1
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/files/utils/guess_file_type.py +4 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +37 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
- cognee/infrastructure/loaders/LoaderEngine.py +1 -0
- cognee/infrastructure/loaders/core/__init__.py +2 -1
- cognee/infrastructure/loaders/core/csv_loader.py +93 -0
- cognee/infrastructure/loaders/core/text_loader.py +1 -2
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
- cognee/infrastructure/loaders/supported_loaders.py +2 -1
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
- cognee/modules/chunking/CsvChunker.py +35 -0
- cognee/modules/chunking/models/DocumentChunk.py +2 -1
- cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/create_dataset.py +4 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/data/methods/get_dataset_ids.py +5 -1
- cognee/modules/data/methods/get_unique_data_id.py +68 -0
- cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
- cognee/modules/data/models/Dataset.py +2 -0
- cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
- cognee/modules/data/processing/document_types/__init__.py +1 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
- cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
- cognee/modules/ingestion/identify.py +4 -4
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
- cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
- cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/base_graph_retriever.py +7 -3
- cognee/modules/retrieval/base_retriever.py +7 -3
- cognee/modules/retrieval/completion_retriever.py +11 -4
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
- cognee/modules/retrieval/graph_completion_retriever.py +14 -1
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +13 -2
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
- cognee/modules/retrieval/utils/completion.py +2 -22
- cognee/modules/run_custom_pipeline/__init__.py +1 -0
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +26 -3
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/create_user.py +12 -27
- cognee/modules/users/methods/get_authenticated_user.py +3 -2
- cognee/modules/users/methods/get_default_user.py +4 -2
- cognee/modules/users/methods/get_user.py +1 -1
- cognee/modules/users/methods/get_user_by_email.py +1 -1
- cognee/modules/users/models/DatasetDatabase.py +24 -3
- cognee/modules/users/models/Tenant.py +6 -7
- cognee/modules/users/models/User.py +6 -5
- cognee/modules/users/models/UserTenant.py +12 -0
- cognee/modules/users/models/__init__.py +1 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
- cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
- cognee/modules/users/tenants/methods/__init__.py +1 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
- cognee/modules/users/tenants/methods/create_tenant.py +22 -8
- cognee/modules/users/tenants/methods/select_tenant.py +62 -0
- cognee/shared/logging_utils.py +6 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/chunks/__init__.py +1 -0
- cognee/tasks/chunks/chunk_by_row.py +94 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/documents/classify_documents.py +2 -0
- cognee/tasks/feedback/generate_improved_answers.py +3 -3
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/ingestion/ingest_data.py +1 -1
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/cognify_session.py +41 -0
- cognee/tasks/memify/extract_user_sessions.py +73 -0
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tasks/storage/index_data_points.py +33 -22
- cognee/tasks/storage/index_graph_edges.py +37 -57
- cognee/tests/integration/documents/CsvDocument_test.py +70 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
- cognee/tests/test_add_docling_document.py +2 -2
- cognee/tests/test_cognee_server_start.py +84 -3
- cognee/tests/test_conversation_history.py +68 -5
- cognee/tests/test_data/example_with_header.csv +3 -0
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_edge_ingestion.py +27 -0
- cognee/tests/test_feedback_enrichment.py +1 -1
- cognee/tests/test_library.py +6 -4
- cognee/tests/test_load.py +62 -0
- cognee/tests/test_multi_tenancy.py +165 -0
- cognee/tests/test_parallel_databases.py +2 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_relational_db_migration.py +54 -2
- cognee/tests/test_search_db.py +44 -2
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
- cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
- cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
- cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
- cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
- cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
- cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -107,29 +107,10 @@ class TestConditionalAuthenticationIntegration:
|
|
|
107
107
|
# REQUIRE_AUTHENTICATION should be a boolean
|
|
108
108
|
assert isinstance(REQUIRE_AUTHENTICATION, bool)
|
|
109
109
|
|
|
110
|
-
# Currently should be False (optional authentication)
|
|
111
|
-
assert not REQUIRE_AUTHENTICATION
|
|
112
|
-
|
|
113
110
|
|
|
114
111
|
class TestConditionalAuthenticationEnvironmentVariables:
|
|
115
112
|
"""Test environment variable handling."""
|
|
116
113
|
|
|
117
|
-
def test_require_authentication_default_false(self):
|
|
118
|
-
"""Test that REQUIRE_AUTHENTICATION defaults to false when imported with no env vars."""
|
|
119
|
-
with patch.dict(os.environ, {}, clear=True):
|
|
120
|
-
# Remove module from cache to force fresh import
|
|
121
|
-
module_name = "cognee.modules.users.methods.get_authenticated_user"
|
|
122
|
-
if module_name in sys.modules:
|
|
123
|
-
del sys.modules[module_name]
|
|
124
|
-
|
|
125
|
-
# Import after patching environment - module will see empty environment
|
|
126
|
-
from cognee.modules.users.methods.get_authenticated_user import (
|
|
127
|
-
REQUIRE_AUTHENTICATION,
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
importlib.invalidate_caches()
|
|
131
|
-
assert not REQUIRE_AUTHENTICATION
|
|
132
|
-
|
|
133
114
|
def test_require_authentication_true(self):
|
|
134
115
|
"""Test that REQUIRE_AUTHENTICATION=true is parsed correctly when imported."""
|
|
135
116
|
with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "true"}):
|
|
@@ -145,50 +126,6 @@ class TestConditionalAuthenticationEnvironmentVariables:
|
|
|
145
126
|
|
|
146
127
|
assert REQUIRE_AUTHENTICATION
|
|
147
128
|
|
|
148
|
-
def test_require_authentication_false_explicit(self):
|
|
149
|
-
"""Test that REQUIRE_AUTHENTICATION=false is parsed correctly when imported."""
|
|
150
|
-
with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "false"}):
|
|
151
|
-
# Remove module from cache to force fresh import
|
|
152
|
-
module_name = "cognee.modules.users.methods.get_authenticated_user"
|
|
153
|
-
if module_name in sys.modules:
|
|
154
|
-
del sys.modules[module_name]
|
|
155
|
-
|
|
156
|
-
# Import after patching environment - module will see REQUIRE_AUTHENTICATION=false
|
|
157
|
-
from cognee.modules.users.methods.get_authenticated_user import (
|
|
158
|
-
REQUIRE_AUTHENTICATION,
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
assert not REQUIRE_AUTHENTICATION
|
|
162
|
-
|
|
163
|
-
def test_require_authentication_case_insensitive(self):
|
|
164
|
-
"""Test that environment variable parsing is case insensitive when imported."""
|
|
165
|
-
test_cases = ["TRUE", "True", "tRuE", "FALSE", "False", "fAlSe"]
|
|
166
|
-
|
|
167
|
-
for case in test_cases:
|
|
168
|
-
with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": case}):
|
|
169
|
-
# Remove module from cache to force fresh import
|
|
170
|
-
module_name = "cognee.modules.users.methods.get_authenticated_user"
|
|
171
|
-
if module_name in sys.modules:
|
|
172
|
-
del sys.modules[module_name]
|
|
173
|
-
|
|
174
|
-
# Import after patching environment
|
|
175
|
-
from cognee.modules.users.methods.get_authenticated_user import (
|
|
176
|
-
REQUIRE_AUTHENTICATION,
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
expected = case.lower() == "true"
|
|
180
|
-
assert REQUIRE_AUTHENTICATION == expected, f"Failed for case: {case}"
|
|
181
|
-
|
|
182
|
-
def test_current_require_authentication_value(self):
|
|
183
|
-
"""Test that the current REQUIRE_AUTHENTICATION module value is as expected."""
|
|
184
|
-
from cognee.modules.users.methods.get_authenticated_user import (
|
|
185
|
-
REQUIRE_AUTHENTICATION,
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
# The module-level variable should currently be False (set at import time)
|
|
189
|
-
assert isinstance(REQUIRE_AUTHENTICATION, bool)
|
|
190
|
-
assert not REQUIRE_AUTHENTICATION
|
|
191
|
-
|
|
192
129
|
|
|
193
130
|
class TestConditionalAuthenticationEdgeCases:
|
|
194
131
|
"""Test edge cases and error scenarios."""
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from itertools import product
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
|
|
7
|
+
from cognee.tasks.chunks import chunk_by_row
|
|
8
|
+
|
|
9
|
+
INPUT_TEXTS = "name: John, age: 30, city: New York, country: USA"
|
|
10
|
+
max_chunk_size_vals = [8, 32]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@pytest.mark.parametrize(
|
|
14
|
+
"input_text,max_chunk_size",
|
|
15
|
+
list(product([INPUT_TEXTS], max_chunk_size_vals)),
|
|
16
|
+
)
|
|
17
|
+
def test_chunk_by_row_isomorphism(input_text, max_chunk_size):
|
|
18
|
+
chunks = chunk_by_row(input_text, max_chunk_size)
|
|
19
|
+
reconstructed_text = ", ".join([chunk["text"] for chunk in chunks])
|
|
20
|
+
assert reconstructed_text == input_text, (
|
|
21
|
+
f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.mark.parametrize(
|
|
26
|
+
"input_text,max_chunk_size",
|
|
27
|
+
list(product([INPUT_TEXTS], max_chunk_size_vals)),
|
|
28
|
+
)
|
|
29
|
+
def test_row_chunk_length(input_text, max_chunk_size):
|
|
30
|
+
chunks = list(chunk_by_row(data=input_text, max_chunk_size=max_chunk_size))
|
|
31
|
+
embedding_engine = get_embedding_engine()
|
|
32
|
+
|
|
33
|
+
chunk_lengths = np.array(
|
|
34
|
+
[embedding_engine.tokenizer.count_tokens(chunk["text"]) for chunk in chunks]
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
larger_chunks = chunk_lengths[chunk_lengths > max_chunk_size]
|
|
38
|
+
assert np.all(chunk_lengths <= max_chunk_size), (
|
|
39
|
+
f"{max_chunk_size = }: {larger_chunks} are too large"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@pytest.mark.parametrize(
|
|
44
|
+
"input_text,max_chunk_size",
|
|
45
|
+
list(product([INPUT_TEXTS], max_chunk_size_vals)),
|
|
46
|
+
)
|
|
47
|
+
def test_chunk_by_row_chunk_numbering(input_text, max_chunk_size):
|
|
48
|
+
chunks = chunk_by_row(data=input_text, max_chunk_size=max_chunk_size)
|
|
49
|
+
chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks])
|
|
50
|
+
assert np.all(chunk_indices == np.arange(len(chunk_indices))), (
|
|
51
|
+
f"{chunk_indices = } are not monotonically increasing"
|
|
52
|
+
)
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from unittest.mock import AsyncMock, patch
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from cognee.infrastructure.engine import DataPoint
|
|
6
|
+
from cognee.modules.engine.models import Triplet
|
|
7
|
+
from cognee.tasks.storage.add_data_points import (
|
|
8
|
+
add_data_points,
|
|
9
|
+
InvalidDataPointsInAddDataPointsError,
|
|
10
|
+
_extract_embeddable_text_from_datapoint,
|
|
11
|
+
_create_triplets_from_graph,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
adp_module = sys.modules["cognee.tasks.storage.add_data_points"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SimplePoint(DataPoint):
|
|
18
|
+
text: str
|
|
19
|
+
metadata: dict = {"index_fields": ["text"]}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.asyncio
|
|
23
|
+
@pytest.mark.parametrize("bad_input", [None, ["not_datapoint"]])
|
|
24
|
+
async def test_add_data_points_validates_inputs(bad_input):
|
|
25
|
+
with pytest.raises(InvalidDataPointsInAddDataPointsError):
|
|
26
|
+
await add_data_points(bad_input)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.asyncio
|
|
30
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
31
|
+
@patch.object(adp_module, "index_data_points")
|
|
32
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
33
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
34
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
35
|
+
async def test_add_data_points_indexes_nodes_and_edges(
|
|
36
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
37
|
+
):
|
|
38
|
+
dp1 = SimplePoint(text="first")
|
|
39
|
+
dp2 = SimplePoint(text="second")
|
|
40
|
+
|
|
41
|
+
edge1 = (str(dp1.id), str(dp2.id), "related_to", {"edge_text": "connects"})
|
|
42
|
+
custom_edges = [(str(dp2.id), str(dp1.id), "custom_edge", {})]
|
|
43
|
+
|
|
44
|
+
mock_get_graph.side_effect = [([dp1], [edge1]), ([dp2], [])]
|
|
45
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
46
|
+
graph_engine = AsyncMock()
|
|
47
|
+
mock_get_engine.return_value = graph_engine
|
|
48
|
+
|
|
49
|
+
result = await add_data_points([dp1, dp2], custom_edges=custom_edges)
|
|
50
|
+
|
|
51
|
+
assert result == [dp1, dp2]
|
|
52
|
+
graph_engine.add_nodes.assert_awaited_once()
|
|
53
|
+
mock_index_nodes.assert_awaited_once()
|
|
54
|
+
assert graph_engine.add_edges.await_count == 2
|
|
55
|
+
assert edge1 in graph_engine.add_edges.await_args_list[0].args[0]
|
|
56
|
+
assert graph_engine.add_edges.await_args_list[1].args[0] == custom_edges
|
|
57
|
+
assert mock_index_edges.await_count == 2
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@pytest.mark.asyncio
|
|
61
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
62
|
+
@patch.object(adp_module, "index_data_points")
|
|
63
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
64
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
65
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
66
|
+
async def test_add_data_points_indexes_triplets_when_enabled(
|
|
67
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
68
|
+
):
|
|
69
|
+
dp1 = SimplePoint(text="source")
|
|
70
|
+
dp2 = SimplePoint(text="target")
|
|
71
|
+
|
|
72
|
+
edge1 = (str(dp1.id), str(dp2.id), "relates", {"edge_text": "describes"})
|
|
73
|
+
|
|
74
|
+
mock_get_graph.side_effect = [([dp1], [edge1]), ([dp2], [])]
|
|
75
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
76
|
+
graph_engine = AsyncMock()
|
|
77
|
+
mock_get_engine.return_value = graph_engine
|
|
78
|
+
|
|
79
|
+
await add_data_points([dp1, dp2], embed_triplets=True)
|
|
80
|
+
|
|
81
|
+
assert mock_index_nodes.await_count == 2
|
|
82
|
+
nodes_arg = mock_index_nodes.await_args_list[0].args[0]
|
|
83
|
+
triplets_arg = mock_index_nodes.await_args_list[1].args[0]
|
|
84
|
+
assert nodes_arg == [dp1, dp2]
|
|
85
|
+
assert len(triplets_arg) == 1
|
|
86
|
+
assert isinstance(triplets_arg[0], Triplet)
|
|
87
|
+
mock_index_edges.assert_awaited_once()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@pytest.mark.asyncio
|
|
91
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
92
|
+
@patch.object(adp_module, "index_data_points")
|
|
93
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
94
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
95
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
96
|
+
async def test_add_data_points_with_empty_list(
|
|
97
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
98
|
+
):
|
|
99
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
100
|
+
graph_engine = AsyncMock()
|
|
101
|
+
mock_get_engine.return_value = graph_engine
|
|
102
|
+
|
|
103
|
+
result = await add_data_points([])
|
|
104
|
+
|
|
105
|
+
assert result == []
|
|
106
|
+
mock_get_graph.assert_not_called()
|
|
107
|
+
graph_engine.add_nodes.assert_awaited_once_with([])
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@pytest.mark.asyncio
|
|
111
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
112
|
+
@patch.object(adp_module, "index_data_points")
|
|
113
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
114
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
115
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
116
|
+
async def test_add_data_points_with_single_datapoint(
|
|
117
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
118
|
+
):
|
|
119
|
+
dp = SimplePoint(text="single")
|
|
120
|
+
mock_get_graph.side_effect = [([dp], [])]
|
|
121
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
122
|
+
graph_engine = AsyncMock()
|
|
123
|
+
mock_get_engine.return_value = graph_engine
|
|
124
|
+
|
|
125
|
+
result = await add_data_points([dp])
|
|
126
|
+
|
|
127
|
+
assert result == [dp]
|
|
128
|
+
mock_get_graph.assert_called_once()
|
|
129
|
+
mock_index_nodes.assert_awaited_once()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_extract_embeddable_text_from_datapoint():
|
|
133
|
+
dp = SimplePoint(text="hello world")
|
|
134
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
135
|
+
assert text == "hello world"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def test_extract_embeddable_text_with_multiple_fields():
|
|
139
|
+
class MultiField(DataPoint):
|
|
140
|
+
title: str
|
|
141
|
+
description: str
|
|
142
|
+
metadata: dict = {"index_fields": ["title", "description"]}
|
|
143
|
+
|
|
144
|
+
dp = MultiField(title="Test", description="Description")
|
|
145
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
146
|
+
assert text == "Test Description"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_extract_embeddable_text_with_no_index_fields():
|
|
150
|
+
class NoIndex(DataPoint):
|
|
151
|
+
text: str
|
|
152
|
+
metadata: dict = {"index_fields": []}
|
|
153
|
+
|
|
154
|
+
dp = NoIndex(text="ignored")
|
|
155
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
156
|
+
assert text == ""
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_create_triplets_from_graph():
|
|
160
|
+
dp1 = SimplePoint(text="source node")
|
|
161
|
+
dp2 = SimplePoint(text="target node")
|
|
162
|
+
edge = (str(dp1.id), str(dp2.id), "connects_to", {"edge_text": "links"})
|
|
163
|
+
|
|
164
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge])
|
|
165
|
+
|
|
166
|
+
assert len(triplets) == 1
|
|
167
|
+
assert isinstance(triplets[0], Triplet)
|
|
168
|
+
assert triplets[0].from_node_id == str(dp1.id)
|
|
169
|
+
assert triplets[0].to_node_id == str(dp2.id)
|
|
170
|
+
assert "source node" in triplets[0].text
|
|
171
|
+
assert "target node" in triplets[0].text
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def test_extract_embeddable_text_with_none_datapoint():
|
|
175
|
+
text = _extract_embeddable_text_from_datapoint(None)
|
|
176
|
+
assert text == ""
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def test_extract_embeddable_text_without_metadata():
|
|
180
|
+
class NoMetadata(DataPoint):
|
|
181
|
+
text: str
|
|
182
|
+
|
|
183
|
+
dp = NoMetadata(text="test")
|
|
184
|
+
delattr(dp, "metadata")
|
|
185
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
186
|
+
assert text == ""
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def test_extract_embeddable_text_with_whitespace_only():
|
|
190
|
+
class WhitespaceField(DataPoint):
|
|
191
|
+
text: str
|
|
192
|
+
metadata: dict = {"index_fields": ["text"]}
|
|
193
|
+
|
|
194
|
+
dp = WhitespaceField(text=" ")
|
|
195
|
+
text = _extract_embeddable_text_from_datapoint(dp)
|
|
196
|
+
assert text == ""
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def test_create_triplets_skips_short_edge_tuples():
|
|
200
|
+
dp = SimplePoint(text="node")
|
|
201
|
+
incomplete_edge = (str(dp.id), str(dp.id))
|
|
202
|
+
|
|
203
|
+
triplets = _create_triplets_from_graph([dp], [incomplete_edge])
|
|
204
|
+
|
|
205
|
+
assert len(triplets) == 0
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def test_create_triplets_skips_missing_source_node():
|
|
209
|
+
dp1 = SimplePoint(text="target")
|
|
210
|
+
edge = ("missing_id", str(dp1.id), "relates", {})
|
|
211
|
+
|
|
212
|
+
triplets = _create_triplets_from_graph([dp1], [edge])
|
|
213
|
+
|
|
214
|
+
assert len(triplets) == 0
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def test_create_triplets_skips_missing_target_node():
|
|
218
|
+
dp1 = SimplePoint(text="source")
|
|
219
|
+
edge = (str(dp1.id), "missing_id", "relates", {})
|
|
220
|
+
|
|
221
|
+
triplets = _create_triplets_from_graph([dp1], [edge])
|
|
222
|
+
|
|
223
|
+
assert len(triplets) == 0
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_create_triplets_skips_none_relationship():
|
|
227
|
+
dp1 = SimplePoint(text="source")
|
|
228
|
+
dp2 = SimplePoint(text="target")
|
|
229
|
+
edge = (str(dp1.id), str(dp2.id), None, {})
|
|
230
|
+
|
|
231
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge])
|
|
232
|
+
|
|
233
|
+
assert len(triplets) == 0
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def test_create_triplets_uses_relationship_name_when_no_edge_text():
|
|
237
|
+
dp1 = SimplePoint(text="source")
|
|
238
|
+
dp2 = SimplePoint(text="target")
|
|
239
|
+
edge = (str(dp1.id), str(dp2.id), "connects_to", {})
|
|
240
|
+
|
|
241
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge])
|
|
242
|
+
|
|
243
|
+
assert len(triplets) == 1
|
|
244
|
+
assert "connects_to" in triplets[0].text
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def test_create_triplets_prevents_duplicates():
|
|
248
|
+
dp1 = SimplePoint(text="source")
|
|
249
|
+
dp2 = SimplePoint(text="target")
|
|
250
|
+
edge = (str(dp1.id), str(dp2.id), "relates", {"edge_text": "links"})
|
|
251
|
+
|
|
252
|
+
triplets = _create_triplets_from_graph([dp1, dp2], [edge, edge])
|
|
253
|
+
|
|
254
|
+
assert len(triplets) == 1
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def test_create_triplets_skips_nodes_without_id():
|
|
258
|
+
class NodeNoId:
|
|
259
|
+
pass
|
|
260
|
+
|
|
261
|
+
dp = SimplePoint(text="valid")
|
|
262
|
+
node_no_id = NodeNoId()
|
|
263
|
+
edge = (str(dp.id), "some_id", "relates", {})
|
|
264
|
+
|
|
265
|
+
triplets = _create_triplets_from_graph([dp, node_no_id], [edge])
|
|
266
|
+
|
|
267
|
+
assert len(triplets) == 0
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@pytest.mark.asyncio
|
|
271
|
+
@patch.object(adp_module, "index_graph_edges")
|
|
272
|
+
@patch.object(adp_module, "index_data_points")
|
|
273
|
+
@patch.object(adp_module, "get_graph_engine")
|
|
274
|
+
@patch.object(adp_module, "deduplicate_nodes_and_edges")
|
|
275
|
+
@patch.object(adp_module, "get_graph_from_model")
|
|
276
|
+
async def test_add_data_points_with_empty_custom_edges(
|
|
277
|
+
mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
|
|
278
|
+
):
|
|
279
|
+
dp = SimplePoint(text="test")
|
|
280
|
+
mock_get_graph.side_effect = [([dp], [])]
|
|
281
|
+
mock_dedup.side_effect = lambda n, e: (n, e)
|
|
282
|
+
graph_engine = AsyncMock()
|
|
283
|
+
mock_get_engine.return_value = graph_engine
|
|
284
|
+
|
|
285
|
+
result = await add_data_points([dp], custom_edges=[])
|
|
286
|
+
|
|
287
|
+
assert result == [dp]
|
|
288
|
+
assert graph_engine.add_edges.await_count == 1
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognee
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning.
|
|
5
5
|
Project-URL: Homepage, https://www.cognee.ai
|
|
6
6
|
Project-URL: Repository, https://github.com/topoteretes/cognee
|
|
@@ -16,10 +16,13 @@ Classifier: Operating System :: Microsoft :: Windows
|
|
|
16
16
|
Classifier: Operating System :: POSIX :: Linux
|
|
17
17
|
Classifier: Topic :: Software Development :: Libraries
|
|
18
18
|
Requires-Python: <3.14,>=3.10
|
|
19
|
-
Requires-Dist: aiofiles
|
|
19
|
+
Requires-Dist: aiofiles>=23.2.1
|
|
20
20
|
Requires-Dist: aiohttp<4.0.0,>=3.11.14
|
|
21
|
+
Requires-Dist: aiolimiter>=1.2.1
|
|
21
22
|
Requires-Dist: aiosqlite<1.0.0,>=0.20.0
|
|
22
23
|
Requires-Dist: alembic<2,>=1.13.3
|
|
24
|
+
Requires-Dist: diskcache>=5.6.3
|
|
25
|
+
Requires-Dist: fakeredis[lua]>=2.32.0
|
|
23
26
|
Requires-Dist: fastapi-users[sqlalchemy]<15.0.0,>=14.0.1
|
|
24
27
|
Requires-Dist: fastapi<1.0.0,>=0.116.2
|
|
25
28
|
Requires-Dist: fastembed<=0.6.0
|
|
@@ -28,8 +31,7 @@ Requires-Dist: gunicorn<24,>=20.1.0
|
|
|
28
31
|
Requires-Dist: instructor<2.0.0,>=1.9.1
|
|
29
32
|
Requires-Dist: jinja2<4,>=3.1.3
|
|
30
33
|
Requires-Dist: kuzu==0.11.3
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
Requires-Dist: lancedb<=0.25.3,>=0.24.0
|
|
34
|
+
Requires-Dist: lancedb<1.0.0,>=0.24.0
|
|
33
35
|
Requires-Dist: limits<5,>=4.4.1
|
|
34
36
|
Requires-Dist: litellm>=1.76.0
|
|
35
37
|
Requires-Dist: mistralai>=1.9.10
|
|
@@ -39,7 +41,7 @@ Requires-Dist: numpy<=4.0.0,>=1.26.4
|
|
|
39
41
|
Requires-Dist: onnxruntime<=1.22.1
|
|
40
42
|
Requires-Dist: openai>=1.80.1
|
|
41
43
|
Requires-Dist: pydantic-settings<3,>=2.2.1
|
|
42
|
-
Requires-Dist: pydantic<
|
|
44
|
+
Requires-Dist: pydantic<2.12.0,>=2.10.5
|
|
43
45
|
Requires-Dist: pylance<=0.36.0,>=0.22.0
|
|
44
46
|
Requires-Dist: pympler<2.0.0,>=1.1
|
|
45
47
|
Requires-Dist: pypdf<7.0.0,>=4.1.0
|
|
@@ -97,7 +99,8 @@ Provides-Extra: docling
|
|
|
97
99
|
Requires-Dist: docling>=2.54; extra == 'docling'
|
|
98
100
|
Requires-Dist: transformers>=4.55; extra == 'docling'
|
|
99
101
|
Provides-Extra: docs
|
|
100
|
-
Requires-Dist: lxml<
|
|
102
|
+
Requires-Dist: lxml<5,>=4.9.3; (python_version < '3.13') and extra == 'docs'
|
|
103
|
+
Requires-Dist: lxml<6,>=5; (python_version >= '3.13') and extra == 'docs'
|
|
101
104
|
Requires-Dist: unstructured[csv,doc,docx,epub,md,odt,org,pdf,ppt,pptx,rst,rtf,tsv,xlsx]<19,>=0.18.1; extra == 'docs'
|
|
102
105
|
Provides-Extra: evals
|
|
103
106
|
Requires-Dist: gdown<6,>=5.2.0; extra == 'evals'
|
|
@@ -144,7 +147,8 @@ Requires-Dist: redis<6.0.0,>=5.0.3; extra == 'redis'
|
|
|
144
147
|
Provides-Extra: scraping
|
|
145
148
|
Requires-Dist: apscheduler<=3.11.0,>=3.10.0; extra == 'scraping'
|
|
146
149
|
Requires-Dist: beautifulsoup4>=4.13.1; extra == 'scraping'
|
|
147
|
-
Requires-Dist: lxml
|
|
150
|
+
Requires-Dist: lxml<5,>=4.9.3; (python_version < '3.13') and extra == 'scraping'
|
|
151
|
+
Requires-Dist: lxml<6,>=5; (python_version >= '3.13') and extra == 'scraping'
|
|
148
152
|
Requires-Dist: playwright>=1.9.0; extra == 'scraping'
|
|
149
153
|
Requires-Dist: protego>=0.1; extra == 'scraping'
|
|
150
154
|
Requires-Dist: tavily-python>=0.7.12; extra == 'scraping'
|