cognee 0.3.4.dev4__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +16 -7
- cognee/api/health.py +5 -9
- cognee/api/v1/add/add.py +3 -1
- cognee/api/v1/cognify/cognify.py +44 -7
- cognee/api/v1/permissions/routers/get_permissions_router.py +8 -4
- cognee/api/v1/search/search.py +3 -0
- cognee/api/v1/ui/__init__.py +1 -1
- cognee/api/v1/ui/ui.py +215 -150
- cognee/api/v1/update/__init__.py +1 -0
- cognee/api/v1/update/routers/__init__.py +1 -0
- cognee/api/v1/update/routers/get_update_router.py +90 -0
- cognee/api/v1/update/update.py +100 -0
- cognee/base_config.py +5 -2
- cognee/cli/_cognee.py +28 -10
- cognee/cli/commands/delete_command.py +34 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +3 -2
- cognee/eval_framework/modal_eval_dashboard.py +9 -1
- cognee/infrastructure/databases/graph/config.py +9 -9
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -21
- cognee/infrastructure/databases/graph/kuzu/adapter.py +60 -9
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +3 -3
- cognee/infrastructure/databases/relational/config.py +4 -4
- cognee/infrastructure/databases/relational/create_relational_engine.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +7 -3
- cognee/infrastructure/databases/vector/config.py +7 -7
- cognee/infrastructure/databases/vector/create_vector_engine.py +7 -15
- cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py +9 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +11 -0
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +19 -2
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -0
- cognee/infrastructure/databases/vector/embeddings/config.py +8 -0
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +5 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +11 -10
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +48 -38
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -4
- cognee/infrastructure/files/storage/S3FileStorage.py +15 -5
- cognee/infrastructure/files/storage/s3_config.py +1 -0
- cognee/infrastructure/files/utils/open_data_file.py +7 -14
- cognee/infrastructure/llm/LLMGateway.py +19 -117
- cognee/infrastructure/llm/config.py +28 -13
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_categories.py +2 -1
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_event_entities.py +3 -2
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_summary.py +3 -2
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_content_graph.py +2 -1
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_event_graph.py +3 -2
- cognee/infrastructure/llm/prompts/read_query_prompt.py +3 -2
- cognee/infrastructure/llm/prompts/show_prompt.py +35 -0
- cognee/infrastructure/llm/prompts/test.txt +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +50 -397
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +2 -3
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +8 -88
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +78 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +2 -99
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +49 -401
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +19 -882
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +2 -34
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +2 -107
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/__init__.py +1 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +76 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/create_dynamic_baml_type.py +122 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +0 -32
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +107 -98
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +5 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +0 -26
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +17 -67
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +8 -7
- cognee/infrastructure/llm/utils.py +4 -4
- cognee/infrastructure/loaders/LoaderEngine.py +5 -2
- cognee/infrastructure/loaders/external/__init__.py +7 -0
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +244 -0
- cognee/infrastructure/loaders/supported_loaders.py +7 -0
- cognee/modules/data/methods/create_authorized_dataset.py +9 -0
- cognee/modules/data/methods/get_authorized_dataset.py +1 -1
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
- cognee/modules/data/methods/get_deletion_counts.py +92 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +1 -1
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
- cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
- cognee/modules/ingestion/data_types/TextData.py +0 -1
- cognee/modules/observability/get_observe.py +14 -0
- cognee/modules/observability/observers.py +1 -0
- cognee/modules/ontology/base_ontology_resolver.py +42 -0
- cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
- cognee/modules/ontology/matching_strategies.py +53 -0
- cognee/modules/ontology/models.py +20 -0
- cognee/modules/ontology/ontology_config.py +24 -0
- cognee/modules/ontology/ontology_env_config.py +45 -0
- cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +21 -24
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +3 -3
- cognee/modules/retrieval/code_retriever.py +2 -1
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +6 -5
- cognee/modules/retrieval/graph_completion_retriever.py +0 -3
- cognee/modules/retrieval/insights_retriever.py +1 -1
- cognee/modules/retrieval/jaccard_retrival.py +60 -0
- cognee/modules/retrieval/lexical_retriever.py +123 -0
- cognee/modules/retrieval/natural_language_retriever.py +2 -1
- cognee/modules/retrieval/temporal_retriever.py +3 -2
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +2 -12
- cognee/modules/retrieval/utils/completion.py +4 -7
- cognee/modules/search/methods/get_search_type_tools.py +7 -0
- cognee/modules/search/methods/no_access_control_search.py +1 -1
- cognee/modules/search/methods/search.py +32 -13
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
- cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +10 -0
- cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
- cognee/modules/users/permissions/methods/get_principal.py +9 -0
- cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
- cognee/modules/users/permissions/methods/get_role.py +10 -0
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
- cognee/modules/users/permissions/methods/get_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
- cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
- cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
- cognee/modules/users/roles/methods/create_role.py +12 -1
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
- cognee/modules/users/tenants/methods/create_tenant.py +12 -1
- cognee/modules/visualization/cognee_network_visualization.py +13 -9
- cognee/shared/data_models.py +0 -1
- cognee/shared/utils.py +0 -32
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/codingagents/coding_rule_associations.py +3 -2
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +3 -2
- cognee/tasks/graph/extract_graph_from_code.py +2 -2
- cognee/tasks/graph/extract_graph_from_data.py +55 -12
- cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
- cognee/tasks/ingestion/migrate_relational_database.py +132 -41
- cognee/tasks/ingestion/resolve_data_directories.py +4 -1
- cognee/tasks/schema/ingest_database_schema.py +134 -0
- cognee/tasks/schema/models.py +40 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +3 -1
- cognee/tasks/summarization/summarize_code.py +2 -2
- cognee/tasks/summarization/summarize_text.py +2 -2
- cognee/tasks/temporal_graph/enrich_events.py +2 -2
- cognee/tasks/temporal_graph/extract_events_and_entities.py +2 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +13 -4
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +13 -3
- cognee/tests/test_advanced_pdf_loader.py +141 -0
- cognee/tests/test_chromadb.py +40 -0
- cognee/tests/test_cognee_server_start.py +6 -1
- cognee/tests/test_data/Quantum_computers.txt +9 -0
- cognee/tests/test_lancedb.py +211 -0
- cognee/tests/test_pgvector.py +40 -0
- cognee/tests/test_relational_db_migration.py +76 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +2 -1
- cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +0 -4
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -4
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +0 -4
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/METADATA +92 -96
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/RECORD +173 -159
- distributed/pyproject.toml +0 -1
- cognee/infrastructure/data/utils/extract_keywords.py +0 -48
- cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +0 -1227
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +0 -109
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +0 -343
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_categories.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +0 -89
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/__init__.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +0 -44
- cognee/tasks/graph/infer_data_ontology.py +0 -309
- cognee/tests/test_falkordb.py +0 -174
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/__init__.py +0 -0
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/__init__.py +0 -0
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/texts.json +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/WHEEL +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/entry_points.txt +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -9,7 +9,7 @@ from cognee.modules.graph.models.EdgeType import EdgeType
|
|
|
9
9
|
logger = get_logger(level=ERROR)
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
async def index_graph_edges(
|
|
12
|
+
async def index_graph_edges():
|
|
13
13
|
"""
|
|
14
14
|
Indexes graph edges by creating and managing vector indexes for relationship types.
|
|
15
15
|
|
|
@@ -72,6 +72,8 @@ async def index_graph_edges(batch_size: int = 1024):
|
|
|
72
72
|
for index_name, indexable_points in index_points.items():
|
|
73
73
|
index_name, field_name = index_name.split(".")
|
|
74
74
|
|
|
75
|
+
# Get maximum batch size for embedding model
|
|
76
|
+
batch_size = vector_engine.embedding_engine.get_batch_size()
|
|
75
77
|
# We save the data in batches of {batch_size} to not put a lot of pressure on the database
|
|
76
78
|
for start in range(0, len(indexable_points), batch_size):
|
|
77
79
|
batch = indexable_points[start : start + batch_size]
|
|
@@ -3,7 +3,7 @@ from typing import AsyncGenerator, Union
|
|
|
3
3
|
from uuid import uuid5
|
|
4
4
|
|
|
5
5
|
from cognee.infrastructure.engine import DataPoint
|
|
6
|
-
from cognee.infrastructure.llm.
|
|
6
|
+
from cognee.infrastructure.llm.extraction import extract_code_summary
|
|
7
7
|
from .models import CodeSummary
|
|
8
8
|
|
|
9
9
|
|
|
@@ -16,7 +16,7 @@ async def summarize_code(
|
|
|
16
16
|
code_data_points = [file for file in code_graph_nodes if hasattr(file, "source_code")]
|
|
17
17
|
|
|
18
18
|
file_summaries = await asyncio.gather(
|
|
19
|
-
*[
|
|
19
|
+
*[extract_code_summary(file.source_code) for file in code_data_points]
|
|
20
20
|
)
|
|
21
21
|
|
|
22
22
|
file_summaries_map = {
|
|
@@ -5,7 +5,7 @@ from pydantic import BaseModel
|
|
|
5
5
|
|
|
6
6
|
from cognee.tasks.summarization.exceptions import InvalidSummaryInputsError
|
|
7
7
|
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
|
8
|
-
from cognee.infrastructure.llm.
|
|
8
|
+
from cognee.infrastructure.llm.extraction import extract_summary
|
|
9
9
|
from cognee.modules.cognify.config import get_cognify_config
|
|
10
10
|
from cognee.tasks.summarization.models import TextSummary
|
|
11
11
|
|
|
@@ -50,7 +50,7 @@ async def summarize_text(
|
|
|
50
50
|
summarization_model = cognee_config.summarization_model
|
|
51
51
|
|
|
52
52
|
chunk_summaries = await asyncio.gather(
|
|
53
|
-
*[
|
|
53
|
+
*[extract_summary(chunk.text, summarization_model) for chunk in data_chunks]
|
|
54
54
|
)
|
|
55
55
|
|
|
56
56
|
summaries = [
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
|
|
3
|
-
from cognee.infrastructure.llm import
|
|
3
|
+
from cognee.infrastructure.llm.extraction import extract_event_entities
|
|
4
4
|
from cognee.modules.engine.models import Event
|
|
5
5
|
from cognee.tasks.temporal_graph.models import EventWithEntities, EventEntityList
|
|
6
6
|
|
|
@@ -29,6 +29,6 @@ async def enrich_events(events: List[Event]) -> List[EventWithEntities]:
|
|
|
29
29
|
events_json_str = json.dumps(events_json)
|
|
30
30
|
|
|
31
31
|
# Extract entities from events
|
|
32
|
-
entity_result = await
|
|
32
|
+
entity_result = await extract_event_entities(events_json_str, EventEntityList)
|
|
33
33
|
|
|
34
34
|
return entity_result.events
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from typing import Type, List
|
|
3
|
-
from cognee.infrastructure.llm.
|
|
3
|
+
from cognee.infrastructure.llm.extraction import extract_event_graph
|
|
4
4
|
from cognee.modules.chunking.models import DocumentChunk
|
|
5
5
|
from cognee.tasks.temporal_graph.models import EventList
|
|
6
6
|
from cognee.modules.engine.utils.generate_event_datapoint import generate_event_datapoint
|
|
@@ -21,7 +21,7 @@ async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> Lis
|
|
|
21
21
|
List[DocumentChunk]: The same list of document chunks, enriched with extracted Event datapoints.
|
|
22
22
|
"""
|
|
23
23
|
events = await asyncio.gather(
|
|
24
|
-
*[
|
|
24
|
+
*[extract_event_graph(chunk.text, EventList) for chunk in data_chunks]
|
|
25
25
|
)
|
|
26
26
|
|
|
27
27
|
for data_chunk, event_list in zip(data_chunks, events):
|
|
@@ -12,7 +12,8 @@ from cognee.cli.commands.search_command import SearchCommand
|
|
|
12
12
|
from cognee.cli.commands.cognify_command import CognifyCommand
|
|
13
13
|
from cognee.cli.commands.delete_command import DeleteCommand
|
|
14
14
|
from cognee.cli.commands.config_command import ConfigCommand
|
|
15
|
-
from cognee.cli.exceptions import CliCommandException
|
|
15
|
+
from cognee.cli.exceptions import CliCommandException
|
|
16
|
+
from cognee.modules.data.methods.get_deletion_counts import DeletionCountsPreview
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
# Mock asyncio.run to properly handle coroutines
|
|
@@ -282,13 +283,18 @@ class TestDeleteCommand:
|
|
|
282
283
|
assert "all" in actions
|
|
283
284
|
assert "force" in actions
|
|
284
285
|
|
|
286
|
+
@patch("cognee.cli.commands.delete_command.get_deletion_counts")
|
|
285
287
|
@patch("cognee.cli.commands.delete_command.fmt.confirm")
|
|
286
288
|
@patch("cognee.cli.commands.delete_command.asyncio.run", side_effect=_mock_run)
|
|
287
|
-
def test_execute_delete_dataset_with_confirmation(
|
|
289
|
+
def test_execute_delete_dataset_with_confirmation(
|
|
290
|
+
self, mock_asyncio_run, mock_confirm, mock_get_deletion_counts
|
|
291
|
+
):
|
|
288
292
|
"""Test execute delete dataset with user confirmation"""
|
|
289
293
|
# Mock the cognee module
|
|
290
294
|
mock_cognee = MagicMock()
|
|
291
295
|
mock_cognee.delete = AsyncMock()
|
|
296
|
+
mock_get_deletion_counts = AsyncMock()
|
|
297
|
+
mock_get_deletion_counts.return_value = DeletionCountsPreview()
|
|
292
298
|
|
|
293
299
|
with patch.dict(sys.modules, {"cognee": mock_cognee}):
|
|
294
300
|
command = DeleteCommand()
|
|
@@ -301,13 +307,16 @@ class TestDeleteCommand:
|
|
|
301
307
|
command.execute(args)
|
|
302
308
|
|
|
303
309
|
mock_confirm.assert_called_once_with(f"Delete dataset '{args.dataset_name}'?")
|
|
304
|
-
mock_asyncio_run.
|
|
310
|
+
assert mock_asyncio_run.call_count == 2
|
|
305
311
|
assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
|
|
306
312
|
mock_cognee.delete.assert_awaited_once_with(dataset_name="test_dataset", user_id=None)
|
|
307
313
|
|
|
314
|
+
@patch("cognee.cli.commands.delete_command.get_deletion_counts")
|
|
308
315
|
@patch("cognee.cli.commands.delete_command.fmt.confirm")
|
|
309
|
-
def test_execute_delete_cancelled(self, mock_confirm):
|
|
316
|
+
def test_execute_delete_cancelled(self, mock_confirm, mock_get_deletion_counts):
|
|
310
317
|
"""Test execute when user cancels deletion"""
|
|
318
|
+
mock_get_deletion_counts = AsyncMock()
|
|
319
|
+
mock_get_deletion_counts.return_value = DeletionCountsPreview()
|
|
311
320
|
command = DeleteCommand()
|
|
312
321
|
args = argparse.Namespace(dataset_name="test_dataset", user_id=None, all=False, force=False)
|
|
313
322
|
|
|
@@ -13,6 +13,7 @@ from cognee.cli.commands.cognify_command import CognifyCommand
|
|
|
13
13
|
from cognee.cli.commands.delete_command import DeleteCommand
|
|
14
14
|
from cognee.cli.commands.config_command import ConfigCommand
|
|
15
15
|
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
|
|
16
|
+
from cognee.modules.data.methods.get_deletion_counts import DeletionCountsPreview
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
# Mock asyncio.run to properly handle coroutines
|
|
@@ -378,13 +379,18 @@ class TestCognifyCommandEdgeCases:
|
|
|
378
379
|
class TestDeleteCommandEdgeCases:
|
|
379
380
|
"""Test edge cases for DeleteCommand"""
|
|
380
381
|
|
|
382
|
+
@patch("cognee.cli.commands.delete_command.get_deletion_counts")
|
|
381
383
|
@patch("cognee.cli.commands.delete_command.fmt.confirm")
|
|
382
384
|
@patch("cognee.cli.commands.delete_command.asyncio.run", side_effect=_mock_run)
|
|
383
|
-
def test_delete_all_with_user_id(
|
|
385
|
+
def test_delete_all_with_user_id(
|
|
386
|
+
self, mock_asyncio_run, mock_confirm, mock_get_deletion_counts
|
|
387
|
+
):
|
|
384
388
|
"""Test delete command with both --all and --user-id"""
|
|
385
389
|
# Mock the cognee module
|
|
386
390
|
mock_cognee = MagicMock()
|
|
387
391
|
mock_cognee.delete = AsyncMock()
|
|
392
|
+
mock_get_deletion_counts = AsyncMock()
|
|
393
|
+
mock_get_deletion_counts.return_value = DeletionCountsPreview()
|
|
388
394
|
|
|
389
395
|
with patch.dict(sys.modules, {"cognee": mock_cognee}):
|
|
390
396
|
command = DeleteCommand()
|
|
@@ -396,13 +402,17 @@ class TestDeleteCommandEdgeCases:
|
|
|
396
402
|
command.execute(args)
|
|
397
403
|
|
|
398
404
|
mock_confirm.assert_called_once_with("Delete ALL data from cognee?")
|
|
399
|
-
mock_asyncio_run.
|
|
405
|
+
assert mock_asyncio_run.call_count == 2
|
|
400
406
|
assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
|
|
401
407
|
mock_cognee.delete.assert_awaited_once_with(dataset_name=None, user_id="test_user")
|
|
402
408
|
|
|
409
|
+
@patch("cognee.cli.commands.delete_command.get_deletion_counts")
|
|
403
410
|
@patch("cognee.cli.commands.delete_command.fmt.confirm")
|
|
404
|
-
def test_delete_confirmation_keyboard_interrupt(self, mock_confirm):
|
|
411
|
+
def test_delete_confirmation_keyboard_interrupt(self, mock_confirm, mock_get_deletion_counts):
|
|
405
412
|
"""Test delete command when user interrupts confirmation"""
|
|
413
|
+
mock_get_deletion_counts = AsyncMock()
|
|
414
|
+
mock_get_deletion_counts.return_value = DeletionCountsPreview()
|
|
415
|
+
|
|
406
416
|
command = DeleteCommand()
|
|
407
417
|
args = argparse.Namespace(dataset_name="test_dataset", user_id=None, all=False, force=False)
|
|
408
418
|
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from unittest.mock import patch, MagicMock, AsyncMock, mock_open
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from cognee.infrastructure.loaders.external.advanced_pdf_loader import AdvancedPdfLoader
|
|
6
|
+
|
|
7
|
+
advanced_pdf_loader_module = sys.modules.get(
|
|
8
|
+
"cognee.infrastructure.loaders.external.advanced_pdf_loader"
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MockElement:
|
|
13
|
+
def __init__(self, category, text, metadata):
|
|
14
|
+
self.category = category
|
|
15
|
+
self.text = text
|
|
16
|
+
self.metadata = metadata
|
|
17
|
+
|
|
18
|
+
def to_dict(self):
|
|
19
|
+
return {
|
|
20
|
+
"type": self.category,
|
|
21
|
+
"text": self.text,
|
|
22
|
+
"metadata": self.metadata,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.fixture
|
|
27
|
+
def loader():
|
|
28
|
+
return AdvancedPdfLoader()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.parametrize(
|
|
32
|
+
"extension, mime_type, expected",
|
|
33
|
+
[
|
|
34
|
+
("pdf", "application/pdf", True),
|
|
35
|
+
("txt", "text/plain", False),
|
|
36
|
+
("pdf", "text/plain", False),
|
|
37
|
+
("doc", "application/pdf", False),
|
|
38
|
+
],
|
|
39
|
+
)
|
|
40
|
+
def test_can_handle(loader, extension, mime_type, expected):
|
|
41
|
+
"""Test can_handle method can correctly identify PDF files"""
|
|
42
|
+
assert loader.can_handle(extension, mime_type) == expected
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pytest.mark.asyncio
|
|
46
|
+
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.open", new_callable=mock_open)
|
|
47
|
+
@patch(
|
|
48
|
+
"cognee.infrastructure.loaders.external.advanced_pdf_loader.get_file_metadata",
|
|
49
|
+
new_callable=AsyncMock,
|
|
50
|
+
)
|
|
51
|
+
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.get_storage_config")
|
|
52
|
+
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.get_file_storage")
|
|
53
|
+
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader")
|
|
54
|
+
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.partition_pdf")
|
|
55
|
+
async def test_load_success_with_unstructured(
|
|
56
|
+
mock_partition_pdf,
|
|
57
|
+
mock_pypdf_loader,
|
|
58
|
+
mock_get_file_storage,
|
|
59
|
+
mock_get_storage_config,
|
|
60
|
+
mock_get_file_metadata,
|
|
61
|
+
mock_open,
|
|
62
|
+
loader,
|
|
63
|
+
):
|
|
64
|
+
"""Test the main flow of using unstructured to successfully process PDF"""
|
|
65
|
+
# Prepare Mock data and objects
|
|
66
|
+
mock_elements = [
|
|
67
|
+
MockElement(
|
|
68
|
+
category="Title", text="Attention Is All You Need", metadata={"page_number": 1}
|
|
69
|
+
),
|
|
70
|
+
MockElement(
|
|
71
|
+
category="NarrativeText",
|
|
72
|
+
text="The dominant sequence transduction models are based on complex recurrent or convolutional neural networks.",
|
|
73
|
+
metadata={"page_number": 1},
|
|
74
|
+
),
|
|
75
|
+
MockElement(
|
|
76
|
+
category="Table",
|
|
77
|
+
text="This is a table.",
|
|
78
|
+
metadata={"page_number": 2, "text_as_html": "<table><tr><td>Data</td></tr></table>"},
|
|
79
|
+
),
|
|
80
|
+
]
|
|
81
|
+
mock_pypdf_loader.return_value.load = AsyncMock(return_value="/fake/path/fallback.txt")
|
|
82
|
+
mock_partition_pdf.return_value = mock_elements
|
|
83
|
+
mock_get_file_metadata.return_value = {"content_hash": "abc123def456"}
|
|
84
|
+
|
|
85
|
+
mock_storage_instance = MagicMock()
|
|
86
|
+
mock_storage_instance.store = AsyncMock(return_value="/stored/text_abc123def456.txt")
|
|
87
|
+
mock_get_file_storage.return_value = mock_storage_instance
|
|
88
|
+
|
|
89
|
+
mock_get_storage_config.return_value = {"data_root_directory": "/fake/data/root"}
|
|
90
|
+
test_file_path = "/fake/path/document.pdf"
|
|
91
|
+
|
|
92
|
+
# Run
|
|
93
|
+
|
|
94
|
+
result_path = await loader.load(test_file_path)
|
|
95
|
+
|
|
96
|
+
# Assert
|
|
97
|
+
assert result_path == "/stored/text_abc123def456.txt"
|
|
98
|
+
|
|
99
|
+
# Verify partition_pdf is called with the correct parameters
|
|
100
|
+
mock_partition_pdf.assert_called_once()
|
|
101
|
+
call_args, call_kwargs = mock_partition_pdf.call_args
|
|
102
|
+
assert call_kwargs.get("filename") == test_file_path
|
|
103
|
+
assert call_kwargs.get("strategy") == "auto" # Default strategy
|
|
104
|
+
|
|
105
|
+
# Verify the stored content is correct
|
|
106
|
+
expected_content = "Page 1:\nAttention Is All You Need\n\nThe dominant sequence transduction models are based on complex recurrent or convolutional neural networks.\n\nPage 2:\n<table><tr><td>Data</td></tr></table>\n"
|
|
107
|
+
mock_storage_instance.store.assert_awaited_once_with("text_abc123def456.txt", expected_content)
|
|
108
|
+
|
|
109
|
+
# Verify fallback is not called
|
|
110
|
+
mock_pypdf_loader.assert_not_called()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@pytest.mark.asyncio
|
|
114
|
+
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.open", new_callable=mock_open)
|
|
115
|
+
@patch(
|
|
116
|
+
"cognee.infrastructure.loaders.external.advanced_pdf_loader.get_file_metadata",
|
|
117
|
+
new_callable=AsyncMock,
|
|
118
|
+
)
|
|
119
|
+
@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader")
|
|
120
|
+
@patch(
|
|
121
|
+
"cognee.infrastructure.loaders.external.advanced_pdf_loader.partition_pdf",
|
|
122
|
+
side_effect=Exception("Unstructured failed!"),
|
|
123
|
+
)
|
|
124
|
+
async def test_load_fallback_on_unstructured_exception(
|
|
125
|
+
mock_partition_pdf, mock_pypdf_loader, mock_get_file_metadata, mock_open, loader
|
|
126
|
+
):
|
|
127
|
+
"""Test fallback to PyPdfLoader when unstructured throws an exception"""
|
|
128
|
+
# Prepare Mock
|
|
129
|
+
mock_fallback_instance = MagicMock()
|
|
130
|
+
mock_fallback_instance.load = AsyncMock(return_value="/fake/path/fallback.txt")
|
|
131
|
+
mock_pypdf_loader.return_value = mock_fallback_instance
|
|
132
|
+
mock_get_file_metadata.return_value = {"content_hash": "anyhash"}
|
|
133
|
+
test_file_path = "/fake/path/document.pdf"
|
|
134
|
+
|
|
135
|
+
# Run
|
|
136
|
+
result_path = await loader.load(test_file_path)
|
|
137
|
+
|
|
138
|
+
# Assert
|
|
139
|
+
assert result_path == "/fake/path/fallback.txt"
|
|
140
|
+
mock_partition_pdf.assert_called_once() # Verify partition_pdf is called
|
|
141
|
+
mock_fallback_instance.load.assert_awaited_once_with(test_file_path)
|
cognee/tests/test_chromadb.py
CHANGED
|
@@ -67,6 +67,44 @@ async def test_getting_of_documents(dataset_name_1):
|
|
|
67
67
|
)
|
|
68
68
|
|
|
69
69
|
|
|
70
|
+
async def test_vector_engine_search_none_limit():
|
|
71
|
+
file_path_quantum = os.path.join(
|
|
72
|
+
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
file_path_nlp = os.path.join(
|
|
76
|
+
pathlib.Path(__file__).parent,
|
|
77
|
+
"test_data/Natural_language_processing.txt",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
await cognee.prune.prune_data()
|
|
81
|
+
await cognee.prune.prune_system(metadata=True)
|
|
82
|
+
|
|
83
|
+
await cognee.add(file_path_quantum)
|
|
84
|
+
|
|
85
|
+
await cognee.add(file_path_nlp)
|
|
86
|
+
|
|
87
|
+
await cognee.cognify()
|
|
88
|
+
|
|
89
|
+
query_text = "Tell me about Quantum computers"
|
|
90
|
+
|
|
91
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
92
|
+
|
|
93
|
+
vector_engine = get_vector_engine()
|
|
94
|
+
|
|
95
|
+
collection_name = "Entity_name"
|
|
96
|
+
|
|
97
|
+
query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
|
|
98
|
+
|
|
99
|
+
result = await vector_engine.search(
|
|
100
|
+
collection_name=collection_name, query_vector=query_vector, limit=None
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Check that we did not accidentally use any default value for limit
|
|
104
|
+
# in vector search along the way (like 5, 10, or 15)
|
|
105
|
+
assert len(result) > 15
|
|
106
|
+
|
|
107
|
+
|
|
70
108
|
async def main():
|
|
71
109
|
cognee.config.set_vector_db_config(
|
|
72
110
|
{
|
|
@@ -165,6 +203,8 @@ async def main():
|
|
|
165
203
|
tables_in_database = await vector_engine.get_collection_names()
|
|
166
204
|
assert len(tables_in_database) == 0, "ChromaDB database is not empty"
|
|
167
205
|
|
|
206
|
+
await test_vector_engine_search_none_limit()
|
|
207
|
+
|
|
168
208
|
|
|
169
209
|
if __name__ == "__main__":
|
|
170
210
|
import asyncio
|
|
@@ -41,7 +41,12 @@ class TestCogneeServerStart(unittest.TestCase):
|
|
|
41
41
|
def tearDownClass(cls):
|
|
42
42
|
# Terminate the server process
|
|
43
43
|
if hasattr(cls, "server_process") and cls.server_process:
|
|
44
|
-
|
|
44
|
+
if hasattr(os, "killpg"):
|
|
45
|
+
# Unix-like systems: Use process groups
|
|
46
|
+
os.killpg(os.getpgid(cls.server_process.pid), signal.SIGTERM)
|
|
47
|
+
else:
|
|
48
|
+
# Windows: Just terminate the main process
|
|
49
|
+
cls.server_process.terminate()
|
|
45
50
|
cls.server_process.wait()
|
|
46
51
|
|
|
47
52
|
def test_server_is_running(self):
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
|
|
2
|
+
At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
|
|
3
|
+
Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
|
|
4
|
+
The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
|
|
5
|
+
Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
|
|
6
|
+
In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
|
|
7
|
+
Emerging error-correcting codes aim to mitigate decoherence effects and are expected to pave the way for fault-tolerant quantum processors. Laboratories across the globe are investigating diverse qubit implementations, such as superconducting circuits, trapped ions, neutral atoms, and photonic systems. Significant government funding and private investment have created an ecosystem of startups and consortia focused on accelerating quantum hardware and software development. Universities are meanwhile launching interdisciplinary programs that teach physics, computer science, and engineering concepts necessary for tomorrow's quantum workforce. Establishing reliable benchmarking standards will be essential for objectively comparing devices and charting realistic milestones toward practical quantum advantage.
|
|
8
|
+
Industry roadmaps anticipate that achieving error rates below the threshold for surface codes will require millions of physical qubits per logical qubit, highlighting daunting scale challenges. Researchers are therefore exploring hardware–software co-design strategies, where algorithmic breakthroughs and device engineering progress hand in hand to minimize overhead. Hybrid quantum-classical workflows, exemplified by variational algorithms running on near-term devices, offer a pragmatic path to extracting value before full fault tolerance arrives. Meanwhile, cryptographers are advancing post-quantum encryption schemes to safeguard information in a future where Shor’s algorithm becomes practical. The interplay between theoretical advances, experimental ingenuity, and policy considerations will ultimately determine how transformative quantum computing becomes for science, industry, and society.
|
|
9
|
+
Collaborative open-source toolkits are lowering the barrier to entry for developers eager to prototype quantum algorithms and simulate small devices on classical hardware. As these software frameworks mature, they will foster standardization of gate libraries, circuit optimization passes, and error-mitigation techniques. At the same time, advances in cryogenic engineering, vacuum systems, and photonics are steadily improving the stability and manufacturability of next-generation qubit platforms. Policymakers are beginning to craft export controls and ethical guidelines aimed at preventing misuse while encouraging international collaboration in fundamental research. Ultimately, the success of quantum technology will hinge on integrating robust hardware, intelligent software, and a skilled workforce within an environment of responsible governance.
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
|
|
4
|
+
import cognee
|
|
5
|
+
from cognee.shared.logging_utils import get_logger
|
|
6
|
+
from cognee.infrastructure.files.storage import get_storage_config
|
|
7
|
+
from cognee.modules.data.models import Data
|
|
8
|
+
from cognee.modules.users.methods import get_default_user
|
|
9
|
+
from cognee.modules.search.types import SearchType
|
|
10
|
+
from cognee.modules.search.operations import get_history
|
|
11
|
+
|
|
12
|
+
logger = get_logger()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def test_local_file_deletion(data_text, file_location):
|
|
16
|
+
from sqlalchemy import select
|
|
17
|
+
import hashlib
|
|
18
|
+
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
19
|
+
|
|
20
|
+
engine = get_relational_engine()
|
|
21
|
+
|
|
22
|
+
async with engine.get_async_session() as session:
|
|
23
|
+
# Get hash of data contents
|
|
24
|
+
encoded_text = data_text.encode("utf-8")
|
|
25
|
+
data_hash = hashlib.md5(encoded_text).hexdigest()
|
|
26
|
+
# Get data entry from database based on hash contents
|
|
27
|
+
data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one()
|
|
28
|
+
assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
|
|
29
|
+
f"Data location doesn't exist: {data.raw_data_location}"
|
|
30
|
+
)
|
|
31
|
+
# Test deletion of data along with local files created by cognee
|
|
32
|
+
await engine.delete_data_entity(data.id)
|
|
33
|
+
assert not os.path.exists(data.raw_data_location.replace("file://", "")), (
|
|
34
|
+
f"Data location still exists after deletion: {data.raw_data_location}"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
async with engine.get_async_session() as session:
|
|
38
|
+
# Get data entry from database based on file path
|
|
39
|
+
data = (
|
|
40
|
+
await session.scalars(select(Data).where(Data.raw_data_location == file_location))
|
|
41
|
+
).one()
|
|
42
|
+
assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
|
|
43
|
+
f"Data location doesn't exist: {data.raw_data_location}"
|
|
44
|
+
)
|
|
45
|
+
# Test local files not created by cognee won't get deleted
|
|
46
|
+
await engine.delete_data_entity(data.id)
|
|
47
|
+
assert os.path.exists(data.raw_data_location.replace("file://", "")), (
|
|
48
|
+
f"Data location doesn't exists: {data.raw_data_location}"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
async def test_getting_of_documents(dataset_name_1):
|
|
53
|
+
# Test getting of documents for search per dataset
|
|
54
|
+
from cognee.modules.users.permissions.methods import get_document_ids_for_user
|
|
55
|
+
|
|
56
|
+
user = await get_default_user()
|
|
57
|
+
document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
|
|
58
|
+
assert len(document_ids) == 1, (
|
|
59
|
+
f"Number of expected documents doesn't match {len(document_ids)} != 1"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Test getting of documents for search when no dataset is provided
|
|
63
|
+
user = await get_default_user()
|
|
64
|
+
document_ids = await get_document_ids_for_user(user.id)
|
|
65
|
+
assert len(document_ids) == 2, (
|
|
66
|
+
f"Number of expected documents doesn't match {len(document_ids)} != 2"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def test_vector_engine_search_none_limit():
|
|
71
|
+
file_path_quantum = os.path.join(
|
|
72
|
+
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
file_path_nlp = os.path.join(
|
|
76
|
+
pathlib.Path(__file__).parent,
|
|
77
|
+
"test_data/Natural_language_processing.txt",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
await cognee.prune.prune_data()
|
|
81
|
+
await cognee.prune.prune_system(metadata=True)
|
|
82
|
+
|
|
83
|
+
await cognee.add(file_path_quantum)
|
|
84
|
+
|
|
85
|
+
await cognee.add(file_path_nlp)
|
|
86
|
+
|
|
87
|
+
await cognee.cognify()
|
|
88
|
+
|
|
89
|
+
query_text = "Tell me about Quantum computers"
|
|
90
|
+
|
|
91
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
92
|
+
|
|
93
|
+
vector_engine = get_vector_engine()
|
|
94
|
+
|
|
95
|
+
collection_name = "Entity_name"
|
|
96
|
+
|
|
97
|
+
query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
|
|
98
|
+
|
|
99
|
+
result = await vector_engine.search(
|
|
100
|
+
collection_name=collection_name, query_vector=query_vector, limit=None
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Check that we did not accidentally use any default value for limit
|
|
104
|
+
# in vector search along the way (like 5, 10, or 15)
|
|
105
|
+
assert len(result) > 15
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
async def main():
|
|
109
|
+
cognee.config.set_vector_db_config(
|
|
110
|
+
{
|
|
111
|
+
"vector_db_provider": "lancedb",
|
|
112
|
+
}
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
data_directory_path = str(
|
|
116
|
+
pathlib.Path(
|
|
117
|
+
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_lancedb")
|
|
118
|
+
).resolve()
|
|
119
|
+
)
|
|
120
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
121
|
+
cognee_directory_path = str(
|
|
122
|
+
pathlib.Path(
|
|
123
|
+
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_lancedb")
|
|
124
|
+
).resolve()
|
|
125
|
+
)
|
|
126
|
+
cognee.config.system_root_directory(cognee_directory_path)
|
|
127
|
+
|
|
128
|
+
await cognee.prune.prune_data()
|
|
129
|
+
await cognee.prune.prune_system(metadata=True)
|
|
130
|
+
|
|
131
|
+
dataset_name_1 = "natural_language"
|
|
132
|
+
dataset_name_2 = "quantum"
|
|
133
|
+
|
|
134
|
+
explanation_file_path = os.path.join(
|
|
135
|
+
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
|
136
|
+
)
|
|
137
|
+
await cognee.add([explanation_file_path], dataset_name_1)
|
|
138
|
+
|
|
139
|
+
text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
|
|
140
|
+
At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
|
|
141
|
+
Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
|
|
142
|
+
The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
|
|
143
|
+
Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
|
|
144
|
+
In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
await cognee.add([text], dataset_name_2)
|
|
148
|
+
|
|
149
|
+
await cognee.cognify([dataset_name_2, dataset_name_1])
|
|
150
|
+
|
|
151
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
152
|
+
|
|
153
|
+
await test_getting_of_documents(dataset_name_1)
|
|
154
|
+
|
|
155
|
+
vector_engine = get_vector_engine()
|
|
156
|
+
random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
|
|
157
|
+
random_node_name = random_node.payload["text"]
|
|
158
|
+
|
|
159
|
+
search_results = await cognee.search(
|
|
160
|
+
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
|
161
|
+
)
|
|
162
|
+
assert len(search_results) != 0, "The search results list is empty."
|
|
163
|
+
print("\n\nExtracted sentences are:\n")
|
|
164
|
+
for result in search_results:
|
|
165
|
+
print(f"{result}\n")
|
|
166
|
+
|
|
167
|
+
search_results = await cognee.search(
|
|
168
|
+
query_type=SearchType.CHUNKS, query_text=random_node_name, datasets=[dataset_name_2]
|
|
169
|
+
)
|
|
170
|
+
assert len(search_results) != 0, "The search results list is empty."
|
|
171
|
+
print("\n\nExtracted chunks are:\n")
|
|
172
|
+
for result in search_results:
|
|
173
|
+
print(f"{result}\n")
|
|
174
|
+
|
|
175
|
+
graph_completion = await cognee.search(
|
|
176
|
+
query_type=SearchType.GRAPH_COMPLETION,
|
|
177
|
+
query_text=random_node_name,
|
|
178
|
+
datasets=[dataset_name_2],
|
|
179
|
+
)
|
|
180
|
+
assert len(graph_completion) != 0, "Completion result is empty."
|
|
181
|
+
print("Completion result is:")
|
|
182
|
+
print(graph_completion)
|
|
183
|
+
|
|
184
|
+
search_results = await cognee.search(
|
|
185
|
+
query_type=SearchType.SUMMARIES, query_text=random_node_name
|
|
186
|
+
)
|
|
187
|
+
assert len(search_results) != 0, "Query related summaries don't exist."
|
|
188
|
+
print("\n\nExtracted summaries are:\n")
|
|
189
|
+
for result in search_results:
|
|
190
|
+
print(f"{result}\n")
|
|
191
|
+
|
|
192
|
+
user = await get_default_user()
|
|
193
|
+
history = await get_history(user.id)
|
|
194
|
+
assert len(history) == 8, "Search history is not correct."
|
|
195
|
+
|
|
196
|
+
await cognee.prune.prune_data()
|
|
197
|
+
data_root_directory = get_storage_config()["data_root_directory"]
|
|
198
|
+
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
|
199
|
+
|
|
200
|
+
await cognee.prune.prune_system(metadata=True)
|
|
201
|
+
connection = await vector_engine.get_connection()
|
|
202
|
+
tables_in_database = await connection.table_names()
|
|
203
|
+
assert len(tables_in_database) == 0, "LanceDB database is not empty"
|
|
204
|
+
|
|
205
|
+
await test_vector_engine_search_none_limit()
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
if __name__ == "__main__":
|
|
209
|
+
import asyncio
|
|
210
|
+
|
|
211
|
+
asyncio.run(main())
|