cognee 0.3.4.dev3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. cognee/api/client.py +16 -7
  2. cognee/api/health.py +5 -9
  3. cognee/api/v1/add/add.py +3 -1
  4. cognee/api/v1/cognify/cognify.py +44 -7
  5. cognee/api/v1/permissions/routers/get_permissions_router.py +8 -4
  6. cognee/api/v1/search/search.py +3 -0
  7. cognee/api/v1/ui/__init__.py +1 -1
  8. cognee/api/v1/ui/ui.py +215 -150
  9. cognee/api/v1/update/__init__.py +1 -0
  10. cognee/api/v1/update/routers/__init__.py +1 -0
  11. cognee/api/v1/update/routers/get_update_router.py +90 -0
  12. cognee/api/v1/update/update.py +100 -0
  13. cognee/base_config.py +5 -2
  14. cognee/cli/_cognee.py +28 -10
  15. cognee/cli/commands/delete_command.py +34 -2
  16. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  17. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +3 -2
  18. cognee/eval_framework/modal_eval_dashboard.py +9 -1
  19. cognee/infrastructure/databases/graph/config.py +9 -9
  20. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -21
  21. cognee/infrastructure/databases/graph/kuzu/adapter.py +60 -9
  22. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +3 -3
  23. cognee/infrastructure/databases/relational/config.py +4 -4
  24. cognee/infrastructure/databases/relational/create_relational_engine.py +11 -3
  25. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +7 -3
  26. cognee/infrastructure/databases/vector/config.py +7 -7
  27. cognee/infrastructure/databases/vector/create_vector_engine.py +7 -15
  28. cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py +9 -0
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +11 -0
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +19 -2
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -0
  32. cognee/infrastructure/databases/vector/embeddings/config.py +8 -0
  33. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +5 -0
  34. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +11 -10
  35. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +48 -38
  36. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -4
  37. cognee/infrastructure/files/storage/S3FileStorage.py +15 -5
  38. cognee/infrastructure/files/storage/s3_config.py +1 -0
  39. cognee/infrastructure/files/utils/open_data_file.py +7 -14
  40. cognee/infrastructure/llm/LLMGateway.py +19 -117
  41. cognee/infrastructure/llm/config.py +28 -13
  42. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_categories.py +2 -1
  43. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_event_entities.py +3 -2
  44. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_summary.py +3 -2
  45. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_content_graph.py +2 -1
  46. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_event_graph.py +3 -2
  47. cognee/infrastructure/llm/prompts/read_query_prompt.py +3 -2
  48. cognee/infrastructure/llm/prompts/show_prompt.py +35 -0
  49. cognee/infrastructure/llm/prompts/test.txt +1 -0
  50. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +2 -2
  51. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +50 -397
  52. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +2 -3
  53. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +8 -88
  54. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +78 -0
  55. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +2 -99
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +49 -401
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +19 -882
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +2 -34
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +2 -107
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +26 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/__init__.py +1 -2
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +76 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/create_dynamic_baml_type.py +122 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +3 -3
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +0 -32
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +107 -98
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +5 -6
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -6
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +0 -26
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +17 -67
  71. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +8 -7
  72. cognee/infrastructure/llm/utils.py +4 -4
  73. cognee/infrastructure/loaders/LoaderEngine.py +5 -2
  74. cognee/infrastructure/loaders/external/__init__.py +7 -0
  75. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +244 -0
  76. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  77. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  78. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  79. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  80. cognee/modules/data/methods/get_deletion_counts.py +92 -0
  81. cognee/modules/graph/cognee_graph/CogneeGraph.py +1 -1
  82. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  83. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  84. cognee/modules/ingestion/data_types/TextData.py +0 -1
  85. cognee/modules/notebooks/methods/create_notebook.py +3 -1
  86. cognee/modules/notebooks/methods/get_notebooks.py +27 -1
  87. cognee/modules/observability/get_observe.py +14 -0
  88. cognee/modules/observability/observers.py +1 -0
  89. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  90. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  91. cognee/modules/ontology/matching_strategies.py +53 -0
  92. cognee/modules/ontology/models.py +20 -0
  93. cognee/modules/ontology/ontology_config.py +24 -0
  94. cognee/modules/ontology/ontology_env_config.py +45 -0
  95. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  96. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +21 -24
  97. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +3 -3
  98. cognee/modules/retrieval/code_retriever.py +2 -1
  99. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -4
  100. cognee/modules/retrieval/graph_completion_cot_retriever.py +6 -5
  101. cognee/modules/retrieval/graph_completion_retriever.py +0 -3
  102. cognee/modules/retrieval/insights_retriever.py +1 -1
  103. cognee/modules/retrieval/jaccard_retrival.py +60 -0
  104. cognee/modules/retrieval/lexical_retriever.py +123 -0
  105. cognee/modules/retrieval/natural_language_retriever.py +2 -1
  106. cognee/modules/retrieval/temporal_retriever.py +3 -2
  107. cognee/modules/retrieval/utils/brute_force_triplet_search.py +2 -12
  108. cognee/modules/retrieval/utils/completion.py +4 -7
  109. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  110. cognee/modules/search/methods/no_access_control_search.py +1 -1
  111. cognee/modules/search/methods/search.py +32 -13
  112. cognee/modules/search/types/SearchType.py +1 -0
  113. cognee/modules/users/methods/create_user.py +0 -2
  114. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  115. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  116. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +10 -0
  117. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  118. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  119. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  120. cognee/modules/users/permissions/methods/get_role.py +10 -0
  121. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  122. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  123. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  124. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  125. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  126. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  127. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  128. cognee/modules/users/roles/methods/create_role.py +12 -1
  129. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  130. cognee/modules/users/tenants/methods/create_tenant.py +12 -1
  131. cognee/modules/visualization/cognee_network_visualization.py +13 -9
  132. cognee/shared/data_models.py +0 -1
  133. cognee/shared/utils.py +0 -32
  134. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  135. cognee/tasks/codingagents/coding_rule_associations.py +3 -2
  136. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +3 -2
  137. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +3 -2
  138. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +3 -2
  139. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +3 -2
  140. cognee/tasks/graph/extract_graph_from_code.py +2 -2
  141. cognee/tasks/graph/extract_graph_from_data.py +55 -12
  142. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  143. cognee/tasks/ingestion/migrate_relational_database.py +132 -41
  144. cognee/tasks/ingestion/resolve_data_directories.py +4 -1
  145. cognee/tasks/schema/ingest_database_schema.py +134 -0
  146. cognee/tasks/schema/models.py +40 -0
  147. cognee/tasks/storage/index_data_points.py +1 -1
  148. cognee/tasks/storage/index_graph_edges.py +3 -1
  149. cognee/tasks/summarization/summarize_code.py +2 -2
  150. cognee/tasks/summarization/summarize_text.py +2 -2
  151. cognee/tasks/temporal_graph/enrich_events.py +2 -2
  152. cognee/tasks/temporal_graph/extract_events_and_entities.py +2 -2
  153. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +13 -4
  154. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +13 -3
  155. cognee/tests/test_advanced_pdf_loader.py +141 -0
  156. cognee/tests/test_chromadb.py +40 -0
  157. cognee/tests/test_cognee_server_start.py +6 -1
  158. cognee/tests/test_data/Quantum_computers.txt +9 -0
  159. cognee/tests/test_lancedb.py +211 -0
  160. cognee/tests/test_pgvector.py +40 -0
  161. cognee/tests/test_relational_db_migration.py +76 -0
  162. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +2 -1
  163. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  164. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +0 -4
  165. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -4
  166. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +0 -4
  167. {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/METADATA +92 -96
  168. {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/RECORD +176 -162
  169. distributed/pyproject.toml +0 -1
  170. cognee/infrastructure/data/utils/extract_keywords.py +0 -48
  171. cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +0 -1227
  172. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +0 -109
  173. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +0 -343
  174. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_categories.py +0 -0
  175. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +0 -89
  176. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/__init__.py +0 -0
  177. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +0 -44
  178. cognee/tasks/graph/infer_data_ontology.py +0 -309
  179. cognee/tests/test_falkordb.py +0 -174
  180. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/__init__.py +0 -0
  181. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/__init__.py +0 -0
  182. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/texts.json +0 -0
  183. {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/WHEEL +0 -0
  184. {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/entry_points.txt +0 -0
  185. {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/licenses/LICENSE +0 -0
  186. {cognee-0.3.4.dev3.dist-info → cognee-0.3.5.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,211 @@
1
+ import os
2
+ import pathlib
3
+
4
+ import cognee
5
+ from cognee.shared.logging_utils import get_logger
6
+ from cognee.infrastructure.files.storage import get_storage_config
7
+ from cognee.modules.data.models import Data
8
+ from cognee.modules.users.methods import get_default_user
9
+ from cognee.modules.search.types import SearchType
10
+ from cognee.modules.search.operations import get_history
11
+
12
+ logger = get_logger()
13
+
14
+
15
+ async def test_local_file_deletion(data_text, file_location):
16
+ from sqlalchemy import select
17
+ import hashlib
18
+ from cognee.infrastructure.databases.relational import get_relational_engine
19
+
20
+ engine = get_relational_engine()
21
+
22
+ async with engine.get_async_session() as session:
23
+ # Get hash of data contents
24
+ encoded_text = data_text.encode("utf-8")
25
+ data_hash = hashlib.md5(encoded_text).hexdigest()
26
+ # Get data entry from database based on hash contents
27
+ data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one()
28
+ assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
29
+ f"Data location doesn't exist: {data.raw_data_location}"
30
+ )
31
+ # Test deletion of data along with local files created by cognee
32
+ await engine.delete_data_entity(data.id)
33
+ assert not os.path.exists(data.raw_data_location.replace("file://", "")), (
34
+ f"Data location still exists after deletion: {data.raw_data_location}"
35
+ )
36
+
37
+ async with engine.get_async_session() as session:
38
+ # Get data entry from database based on file path
39
+ data = (
40
+ await session.scalars(select(Data).where(Data.raw_data_location == file_location))
41
+ ).one()
42
+ assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
43
+ f"Data location doesn't exist: {data.raw_data_location}"
44
+ )
45
+ # Test local files not created by cognee won't get deleted
46
+ await engine.delete_data_entity(data.id)
47
+ assert os.path.exists(data.raw_data_location.replace("file://", "")), (
48
+ f"Data location doesn't exists: {data.raw_data_location}"
49
+ )
50
+
51
+
52
+ async def test_getting_of_documents(dataset_name_1):
53
+ # Test getting of documents for search per dataset
54
+ from cognee.modules.users.permissions.methods import get_document_ids_for_user
55
+
56
+ user = await get_default_user()
57
+ document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
58
+ assert len(document_ids) == 1, (
59
+ f"Number of expected documents doesn't match {len(document_ids)} != 1"
60
+ )
61
+
62
+ # Test getting of documents for search when no dataset is provided
63
+ user = await get_default_user()
64
+ document_ids = await get_document_ids_for_user(user.id)
65
+ assert len(document_ids) == 2, (
66
+ f"Number of expected documents doesn't match {len(document_ids)} != 2"
67
+ )
68
+
69
+
70
+ async def test_vector_engine_search_none_limit():
71
+ file_path_quantum = os.path.join(
72
+ pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
73
+ )
74
+
75
+ file_path_nlp = os.path.join(
76
+ pathlib.Path(__file__).parent,
77
+ "test_data/Natural_language_processing.txt",
78
+ )
79
+
80
+ await cognee.prune.prune_data()
81
+ await cognee.prune.prune_system(metadata=True)
82
+
83
+ await cognee.add(file_path_quantum)
84
+
85
+ await cognee.add(file_path_nlp)
86
+
87
+ await cognee.cognify()
88
+
89
+ query_text = "Tell me about Quantum computers"
90
+
91
+ from cognee.infrastructure.databases.vector import get_vector_engine
92
+
93
+ vector_engine = get_vector_engine()
94
+
95
+ collection_name = "Entity_name"
96
+
97
+ query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
98
+
99
+ result = await vector_engine.search(
100
+ collection_name=collection_name, query_vector=query_vector, limit=None
101
+ )
102
+
103
+ # Check that we did not accidentally use any default value for limit
104
+ # in vector search along the way (like 5, 10, or 15)
105
+ assert len(result) > 15
106
+
107
+
108
+ async def main():
109
+ cognee.config.set_vector_db_config(
110
+ {
111
+ "vector_db_provider": "lancedb",
112
+ }
113
+ )
114
+
115
+ data_directory_path = str(
116
+ pathlib.Path(
117
+ os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_lancedb")
118
+ ).resolve()
119
+ )
120
+ cognee.config.data_root_directory(data_directory_path)
121
+ cognee_directory_path = str(
122
+ pathlib.Path(
123
+ os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_lancedb")
124
+ ).resolve()
125
+ )
126
+ cognee.config.system_root_directory(cognee_directory_path)
127
+
128
+ await cognee.prune.prune_data()
129
+ await cognee.prune.prune_system(metadata=True)
130
+
131
+ dataset_name_1 = "natural_language"
132
+ dataset_name_2 = "quantum"
133
+
134
+ explanation_file_path = os.path.join(
135
+ pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
136
+ )
137
+ await cognee.add([explanation_file_path], dataset_name_1)
138
+
139
+ text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
140
+ At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
141
+ Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
142
+ The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
143
+ Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
144
+ In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
145
+ """
146
+
147
+ await cognee.add([text], dataset_name_2)
148
+
149
+ await cognee.cognify([dataset_name_2, dataset_name_1])
150
+
151
+ from cognee.infrastructure.databases.vector import get_vector_engine
152
+
153
+ await test_getting_of_documents(dataset_name_1)
154
+
155
+ vector_engine = get_vector_engine()
156
+ random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
157
+ random_node_name = random_node.payload["text"]
158
+
159
+ search_results = await cognee.search(
160
+ query_type=SearchType.INSIGHTS, query_text=random_node_name
161
+ )
162
+ assert len(search_results) != 0, "The search results list is empty."
163
+ print("\n\nExtracted sentences are:\n")
164
+ for result in search_results:
165
+ print(f"{result}\n")
166
+
167
+ search_results = await cognee.search(
168
+ query_type=SearchType.CHUNKS, query_text=random_node_name, datasets=[dataset_name_2]
169
+ )
170
+ assert len(search_results) != 0, "The search results list is empty."
171
+ print("\n\nExtracted chunks are:\n")
172
+ for result in search_results:
173
+ print(f"{result}\n")
174
+
175
+ graph_completion = await cognee.search(
176
+ query_type=SearchType.GRAPH_COMPLETION,
177
+ query_text=random_node_name,
178
+ datasets=[dataset_name_2],
179
+ )
180
+ assert len(graph_completion) != 0, "Completion result is empty."
181
+ print("Completion result is:")
182
+ print(graph_completion)
183
+
184
+ search_results = await cognee.search(
185
+ query_type=SearchType.SUMMARIES, query_text=random_node_name
186
+ )
187
+ assert len(search_results) != 0, "Query related summaries don't exist."
188
+ print("\n\nExtracted summaries are:\n")
189
+ for result in search_results:
190
+ print(f"{result}\n")
191
+
192
+ user = await get_default_user()
193
+ history = await get_history(user.id)
194
+ assert len(history) == 8, "Search history is not correct."
195
+
196
+ await cognee.prune.prune_data()
197
+ data_root_directory = get_storage_config()["data_root_directory"]
198
+ assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
199
+
200
+ await cognee.prune.prune_system(metadata=True)
201
+ connection = await vector_engine.get_connection()
202
+ tables_in_database = await connection.table_names()
203
+ assert len(tables_in_database) == 0, "LanceDB database is not empty"
204
+
205
+ await test_vector_engine_search_none_limit()
206
+
207
+
208
+ if __name__ == "__main__":
209
+ import asyncio
210
+
211
+ asyncio.run(main())
@@ -68,6 +68,44 @@ async def test_getting_of_documents(dataset_name_1):
68
68
  )
69
69
 
70
70
 
71
+ async def test_vector_engine_search_none_limit():
72
+ file_path_quantum = os.path.join(
73
+ pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
74
+ )
75
+
76
+ file_path_nlp = os.path.join(
77
+ pathlib.Path(__file__).parent,
78
+ "test_data/Natural_language_processing.txt",
79
+ )
80
+
81
+ await cognee.prune.prune_data()
82
+ await cognee.prune.prune_system(metadata=True)
83
+
84
+ await cognee.add(file_path_quantum)
85
+
86
+ await cognee.add(file_path_nlp)
87
+
88
+ await cognee.cognify()
89
+
90
+ query_text = "Tell me about Quantum computers"
91
+
92
+ from cognee.infrastructure.databases.vector import get_vector_engine
93
+
94
+ vector_engine = get_vector_engine()
95
+
96
+ collection_name = "Entity_name"
97
+
98
+ query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
99
+
100
+ result = await vector_engine.search(
101
+ collection_name=collection_name, query_vector=query_vector, limit=None
102
+ )
103
+
104
+ # Check that we did not accidentally use any default value for limit
105
+ # in vector search along the way (like 5, 10, or 15)
106
+ assert len(result) > 15
107
+
108
+
71
109
  async def main():
72
110
  cognee.config.set_vector_db_config(
73
111
  {"vector_db_url": "", "vector_db_key": "", "vector_db_provider": "pgvector"}
@@ -174,6 +212,8 @@ async def main():
174
212
  tables_in_database = await vector_engine.get_table_names()
175
213
  assert len(tables_in_database) == 0, "PostgreSQL database is not empty"
176
214
 
215
+ await test_vector_engine_search_none_limit()
216
+
177
217
 
178
218
  if __name__ == "__main__":
179
219
  import asyncio
@@ -197,6 +197,80 @@ async def relational_db_migration():
197
197
  print(f"All checks passed for {graph_db_provider} provider with '{relationship_label}' edges!")
198
198
 
199
199
 
200
+ async def test_schema_only_migration():
201
+ # 1. Setup test DB and extract schema
202
+ migration_engine = await setup_test_db()
203
+ schema = await migration_engine.extract_schema()
204
+
205
+ # 2. Setup graph engine
206
+ graph_engine = await get_graph_engine()
207
+
208
+ # 4. Migrate schema only
209
+ await migrate_relational_database(graph_engine, schema=schema, schema_only=True)
210
+
211
+ # 5. Verify number of tables through search
212
+ search_results = await cognee.search(
213
+ query_text="How many tables are there in this database",
214
+ query_type=cognee.SearchType.GRAPH_COMPLETION,
215
+ top_k=30,
216
+ )
217
+ assert any("11" in r for r in search_results), (
218
+ "Number of tables in the database reported in search_results is either None or not equal to 11"
219
+ )
220
+
221
+ graph_db_provider = os.getenv("GRAPH_DATABASE_PROVIDER", "networkx").lower()
222
+
223
+ edge_counts = {
224
+ "is_part_of": 0,
225
+ "has_relationship": 0,
226
+ "foreign_key": 0,
227
+ }
228
+
229
+ if graph_db_provider == "neo4j":
230
+ for rel_type in edge_counts.keys():
231
+ query_str = f"""
232
+ MATCH ()-[r:{rel_type}]->()
233
+ RETURN count(r) as c
234
+ """
235
+ rows = await graph_engine.query(query_str)
236
+ edge_counts[rel_type] = rows[0]["c"]
237
+
238
+ elif graph_db_provider == "kuzu":
239
+ for rel_type in edge_counts.keys():
240
+ query_str = f"""
241
+ MATCH ()-[r:EDGE]->()
242
+ WHERE r.relationship_name = '{rel_type}'
243
+ RETURN count(r) as c
244
+ """
245
+ rows = await graph_engine.query(query_str)
246
+ edge_counts[rel_type] = rows[0][0]
247
+
248
+ elif graph_db_provider == "networkx":
249
+ nodes, edges = await graph_engine.get_graph_data()
250
+ for _, _, key, _ in edges:
251
+ if key in edge_counts:
252
+ edge_counts[key] += 1
253
+
254
+ else:
255
+ raise ValueError(f"Unsupported graph database provider: {graph_db_provider}")
256
+
257
+ # 7. Assert counts match expected values
258
+ expected_counts = {
259
+ "is_part_of": 11,
260
+ "has_relationship": 22,
261
+ "foreign_key": 11,
262
+ }
263
+
264
+ for rel_type, expected in expected_counts.items():
265
+ actual = edge_counts[rel_type]
266
+ assert actual == expected, (
267
+ f"Expected {expected} edges for relationship '{rel_type}', but found {actual}"
268
+ )
269
+
270
+ print("Schema-only migration edge counts validated successfully!")
271
+ print(f"Edge counts: {edge_counts}")
272
+
273
+
200
274
  async def test_migration_sqlite():
201
275
  database_to_migrate_path = os.path.join(pathlib.Path(__file__).parent, "test_data/")
202
276
 
@@ -209,6 +283,7 @@ async def test_migration_sqlite():
209
283
  )
210
284
 
211
285
  await relational_db_migration()
286
+ await test_schema_only_migration()
212
287
 
213
288
 
214
289
  async def test_migration_postgres():
@@ -224,6 +299,7 @@ async def test_migration_postgres():
224
299
  }
225
300
  )
226
301
  await relational_db_migration()
302
+ await test_schema_only_migration()
227
303
 
228
304
 
229
305
  async def main():
@@ -1,5 +1,5 @@
1
1
  import pytest
2
- from unittest.mock import AsyncMock, patch
2
+ from unittest.mock import AsyncMock, patch, MagicMock
3
3
  from cognee.tasks.storage.index_graph_edges import index_graph_edges
4
4
 
5
5
 
@@ -16,6 +16,7 @@ async def test_index_graph_edges_success():
16
16
  ],
17
17
  )
18
18
  mock_vector_engine = AsyncMock()
19
+ mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
19
20
 
20
21
  # Patch the globals of the function so that when it does:
21
22
  # vector_engine = get_vector_engine()