cognee 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/health.py +2 -12
- cognee/api/v1/add/add.py +46 -6
- cognee/api/v1/add/routers/get_add_router.py +5 -1
- cognee/api/v1/cognify/cognify.py +29 -9
- cognee/api/v1/datasets/datasets.py +11 -0
- cognee/api/v1/responses/default_tools.py +0 -1
- cognee/api/v1/responses/dispatch_function.py +1 -1
- cognee/api/v1/responses/routers/default_tools.py +0 -1
- cognee/api/v1/search/search.py +11 -9
- cognee/api/v1/settings/routers/get_settings_router.py +7 -1
- cognee/api/v1/ui/ui.py +47 -16
- cognee/api/v1/update/routers/get_update_router.py +1 -1
- cognee/api/v1/update/update.py +3 -3
- cognee/cli/_cognee.py +61 -10
- cognee/cli/commands/add_command.py +3 -3
- cognee/cli/commands/cognify_command.py +3 -3
- cognee/cli/commands/config_command.py +9 -7
- cognee/cli/commands/delete_command.py +3 -3
- cognee/cli/commands/search_command.py +3 -7
- cognee/cli/config.py +0 -1
- cognee/context_global_variables.py +5 -0
- cognee/exceptions/exceptions.py +1 -1
- cognee/infrastructure/databases/cache/__init__.py +2 -0
- cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
- cognee/infrastructure/databases/cache/config.py +44 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
- cognee/infrastructure/databases/exceptions/__init__.py +1 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
- cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
- cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +67 -44
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
- cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
- cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
- cognee/infrastructure/files/exceptions.py +1 -1
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
- cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
- cognee/infrastructure/files/utils/guess_file_type.py +6 -0
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
- cognee/infrastructure/loaders/LoaderEngine.py +27 -7
- cognee/infrastructure/loaders/external/__init__.py +7 -0
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
- cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
- cognee/infrastructure/loaders/supported_loaders.py +7 -0
- cognee/modules/data/exceptions/exceptions.py +1 -1
- cognee/modules/data/methods/__init__.py +3 -0
- cognee/modules/data/methods/get_dataset_data.py +4 -1
- cognee/modules/data/methods/has_dataset_data.py +21 -0
- cognee/modules/engine/models/TableRow.py +0 -1
- cognee/modules/ingestion/save_data_to_file.py +9 -2
- cognee/modules/pipelines/exceptions/exceptions.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +12 -1
- cognee/modules/pipelines/operations/run_tasks.py +25 -197
- cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
- cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
- cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
- cognee/modules/retrieval/base_graph_retriever.py +3 -1
- cognee/modules/retrieval/base_retriever.py +3 -1
- cognee/modules/retrieval/chunks_retriever.py +5 -1
- cognee/modules/retrieval/code_retriever.py +20 -2
- cognee/modules/retrieval/completion_retriever.py +50 -9
- cognee/modules/retrieval/cypher_search_retriever.py +11 -1
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
- cognee/modules/retrieval/graph_completion_cot_retriever.py +32 -1
- cognee/modules/retrieval/graph_completion_retriever.py +54 -10
- cognee/modules/retrieval/lexical_retriever.py +20 -2
- cognee/modules/retrieval/natural_language_retriever.py +10 -1
- cognee/modules/retrieval/summaries_retriever.py +5 -1
- cognee/modules/retrieval/temporal_retriever.py +62 -10
- cognee/modules/retrieval/user_qa_feedback.py +3 -2
- cognee/modules/retrieval/utils/completion.py +5 -0
- cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
- cognee/modules/retrieval/utils/session_cache.py +156 -0
- cognee/modules/search/methods/get_search_type_tools.py +0 -5
- cognee/modules/search/methods/no_access_control_search.py +12 -1
- cognee/modules/search/methods/search.py +34 -2
- cognee/modules/search/types/SearchType.py +0 -1
- cognee/modules/settings/get_settings.py +23 -0
- cognee/modules/users/methods/get_authenticated_user.py +3 -1
- cognee/modules/users/methods/get_default_user.py +1 -6
- cognee/modules/users/roles/methods/create_role.py +2 -2
- cognee/modules/users/tenants/methods/create_tenant.py +2 -2
- cognee/shared/exceptions/exceptions.py +1 -1
- cognee/tasks/codingagents/coding_rule_associations.py +1 -2
- cognee/tasks/documents/exceptions/exceptions.py +1 -1
- cognee/tasks/graph/extract_graph_from_data.py +2 -0
- cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
- cognee/tasks/ingestion/ingest_data.py +11 -5
- cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
- cognee/tasks/storage/add_data_points.py +3 -10
- cognee/tasks/storage/index_data_points.py +19 -14
- cognee/tasks/storage/index_graph_edges.py +25 -11
- cognee/tasks/web_scraper/__init__.py +34 -0
- cognee/tasks/web_scraper/config.py +26 -0
- cognee/tasks/web_scraper/default_url_crawler.py +446 -0
- cognee/tasks/web_scraper/models.py +46 -0
- cognee/tasks/web_scraper/types.py +4 -0
- cognee/tasks/web_scraper/utils.py +142 -0
- cognee/tasks/web_scraper/web_scraper_task.py +396 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
- cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
- cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
- cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
- cognee/tests/subprocesses/reader.py +25 -0
- cognee/tests/subprocesses/simple_cognify_1.py +31 -0
- cognee/tests/subprocesses/simple_cognify_2.py +31 -0
- cognee/tests/subprocesses/writer.py +32 -0
- cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
- cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
- cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
- cognee/tests/test_add_docling_document.py +56 -0
- cognee/tests/test_chromadb.py +7 -11
- cognee/tests/test_concurrent_subprocess_access.py +76 -0
- cognee/tests/test_conversation_history.py +240 -0
- cognee/tests/test_kuzu.py +27 -15
- cognee/tests/test_lancedb.py +7 -11
- cognee/tests/test_library.py +32 -2
- cognee/tests/test_neo4j.py +24 -16
- cognee/tests/test_neptune_analytics_vector.py +7 -11
- cognee/tests/test_permissions.py +9 -13
- cognee/tests/test_pgvector.py +4 -4
- cognee/tests/test_remote_kuzu.py +8 -11
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +6 -8
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
- cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
- {cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/METADATA +22 -7
- {cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/RECORD +155 -128
- {cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/entry_points.txt +1 -0
- distributed/Dockerfile +0 -3
- distributed/entrypoint.py +21 -9
- distributed/signal.py +5 -0
- distributed/workers/data_point_saving_worker.py +64 -34
- distributed/workers/graph_saving_worker.py +71 -47
- cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
- cognee/modules/retrieval/insights_retriever.py +0 -133
- cognee/tests/test_memgraph.py +0 -109
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
- distributed/poetry.lock +0 -12238
- distributed/pyproject.toml +0 -185
- {cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/WHEEL +0 -0
- {cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import cognee
|
|
3
|
+
from cognee.tasks.web_scraper.config import DefaultCrawlerConfig
|
|
4
|
+
from cognee.tasks.web_scraper import cron_web_scraper_task
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
async def test_web_scraping_using_bs4():
|
|
8
|
+
await cognee.prune.prune_data()
|
|
9
|
+
await cognee.prune.prune_system()
|
|
10
|
+
|
|
11
|
+
url = "https://quotes.toscrape.com/"
|
|
12
|
+
rules = {
|
|
13
|
+
"quotes": {"selector": ".quote span.text", "all": True},
|
|
14
|
+
"authors": {"selector": ".quote small", "all": True},
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
soup_config = DefaultCrawlerConfig(
|
|
18
|
+
concurrency=5,
|
|
19
|
+
crawl_delay=0.5,
|
|
20
|
+
timeout=15.0,
|
|
21
|
+
max_retries=2,
|
|
22
|
+
retry_delay_factor=0.5,
|
|
23
|
+
extraction_rules=rules,
|
|
24
|
+
use_playwright=False,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
await cognee.add(
|
|
28
|
+
data=url,
|
|
29
|
+
soup_crawler_config=soup_config,
|
|
30
|
+
incremental_loading=False,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
await cognee.cognify()
|
|
34
|
+
|
|
35
|
+
results = await cognee.search(
|
|
36
|
+
"Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
|
|
37
|
+
query_type=cognee.SearchType.GRAPH_COMPLETION,
|
|
38
|
+
)
|
|
39
|
+
assert "Albert Einstein" in results[0]
|
|
40
|
+
print("Test passed! Found Albert Einstein in scraped data.")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def test_web_scraping_using_bs4_and_incremental_loading():
|
|
44
|
+
await cognee.prune.prune_data()
|
|
45
|
+
await cognee.prune.prune_system(metadata=True)
|
|
46
|
+
|
|
47
|
+
url = "https://books.toscrape.com/"
|
|
48
|
+
rules = {"titles": "article.product_pod h3 a", "prices": "article.product_pod p.price_color"}
|
|
49
|
+
|
|
50
|
+
soup_config = DefaultCrawlerConfig(
|
|
51
|
+
concurrency=1,
|
|
52
|
+
crawl_delay=0.1,
|
|
53
|
+
timeout=10.0,
|
|
54
|
+
max_retries=1,
|
|
55
|
+
retry_delay_factor=0.5,
|
|
56
|
+
extraction_rules=rules,
|
|
57
|
+
use_playwright=False,
|
|
58
|
+
structured=True,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
await cognee.add(
|
|
62
|
+
data=url,
|
|
63
|
+
soup_crawler_config=soup_config,
|
|
64
|
+
incremental_loading=True,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
await cognee.cognify()
|
|
68
|
+
|
|
69
|
+
results = await cognee.search(
|
|
70
|
+
"What is the price of 'A Light in the Attic' book?",
|
|
71
|
+
query_type=cognee.SearchType.GRAPH_COMPLETION,
|
|
72
|
+
)
|
|
73
|
+
assert "51.77" in results[0]
|
|
74
|
+
print("Test passed! Found 'A Light in the Attic' in scraped data.")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
async def test_web_scraping_using_tavily():
|
|
78
|
+
await cognee.prune.prune_data()
|
|
79
|
+
await cognee.prune.prune_system(metadata=True)
|
|
80
|
+
|
|
81
|
+
url = "https://quotes.toscrape.com/"
|
|
82
|
+
|
|
83
|
+
await cognee.add(
|
|
84
|
+
data=url,
|
|
85
|
+
incremental_loading=False,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
await cognee.cognify()
|
|
89
|
+
|
|
90
|
+
results = await cognee.search(
|
|
91
|
+
"Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
|
|
92
|
+
query_type=cognee.SearchType.GRAPH_COMPLETION,
|
|
93
|
+
)
|
|
94
|
+
assert "Albert Einstein" in results[0]
|
|
95
|
+
print("Test passed! Found Albert Einstein in scraped data.")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async def test_web_scraping_using_tavily_and_incremental_loading():
|
|
99
|
+
await cognee.prune.prune_data()
|
|
100
|
+
await cognee.prune.prune_system(metadata=True)
|
|
101
|
+
|
|
102
|
+
url = "https://quotes.toscrape.com/"
|
|
103
|
+
|
|
104
|
+
await cognee.add(
|
|
105
|
+
data=url,
|
|
106
|
+
incremental_loading=True,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
await cognee.cognify()
|
|
110
|
+
|
|
111
|
+
results = await cognee.search(
|
|
112
|
+
"Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
|
|
113
|
+
query_type=cognee.SearchType.GRAPH_COMPLETION,
|
|
114
|
+
)
|
|
115
|
+
assert "Albert Einstein" in results[0]
|
|
116
|
+
print("Test passed! Found Albert Einstein in scraped data.")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# ---------- cron job tests ----------
|
|
120
|
+
async def test_cron_web_scraper():
|
|
121
|
+
await cognee.prune.prune_data()
|
|
122
|
+
await cognee.prune.prune_system(metadata=True)
|
|
123
|
+
urls = ["https://quotes.toscrape.com/", "https://books.toscrape.com/"]
|
|
124
|
+
extraction_rules = {
|
|
125
|
+
"quotes": ".quote .text",
|
|
126
|
+
"authors": ".quote .author",
|
|
127
|
+
"titles": "article.product_pod h3 a",
|
|
128
|
+
"prices": "article.product_pod p.price_color",
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# Run cron_web_scraper_task
|
|
132
|
+
await cron_web_scraper_task(
|
|
133
|
+
url=urls,
|
|
134
|
+
job_name="cron_scraping_job",
|
|
135
|
+
extraction_rules=extraction_rules,
|
|
136
|
+
)
|
|
137
|
+
results = await cognee.search(
|
|
138
|
+
"Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
|
|
139
|
+
query_type=cognee.SearchType.GRAPH_COMPLETION,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
assert "Albert Einstein" in results[0]
|
|
143
|
+
|
|
144
|
+
results_books = await cognee.search(
|
|
145
|
+
"What is the price of 'A Light in the Attic' book?",
|
|
146
|
+
query_type=cognee.SearchType.GRAPH_COMPLETION,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
assert "51.77" in results_books[0]
|
|
150
|
+
|
|
151
|
+
print("Cron job web_scraping test passed!")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
async def main():
|
|
155
|
+
print("Starting BS4 incremental loading test...")
|
|
156
|
+
await test_web_scraping_using_bs4_and_incremental_loading()
|
|
157
|
+
|
|
158
|
+
print("Starting BS4 normal test...")
|
|
159
|
+
await test_web_scraping_using_bs4()
|
|
160
|
+
|
|
161
|
+
print("Starting Tavily incremental loading test...")
|
|
162
|
+
await test_web_scraping_using_tavily_and_incremental_loading()
|
|
163
|
+
|
|
164
|
+
print("Starting Tavily normal test...")
|
|
165
|
+
await test_web_scraping_using_tavily()
|
|
166
|
+
|
|
167
|
+
print("Starting cron job test...")
|
|
168
|
+
await test_cron_web_scraper()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
if __name__ == "__main__":
|
|
172
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import cognee
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
async def main():
|
|
8
|
+
# Get file path to document to process
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
current_directory = Path(__file__).resolve().parent
|
|
12
|
+
file_path_artificial = os.path.join(
|
|
13
|
+
current_directory, "test_data", "artificial-intelligence.pdf"
|
|
14
|
+
)
|
|
15
|
+
file_path_png = os.path.join(current_directory, "test_data", "example_copy.png")
|
|
16
|
+
file_path_pptx = os.path.join(current_directory, "test_data", "example.pptx")
|
|
17
|
+
|
|
18
|
+
await cognee.prune.prune_data()
|
|
19
|
+
await cognee.prune.prune_system(metadata=True)
|
|
20
|
+
|
|
21
|
+
# Import necessary converter, and convert file to DoclingDocument format
|
|
22
|
+
from docling.document_converter import DocumentConverter
|
|
23
|
+
|
|
24
|
+
converter = DocumentConverter()
|
|
25
|
+
|
|
26
|
+
result = converter.convert(file_path_artificial)
|
|
27
|
+
await cognee.add(result.document)
|
|
28
|
+
|
|
29
|
+
result = converter.convert(file_path_png)
|
|
30
|
+
await cognee.add(result.document)
|
|
31
|
+
|
|
32
|
+
result = converter.convert(file_path_pptx)
|
|
33
|
+
await cognee.add(result.document)
|
|
34
|
+
|
|
35
|
+
await cognee.cognify()
|
|
36
|
+
|
|
37
|
+
answer = await cognee.search("Tell me about Artificial Intelligence.")
|
|
38
|
+
assert len(answer) != 0
|
|
39
|
+
|
|
40
|
+
answer = await cognee.search("Do programmers change light bulbs?")
|
|
41
|
+
assert len(answer) != 0
|
|
42
|
+
lowercase_answer = answer[0].lower()
|
|
43
|
+
assert ("no" in lowercase_answer) or ("none" in lowercase_answer)
|
|
44
|
+
|
|
45
|
+
answer = await cognee.search("What colours are there in the presentation table?")
|
|
46
|
+
assert len(answer) != 0
|
|
47
|
+
lowercase_answer = answer[0].lower()
|
|
48
|
+
assert (
|
|
49
|
+
("red" in lowercase_answer)
|
|
50
|
+
and ("blue" in lowercase_answer)
|
|
51
|
+
and ("green" in lowercase_answer)
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
if __name__ == "__main__":
|
|
56
|
+
asyncio.run(main())
|
cognee/tests/test_chromadb.py
CHANGED
|
@@ -133,20 +133,16 @@ async def main():
|
|
|
133
133
|
dataset_name_1 = "natural_language"
|
|
134
134
|
dataset_name_2 = "quantum"
|
|
135
135
|
|
|
136
|
-
|
|
136
|
+
explanation_file_path_nlp = os.path.join(
|
|
137
137
|
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
|
138
138
|
)
|
|
139
|
-
await cognee.add([
|
|
139
|
+
await cognee.add([explanation_file_path_nlp], dataset_name_1)
|
|
140
140
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
|
|
145
|
-
Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
|
|
146
|
-
In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
|
|
147
|
-
"""
|
|
141
|
+
explanation_file_path_quantum = os.path.join(
|
|
142
|
+
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
|
143
|
+
)
|
|
148
144
|
|
|
149
|
-
await cognee.add([
|
|
145
|
+
await cognee.add([explanation_file_path_quantum], dataset_name_2)
|
|
150
146
|
|
|
151
147
|
await cognee.cognify([dataset_name_2, dataset_name_1])
|
|
152
148
|
|
|
@@ -159,7 +155,7 @@ async def main():
|
|
|
159
155
|
random_node_name = random_node.payload["text"]
|
|
160
156
|
|
|
161
157
|
search_results = await cognee.search(
|
|
162
|
-
query_type=SearchType.
|
|
158
|
+
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
|
163
159
|
)
|
|
164
160
|
assert len(search_results) != 0, "The search results list is empty."
|
|
165
161
|
print("\n\nExtracted sentences are:\n")
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import asyncio
|
|
3
|
+
import cognee
|
|
4
|
+
import pathlib
|
|
5
|
+
import subprocess
|
|
6
|
+
|
|
7
|
+
from cognee.shared.logging_utils import get_logger
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = get_logger()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
async def concurrent_subprocess_access():
|
|
14
|
+
data_directory_path = str(
|
|
15
|
+
pathlib.Path(
|
|
16
|
+
os.path.join(pathlib.Path(__file__).parent, ".data_storage/concurrent_tasks")
|
|
17
|
+
).resolve()
|
|
18
|
+
)
|
|
19
|
+
cognee_directory_path = str(
|
|
20
|
+
pathlib.Path(
|
|
21
|
+
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/concurrent_tasks")
|
|
22
|
+
).resolve()
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
subprocess_directory_path = str(
|
|
26
|
+
pathlib.Path(os.path.join(pathlib.Path(__file__).parent, "subprocesses/")).resolve()
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
writer_path = subprocess_directory_path + "/writer.py"
|
|
30
|
+
reader_path = subprocess_directory_path + "/reader.py"
|
|
31
|
+
|
|
32
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
33
|
+
cognee.config.system_root_directory(cognee_directory_path)
|
|
34
|
+
|
|
35
|
+
await cognee.prune.prune_data()
|
|
36
|
+
await cognee.prune.prune_system(metadata=True)
|
|
37
|
+
|
|
38
|
+
writer_process = subprocess.Popen([os.sys.executable, str(writer_path)])
|
|
39
|
+
|
|
40
|
+
reader_process = subprocess.Popen([os.sys.executable, str(reader_path)])
|
|
41
|
+
|
|
42
|
+
# Wait for both processes to complete
|
|
43
|
+
writer_process.wait()
|
|
44
|
+
reader_process.wait()
|
|
45
|
+
|
|
46
|
+
logger.info("Basic write read subprocess example finished")
|
|
47
|
+
|
|
48
|
+
await cognee.prune.prune_data()
|
|
49
|
+
await cognee.prune.prune_system(metadata=True)
|
|
50
|
+
|
|
51
|
+
text = """
|
|
52
|
+
This is the text of the first cognify subprocess
|
|
53
|
+
"""
|
|
54
|
+
await cognee.add(text, dataset_name="first_cognify_dataset")
|
|
55
|
+
|
|
56
|
+
text = """
|
|
57
|
+
This is the text of the second cognify subprocess
|
|
58
|
+
"""
|
|
59
|
+
await cognee.add(text, dataset_name="second_cognify_dataset")
|
|
60
|
+
|
|
61
|
+
first_cognify_path = subprocess_directory_path + "/simple_cognify_1.py"
|
|
62
|
+
second_cognify_path = subprocess_directory_path + "/simple_cognify_2.py"
|
|
63
|
+
|
|
64
|
+
first_cognify_process = subprocess.Popen([os.sys.executable, str(first_cognify_path)])
|
|
65
|
+
|
|
66
|
+
second_cognify_process = subprocess.Popen([os.sys.executable, str(second_cognify_path)])
|
|
67
|
+
|
|
68
|
+
# Wait for both processes to complete
|
|
69
|
+
first_cognify_process.wait()
|
|
70
|
+
second_cognify_process.wait()
|
|
71
|
+
|
|
72
|
+
logger.info("Database concurrent subprocess example finished")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
asyncio.run(concurrent_subprocess_access())
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""
|
|
2
|
+
End-to-end integration test for conversation history feature.
|
|
3
|
+
|
|
4
|
+
Tests all retrievers that save conversation history to Redis cache:
|
|
5
|
+
1. GRAPH_COMPLETION
|
|
6
|
+
2. RAG_COMPLETION
|
|
7
|
+
3. GRAPH_COMPLETION_COT
|
|
8
|
+
4. GRAPH_COMPLETION_CONTEXT_EXTENSION
|
|
9
|
+
5. GRAPH_SUMMARY_COMPLETION
|
|
10
|
+
6. TEMPORAL
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
import cognee
|
|
16
|
+
import pathlib
|
|
17
|
+
|
|
18
|
+
from cognee.infrastructure.databases.cache import get_cache_engine
|
|
19
|
+
from cognee.modules.search.types import SearchType
|
|
20
|
+
from cognee.shared.logging_utils import get_logger
|
|
21
|
+
from cognee.modules.users.methods import get_default_user
|
|
22
|
+
|
|
23
|
+
logger = get_logger()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def main():
|
|
27
|
+
data_directory_path = str(
|
|
28
|
+
pathlib.Path(
|
|
29
|
+
os.path.join(
|
|
30
|
+
pathlib.Path(__file__).parent,
|
|
31
|
+
".data_storage/test_conversation_history",
|
|
32
|
+
)
|
|
33
|
+
).resolve()
|
|
34
|
+
)
|
|
35
|
+
cognee_directory_path = str(
|
|
36
|
+
pathlib.Path(
|
|
37
|
+
os.path.join(
|
|
38
|
+
pathlib.Path(__file__).parent,
|
|
39
|
+
".cognee_system/test_conversation_history",
|
|
40
|
+
)
|
|
41
|
+
).resolve()
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
cognee.config.data_root_directory(data_directory_path)
|
|
45
|
+
cognee.config.system_root_directory(cognee_directory_path)
|
|
46
|
+
|
|
47
|
+
await cognee.prune.prune_data()
|
|
48
|
+
await cognee.prune.prune_system(metadata=True)
|
|
49
|
+
|
|
50
|
+
dataset_name = "conversation_history_test"
|
|
51
|
+
|
|
52
|
+
text_1 = """TechCorp is a technology company based in San Francisco. They specialize in artificial intelligence and machine learning."""
|
|
53
|
+
text_2 = (
|
|
54
|
+
"""DataCo is a data analytics company. They help businesses make sense of their data."""
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
await cognee.add(text_1, dataset_name)
|
|
58
|
+
await cognee.add(text_2, dataset_name)
|
|
59
|
+
|
|
60
|
+
await cognee.cognify([dataset_name])
|
|
61
|
+
|
|
62
|
+
user = await get_default_user()
|
|
63
|
+
|
|
64
|
+
cache_engine = get_cache_engine()
|
|
65
|
+
assert cache_engine is not None, "Cache engine should be available for testing"
|
|
66
|
+
|
|
67
|
+
session_id_1 = "test_session_graph"
|
|
68
|
+
|
|
69
|
+
await cognee.search(
|
|
70
|
+
query_type=SearchType.GRAPH_COMPLETION,
|
|
71
|
+
query_text="What is TechCorp?",
|
|
72
|
+
session_id=session_id_1,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
history1 = await cache_engine.get_latest_qa(str(user.id), session_id_1, last_n=10)
|
|
76
|
+
assert len(history1) == 1, f"Expected at least 1 Q&A in history, got {len(history1)}"
|
|
77
|
+
our_qa = [h for h in history1 if h["question"] == "What is TechCorp?"]
|
|
78
|
+
assert len(our_qa) >= 1, "Expected to find 'What is TechCorp?' in history"
|
|
79
|
+
assert "answer" in our_qa[0] and "context" in our_qa[0], (
|
|
80
|
+
"Q&A should contain answer and context fields"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
result2 = await cognee.search(
|
|
84
|
+
query_type=SearchType.GRAPH_COMPLETION,
|
|
85
|
+
query_text="Tell me more about it",
|
|
86
|
+
session_id=session_id_1,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
assert isinstance(result2, list) and len(result2) > 0, (
|
|
90
|
+
f"Second query should return non-empty list, got: {result2!r}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
history2 = await cache_engine.get_latest_qa(str(user.id), session_id_1, last_n=10)
|
|
94
|
+
our_questions = [
|
|
95
|
+
h for h in history2 if h["question"] in ["What is TechCorp?", "Tell me more about it"]
|
|
96
|
+
]
|
|
97
|
+
assert len(our_questions) == 2, (
|
|
98
|
+
f"Expected at least 2 Q&A pairs in history after 2 queries, got {len(our_questions)}"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
session_id_2 = "test_session_separate"
|
|
102
|
+
|
|
103
|
+
result3 = await cognee.search(
|
|
104
|
+
query_type=SearchType.GRAPH_COMPLETION,
|
|
105
|
+
query_text="What is DataCo?",
|
|
106
|
+
session_id=session_id_2,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
assert isinstance(result3, list) and len(result3) > 0, (
|
|
110
|
+
f"Different session should return non-empty list, got: {result3!r}"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
history3 = await cache_engine.get_latest_qa(str(user.id), session_id_2, last_n=10)
|
|
114
|
+
our_qa_session2 = [h for h in history3 if h["question"] == "What is DataCo?"]
|
|
115
|
+
assert len(our_qa_session2) == 1, "Session 2 should have 'What is DataCo?' question"
|
|
116
|
+
|
|
117
|
+
result4 = await cognee.search(
|
|
118
|
+
query_type=SearchType.GRAPH_COMPLETION,
|
|
119
|
+
query_text="Test default session",
|
|
120
|
+
session_id=None,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
assert isinstance(result4, list) and len(result4) > 0, (
|
|
124
|
+
f"Default session should return non-empty list, got: {result4!r}"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
history_default = await cache_engine.get_latest_qa(str(user.id), "default_session", last_n=10)
|
|
128
|
+
our_qa_default = [h for h in history_default if h["question"] == "Test default session"]
|
|
129
|
+
assert len(our_qa_default) == 1, "Should find 'Test default session' in default_session"
|
|
130
|
+
|
|
131
|
+
session_id_rag = "test_session_rag"
|
|
132
|
+
|
|
133
|
+
result_rag = await cognee.search(
|
|
134
|
+
query_type=SearchType.RAG_COMPLETION,
|
|
135
|
+
query_text="What companies are mentioned?",
|
|
136
|
+
session_id=session_id_rag,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
assert isinstance(result_rag, list) and len(result_rag) > 0, (
|
|
140
|
+
f"RAG_COMPLETION should return non-empty list, got: {result_rag!r}"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
history_rag = await cache_engine.get_latest_qa(str(user.id), session_id_rag, last_n=10)
|
|
144
|
+
our_qa_rag = [h for h in history_rag if h["question"] == "What companies are mentioned?"]
|
|
145
|
+
assert len(our_qa_rag) == 1, "Should find RAG question in history"
|
|
146
|
+
|
|
147
|
+
session_id_cot = "test_session_cot"
|
|
148
|
+
|
|
149
|
+
result_cot = await cognee.search(
|
|
150
|
+
query_type=SearchType.GRAPH_COMPLETION_COT,
|
|
151
|
+
query_text="What do you know about TechCorp?",
|
|
152
|
+
session_id=session_id_cot,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
assert isinstance(result_cot, list) and len(result_cot) > 0, (
|
|
156
|
+
f"GRAPH_COMPLETION_COT should return non-empty list, got: {result_cot!r}"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
history_cot = await cache_engine.get_latest_qa(str(user.id), session_id_cot, last_n=10)
|
|
160
|
+
our_qa_cot = [h for h in history_cot if h["question"] == "What do you know about TechCorp?"]
|
|
161
|
+
assert len(our_qa_cot) == 1, "Should find CoT question in history"
|
|
162
|
+
|
|
163
|
+
session_id_ext = "test_session_ext"
|
|
164
|
+
|
|
165
|
+
result_ext = await cognee.search(
|
|
166
|
+
query_type=SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION,
|
|
167
|
+
query_text="Tell me about DataCo",
|
|
168
|
+
session_id=session_id_ext,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
assert isinstance(result_ext, list) and len(result_ext) > 0, (
|
|
172
|
+
f"GRAPH_COMPLETION_CONTEXT_EXTENSION should return non-empty list, got: {result_ext!r}"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
history_ext = await cache_engine.get_latest_qa(str(user.id), session_id_ext, last_n=10)
|
|
176
|
+
our_qa_ext = [h for h in history_ext if h["question"] == "Tell me about DataCo"]
|
|
177
|
+
assert len(our_qa_ext) == 1, "Should find Context Extension question in history"
|
|
178
|
+
|
|
179
|
+
session_id_summary = "test_session_summary"
|
|
180
|
+
|
|
181
|
+
result_summary = await cognee.search(
|
|
182
|
+
query_type=SearchType.GRAPH_SUMMARY_COMPLETION,
|
|
183
|
+
query_text="What are the key points about TechCorp?",
|
|
184
|
+
session_id=session_id_summary,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
assert isinstance(result_summary, list) and len(result_summary) > 0, (
|
|
188
|
+
f"GRAPH_SUMMARY_COMPLETION should return non-empty list, got: {result_summary!r}"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Verify saved
|
|
192
|
+
history_summary = await cache_engine.get_latest_qa(str(user.id), session_id_summary, last_n=10)
|
|
193
|
+
our_qa_summary = [
|
|
194
|
+
h for h in history_summary if h["question"] == "What are the key points about TechCorp?"
|
|
195
|
+
]
|
|
196
|
+
assert len(our_qa_summary) == 1, "Should find Summary question in history"
|
|
197
|
+
|
|
198
|
+
session_id_temporal = "test_session_temporal"
|
|
199
|
+
|
|
200
|
+
result_temporal = await cognee.search(
|
|
201
|
+
query_type=SearchType.TEMPORAL,
|
|
202
|
+
query_text="Tell me about the companies",
|
|
203
|
+
session_id=session_id_temporal,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
assert isinstance(result_temporal, list) and len(result_temporal) > 0, (
|
|
207
|
+
f"TEMPORAL should return non-empty list, got: {result_temporal!r}"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
history_temporal = await cache_engine.get_latest_qa(
|
|
211
|
+
str(user.id), session_id_temporal, last_n=10
|
|
212
|
+
)
|
|
213
|
+
our_qa_temporal = [
|
|
214
|
+
h for h in history_temporal if h["question"] == "Tell me about the companies"
|
|
215
|
+
]
|
|
216
|
+
assert len(our_qa_temporal) == 1, "Should find Temporal question in history"
|
|
217
|
+
|
|
218
|
+
from cognee.modules.retrieval.utils.session_cache import (
|
|
219
|
+
get_conversation_history,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
formatted_history = await get_conversation_history(session_id=session_id_1)
|
|
223
|
+
|
|
224
|
+
assert "Previous conversation:" in formatted_history, (
|
|
225
|
+
"Formatted history should contain 'Previous conversation:' header"
|
|
226
|
+
)
|
|
227
|
+
assert "QUESTION:" in formatted_history, "Formatted history should contain 'QUESTION:' prefix"
|
|
228
|
+
assert "CONTEXT:" in formatted_history, "Formatted history should contain 'CONTEXT:' prefix"
|
|
229
|
+
assert "ANSWER:" in formatted_history, "Formatted history should contain 'ANSWER:' prefix"
|
|
230
|
+
|
|
231
|
+
await cognee.prune.prune_data()
|
|
232
|
+
await cognee.prune.prune_system(metadata=True)
|
|
233
|
+
|
|
234
|
+
logger.info("All conversation history tests passed successfully")
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
if __name__ == "__main__":
|
|
238
|
+
import asyncio
|
|
239
|
+
|
|
240
|
+
asyncio.run(main())
|
cognee/tests/test_kuzu.py
CHANGED
|
@@ -38,22 +38,35 @@ async def main():
|
|
|
38
38
|
|
|
39
39
|
dataset_name = "cs_explanations"
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
explanation_file_path_nlp = os.path.join(
|
|
42
42
|
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
|
|
43
43
|
)
|
|
44
|
-
await cognee.add([
|
|
44
|
+
await cognee.add([explanation_file_path_nlp], dataset_name)
|
|
45
45
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
46
|
+
explanation_file_path_quantum = os.path.join(
|
|
47
|
+
pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
51
|
+
|
|
52
|
+
graph_engine = await get_graph_engine()
|
|
53
|
+
|
|
54
|
+
is_empty = await graph_engine.is_empty()
|
|
55
|
+
|
|
56
|
+
assert is_empty, "Kuzu graph database is not empty"
|
|
57
|
+
|
|
58
|
+
await cognee.add([explanation_file_path_quantum], dataset_name)
|
|
59
|
+
|
|
60
|
+
is_empty = await graph_engine.is_empty()
|
|
61
|
+
|
|
62
|
+
assert is_empty, "Kuzu graph database should be empty before cognify"
|
|
54
63
|
|
|
55
64
|
await cognee.cognify([dataset_name])
|
|
56
65
|
|
|
66
|
+
is_empty = await graph_engine.is_empty()
|
|
67
|
+
|
|
68
|
+
assert not is_empty, "Kuzu graph database should not be empty"
|
|
69
|
+
|
|
57
70
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
58
71
|
|
|
59
72
|
vector_engine = get_vector_engine()
|
|
@@ -61,7 +74,7 @@ async def main():
|
|
|
61
74
|
random_node_name = random_node.payload["text"]
|
|
62
75
|
|
|
63
76
|
search_results = await cognee.search(
|
|
64
|
-
query_type=SearchType.
|
|
77
|
+
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
|
65
78
|
)
|
|
66
79
|
assert len(search_results) != 0, "The search results list is empty."
|
|
67
80
|
print("\n\nExtracted sentences are:\n")
|
|
@@ -117,11 +130,10 @@ async def main():
|
|
|
117
130
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
|
118
131
|
|
|
119
132
|
await cognee.prune.prune_system(metadata=True)
|
|
120
|
-
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
121
133
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
assert
|
|
134
|
+
is_empty = await graph_engine.is_empty()
|
|
135
|
+
|
|
136
|
+
assert is_empty, "Kuzu graph database is not empty"
|
|
125
137
|
|
|
126
138
|
finally:
|
|
127
139
|
# Ensure cleanup even if tests fail
|