cognee 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/health.py +2 -12
  3. cognee/api/v1/add/add.py +46 -6
  4. cognee/api/v1/add/routers/get_add_router.py +11 -2
  5. cognee/api/v1/cognify/cognify.py +29 -9
  6. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  7. cognee/api/v1/datasets/datasets.py +11 -0
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
  9. cognee/api/v1/delete/routers/get_delete_router.py +2 -0
  10. cognee/api/v1/memify/routers/get_memify_router.py +2 -1
  11. cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
  12. cognee/api/v1/responses/default_tools.py +0 -1
  13. cognee/api/v1/responses/dispatch_function.py +1 -1
  14. cognee/api/v1/responses/routers/default_tools.py +0 -1
  15. cognee/api/v1/search/routers/get_search_router.py +3 -3
  16. cognee/api/v1/search/search.py +11 -9
  17. cognee/api/v1/settings/routers/get_settings_router.py +7 -1
  18. cognee/api/v1/sync/routers/get_sync_router.py +3 -0
  19. cognee/api/v1/ui/ui.py +45 -16
  20. cognee/api/v1/update/routers/get_update_router.py +3 -1
  21. cognee/api/v1/update/update.py +3 -3
  22. cognee/api/v1/users/routers/get_visualize_router.py +2 -0
  23. cognee/cli/_cognee.py +61 -10
  24. cognee/cli/commands/add_command.py +3 -3
  25. cognee/cli/commands/cognify_command.py +3 -3
  26. cognee/cli/commands/config_command.py +9 -7
  27. cognee/cli/commands/delete_command.py +3 -3
  28. cognee/cli/commands/search_command.py +3 -7
  29. cognee/cli/config.py +0 -1
  30. cognee/context_global_variables.py +5 -0
  31. cognee/exceptions/exceptions.py +1 -1
  32. cognee/infrastructure/databases/cache/__init__.py +2 -0
  33. cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
  34. cognee/infrastructure/databases/cache/config.py +44 -0
  35. cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
  36. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
  37. cognee/infrastructure/databases/exceptions/__init__.py +1 -0
  38. cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
  39. cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
  40. cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
  41. cognee/infrastructure/databases/graph/kuzu/adapter.py +76 -47
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
  43. cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
  44. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
  46. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
  47. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
  48. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
  49. cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
  50. cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
  52. cognee/infrastructure/files/exceptions.py +1 -1
  53. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
  54. cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
  55. cognee/infrastructure/files/utils/guess_file_type.py +6 -0
  56. cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
  57. cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
  58. cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
  59. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
  60. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
  61. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
  62. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
  63. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
  68. cognee/infrastructure/loaders/LoaderEngine.py +27 -7
  69. cognee/infrastructure/loaders/external/__init__.py +7 -0
  70. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
  71. cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
  72. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  73. cognee/modules/data/exceptions/exceptions.py +1 -1
  74. cognee/modules/data/methods/__init__.py +3 -0
  75. cognee/modules/data/methods/get_dataset_data.py +4 -1
  76. cognee/modules/data/methods/has_dataset_data.py +21 -0
  77. cognee/modules/engine/models/TableRow.py +0 -1
  78. cognee/modules/ingestion/save_data_to_file.py +9 -2
  79. cognee/modules/pipelines/exceptions/exceptions.py +1 -1
  80. cognee/modules/pipelines/operations/pipeline.py +12 -1
  81. cognee/modules/pipelines/operations/run_tasks.py +25 -197
  82. cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
  83. cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
  84. cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
  85. cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
  86. cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
  87. cognee/modules/retrieval/base_graph_retriever.py +3 -1
  88. cognee/modules/retrieval/base_retriever.py +3 -1
  89. cognee/modules/retrieval/chunks_retriever.py +5 -1
  90. cognee/modules/retrieval/code_retriever.py +20 -2
  91. cognee/modules/retrieval/completion_retriever.py +50 -9
  92. cognee/modules/retrieval/cypher_search_retriever.py +11 -1
  93. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
  94. cognee/modules/retrieval/graph_completion_cot_retriever.py +152 -22
  95. cognee/modules/retrieval/graph_completion_retriever.py +54 -10
  96. cognee/modules/retrieval/lexical_retriever.py +20 -2
  97. cognee/modules/retrieval/natural_language_retriever.py +10 -1
  98. cognee/modules/retrieval/summaries_retriever.py +5 -1
  99. cognee/modules/retrieval/temporal_retriever.py +62 -10
  100. cognee/modules/retrieval/user_qa_feedback.py +3 -2
  101. cognee/modules/retrieval/utils/completion.py +30 -4
  102. cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
  103. cognee/modules/retrieval/utils/session_cache.py +156 -0
  104. cognee/modules/search/methods/get_search_type_tools.py +0 -5
  105. cognee/modules/search/methods/no_access_control_search.py +12 -1
  106. cognee/modules/search/methods/search.py +51 -5
  107. cognee/modules/search/types/SearchType.py +0 -1
  108. cognee/modules/settings/get_settings.py +23 -0
  109. cognee/modules/users/methods/get_authenticated_user.py +3 -1
  110. cognee/modules/users/methods/get_default_user.py +1 -6
  111. cognee/modules/users/roles/methods/create_role.py +2 -2
  112. cognee/modules/users/tenants/methods/create_tenant.py +2 -2
  113. cognee/shared/exceptions/exceptions.py +1 -1
  114. cognee/shared/logging_utils.py +18 -11
  115. cognee/shared/utils.py +24 -2
  116. cognee/tasks/codingagents/coding_rule_associations.py +1 -2
  117. cognee/tasks/documents/exceptions/exceptions.py +1 -1
  118. cognee/tasks/feedback/__init__.py +13 -0
  119. cognee/tasks/feedback/create_enrichments.py +84 -0
  120. cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
  121. cognee/tasks/feedback/generate_improved_answers.py +130 -0
  122. cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
  123. cognee/tasks/feedback/models.py +26 -0
  124. cognee/tasks/graph/extract_graph_from_data.py +2 -0
  125. cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
  126. cognee/tasks/ingestion/ingest_data.py +11 -5
  127. cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
  128. cognee/tasks/storage/add_data_points.py +3 -10
  129. cognee/tasks/storage/index_data_points.py +19 -14
  130. cognee/tasks/storage/index_graph_edges.py +25 -11
  131. cognee/tasks/web_scraper/__init__.py +34 -0
  132. cognee/tasks/web_scraper/config.py +26 -0
  133. cognee/tasks/web_scraper/default_url_crawler.py +446 -0
  134. cognee/tasks/web_scraper/models.py +46 -0
  135. cognee/tasks/web_scraper/types.py +4 -0
  136. cognee/tasks/web_scraper/utils.py +142 -0
  137. cognee/tasks/web_scraper/web_scraper_task.py +396 -0
  138. cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
  139. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
  140. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
  141. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
  142. cognee/tests/subprocesses/reader.py +25 -0
  143. cognee/tests/subprocesses/simple_cognify_1.py +31 -0
  144. cognee/tests/subprocesses/simple_cognify_2.py +31 -0
  145. cognee/tests/subprocesses/writer.py +32 -0
  146. cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
  147. cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
  148. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
  149. cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
  150. cognee/tests/test_add_docling_document.py +56 -0
  151. cognee/tests/test_chromadb.py +7 -11
  152. cognee/tests/test_concurrent_subprocess_access.py +76 -0
  153. cognee/tests/test_conversation_history.py +240 -0
  154. cognee/tests/test_feedback_enrichment.py +174 -0
  155. cognee/tests/test_kuzu.py +27 -15
  156. cognee/tests/test_lancedb.py +7 -11
  157. cognee/tests/test_library.py +32 -2
  158. cognee/tests/test_neo4j.py +24 -16
  159. cognee/tests/test_neptune_analytics_vector.py +7 -11
  160. cognee/tests/test_permissions.py +9 -13
  161. cognee/tests/test_pgvector.py +4 -4
  162. cognee/tests/test_remote_kuzu.py +8 -11
  163. cognee/tests/test_s3_file_storage.py +1 -1
  164. cognee/tests/test_search_db.py +6 -8
  165. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
  166. cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
  167. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
  168. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +21 -6
  169. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +178 -139
  170. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +1 -0
  171. distributed/Dockerfile +0 -3
  172. distributed/entrypoint.py +21 -9
  173. distributed/signal.py +5 -0
  174. distributed/workers/data_point_saving_worker.py +64 -34
  175. distributed/workers/graph_saving_worker.py +71 -47
  176. cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
  177. cognee/modules/retrieval/insights_retriever.py +0 -133
  178. cognee/tests/test_memgraph.py +0 -109
  179. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
  180. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
  181. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
  182. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,172 @@
1
+ import asyncio
2
+ import cognee
3
+ from cognee.tasks.web_scraper.config import DefaultCrawlerConfig
4
+ from cognee.tasks.web_scraper import cron_web_scraper_task
5
+
6
+
7
+ async def test_web_scraping_using_bs4():
8
+ await cognee.prune.prune_data()
9
+ await cognee.prune.prune_system()
10
+
11
+ url = "https://quotes.toscrape.com/"
12
+ rules = {
13
+ "quotes": {"selector": ".quote span.text", "all": True},
14
+ "authors": {"selector": ".quote small", "all": True},
15
+ }
16
+
17
+ soup_config = DefaultCrawlerConfig(
18
+ concurrency=5,
19
+ crawl_delay=0.5,
20
+ timeout=15.0,
21
+ max_retries=2,
22
+ retry_delay_factor=0.5,
23
+ extraction_rules=rules,
24
+ use_playwright=False,
25
+ )
26
+
27
+ await cognee.add(
28
+ data=url,
29
+ soup_crawler_config=soup_config,
30
+ incremental_loading=False,
31
+ )
32
+
33
+ await cognee.cognify()
34
+
35
+ results = await cognee.search(
36
+ "Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
37
+ query_type=cognee.SearchType.GRAPH_COMPLETION,
38
+ )
39
+ assert "Albert Einstein" in results[0]
40
+ print("Test passed! Found Albert Einstein in scraped data.")
41
+
42
+
43
+ async def test_web_scraping_using_bs4_and_incremental_loading():
44
+ await cognee.prune.prune_data()
45
+ await cognee.prune.prune_system(metadata=True)
46
+
47
+ url = "https://books.toscrape.com/"
48
+ rules = {"titles": "article.product_pod h3 a", "prices": "article.product_pod p.price_color"}
49
+
50
+ soup_config = DefaultCrawlerConfig(
51
+ concurrency=1,
52
+ crawl_delay=0.1,
53
+ timeout=10.0,
54
+ max_retries=1,
55
+ retry_delay_factor=0.5,
56
+ extraction_rules=rules,
57
+ use_playwright=False,
58
+ structured=True,
59
+ )
60
+
61
+ await cognee.add(
62
+ data=url,
63
+ soup_crawler_config=soup_config,
64
+ incremental_loading=True,
65
+ )
66
+
67
+ await cognee.cognify()
68
+
69
+ results = await cognee.search(
70
+ "What is the price of 'A Light in the Attic' book?",
71
+ query_type=cognee.SearchType.GRAPH_COMPLETION,
72
+ )
73
+ assert "51.77" in results[0]
74
+ print("Test passed! Found 'A Light in the Attic' in scraped data.")
75
+
76
+
77
+ async def test_web_scraping_using_tavily():
78
+ await cognee.prune.prune_data()
79
+ await cognee.prune.prune_system(metadata=True)
80
+
81
+ url = "https://quotes.toscrape.com/"
82
+
83
+ await cognee.add(
84
+ data=url,
85
+ incremental_loading=False,
86
+ )
87
+
88
+ await cognee.cognify()
89
+
90
+ results = await cognee.search(
91
+ "Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
92
+ query_type=cognee.SearchType.GRAPH_COMPLETION,
93
+ )
94
+ assert "Albert Einstein" in results[0]
95
+ print("Test passed! Found Albert Einstein in scraped data.")
96
+
97
+
98
+ async def test_web_scraping_using_tavily_and_incremental_loading():
99
+ await cognee.prune.prune_data()
100
+ await cognee.prune.prune_system(metadata=True)
101
+
102
+ url = "https://quotes.toscrape.com/"
103
+
104
+ await cognee.add(
105
+ data=url,
106
+ incremental_loading=True,
107
+ )
108
+
109
+ await cognee.cognify()
110
+
111
+ results = await cognee.search(
112
+ "Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
113
+ query_type=cognee.SearchType.GRAPH_COMPLETION,
114
+ )
115
+ assert "Albert Einstein" in results[0]
116
+ print("Test passed! Found Albert Einstein in scraped data.")
117
+
118
+
119
+ # ---------- cron job tests ----------
120
+ async def test_cron_web_scraper():
121
+ await cognee.prune.prune_data()
122
+ await cognee.prune.prune_system(metadata=True)
123
+ urls = ["https://quotes.toscrape.com/", "https://books.toscrape.com/"]
124
+ extraction_rules = {
125
+ "quotes": ".quote .text",
126
+ "authors": ".quote .author",
127
+ "titles": "article.product_pod h3 a",
128
+ "prices": "article.product_pod p.price_color",
129
+ }
130
+
131
+ # Run cron_web_scraper_task
132
+ await cron_web_scraper_task(
133
+ url=urls,
134
+ job_name="cron_scraping_job",
135
+ extraction_rules=extraction_rules,
136
+ )
137
+ results = await cognee.search(
138
+ "Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
139
+ query_type=cognee.SearchType.GRAPH_COMPLETION,
140
+ )
141
+
142
+ assert "Albert Einstein" in results[0]
143
+
144
+ results_books = await cognee.search(
145
+ "What is the price of 'A Light in the Attic' book?",
146
+ query_type=cognee.SearchType.GRAPH_COMPLETION,
147
+ )
148
+
149
+ assert "51.77" in results_books[0]
150
+
151
+ print("Cron job web_scraping test passed!")
152
+
153
+
154
+ async def main():
155
+ print("Starting BS4 incremental loading test...")
156
+ await test_web_scraping_using_bs4_and_incremental_loading()
157
+
158
+ print("Starting BS4 normal test...")
159
+ await test_web_scraping_using_bs4()
160
+
161
+ print("Starting Tavily incremental loading test...")
162
+ await test_web_scraping_using_tavily_and_incremental_loading()
163
+
164
+ print("Starting Tavily normal test...")
165
+ await test_web_scraping_using_tavily()
166
+
167
+ print("Starting cron job test...")
168
+ await test_cron_web_scraper()
169
+
170
+
171
+ if __name__ == "__main__":
172
+ asyncio.run(main())
@@ -0,0 +1,56 @@
1
+ import asyncio
2
+ import cognee
3
+
4
+ import os
5
+
6
+
7
+ async def main():
8
+ # Get file path to document to process
9
+ from pathlib import Path
10
+
11
+ current_directory = Path(__file__).resolve().parent
12
+ file_path_artificial = os.path.join(
13
+ current_directory, "test_data", "artificial-intelligence.pdf"
14
+ )
15
+ file_path_png = os.path.join(current_directory, "test_data", "example_copy.png")
16
+ file_path_pptx = os.path.join(current_directory, "test_data", "example.pptx")
17
+
18
+ await cognee.prune.prune_data()
19
+ await cognee.prune.prune_system(metadata=True)
20
+
21
+ # Import necessary converter, and convert file to DoclingDocument format
22
+ from docling.document_converter import DocumentConverter
23
+
24
+ converter = DocumentConverter()
25
+
26
+ result = converter.convert(file_path_artificial)
27
+ await cognee.add(result.document)
28
+
29
+ result = converter.convert(file_path_png)
30
+ await cognee.add(result.document)
31
+
32
+ result = converter.convert(file_path_pptx)
33
+ await cognee.add(result.document)
34
+
35
+ await cognee.cognify()
36
+
37
+ answer = await cognee.search("Tell me about Artificial Intelligence.")
38
+ assert len(answer) != 0
39
+
40
+ answer = await cognee.search("Do programmers change light bulbs?")
41
+ assert len(answer) != 0
42
+ lowercase_answer = answer[0].lower()
43
+ assert ("no" in lowercase_answer) or ("none" in lowercase_answer)
44
+
45
+ answer = await cognee.search("What colours are there in the presentation table?")
46
+ assert len(answer) != 0
47
+ lowercase_answer = answer[0].lower()
48
+ assert (
49
+ ("red" in lowercase_answer)
50
+ and ("blue" in lowercase_answer)
51
+ and ("green" in lowercase_answer)
52
+ )
53
+
54
+
55
+ if __name__ == "__main__":
56
+ asyncio.run(main())
@@ -133,20 +133,16 @@ async def main():
133
133
  dataset_name_1 = "natural_language"
134
134
  dataset_name_2 = "quantum"
135
135
 
136
- explanation_file_path = os.path.join(
136
+ explanation_file_path_nlp = os.path.join(
137
137
  pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
138
138
  )
139
- await cognee.add([explanation_file_path], dataset_name_1)
139
+ await cognee.add([explanation_file_path_nlp], dataset_name_1)
140
140
 
141
- text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
142
- At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
143
- Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
144
- The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
145
- Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
146
- In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
147
- """
141
+ explanation_file_path_quantum = os.path.join(
142
+ pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
143
+ )
148
144
 
149
- await cognee.add([text], dataset_name_2)
145
+ await cognee.add([explanation_file_path_quantum], dataset_name_2)
150
146
 
151
147
  await cognee.cognify([dataset_name_2, dataset_name_1])
152
148
 
@@ -159,7 +155,7 @@ async def main():
159
155
  random_node_name = random_node.payload["text"]
160
156
 
161
157
  search_results = await cognee.search(
162
- query_type=SearchType.INSIGHTS, query_text=random_node_name
158
+ query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
163
159
  )
164
160
  assert len(search_results) != 0, "The search results list is empty."
165
161
  print("\n\nExtracted sentences are:\n")
@@ -0,0 +1,76 @@
1
+ import os
2
+ import asyncio
3
+ import cognee
4
+ import pathlib
5
+ import subprocess
6
+
7
+ from cognee.shared.logging_utils import get_logger
8
+
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ async def concurrent_subprocess_access():
14
+ data_directory_path = str(
15
+ pathlib.Path(
16
+ os.path.join(pathlib.Path(__file__).parent, ".data_storage/concurrent_tasks")
17
+ ).resolve()
18
+ )
19
+ cognee_directory_path = str(
20
+ pathlib.Path(
21
+ os.path.join(pathlib.Path(__file__).parent, ".cognee_system/concurrent_tasks")
22
+ ).resolve()
23
+ )
24
+
25
+ subprocess_directory_path = str(
26
+ pathlib.Path(os.path.join(pathlib.Path(__file__).parent, "subprocesses/")).resolve()
27
+ )
28
+
29
+ writer_path = subprocess_directory_path + "/writer.py"
30
+ reader_path = subprocess_directory_path + "/reader.py"
31
+
32
+ cognee.config.data_root_directory(data_directory_path)
33
+ cognee.config.system_root_directory(cognee_directory_path)
34
+
35
+ await cognee.prune.prune_data()
36
+ await cognee.prune.prune_system(metadata=True)
37
+
38
+ writer_process = subprocess.Popen([os.sys.executable, str(writer_path)])
39
+
40
+ reader_process = subprocess.Popen([os.sys.executable, str(reader_path)])
41
+
42
+ # Wait for both processes to complete
43
+ writer_process.wait()
44
+ reader_process.wait()
45
+
46
+ logger.info("Basic write read subprocess example finished")
47
+
48
+ await cognee.prune.prune_data()
49
+ await cognee.prune.prune_system(metadata=True)
50
+
51
+ text = """
52
+ This is the text of the first cognify subprocess
53
+ """
54
+ await cognee.add(text, dataset_name="first_cognify_dataset")
55
+
56
+ text = """
57
+ This is the text of the second cognify subprocess
58
+ """
59
+ await cognee.add(text, dataset_name="second_cognify_dataset")
60
+
61
+ first_cognify_path = subprocess_directory_path + "/simple_cognify_1.py"
62
+ second_cognify_path = subprocess_directory_path + "/simple_cognify_2.py"
63
+
64
+ first_cognify_process = subprocess.Popen([os.sys.executable, str(first_cognify_path)])
65
+
66
+ second_cognify_process = subprocess.Popen([os.sys.executable, str(second_cognify_path)])
67
+
68
+ # Wait for both processes to complete
69
+ first_cognify_process.wait()
70
+ second_cognify_process.wait()
71
+
72
+ logger.info("Database concurrent subprocess example finished")
73
+
74
+
75
+ if __name__ == "__main__":
76
+ asyncio.run(concurrent_subprocess_access())
@@ -0,0 +1,240 @@
1
+ """
2
+ End-to-end integration test for conversation history feature.
3
+
4
+ Tests all retrievers that save conversation history to Redis cache:
5
+ 1. GRAPH_COMPLETION
6
+ 2. RAG_COMPLETION
7
+ 3. GRAPH_COMPLETION_COT
8
+ 4. GRAPH_COMPLETION_CONTEXT_EXTENSION
9
+ 5. GRAPH_SUMMARY_COMPLETION
10
+ 6. TEMPORAL
11
+ """
12
+
13
+ import os
14
+ import shutil
15
+ import cognee
16
+ import pathlib
17
+
18
+ from cognee.infrastructure.databases.cache import get_cache_engine
19
+ from cognee.modules.search.types import SearchType
20
+ from cognee.shared.logging_utils import get_logger
21
+ from cognee.modules.users.methods import get_default_user
22
+
23
+ logger = get_logger()
24
+
25
+
26
+ async def main():
27
+ data_directory_path = str(
28
+ pathlib.Path(
29
+ os.path.join(
30
+ pathlib.Path(__file__).parent,
31
+ ".data_storage/test_conversation_history",
32
+ )
33
+ ).resolve()
34
+ )
35
+ cognee_directory_path = str(
36
+ pathlib.Path(
37
+ os.path.join(
38
+ pathlib.Path(__file__).parent,
39
+ ".cognee_system/test_conversation_history",
40
+ )
41
+ ).resolve()
42
+ )
43
+
44
+ cognee.config.data_root_directory(data_directory_path)
45
+ cognee.config.system_root_directory(cognee_directory_path)
46
+
47
+ await cognee.prune.prune_data()
48
+ await cognee.prune.prune_system(metadata=True)
49
+
50
+ dataset_name = "conversation_history_test"
51
+
52
+ text_1 = """TechCorp is a technology company based in San Francisco. They specialize in artificial intelligence and machine learning."""
53
+ text_2 = (
54
+ """DataCo is a data analytics company. They help businesses make sense of their data."""
55
+ )
56
+
57
+ await cognee.add(text_1, dataset_name)
58
+ await cognee.add(text_2, dataset_name)
59
+
60
+ await cognee.cognify([dataset_name])
61
+
62
+ user = await get_default_user()
63
+
64
+ cache_engine = get_cache_engine()
65
+ assert cache_engine is not None, "Cache engine should be available for testing"
66
+
67
+ session_id_1 = "test_session_graph"
68
+
69
+ await cognee.search(
70
+ query_type=SearchType.GRAPH_COMPLETION,
71
+ query_text="What is TechCorp?",
72
+ session_id=session_id_1,
73
+ )
74
+
75
+ history1 = await cache_engine.get_latest_qa(str(user.id), session_id_1, last_n=10)
76
+ assert len(history1) == 1, f"Expected at least 1 Q&A in history, got {len(history1)}"
77
+ our_qa = [h for h in history1 if h["question"] == "What is TechCorp?"]
78
+ assert len(our_qa) >= 1, "Expected to find 'What is TechCorp?' in history"
79
+ assert "answer" in our_qa[0] and "context" in our_qa[0], (
80
+ "Q&A should contain answer and context fields"
81
+ )
82
+
83
+ result2 = await cognee.search(
84
+ query_type=SearchType.GRAPH_COMPLETION,
85
+ query_text="Tell me more about it",
86
+ session_id=session_id_1,
87
+ )
88
+
89
+ assert isinstance(result2, list) and len(result2) > 0, (
90
+ f"Second query should return non-empty list, got: {result2!r}"
91
+ )
92
+
93
+ history2 = await cache_engine.get_latest_qa(str(user.id), session_id_1, last_n=10)
94
+ our_questions = [
95
+ h for h in history2 if h["question"] in ["What is TechCorp?", "Tell me more about it"]
96
+ ]
97
+ assert len(our_questions) == 2, (
98
+ f"Expected at least 2 Q&A pairs in history after 2 queries, got {len(our_questions)}"
99
+ )
100
+
101
+ session_id_2 = "test_session_separate"
102
+
103
+ result3 = await cognee.search(
104
+ query_type=SearchType.GRAPH_COMPLETION,
105
+ query_text="What is DataCo?",
106
+ session_id=session_id_2,
107
+ )
108
+
109
+ assert isinstance(result3, list) and len(result3) > 0, (
110
+ f"Different session should return non-empty list, got: {result3!r}"
111
+ )
112
+
113
+ history3 = await cache_engine.get_latest_qa(str(user.id), session_id_2, last_n=10)
114
+ our_qa_session2 = [h for h in history3 if h["question"] == "What is DataCo?"]
115
+ assert len(our_qa_session2) == 1, "Session 2 should have 'What is DataCo?' question"
116
+
117
+ result4 = await cognee.search(
118
+ query_type=SearchType.GRAPH_COMPLETION,
119
+ query_text="Test default session",
120
+ session_id=None,
121
+ )
122
+
123
+ assert isinstance(result4, list) and len(result4) > 0, (
124
+ f"Default session should return non-empty list, got: {result4!r}"
125
+ )
126
+
127
+ history_default = await cache_engine.get_latest_qa(str(user.id), "default_session", last_n=10)
128
+ our_qa_default = [h for h in history_default if h["question"] == "Test default session"]
129
+ assert len(our_qa_default) == 1, "Should find 'Test default session' in default_session"
130
+
131
+ session_id_rag = "test_session_rag"
132
+
133
+ result_rag = await cognee.search(
134
+ query_type=SearchType.RAG_COMPLETION,
135
+ query_text="What companies are mentioned?",
136
+ session_id=session_id_rag,
137
+ )
138
+
139
+ assert isinstance(result_rag, list) and len(result_rag) > 0, (
140
+ f"RAG_COMPLETION should return non-empty list, got: {result_rag!r}"
141
+ )
142
+
143
+ history_rag = await cache_engine.get_latest_qa(str(user.id), session_id_rag, last_n=10)
144
+ our_qa_rag = [h for h in history_rag if h["question"] == "What companies are mentioned?"]
145
+ assert len(our_qa_rag) == 1, "Should find RAG question in history"
146
+
147
+ session_id_cot = "test_session_cot"
148
+
149
+ result_cot = await cognee.search(
150
+ query_type=SearchType.GRAPH_COMPLETION_COT,
151
+ query_text="What do you know about TechCorp?",
152
+ session_id=session_id_cot,
153
+ )
154
+
155
+ assert isinstance(result_cot, list) and len(result_cot) > 0, (
156
+ f"GRAPH_COMPLETION_COT should return non-empty list, got: {result_cot!r}"
157
+ )
158
+
159
+ history_cot = await cache_engine.get_latest_qa(str(user.id), session_id_cot, last_n=10)
160
+ our_qa_cot = [h for h in history_cot if h["question"] == "What do you know about TechCorp?"]
161
+ assert len(our_qa_cot) == 1, "Should find CoT question in history"
162
+
163
+ session_id_ext = "test_session_ext"
164
+
165
+ result_ext = await cognee.search(
166
+ query_type=SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION,
167
+ query_text="Tell me about DataCo",
168
+ session_id=session_id_ext,
169
+ )
170
+
171
+ assert isinstance(result_ext, list) and len(result_ext) > 0, (
172
+ f"GRAPH_COMPLETION_CONTEXT_EXTENSION should return non-empty list, got: {result_ext!r}"
173
+ )
174
+
175
+ history_ext = await cache_engine.get_latest_qa(str(user.id), session_id_ext, last_n=10)
176
+ our_qa_ext = [h for h in history_ext if h["question"] == "Tell me about DataCo"]
177
+ assert len(our_qa_ext) == 1, "Should find Context Extension question in history"
178
+
179
+ session_id_summary = "test_session_summary"
180
+
181
+ result_summary = await cognee.search(
182
+ query_type=SearchType.GRAPH_SUMMARY_COMPLETION,
183
+ query_text="What are the key points about TechCorp?",
184
+ session_id=session_id_summary,
185
+ )
186
+
187
+ assert isinstance(result_summary, list) and len(result_summary) > 0, (
188
+ f"GRAPH_SUMMARY_COMPLETION should return non-empty list, got: {result_summary!r}"
189
+ )
190
+
191
+ # Verify saved
192
+ history_summary = await cache_engine.get_latest_qa(str(user.id), session_id_summary, last_n=10)
193
+ our_qa_summary = [
194
+ h for h in history_summary if h["question"] == "What are the key points about TechCorp?"
195
+ ]
196
+ assert len(our_qa_summary) == 1, "Should find Summary question in history"
197
+
198
+ session_id_temporal = "test_session_temporal"
199
+
200
+ result_temporal = await cognee.search(
201
+ query_type=SearchType.TEMPORAL,
202
+ query_text="Tell me about the companies",
203
+ session_id=session_id_temporal,
204
+ )
205
+
206
+ assert isinstance(result_temporal, list) and len(result_temporal) > 0, (
207
+ f"TEMPORAL should return non-empty list, got: {result_temporal!r}"
208
+ )
209
+
210
+ history_temporal = await cache_engine.get_latest_qa(
211
+ str(user.id), session_id_temporal, last_n=10
212
+ )
213
+ our_qa_temporal = [
214
+ h for h in history_temporal if h["question"] == "Tell me about the companies"
215
+ ]
216
+ assert len(our_qa_temporal) == 1, "Should find Temporal question in history"
217
+
218
+ from cognee.modules.retrieval.utils.session_cache import (
219
+ get_conversation_history,
220
+ )
221
+
222
+ formatted_history = await get_conversation_history(session_id=session_id_1)
223
+
224
+ assert "Previous conversation:" in formatted_history, (
225
+ "Formatted history should contain 'Previous conversation:' header"
226
+ )
227
+ assert "QUESTION:" in formatted_history, "Formatted history should contain 'QUESTION:' prefix"
228
+ assert "CONTEXT:" in formatted_history, "Formatted history should contain 'CONTEXT:' prefix"
229
+ assert "ANSWER:" in formatted_history, "Formatted history should contain 'ANSWER:' prefix"
230
+
231
+ await cognee.prune.prune_data()
232
+ await cognee.prune.prune_system(metadata=True)
233
+
234
+ logger.info("All conversation history tests passed successfully")
235
+
236
+
237
+ if __name__ == "__main__":
238
+ import asyncio
239
+
240
+ asyncio.run(main())