cognee 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/health.py +2 -12
  3. cognee/api/v1/add/add.py +46 -6
  4. cognee/api/v1/add/routers/get_add_router.py +11 -2
  5. cognee/api/v1/cognify/cognify.py +29 -9
  6. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  7. cognee/api/v1/datasets/datasets.py +11 -0
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
  9. cognee/api/v1/delete/routers/get_delete_router.py +2 -0
  10. cognee/api/v1/memify/routers/get_memify_router.py +2 -1
  11. cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
  12. cognee/api/v1/responses/default_tools.py +0 -1
  13. cognee/api/v1/responses/dispatch_function.py +1 -1
  14. cognee/api/v1/responses/routers/default_tools.py +0 -1
  15. cognee/api/v1/search/routers/get_search_router.py +3 -3
  16. cognee/api/v1/search/search.py +11 -9
  17. cognee/api/v1/settings/routers/get_settings_router.py +7 -1
  18. cognee/api/v1/sync/routers/get_sync_router.py +3 -0
  19. cognee/api/v1/ui/ui.py +45 -16
  20. cognee/api/v1/update/routers/get_update_router.py +3 -1
  21. cognee/api/v1/update/update.py +3 -3
  22. cognee/api/v1/users/routers/get_visualize_router.py +2 -0
  23. cognee/cli/_cognee.py +61 -10
  24. cognee/cli/commands/add_command.py +3 -3
  25. cognee/cli/commands/cognify_command.py +3 -3
  26. cognee/cli/commands/config_command.py +9 -7
  27. cognee/cli/commands/delete_command.py +3 -3
  28. cognee/cli/commands/search_command.py +3 -7
  29. cognee/cli/config.py +0 -1
  30. cognee/context_global_variables.py +5 -0
  31. cognee/exceptions/exceptions.py +1 -1
  32. cognee/infrastructure/databases/cache/__init__.py +2 -0
  33. cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
  34. cognee/infrastructure/databases/cache/config.py +44 -0
  35. cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
  36. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
  37. cognee/infrastructure/databases/exceptions/__init__.py +1 -0
  38. cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
  39. cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
  40. cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
  41. cognee/infrastructure/databases/graph/kuzu/adapter.py +76 -47
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
  43. cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
  44. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
  46. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
  47. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
  48. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
  49. cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
  50. cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
  52. cognee/infrastructure/files/exceptions.py +1 -1
  53. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
  54. cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
  55. cognee/infrastructure/files/utils/guess_file_type.py +6 -0
  56. cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
  57. cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
  58. cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
  59. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
  60. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
  61. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
  62. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
  63. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
  68. cognee/infrastructure/loaders/LoaderEngine.py +27 -7
  69. cognee/infrastructure/loaders/external/__init__.py +7 -0
  70. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
  71. cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
  72. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  73. cognee/modules/data/exceptions/exceptions.py +1 -1
  74. cognee/modules/data/methods/__init__.py +3 -0
  75. cognee/modules/data/methods/get_dataset_data.py +4 -1
  76. cognee/modules/data/methods/has_dataset_data.py +21 -0
  77. cognee/modules/engine/models/TableRow.py +0 -1
  78. cognee/modules/ingestion/save_data_to_file.py +9 -2
  79. cognee/modules/pipelines/exceptions/exceptions.py +1 -1
  80. cognee/modules/pipelines/operations/pipeline.py +12 -1
  81. cognee/modules/pipelines/operations/run_tasks.py +25 -197
  82. cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
  83. cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
  84. cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
  85. cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
  86. cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
  87. cognee/modules/retrieval/base_graph_retriever.py +3 -1
  88. cognee/modules/retrieval/base_retriever.py +3 -1
  89. cognee/modules/retrieval/chunks_retriever.py +5 -1
  90. cognee/modules/retrieval/code_retriever.py +20 -2
  91. cognee/modules/retrieval/completion_retriever.py +50 -9
  92. cognee/modules/retrieval/cypher_search_retriever.py +11 -1
  93. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
  94. cognee/modules/retrieval/graph_completion_cot_retriever.py +152 -22
  95. cognee/modules/retrieval/graph_completion_retriever.py +54 -10
  96. cognee/modules/retrieval/lexical_retriever.py +20 -2
  97. cognee/modules/retrieval/natural_language_retriever.py +10 -1
  98. cognee/modules/retrieval/summaries_retriever.py +5 -1
  99. cognee/modules/retrieval/temporal_retriever.py +62 -10
  100. cognee/modules/retrieval/user_qa_feedback.py +3 -2
  101. cognee/modules/retrieval/utils/completion.py +30 -4
  102. cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
  103. cognee/modules/retrieval/utils/session_cache.py +156 -0
  104. cognee/modules/search/methods/get_search_type_tools.py +0 -5
  105. cognee/modules/search/methods/no_access_control_search.py +12 -1
  106. cognee/modules/search/methods/search.py +51 -5
  107. cognee/modules/search/types/SearchType.py +0 -1
  108. cognee/modules/settings/get_settings.py +23 -0
  109. cognee/modules/users/methods/get_authenticated_user.py +3 -1
  110. cognee/modules/users/methods/get_default_user.py +1 -6
  111. cognee/modules/users/roles/methods/create_role.py +2 -2
  112. cognee/modules/users/tenants/methods/create_tenant.py +2 -2
  113. cognee/shared/exceptions/exceptions.py +1 -1
  114. cognee/shared/logging_utils.py +18 -11
  115. cognee/shared/utils.py +24 -2
  116. cognee/tasks/codingagents/coding_rule_associations.py +1 -2
  117. cognee/tasks/documents/exceptions/exceptions.py +1 -1
  118. cognee/tasks/feedback/__init__.py +13 -0
  119. cognee/tasks/feedback/create_enrichments.py +84 -0
  120. cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
  121. cognee/tasks/feedback/generate_improved_answers.py +130 -0
  122. cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
  123. cognee/tasks/feedback/models.py +26 -0
  124. cognee/tasks/graph/extract_graph_from_data.py +2 -0
  125. cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
  126. cognee/tasks/ingestion/ingest_data.py +11 -5
  127. cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
  128. cognee/tasks/storage/add_data_points.py +3 -10
  129. cognee/tasks/storage/index_data_points.py +19 -14
  130. cognee/tasks/storage/index_graph_edges.py +25 -11
  131. cognee/tasks/web_scraper/__init__.py +34 -0
  132. cognee/tasks/web_scraper/config.py +26 -0
  133. cognee/tasks/web_scraper/default_url_crawler.py +446 -0
  134. cognee/tasks/web_scraper/models.py +46 -0
  135. cognee/tasks/web_scraper/types.py +4 -0
  136. cognee/tasks/web_scraper/utils.py +142 -0
  137. cognee/tasks/web_scraper/web_scraper_task.py +396 -0
  138. cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
  139. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
  140. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
  141. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
  142. cognee/tests/subprocesses/reader.py +25 -0
  143. cognee/tests/subprocesses/simple_cognify_1.py +31 -0
  144. cognee/tests/subprocesses/simple_cognify_2.py +31 -0
  145. cognee/tests/subprocesses/writer.py +32 -0
  146. cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
  147. cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
  148. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
  149. cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
  150. cognee/tests/test_add_docling_document.py +56 -0
  151. cognee/tests/test_chromadb.py +7 -11
  152. cognee/tests/test_concurrent_subprocess_access.py +76 -0
  153. cognee/tests/test_conversation_history.py +240 -0
  154. cognee/tests/test_feedback_enrichment.py +174 -0
  155. cognee/tests/test_kuzu.py +27 -15
  156. cognee/tests/test_lancedb.py +7 -11
  157. cognee/tests/test_library.py +32 -2
  158. cognee/tests/test_neo4j.py +24 -16
  159. cognee/tests/test_neptune_analytics_vector.py +7 -11
  160. cognee/tests/test_permissions.py +9 -13
  161. cognee/tests/test_pgvector.py +4 -4
  162. cognee/tests/test_remote_kuzu.py +8 -11
  163. cognee/tests/test_s3_file_storage.py +1 -1
  164. cognee/tests/test_search_db.py +6 -8
  165. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
  166. cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
  167. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
  168. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +21 -6
  169. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +178 -139
  170. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +1 -0
  171. distributed/Dockerfile +0 -3
  172. distributed/entrypoint.py +21 -9
  173. distributed/signal.py +5 -0
  174. distributed/workers/data_point_saving_worker.py +64 -34
  175. distributed/workers/graph_saving_worker.py +71 -47
  176. cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
  177. cognee/modules/retrieval/insights_retriever.py +0 -133
  178. cognee/tests/test_memgraph.py +0 -109
  179. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
  180. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
  181. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
  182. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
+ cognee = cognee.cli._cognee:main
2
3
  cognee-cli = cognee.cli._cognee:main
distributed/Dockerfile CHANGED
@@ -29,6 +29,3 @@ RUN poetry install --extras neo4j --extras postgres --extras aws --extras distri
29
29
 
30
30
  COPY cognee/ /app/cognee
31
31
  COPY distributed/ /app/distributed
32
- RUN chmod +x /app/distributed/entrypoint.sh
33
-
34
- ENTRYPOINT ["/app/distributed/entrypoint.sh"]
distributed/entrypoint.py CHANGED
@@ -10,6 +10,7 @@ from distributed.app import app
10
10
  from distributed.queues import add_nodes_and_edges_queue, add_data_points_queue
11
11
  from distributed.workers.graph_saving_worker import graph_saving_worker
12
12
  from distributed.workers.data_point_saving_worker import data_point_saving_worker
13
+ from distributed.signal import QueueSignal
13
14
 
14
15
  logger = get_logger()
15
16
 
@@ -23,13 +24,14 @@ async def main():
23
24
  await add_nodes_and_edges_queue.clear.aio()
24
25
  await add_data_points_queue.clear.aio()
25
26
 
26
- number_of_graph_saving_workers = 1 # Total number of graph_saving_worker to spawn
27
- number_of_data_point_saving_workers = 5 # Total number of graph_saving_worker to spawn
27
+ number_of_graph_saving_workers = 1 # Total number of graph_saving_worker to spawn (MAX 1)
28
+ number_of_data_point_saving_workers = (
29
+ 10 # Total number of graph_saving_worker to spawn (MAX 10)
30
+ )
28
31
 
29
- results = []
30
32
  consumer_futures = []
31
33
 
32
- # await prune.prune_data() # We don't want to delete files on s3
34
+ await prune.prune_data() # This prunes the data from the file storage
33
35
  # Delete DBs and saved files from metastore
34
36
  await prune.prune_system(metadata=True)
35
37
 
@@ -45,16 +47,28 @@ async def main():
45
47
  worker_future = data_point_saving_worker.spawn()
46
48
  consumer_futures.append(worker_future)
47
49
 
50
+ """ Example: Setting and adding S3 path as input
48
51
  s3_bucket_path = os.getenv("S3_BUCKET_PATH")
49
52
  s3_data_path = "s3://" + s3_bucket_path
50
53
 
51
54
  await cognee.add(s3_data_path, dataset_name="s3-files")
55
+ """
56
+ await cognee.add(
57
+ [
58
+ "Audi is a German car manufacturer",
59
+ "The Netherlands is next to Germany",
60
+ "Berlin is the capital of Germany",
61
+ "The Rhine is a major European river",
62
+ "BMW produces luxury vehicles",
63
+ ],
64
+ dataset_name="s3-files",
65
+ )
52
66
 
53
67
  await cognee.cognify(datasets=["s3-files"])
54
68
 
55
- # Push empty tuple into the queue to signal the end of data.
56
- await add_nodes_and_edges_queue.put.aio(())
57
- await add_data_points_queue.put.aio(())
69
+ # Put Processing end signal into the queues to stop the consumers
70
+ await add_nodes_and_edges_queue.put.aio(QueueSignal.STOP)
71
+ await add_data_points_queue.put.aio(QueueSignal.STOP)
58
72
 
59
73
  for consumer_future in consumer_futures:
60
74
  try:
@@ -64,8 +78,6 @@ async def main():
64
78
  except Exception as e:
65
79
  logger.error(e)
66
80
 
67
- print(results)
68
-
69
81
 
70
82
  if __name__ == "__main__":
71
83
  asyncio.run(main())
distributed/signal.py ADDED
@@ -0,0 +1,5 @@
1
+ from enum import Enum
2
+
3
+
4
+ class QueueSignal(str, Enum):
5
+ STOP = "STOP"
@@ -1,16 +1,17 @@
1
+ import os
1
2
  import modal
2
3
  import asyncio
3
4
  from sqlalchemy.exc import OperationalError, DBAPIError
4
5
  from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
5
6
 
6
7
  from distributed.app import app
8
+ from distributed.signal import QueueSignal
7
9
  from distributed.modal_image import image
8
10
  from distributed.queues import add_data_points_queue
9
11
 
10
12
  from cognee.shared.logging_utils import get_logger
11
13
  from cognee.infrastructure.databases.vector import get_vector_engine
12
14
 
13
-
14
15
  logger = get_logger("data_point_saving_worker")
15
16
 
16
17
 
@@ -39,54 +40,83 @@ def is_deadlock_error(error):
39
40
  return False
40
41
 
41
42
 
43
+ secret_name = os.environ.get("MODAL_SECRET_NAME", "distributed_cognee")
44
+
45
+
42
46
  @app.function(
43
47
  retries=3,
44
48
  image=image,
45
49
  timeout=86400,
46
- max_containers=5,
47
- secrets=[modal.Secret.from_name("distributed_cognee")],
50
+ max_containers=10,
51
+ secrets=[modal.Secret.from_name(secret_name)],
48
52
  )
49
53
  async def data_point_saving_worker():
50
54
  print("Started processing of data points; starting vector engine queue.")
51
55
  vector_engine = get_vector_engine()
56
+ # Defines how many data packets do we glue together from the modal queue before embedding call and ingestion
57
+ BATCH_SIZE = 25
58
+ stop_seen = False
52
59
 
53
60
  while True:
61
+ if stop_seen:
62
+ print("Finished processing all data points; stopping vector engine queue consumer.")
63
+ return True
64
+
54
65
  if await add_data_points_queue.len.aio() != 0:
55
66
  try:
56
- add_data_points_request = await add_data_points_queue.get.aio(block=False)
57
- except modal.exception.DeserializationError as error:
58
- logger.error(f"Deserialization error: {str(error)}")
59
- continue
60
-
61
- if len(add_data_points_request) == 0:
62
- print("Finished processing all data points; stopping vector engine queue.")
63
- return True
64
-
65
- if len(add_data_points_request) == 2:
66
- (collection_name, data_points) = add_data_points_request
67
-
68
- print(f"Adding {len(data_points)} data points to '{collection_name}' collection.")
69
-
70
- @retry(
71
- retry=retry_if_exception_type(VectorDatabaseDeadlockError),
72
- stop=stop_after_attempt(3),
73
- wait=wait_exponential(multiplier=2, min=1, max=6),
74
- )
75
- async def add_data_points():
76
- try:
77
- await vector_engine.create_data_points(
78
- collection_name, data_points, distributed=False
67
+ print("Remaining elements in queue:")
68
+ print(await add_data_points_queue.len.aio())
69
+
70
+ # collect batched requests
71
+ batched_points = {}
72
+ for _ in range(min(BATCH_SIZE, await add_data_points_queue.len.aio())):
73
+ add_data_points_request = await add_data_points_queue.get.aio(block=False)
74
+
75
+ if not add_data_points_request:
76
+ continue
77
+
78
+ if add_data_points_request == QueueSignal.STOP:
79
+ await add_data_points_queue.put.aio(QueueSignal.STOP)
80
+ stop_seen = True
81
+ break
82
+
83
+ if len(add_data_points_request) == 2:
84
+ collection_name, data_points = add_data_points_request
85
+ if collection_name not in batched_points:
86
+ batched_points[collection_name] = []
87
+ batched_points[collection_name].extend(data_points)
88
+ else:
89
+ print("NoneType or invalid request detected.")
90
+
91
+ if batched_points:
92
+ for collection_name, data_points in batched_points.items():
93
+ print(
94
+ f"Adding {len(data_points)} data points to '{collection_name}' collection."
79
95
  )
80
- except DBAPIError as error:
81
- if is_deadlock_error(error):
82
- raise VectorDatabaseDeadlockError()
83
- except OperationalError as error:
84
- if is_deadlock_error(error):
85
- raise VectorDatabaseDeadlockError()
86
96
 
87
- await add_data_points()
97
+ @retry(
98
+ retry=retry_if_exception_type(VectorDatabaseDeadlockError),
99
+ stop=stop_after_attempt(3),
100
+ wait=wait_exponential(multiplier=2, min=1, max=6),
101
+ )
102
+ async def add_data_points():
103
+ try:
104
+ await vector_engine.create_data_points(
105
+ collection_name, data_points, distributed=False
106
+ )
107
+ except DBAPIError as error:
108
+ if is_deadlock_error(error):
109
+ raise VectorDatabaseDeadlockError()
110
+ except OperationalError as error:
111
+ if is_deadlock_error(error):
112
+ raise VectorDatabaseDeadlockError()
113
+
114
+ await add_data_points()
115
+ print(f"Finished adding data points to '{collection_name}'.")
88
116
 
89
- print("Finished adding data points.")
117
+ except modal.exception.DeserializationError as error:
118
+ logger.error(f"Deserialization error: {str(error)}")
119
+ continue
90
120
 
91
121
  else:
92
122
  print("No jobs, go to sleep.")
@@ -1,8 +1,10 @@
1
+ import os
1
2
  import modal
2
3
  import asyncio
3
4
  from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
4
5
 
5
6
  from distributed.app import app
7
+ from distributed.signal import QueueSignal
6
8
  from distributed.modal_image import image
7
9
  from distributed.queues import add_nodes_and_edges_queue
8
10
 
@@ -10,7 +12,6 @@ from cognee.shared.logging_utils import get_logger
10
12
  from cognee.infrastructure.databases.graph import get_graph_engine
11
13
  from cognee.infrastructure.databases.graph.config import get_graph_config
12
14
 
13
-
14
15
  logger = get_logger("graph_saving_worker")
15
16
 
16
17
 
@@ -37,68 +38,91 @@ def is_deadlock_error(error):
37
38
  return False
38
39
 
39
40
 
41
+ secret_name = os.environ.get("MODAL_SECRET_NAME", "distributed_cognee")
42
+
43
+
40
44
  @app.function(
41
45
  retries=3,
42
46
  image=image,
43
47
  timeout=86400,
44
- max_containers=5,
45
- secrets=[modal.Secret.from_name("distributed_cognee")],
48
+ max_containers=1,
49
+ secrets=[modal.Secret.from_name(secret_name)],
46
50
  )
47
51
  async def graph_saving_worker():
48
52
  print("Started processing of nodes and edges; starting graph engine queue.")
49
53
  graph_engine = await get_graph_engine()
54
+ # Defines how many data packets do we glue together from the queue before ingesting them into the graph database
55
+ BATCH_SIZE = 25
56
+ stop_seen = False
50
57
 
51
58
  while True:
59
+ if stop_seen:
60
+ print("Finished processing all data points; stopping graph engine queue consumer.")
61
+ return True
62
+
52
63
  if await add_nodes_and_edges_queue.len.aio() != 0:
53
64
  try:
54
- nodes_and_edges = await add_nodes_and_edges_queue.get.aio(block=False)
65
+ print("Remaining elements in queue:")
66
+ print(await add_nodes_and_edges_queue.len.aio())
67
+
68
+ all_nodes, all_edges = [], []
69
+ for _ in range(min(BATCH_SIZE, await add_nodes_and_edges_queue.len.aio())):
70
+ nodes_and_edges = await add_nodes_and_edges_queue.get.aio(block=False)
71
+
72
+ if not nodes_and_edges:
73
+ continue
74
+
75
+ if nodes_and_edges == QueueSignal.STOP:
76
+ await add_nodes_and_edges_queue.put.aio(QueueSignal.STOP)
77
+ stop_seen = True
78
+ break
79
+
80
+ if len(nodes_and_edges) == 2:
81
+ nodes, edges = nodes_and_edges
82
+ all_nodes.extend(nodes)
83
+ all_edges.extend(edges)
84
+ else:
85
+ print("None Type detected.")
86
+
87
+ if all_nodes or all_edges:
88
+ print(f"Adding {len(all_nodes)} nodes and {len(all_edges)} edges.")
89
+
90
+ @retry(
91
+ retry=retry_if_exception_type(GraphDatabaseDeadlockError),
92
+ stop=stop_after_attempt(3),
93
+ wait=wait_exponential(multiplier=2, min=1, max=6),
94
+ )
95
+ async def save_graph_nodes(new_nodes):
96
+ try:
97
+ await graph_engine.add_nodes(new_nodes, distributed=False)
98
+ except Exception as error:
99
+ if is_deadlock_error(error):
100
+ raise GraphDatabaseDeadlockError()
101
+
102
+ @retry(
103
+ retry=retry_if_exception_type(GraphDatabaseDeadlockError),
104
+ stop=stop_after_attempt(3),
105
+ wait=wait_exponential(multiplier=2, min=1, max=6),
106
+ )
107
+ async def save_graph_edges(new_edges):
108
+ try:
109
+ await graph_engine.add_edges(new_edges, distributed=False)
110
+ except Exception as error:
111
+ if is_deadlock_error(error):
112
+ raise GraphDatabaseDeadlockError()
113
+
114
+ if all_nodes:
115
+ await save_graph_nodes(all_nodes)
116
+
117
+ if all_edges:
118
+ await save_graph_edges(all_edges)
119
+
120
+ print("Finished adding nodes and edges.")
121
+
55
122
  except modal.exception.DeserializationError as error:
56
123
  logger.error(f"Deserialization error: {str(error)}")
57
124
  continue
58
125
 
59
- if len(nodes_and_edges) == 0:
60
- print("Finished processing all nodes and edges; stopping graph engine queue.")
61
- return True
62
-
63
- if len(nodes_and_edges) == 2:
64
- print(
65
- f"Adding {len(nodes_and_edges[0])} nodes and {len(nodes_and_edges[1])} edges."
66
- )
67
- nodes = nodes_and_edges[0]
68
- edges = nodes_and_edges[1]
69
-
70
- @retry(
71
- retry=retry_if_exception_type(GraphDatabaseDeadlockError),
72
- stop=stop_after_attempt(3),
73
- wait=wait_exponential(multiplier=2, min=1, max=6),
74
- )
75
- async def save_graph_nodes(new_nodes):
76
- try:
77
- await graph_engine.add_nodes(new_nodes, distributed=False)
78
- except Exception as error:
79
- if is_deadlock_error(error):
80
- raise GraphDatabaseDeadlockError()
81
-
82
- @retry(
83
- retry=retry_if_exception_type(GraphDatabaseDeadlockError),
84
- stop=stop_after_attempt(3),
85
- wait=wait_exponential(multiplier=2, min=1, max=6),
86
- )
87
- async def save_graph_edges(new_edges):
88
- try:
89
- await graph_engine.add_edges(new_edges, distributed=False)
90
- except Exception as error:
91
- if is_deadlock_error(error):
92
- raise GraphDatabaseDeadlockError()
93
-
94
- if nodes:
95
- await save_graph_nodes(nodes)
96
-
97
- if edges:
98
- await save_graph_edges(edges)
99
-
100
- print("Finished adding nodes and edges.")
101
-
102
126
  else:
103
127
  print("No jobs, go to sleep.")
104
128
  await asyncio.sleep(5)