cognee 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. cognee/api/client.py +41 -3
  2. cognee/api/health.py +332 -0
  3. cognee/api/v1/add/add.py +5 -2
  4. cognee/api/v1/add/routers/get_add_router.py +3 -0
  5. cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
  6. cognee/api/v1/cognify/cognify.py +8 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
  8. cognee/api/v1/config/config.py +3 -1
  9. cognee/api/v1/datasets/routers/get_datasets_router.py +1 -7
  10. cognee/api/v1/delete/delete.py +16 -12
  11. cognee/api/v1/responses/routers/get_responses_router.py +3 -1
  12. cognee/api/v1/search/search.py +10 -0
  13. cognee/api/v1/settings/routers/get_settings_router.py +0 -2
  14. cognee/base_config.py +1 -0
  15. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
  16. cognee/infrastructure/databases/graph/config.py +2 -0
  17. cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
  18. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
  19. cognee/infrastructure/databases/graph/kuzu/adapter.py +12 -7
  20. cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +1 -1
  21. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +48 -13
  22. cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
  23. cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
  24. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
  25. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
  26. cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
  27. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
  28. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -0
  29. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
  30. cognee/infrastructure/databases/vector/create_vector_engine.py +31 -15
  31. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
  32. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
  33. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
  34. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
  35. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
  36. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
  37. cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
  38. cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
  39. cognee/infrastructure/files/utils/guess_file_type.py +2 -2
  40. cognee/infrastructure/files/utils/open_data_file.py +4 -23
  41. cognee/infrastructure/llm/LLMGateway.py +137 -0
  42. cognee/infrastructure/llm/__init__.py +14 -4
  43. cognee/infrastructure/llm/config.py +29 -1
  44. cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
  45. cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
  46. cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
  47. cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
  48. cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
  49. cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
  50. cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
  51. cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
  52. cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
  53. cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
  54. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
  55. cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
  65. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
  66. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
  67. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
  68. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
  69. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
  70. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
  71. cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
  72. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
  73. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
  77. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
  78. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
  79. cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
  80. cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
  82. cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
  83. cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
  84. cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
  86. cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
  87. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
  88. cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
  89. cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
  90. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
  91. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
  92. cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
  93. cognee/infrastructure/llm/utils.py +3 -1
  94. cognee/infrastructure/loaders/LoaderEngine.py +156 -0
  95. cognee/infrastructure/loaders/LoaderInterface.py +73 -0
  96. cognee/infrastructure/loaders/__init__.py +18 -0
  97. cognee/infrastructure/loaders/core/__init__.py +7 -0
  98. cognee/infrastructure/loaders/core/audio_loader.py +98 -0
  99. cognee/infrastructure/loaders/core/image_loader.py +114 -0
  100. cognee/infrastructure/loaders/core/text_loader.py +90 -0
  101. cognee/infrastructure/loaders/create_loader_engine.py +32 -0
  102. cognee/infrastructure/loaders/external/__init__.py +22 -0
  103. cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
  104. cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
  105. cognee/infrastructure/loaders/get_loader_engine.py +18 -0
  106. cognee/infrastructure/loaders/supported_loaders.py +18 -0
  107. cognee/infrastructure/loaders/use_loader.py +21 -0
  108. cognee/infrastructure/loaders/utils/__init__.py +0 -0
  109. cognee/modules/data/methods/__init__.py +1 -0
  110. cognee/modules/data/methods/get_authorized_dataset.py +23 -0
  111. cognee/modules/data/models/Data.py +11 -1
  112. cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
  113. cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
  114. cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
  115. cognee/modules/engine/utils/generate_edge_id.py +5 -0
  116. cognee/modules/graph/cognee_graph/CogneeGraph.py +9 -18
  117. cognee/modules/graph/methods/get_formatted_graph_data.py +7 -1
  118. cognee/modules/graph/utils/get_graph_from_model.py +93 -101
  119. cognee/modules/ingestion/data_types/TextData.py +8 -2
  120. cognee/modules/ingestion/save_data_to_file.py +1 -1
  121. cognee/modules/pipelines/exceptions/__init__.py +1 -0
  122. cognee/modules/pipelines/exceptions/exceptions.py +12 -0
  123. cognee/modules/pipelines/models/DataItemStatus.py +5 -0
  124. cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
  125. cognee/modules/pipelines/models/__init__.py +1 -0
  126. cognee/modules/pipelines/operations/pipeline.py +10 -2
  127. cognee/modules/pipelines/operations/run_tasks.py +251 -19
  128. cognee/modules/retrieval/code_retriever.py +3 -5
  129. cognee/modules/retrieval/completion_retriever.py +1 -1
  130. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
  131. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
  132. cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
  133. cognee/modules/retrieval/natural_language_retriever.py +3 -5
  134. cognee/modules/retrieval/utils/completion.py +6 -9
  135. cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
  136. cognee/modules/search/methods/search.py +5 -1
  137. cognee/modules/search/operations/__init__.py +1 -0
  138. cognee/modules/search/operations/select_search_type.py +42 -0
  139. cognee/modules/search/types/SearchType.py +1 -0
  140. cognee/modules/settings/get_settings.py +0 -4
  141. cognee/modules/settings/save_vector_db_config.py +1 -1
  142. cognee/shared/data_models.py +3 -1
  143. cognee/shared/logging_utils.py +0 -5
  144. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  145. cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
  146. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
  147. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
  148. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
  149. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
  150. cognee/tasks/graph/extract_graph_from_code.py +3 -2
  151. cognee/tasks/graph/extract_graph_from_data.py +4 -3
  152. cognee/tasks/graph/infer_data_ontology.py +5 -6
  153. cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
  154. cognee/tasks/ingestion/ingest_data.py +91 -61
  155. cognee/tasks/ingestion/resolve_data_directories.py +3 -0
  156. cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
  157. cognee/tasks/storage/index_data_points.py +1 -1
  158. cognee/tasks/storage/index_graph_edges.py +4 -1
  159. cognee/tasks/summarization/summarize_code.py +2 -3
  160. cognee/tasks/summarization/summarize_text.py +3 -2
  161. cognee/tests/test_cognee_server_start.py +12 -7
  162. cognee/tests/test_deduplication.py +2 -2
  163. cognee/tests/test_deletion.py +58 -17
  164. cognee/tests/test_graph_visualization_permissions.py +161 -0
  165. cognee/tests/test_neptune_analytics_graph.py +309 -0
  166. cognee/tests/test_neptune_analytics_hybrid.py +176 -0
  167. cognee/tests/{test_qdrant.py → test_neptune_analytics_vector.py} +86 -16
  168. cognee/tests/test_pgvector.py +5 -5
  169. cognee/tests/test_s3.py +1 -6
  170. cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
  171. cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
  172. cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
  173. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
  174. cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
  175. cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
  176. cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
  177. cognee/tests/unit/modules/search/search_methods_test.py +55 -0
  178. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/METADATA +12 -6
  179. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/RECORD +195 -156
  180. cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
  181. cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
  182. cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
  183. cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
  184. cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
  185. cognee/modules/data/extraction/extract_categories.py +0 -14
  186. distributed/Dockerfile +0 -34
  187. distributed/app.py +0 -4
  188. distributed/entrypoint.py +0 -71
  189. distributed/entrypoint.sh +0 -5
  190. distributed/modal_image.py +0 -11
  191. distributed/queues.py +0 -5
  192. distributed/tasks/queued_add_data_points.py +0 -13
  193. distributed/tasks/queued_add_edges.py +0 -13
  194. distributed/tasks/queued_add_nodes.py +0 -13
  195. distributed/test.py +0 -28
  196. distributed/utils.py +0 -19
  197. distributed/workers/data_point_saving_worker.py +0 -93
  198. distributed/workers/graph_saving_worker.py +0 -104
  199. /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
  200. /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
  201. /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
  202. /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
  203. /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
  204. /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
  205. /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
  206. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
  207. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
  208. /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
  209. {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
  210. {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
  211. /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
  212. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/WHEEL +0 -0
  213. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/LICENSE +0 -0
  214. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/NOTICE.md +0 -0
@@ -7,7 +7,7 @@ from pydantic import BaseModel
7
7
  from cognee.infrastructure.databases.graph import get_graph_engine
8
8
  from cognee.infrastructure.databases.vector import get_vector_engine
9
9
  from cognee.infrastructure.engine.models import DataPoint
10
- from cognee.modules.data.extraction.extract_categories import extract_categories
10
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
11
11
  from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
12
12
 
13
13
 
@@ -40,7 +40,7 @@ async def chunk_naive_llm_classifier(
40
40
  return data_chunks
41
41
 
42
42
  chunk_classifications = await asyncio.gather(
43
- *[extract_categories(chunk.text, classification_model) for chunk in data_chunks],
43
+ *[LLMGateway.extract_categories(chunk.text, classification_model) for chunk in data_chunks],
44
44
  )
45
45
 
46
46
  classification_data_points = []
@@ -8,7 +8,6 @@ from cognee.modules.data.models import Data
8
8
  from cognee.infrastructure.databases.relational import get_relational_engine
9
9
  from cognee.modules.chunking.TextChunker import TextChunker
10
10
  from cognee.modules.chunking.Chunker import Chunker
11
- from cognee.modules.data.processing.document_types.exceptions.exceptions import PyPdfInternalError
12
11
 
13
12
 
14
13
  async def update_document_token_count(document_id: UUID, token_count: int) -> None:
@@ -40,15 +39,14 @@ async def extract_chunks_from_documents(
40
39
  """
41
40
  for document in documents:
42
41
  document_token_count = 0
43
- try:
44
- async for document_chunk in document.read(
45
- max_chunk_size=max_chunk_size, chunker_cls=chunker
46
- ):
47
- document_token_count += document_chunk.chunk_size
48
- document_chunk.belongs_to_set = document.belongs_to_set
49
- yield document_chunk
50
-
51
- await update_document_token_count(document.id, document_token_count)
52
- except PyPdfInternalError:
53
- pass
42
+
43
+ async for document_chunk in document.read(
44
+ max_chunk_size=max_chunk_size, chunker_cls=chunker
45
+ ):
46
+ document_token_count += document_chunk.chunk_size
47
+ document_chunk.belongs_to_set = document.belongs_to_set
48
+ yield document_chunk
49
+
50
+ await update_document_token_count(document.id, document_token_count)
51
+
54
52
  # todo rita
@@ -6,8 +6,7 @@ from pydantic import BaseModel
6
6
  from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor
7
7
  from cognee.modules.engine.models import Entity
8
8
  from cognee.modules.engine.models.EntityType import EntityType
9
- from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
10
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
9
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
11
10
 
12
11
  logger = get_logger("llm_entity_extractor")
13
12
 
@@ -51,11 +50,10 @@ class LLMEntityExtractor(BaseEntityExtractor):
51
50
  try:
52
51
  logger.info(f"Extracting entities from text: {text[:100]}...")
53
52
 
54
- llm_client = get_llm_client()
55
- user_prompt = render_prompt(self.user_prompt_template, {"text": text})
56
- system_prompt = read_query_prompt(self.system_prompt_template)
53
+ user_prompt = LLMGateway.render_prompt(self.user_prompt_template, {"text": text})
54
+ system_prompt = LLMGateway.read_query_prompt(self.system_prompt_template)
57
55
 
58
- response = await llm_client.acreate_structured_output(
56
+ response = await LLMGateway.acreate_structured_output(
59
57
  text_input=user_prompt,
60
58
  system_prompt=system_prompt,
61
59
  response_model=EntityList,
@@ -1,8 +1,7 @@
1
1
  from typing import List, Tuple
2
2
  from pydantic import BaseModel
3
3
 
4
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
5
- from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
4
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
6
5
  from cognee.root_dir import get_absolute_path
7
6
 
8
7
 
@@ -17,7 +16,6 @@ async def extract_content_nodes_and_relationship_names(
17
16
  content: str, existing_nodes: List[str], n_rounds: int = 2
18
17
  ) -> Tuple[List[str], List[str]]:
19
18
  """Extracts node names and relationship_names from content through multiple rounds of analysis."""
20
- llm_client = get_llm_client()
21
19
  all_nodes: List[str] = existing_nodes.copy()
22
20
  all_relationship_names: List[str] = []
23
21
  existing_node_set = {node.lower() for node in all_nodes}
@@ -34,15 +32,15 @@ async def extract_content_nodes_and_relationship_names(
34
32
  }
35
33
 
36
34
  base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
37
- text_input = render_prompt(
35
+ text_input = LLMGateway.render_prompt(
38
36
  "extract_graph_relationship_names_prompt_input.txt",
39
37
  context,
40
38
  base_directory=base_directory,
41
39
  )
42
- system_prompt = read_query_prompt(
40
+ system_prompt = LLMGateway.read_query_prompt(
43
41
  "extract_graph_relationship_names_prompt_system.txt", base_directory=base_directory
44
42
  )
45
- response = await llm_client.acreate_structured_output(
43
+ response = await LLMGateway.acreate_structured_output(
46
44
  text_input=text_input,
47
45
  system_prompt=system_prompt,
48
46
  response_model=PotentialNodesAndRelationshipNames,
@@ -1,6 +1,6 @@
1
- from typing import List, Tuple
2
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
3
- from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
1
+ from typing import List
2
+
3
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
4
4
  from cognee.shared.data_models import KnowledgeGraph
5
5
  from cognee.root_dir import get_absolute_path
6
6
 
@@ -9,7 +9,6 @@ async def extract_edge_triplets(
9
9
  content: str, nodes: List[str], relationship_names: List[str], n_rounds: int = 2
10
10
  ) -> KnowledgeGraph:
11
11
  """Creates a knowledge graph by identifying relationships between the provided nodes."""
12
- llm_client = get_llm_client()
13
12
  final_graph = KnowledgeGraph(nodes=[], edges=[])
14
13
  existing_nodes = set()
15
14
  existing_node_ids = set()
@@ -27,13 +26,13 @@ async def extract_edge_triplets(
27
26
  }
28
27
 
29
28
  base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
30
- text_input = render_prompt(
29
+ text_input = LLMGateway.render_prompt(
31
30
  "extract_graph_edge_triplets_prompt_input.txt", context, base_directory=base_directory
32
31
  )
33
- system_prompt = read_query_prompt(
32
+ system_prompt = LLMGateway.read_query_prompt(
34
33
  "extract_graph_edge_triplets_prompt_system.txt", base_directory=base_directory
35
34
  )
36
- extracted_graph = await llm_client.acreate_structured_output(
35
+ extracted_graph = await LLMGateway.acreate_structured_output(
37
36
  text_input=text_input, system_prompt=system_prompt, response_model=KnowledgeGraph
38
37
  )
39
38
 
@@ -1,9 +1,7 @@
1
1
  from typing import List
2
2
  from pydantic import BaseModel
3
3
 
4
- from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
5
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
6
- from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
4
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
7
5
  from cognee.root_dir import get_absolute_path
8
6
 
9
7
 
@@ -15,7 +13,6 @@ class PotentialNodes(BaseModel):
15
13
 
16
14
  async def extract_nodes(text: str, n_rounds: int = 2) -> List[str]:
17
15
  """Extracts node names from content through multiple rounds of analysis."""
18
- llm_client = get_llm_client()
19
16
  all_nodes: List[str] = []
20
17
  existing_nodes = set()
21
18
 
@@ -27,13 +24,13 @@ async def extract_nodes(text: str, n_rounds: int = 2) -> List[str]:
27
24
  "text": text,
28
25
  }
29
26
  base_directory = get_absolute_path("./tasks/graph/cascade_extract/prompts")
30
- text_input = render_prompt(
27
+ text_input = LLMGateway.render_prompt(
31
28
  "extract_graph_nodes_prompt_input.txt", context, base_directory=base_directory
32
29
  )
33
- system_prompt = read_query_prompt(
30
+ system_prompt = LLMGateway.read_query_prompt(
34
31
  "extract_graph_nodes_prompt_system.txt", base_directory=base_directory
35
32
  )
36
- response = await llm_client.acreate_structured_output(
33
+ response = await LLMGateway.acreate_structured_output(
37
34
  text_input=text_input, system_prompt=system_prompt, response_model=PotentialNodes
38
35
  )
39
36
 
@@ -1,7 +1,8 @@
1
1
  import asyncio
2
2
  from typing import Type, List
3
3
  from pydantic import BaseModel
4
- from cognee.modules.data.extraction.knowledge_graph import extract_content_graph
4
+
5
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
5
6
  from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
6
7
  from cognee.tasks.storage import add_data_points
7
8
 
@@ -17,7 +18,7 @@ async def extract_graph_from_code(
17
18
  - Graph nodes are stored using the `add_data_points` function for later retrieval or analysis.
18
19
  """
19
20
  chunk_graphs = await asyncio.gather(
20
- *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
21
+ *[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
21
22
  )
22
23
 
23
24
  for chunk_index, chunk in enumerate(data_chunks):
@@ -3,15 +3,15 @@ from typing import Type, List
3
3
  from pydantic import BaseModel
4
4
 
5
5
  from cognee.infrastructure.databases.graph import get_graph_engine
6
+ from cognee.tasks.storage.add_data_points import add_data_points
6
7
  from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
7
8
  from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
8
- from cognee.modules.data.extraction.knowledge_graph import extract_content_graph
9
9
  from cognee.modules.graph.utils import (
10
10
  expand_with_nodes_and_edges,
11
11
  retrieve_existing_edges,
12
12
  )
13
13
  from cognee.shared.data_models import KnowledgeGraph
14
- from cognee.tasks.storage.add_data_points import add_data_points
14
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
15
15
 
16
16
 
17
17
  async def integrate_chunk_graphs(
@@ -40,6 +40,7 @@ async def integrate_chunk_graphs(
40
40
 
41
41
  if len(graph_nodes) > 0:
42
42
  await add_data_points(graph_nodes)
43
+
43
44
  if len(graph_edges) > 0:
44
45
  await graph_engine.add_edges(graph_edges)
45
46
 
@@ -55,7 +56,7 @@ async def extract_graph_from_data(
55
56
  Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
56
57
  """
57
58
  chunk_graphs = await asyncio.gather(
58
- *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
59
+ *[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
59
60
  )
60
61
 
61
62
  # Note: Filter edges with missing source or target nodes
@@ -15,19 +15,19 @@ from pydantic import BaseModel
15
15
 
16
16
  from cognee.modules.graph.exceptions import EntityNotFoundError
17
17
  from cognee.modules.ingestion.exceptions import IngestionError
18
- from cognee.infrastructure.llm.prompts import read_query_prompt
19
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
18
+
20
19
  from cognee.infrastructure.data.chunking.config import get_chunk_config
21
20
  from cognee.infrastructure.data.chunking.get_chunking_engine import get_chunk_engine
22
21
  from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
23
22
  from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file
24
23
  from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException
25
- from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import (
24
+ from cognee.modules.data.methods.add_model_class_to_graph import (
26
25
  add_model_class_to_graph,
27
26
  )
28
27
  from cognee.tasks.graph.models import NodeModel, GraphOntology
29
28
  from cognee.shared.data_models import KnowledgeGraph
30
29
  from cognee.modules.engine.utils import generate_node_id, generate_node_name
30
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
31
31
 
32
32
  logger = get_logger("task:infer_data_ontology")
33
33
 
@@ -52,11 +52,10 @@ async def extract_ontology(content: str, response_model: Type[BaseModel]):
52
52
 
53
53
  The structured ontology extracted from the content.
54
54
  """
55
- llm_client = get_llm_client()
56
55
 
57
- system_prompt = read_query_prompt("extract_ontology.txt")
56
+ system_prompt = LLMGateway.read_query_prompt("extract_ontology.txt")
58
57
 
59
- ontology = await llm_client.acreate_structured_output(content, system_prompt, response_model)
58
+ ontology = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
60
59
 
61
60
  return ontology
62
61
 
@@ -0,0 +1,79 @@
1
+ import os
2
+ from urllib.parse import urlparse
3
+ from typing import List, Tuple
4
+ from pathlib import Path
5
+ import tempfile
6
+
7
+ from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface
8
+ from cognee.modules.ingestion.exceptions import IngestionError
9
+ from cognee.infrastructure.loaders import get_loader_engine
10
+ from cognee.shared.logging_utils import get_logger
11
+ from cognee.infrastructure.files.utils.open_data_file import open_data_file
12
+
13
+ from pydantic_settings import BaseSettings, SettingsConfigDict
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ class SaveDataSettings(BaseSettings):
19
+ accept_local_file_path: bool = True
20
+
21
+ model_config = SettingsConfigDict(env_file=".env", extra="allow")
22
+
23
+
24
+ settings = SaveDataSettings()
25
+
26
+
27
+ async def pull_from_s3(file_path, destination_file) -> None:
28
+ async with open_data_file(file_path) as file:
29
+ while True:
30
+ chunk = file.read(8192)
31
+ if not chunk:
32
+ break
33
+ destination_file.write(chunk)
34
+
35
+
36
+ async def data_item_to_text_file(
37
+ data_item_path: str, preferred_loaders: List[str]
38
+ ) -> Tuple[str, LoaderInterface]:
39
+ if isinstance(data_item_path, str):
40
+ parsed_url = urlparse(data_item_path)
41
+
42
+ # data is s3 file path
43
+ if parsed_url.scheme == "s3":
44
+ # TODO: Rework this to work with file streams and not saving data to temp storage
45
+ # Note: proper suffix information is needed for OpenAI to handle mp3 files
46
+ path_info = Path(parsed_url.path)
47
+ with tempfile.NamedTemporaryFile(mode="wb", suffix=path_info.suffix) as temp_file:
48
+ await pull_from_s3(data_item_path, temp_file)
49
+ temp_file.flush() # Data needs to be saved to local storage
50
+ loader = get_loader_engine()
51
+ return await loader.load_file(temp_file.name, preferred_loaders), loader.get_loader(
52
+ temp_file.name, preferred_loaders
53
+ )
54
+
55
+ # data is local file path
56
+ elif parsed_url.scheme == "file":
57
+ if settings.accept_local_file_path:
58
+ loader = get_loader_engine()
59
+ return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
60
+ data_item_path, preferred_loaders
61
+ )
62
+ else:
63
+ raise IngestionError(message="Local files are not accepted.")
64
+
65
+ # data is an absolute file path
66
+ elif data_item_path.startswith("/") or (
67
+ os.name == "nt" and len(data_item_path) > 1 and data_item_path[1] == ":"
68
+ ):
69
+ # Handle both Unix absolute paths (/path) and Windows absolute paths (C:\path)
70
+ if settings.accept_local_file_path:
71
+ loader = get_loader_engine()
72
+ return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
73
+ data_item_path, preferred_loaders
74
+ )
75
+ else:
76
+ raise IngestionError(message="Local files are not accepted.")
77
+
78
+ # data is not a supported type
79
+ raise IngestionError(message=f"Data type not supported: {type(data_item_path)}")
@@ -1,16 +1,16 @@
1
1
  import json
2
2
  import inspect
3
- from os import path
4
3
  from uuid import UUID
5
4
  from typing import Union, BinaryIO, Any, List, Optional
6
5
 
7
6
  import cognee.modules.ingestion as ingestion
8
- from cognee.infrastructure.files.utils.open_data_file import open_data_file
9
7
  from cognee.infrastructure.databases.relational import get_relational_engine
10
8
  from cognee.modules.data.models import Data
11
9
  from cognee.modules.users.models import User
12
10
  from cognee.modules.users.methods import get_default_user
13
11
  from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
12
+ from cognee.infrastructure.files.utils.open_data_file import open_data_file
13
+ from cognee.infrastructure.files.utils.get_data_file_path import get_data_file_path
14
14
  from cognee.modules.data.methods import (
15
15
  get_authorized_existing_datasets,
16
16
  get_dataset_data,
@@ -18,6 +18,7 @@ from cognee.modules.data.methods import (
18
18
  )
19
19
 
20
20
  from .save_data_item_to_storage import save_data_item_to_storage
21
+ from .data_item_to_text_file import data_item_to_text_file
21
22
 
22
23
 
23
24
  async def ingest_data(
@@ -26,6 +27,7 @@ async def ingest_data(
26
27
  user: User,
27
28
  node_set: Optional[List[str]] = None,
28
29
  dataset_id: UUID = None,
30
+ preferred_loaders: List[str] = None,
29
31
  ):
30
32
  if not user:
31
33
  user = await get_default_user()
@@ -42,6 +44,7 @@ async def ingest_data(
42
44
  user: User,
43
45
  node_set: Optional[List[str]] = None,
44
46
  dataset_id: UUID = None,
47
+ preferred_loaders: List[str] = None,
45
48
  ):
46
49
  new_datapoints = []
47
50
  existing_data_points = []
@@ -74,71 +77,96 @@ async def ingest_data(
74
77
  dataset_data_map = {str(data.id): True for data in dataset_data}
75
78
 
76
79
  for data_item in data:
77
- file_path = await save_data_item_to_storage(data_item)
80
+ # Get file path of data item or create a file it doesn't exist
81
+ original_file_path = await save_data_item_to_storage(data_item)
82
+
83
+ # Transform file path to be OS usable
84
+ actual_file_path = get_data_file_path(original_file_path)
78
85
 
79
- # Ingest data and add metadata
80
- async with open_data_file(file_path) as file:
86
+ # Store all input data as text files in Cognee data storage
87
+ cognee_storage_file_path, loader_engine = await data_item_to_text_file(
88
+ actual_file_path, preferred_loaders
89
+ )
90
+
91
+ # Find metadata from original file
92
+ async with open_data_file(original_file_path) as file:
81
93
  classified_data = ingestion.classify(file)
82
94
 
83
- # data_id is the hash of file contents + owner id to avoid duplicate data
95
+ # data_id is the hash of original file contents + owner id to avoid duplicate data
84
96
  data_id = ingestion.identify(classified_data, user)
97
+ original_file_metadata = classified_data.get_metadata()
85
98
 
86
- file_metadata = classified_data.get_metadata()
87
-
88
- from sqlalchemy import select
89
-
90
- db_engine = get_relational_engine()
91
-
92
- # Check to see if data should be updated
93
- async with db_engine.get_async_session() as session:
94
- data_point = (
95
- await session.execute(select(Data).filter(Data.id == data_id))
96
- ).scalar_one_or_none()
97
-
98
- ext_metadata = get_external_metadata_dict(data_item)
99
-
100
- if node_set:
101
- ext_metadata["node_set"] = node_set
102
-
103
- if data_point is not None:
104
- data_point.name = file_metadata["name"]
105
- data_point.raw_data_location = file_metadata["file_path"]
106
- data_point.extension = file_metadata["extension"]
107
- data_point.mime_type = file_metadata["mime_type"]
108
- data_point.owner_id = user.id
109
- data_point.content_hash = file_metadata["content_hash"]
110
- data_point.file_size = file_metadata["file_size"]
111
- data_point.external_metadata = ext_metadata
112
- data_point.node_set = json.dumps(node_set) if node_set else None
113
- data_point.tenant_id = user.tenant_id if user.tenant_id else None
114
-
115
- # Check if data is already in dataset
116
- if str(data_point.id) in dataset_data_map:
117
- existing_data_points.append(data_point)
118
- else:
119
- dataset_new_data_points.append(data_point)
120
- dataset_data_map[str(data_point.id)] = True
99
+ # Find metadata from Cognee data storage text file
100
+ async with open_data_file(cognee_storage_file_path) as file:
101
+ classified_data = ingestion.classify(file)
102
+ storage_file_metadata = classified_data.get_metadata()
103
+
104
+ from sqlalchemy import select
105
+
106
+ db_engine = get_relational_engine()
107
+
108
+ # Check to see if data should be updated
109
+ async with db_engine.get_async_session() as session:
110
+ data_point = (
111
+ await session.execute(select(Data).filter(Data.id == data_id))
112
+ ).scalar_one_or_none()
113
+
114
+ # TODO: Maybe allow getting of external metadata through ingestion loader?
115
+ ext_metadata = get_external_metadata_dict(data_item)
116
+
117
+ if node_set:
118
+ ext_metadata["node_set"] = node_set
119
+
120
+ if data_point is not None:
121
+ data_point.name = original_file_metadata["name"]
122
+ data_point.raw_data_location = cognee_storage_file_path
123
+ data_point.original_data_location = original_file_metadata["file_path"]
124
+ data_point.extension = storage_file_metadata["extension"]
125
+ data_point.mime_type = storage_file_metadata["mime_type"]
126
+ data_point.original_extension = original_file_metadata["extension"]
127
+ data_point.original_mime_type = original_file_metadata["mime_type"]
128
+ data_point.loader_engine = loader_engine.loader_name
129
+ data_point.owner_id = user.id
130
+ data_point.content_hash = original_file_metadata["content_hash"]
131
+ data_point.raw_content_hash = storage_file_metadata["content_hash"]
132
+ data_point.file_size = original_file_metadata["file_size"]
133
+ data_point.external_metadata = ext_metadata
134
+ data_point.node_set = json.dumps(node_set) if node_set else None
135
+ data_point.tenant_id = user.tenant_id if user.tenant_id else None
136
+
137
+ # Check if data is already in dataset
138
+ if str(data_point.id) in dataset_data_map:
139
+ existing_data_points.append(data_point)
121
140
  else:
122
- if str(data_id) in dataset_data_map:
123
- continue
124
-
125
- data_point = Data(
126
- id=data_id,
127
- name=file_metadata["name"],
128
- raw_data_location=file_metadata["file_path"],
129
- extension=file_metadata["extension"],
130
- mime_type=file_metadata["mime_type"],
131
- owner_id=user.id,
132
- content_hash=file_metadata["content_hash"],
133
- external_metadata=ext_metadata,
134
- node_set=json.dumps(node_set) if node_set else None,
135
- data_size=file_metadata["file_size"],
136
- tenant_id=user.tenant_id if user.tenant_id else None,
137
- token_count=-1,
138
- )
139
-
140
- new_datapoints.append(data_point)
141
+ dataset_new_data_points.append(data_point)
141
142
  dataset_data_map[str(data_point.id)] = True
143
+ else:
144
+ if str(data_id) in dataset_data_map:
145
+ continue
146
+
147
+ data_point = Data(
148
+ id=data_id,
149
+ name=original_file_metadata["name"],
150
+ raw_data_location=cognee_storage_file_path,
151
+ original_data_location=original_file_metadata["file_path"],
152
+ extension=storage_file_metadata["extension"],
153
+ mime_type=storage_file_metadata["mime_type"],
154
+ original_extension=original_file_metadata["extension"],
155
+ original_mime_type=original_file_metadata["mime_type"],
156
+ loader_engine=loader_engine.loader_name,
157
+ owner_id=user.id,
158
+ content_hash=original_file_metadata["content_hash"],
159
+ raw_content_hash=storage_file_metadata["content_hash"],
160
+ external_metadata=ext_metadata,
161
+ node_set=json.dumps(node_set) if node_set else None,
162
+ data_size=original_file_metadata["file_size"],
163
+ tenant_id=user.tenant_id if user.tenant_id else None,
164
+ pipeline_status={},
165
+ token_count=-1,
166
+ )
167
+
168
+ new_datapoints.append(data_point)
169
+ dataset_data_map[str(data_point.id)] = True
142
170
 
143
171
  async with db_engine.get_async_session() as session:
144
172
  if dataset not in session:
@@ -160,4 +188,6 @@ async def ingest_data(
160
188
 
161
189
  return existing_data_points + dataset_new_data_points + new_datapoints
162
190
 
163
- return await store_data_to_dataset(data, dataset_name, user, node_set, dataset_id)
191
+ return await store_data_to_dataset(
192
+ data, dataset_name, user, node_set, dataset_id, preferred_loaders
193
+ )
@@ -40,6 +40,9 @@ async def resolve_data_directories(
40
40
  if include_subdirectories:
41
41
  base_path = item if item.endswith("/") else item + "/"
42
42
  s3_keys = fs.glob(base_path + "**")
43
+ # If path is not directory attempt to add item directly
44
+ if not s3_keys:
45
+ s3_keys = fs.ls(item)
43
46
  else:
44
47
  s3_keys = fs.ls(item)
45
48
  # Filter out keys that represent directories using fs.isdir
@@ -103,6 +103,9 @@ async def get_repo_file_dependencies(
103
103
  extraction of dependencies (default is False). (default False)
104
104
  """
105
105
 
106
+ if isinstance(repo_path, list) and len(repo_path) == 1:
107
+ repo_path = repo_path[0]
108
+
106
109
  if not os.path.exists(repo_path):
107
110
  raise FileNotFoundError(f"Repository path {repo_path} does not exist.")
108
111
 
@@ -38,7 +38,7 @@ async def index_data_points(data_points: list[DataPoint]):
38
38
  index_name = index_name_and_field[:first_occurence]
39
39
  field_name = index_name_and_field[first_occurence + 1 :]
40
40
  try:
41
- # In case the ammount if indexable points is too large we need to send them in batches
41
+ # In case the amount of indexable points is too large we need to send them in batches
42
42
  batch_size = 100
43
43
  for i in range(0, len(indexable_points), batch_size):
44
44
  batch = indexable_points[i : i + batch_size]
@@ -1,3 +1,4 @@
1
+ from cognee.modules.engine.utils.generate_edge_id import generate_edge_id
1
2
  from cognee.shared.logging_utils import get_logger, ERROR
2
3
  from collections import Counter
3
4
 
@@ -49,7 +50,9 @@ async def index_graph_edges(batch_size: int = 1024):
49
50
  )
50
51
 
51
52
  for text, count in edge_types.items():
52
- edge = EdgeType(relationship_name=text, number_of_edges=count)
53
+ edge = EdgeType(
54
+ id=generate_edge_id(edge_id=text), relationship_name=text, number_of_edges=count
55
+ )
53
56
  data_point_type = type(edge)
54
57
 
55
58
  for field_name in edge.metadata["index_fields"]:
@@ -3,8 +3,7 @@ from typing import AsyncGenerator, Union
3
3
  from uuid import uuid5
4
4
 
5
5
  from cognee.infrastructure.engine import DataPoint
6
- from cognee.modules.data.extraction.extract_summary import extract_code_summary
7
-
6
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
8
7
  from .models import CodeSummary
9
8
 
10
9
 
@@ -17,7 +16,7 @@ async def summarize_code(
17
16
  code_data_points = [file for file in code_graph_nodes if hasattr(file, "source_code")]
18
17
 
19
18
  file_summaries = await asyncio.gather(
20
- *[extract_code_summary(file.source_code) for file in code_data_points]
19
+ *[LLMGateway.extract_code_summary(file.source_code) for file in code_data_points]
21
20
  )
22
21
 
23
22
  file_summaries_map = {
@@ -2,8 +2,9 @@ import asyncio
2
2
  from typing import Type
3
3
  from uuid import uuid5
4
4
  from pydantic import BaseModel
5
- from cognee.modules.data.extraction.extract_summary import extract_summary
5
+
6
6
  from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
7
+ from cognee.infrastructure.llm.LLMGateway import LLMGateway
7
8
  from cognee.modules.cognify.config import get_cognify_config
8
9
  from .models import TextSummary
9
10
 
@@ -42,7 +43,7 @@ async def summarize_text(
42
43
  summarization_model = cognee_config.summarization_model
43
44
 
44
45
  chunk_summaries = await asyncio.gather(
45
- *[extract_summary(chunk.text, summarization_model) for chunk in data_chunks]
46
+ *[LLMGateway.extract_summary(chunk.text, summarization_model) for chunk in data_chunks]
46
47
  )
47
48
 
48
49
  summaries = [