langflow-base-nightly 0.5.0.dev35__py3-none-any.whl → 0.5.0.dev37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. langflow/api/v1/knowledge_bases.py +16 -9
  2. langflow/api/v2/files.py +3 -1
  3. langflow/base/data/kb_utils.py +33 -0
  4. langflow/base/models/model.py +3 -3
  5. langflow/components/agents/mcp_component.py +40 -55
  6. langflow/components/data/kb_ingest.py +116 -43
  7. langflow/components/data/kb_retrieval.py +24 -26
  8. langflow/components/docling/__init__.py +198 -0
  9. langflow/components/docling/docling_inline.py +102 -60
  10. langflow/components/processing/save_file.py +6 -32
  11. langflow/components/vectorstores/astradb.py +30 -19
  12. langflow/frontend/assets/{SlackIcon-B260Qg_R.js → SlackIcon-CnvyOamQ.js} +1 -1
  13. langflow/frontend/assets/{Wikipedia-BB2mbgyd.js → Wikipedia-nyTEXdr2.js} +1 -1
  14. langflow/frontend/assets/{Wolfram-DytXC9hF.js → Wolfram-BYMQkNSq.js} +1 -1
  15. langflow/frontend/assets/{index-BdIWbCEL.js → index-8WdfSTTz.js} +1 -1
  16. langflow/frontend/assets/{index-D87Zw62M.js → index-8yMsjVV2.js} +1 -1
  17. langflow/frontend/assets/{index-DyJDHm2D.js → index-B1YN7oMV.js} +1 -1
  18. langflow/frontend/assets/{index-BEDxAk3N.js → index-B3Sur4Z3.js} +1 -1
  19. langflow/frontend/assets/{index-DhzEUXfr.js → index-B748uLP1.js} +1 -1
  20. langflow/frontend/assets/{index-4eRtaV45.js → index-BB15_iOb.js} +1 -1
  21. langflow/frontend/assets/{index-C_1RBTul.js → index-BBxAPk1y.js} +1 -1
  22. langflow/frontend/assets/{index-DHlEwAxb.js → index-BCCGvqay.js} +1 -1
  23. langflow/frontend/assets/{index-ci4XHjbJ.js → index-BChjg6Az.js} +3 -3
  24. langflow/frontend/assets/{index-B9Mo3ndZ.js → index-BEMw2Np8.js} +1 -1
  25. langflow/frontend/assets/{index-BKvKC-12.js → index-BFp_O-c9.js} +1 -1
  26. langflow/frontend/assets/{index-Ym6gz0T6.js → index-BIQQCMvz.js} +1 -1
  27. langflow/frontend/assets/{index-CwIxqYlT.js → index-BIXaW2aY.js} +1 -1
  28. langflow/frontend/assets/{index-BxkZkBgQ.js → index-BIzTEqFh.js} +1 -1
  29. langflow/frontend/assets/{index-C76aBV_h.js → index-BLGYN-9b.js} +1 -1
  30. langflow/frontend/assets/{index-B-c82Fnu.js → index-BOB_zsjl.js} +1 -1
  31. langflow/frontend/assets/{index-BbsND1Qg.js → index-BOeo01QB.js} +1 -1
  32. langflow/frontend/assets/{index-DztLFiip.js → index-BQ6NUdMY.js} +1 -1
  33. langflow/frontend/assets/{index-G_U_kPAd.js → index-BR0bkVqX.js} +1 -1
  34. langflow/frontend/assets/{index-R7q8cAek.js → index-BRYjyhAd.js} +1 -1
  35. langflow/frontend/assets/{index-Ccb5B8zG.js → index-BRxvproo.js} +1 -1
  36. langflow/frontend/assets/{index-B8y58M9b.js → index-BTEW9e8P.js} +1 -1
  37. langflow/frontend/assets/{index-DdzVmJHE.js → index-BTrsh9LS.js} +1 -1
  38. langflow/frontend/assets/{index-CkSzjCqM.js → index-BVEZDXxS.js} +1 -1
  39. langflow/frontend/assets/{index-BLROcaSz.js → index-BWmPX4iQ.js} +1 -1
  40. langflow/frontend/assets/{index-Ct9_T9ox.js → index-BX5D-USa.js} +1 -1
  41. langflow/frontend/assets/{index-DtJyCbzF.js → index-BZgXW854.js} +1 -1
  42. langflow/frontend/assets/{index-CkQ-bJ4G.js → index-BbJjt5m4.js} +1 -1
  43. langflow/frontend/assets/{index-D5PeCofu.js → index-BbRm7beF.js} +1 -1
  44. langflow/frontend/assets/{index-Uq2ij_SS.js → index-Bd6WtbKA.js} +1 -1
  45. langflow/frontend/assets/{index-sS6XLk3j.js → index-BhIOhlCH.js} +1 -1
  46. langflow/frontend/assets/{index-dkS0ek2S.js → index-BkPYpfgw.js} +1 -1
  47. langflow/frontend/assets/{index-BOYTBrh9.js → index-BmX5CoED.js} +1 -1
  48. langflow/frontend/assets/{index-CMGZGIx_.js → index-Bnqod3vk.js} +1 -1
  49. langflow/frontend/assets/{index-Bisa4IQF.js → index-Boso-xEw.js} +1 -1
  50. langflow/frontend/assets/{index-DqSH4x-R.js → index-BqPpO6KG.js} +1 -1
  51. langflow/frontend/assets/{index-B5ed-sAv.js → index-Bsa0xZyL.js} +1 -1
  52. langflow/frontend/assets/{index-CoUlHbtg.js → index-Bv8h2Z-q.js} +1 -1
  53. langflow/frontend/assets/{index-tOy_uloT.js → index-BvT7L317.js} +1 -1
  54. langflow/frontend/assets/{index-D-zkHcob.js → index-BvwZfF2i.js} +1 -1
  55. langflow/frontend/assets/{index-DxIs8VSp.js → index-Bvxg4_ux.js} +1 -1
  56. langflow/frontend/assets/{index-CqDUqHfd.js → index-BxEuHa76.js} +1 -1
  57. langflow/frontend/assets/{index-DX7XsAcx.js → index-BzEUlaw_.js} +1 -1
  58. langflow/frontend/assets/{index-BCK-ZyIh.js → index-BzL_EoKd.js} +1 -1
  59. langflow/frontend/assets/{index-BNbWMmAV.js → index-C-2hghRJ.js} +1 -1
  60. langflow/frontend/assets/{index-CWWo2zOA.js → index-C26RqKWL.js} +1 -1
  61. langflow/frontend/assets/{index-BcgB3rXH.js → index-C6jri9Wm.js} +1 -1
  62. langflow/frontend/assets/{index-mBjJYD9q.js → index-C7QWbnLK.js} +1 -1
  63. langflow/frontend/assets/{index-D0HmkH0H.js → index-C82JjCPD.js} +1 -1
  64. langflow/frontend/assets/{index-Cpgkb0Q3.js → index-CCePCqkT.js} +1 -1
  65. langflow/frontend/assets/{index-IFGgPiye.js → index-CCxGSSTT.js} +1 -1
  66. langflow/frontend/assets/{index-BOEf7-ty.js → index-CFDvOtKC.js} +1 -1
  67. langflow/frontend/assets/{index-Ba3RTMXI.js → index-CJo_cyWW.js} +1 -1
  68. langflow/frontend/assets/{index-Cx__T92e.js → index-CLPdN-q6.js} +1 -1
  69. langflow/frontend/assets/{index-CF4dtI6S.js → index-CQMoqLAu.js} +1 -1
  70. langflow/frontend/assets/{index-3qMh9x6K.js → index-CTrt1Q_j.js} +1 -1
  71. langflow/frontend/assets/{index-rcdQpNcU.js → index-CVQmT7ZL.js} +1 -1
  72. langflow/frontend/assets/{index-Dpz3oBf5.js → index-CWdkbVsd.js} +1 -1
  73. langflow/frontend/assets/{index-D0s9f6Re.js → index-CYDAYm-i.js} +1 -1
  74. langflow/frontend/assets/{index-BjENqyKe.js → index-CYe8Ipef.js} +1 -1
  75. langflow/frontend/assets/{index-ByFXr9Iq.js → index-CZQ9rXNa.js} +1 -1
  76. langflow/frontend/assets/{index-VcXZzovW.js → index-C_TdzfAn.js} +1 -1
  77. langflow/frontend/assets/{index-LrMzDsq9.js → index-C_veJlEb.js} +1 -1
  78. langflow/frontend/assets/{index-BdYgKk1d.js → index-CaQ_H9ww.js} +1 -1
  79. langflow/frontend/assets/{index-CHFO5O4g.js → index-Car-zdor.js} +1 -1
  80. langflow/frontend/assets/{index-DZzbmg3J.js → index-ChXJpBz4.js} +1 -1
  81. langflow/frontend/assets/{index-C7x9R_Yo.js → index-CmplyEaa.js} +1 -1
  82. langflow/frontend/assets/{index-Cd5zuUUK.js → index-CpcbQZIF.js} +1 -1
  83. langflow/frontend/assets/{index-D9eflZfP.js → index-CpvYQ0ug.js} +1 -1
  84. langflow/frontend/assets/{index-DS1EgA10.js → index-CvcEzq4x.js} +1 -1
  85. langflow/frontend/assets/{index-hOkEW3JP.js → index-CxvP91st.js} +1 -1
  86. langflow/frontend/assets/{index-DasrI03Y.js → index-CyPvTB63.js} +1 -1
  87. langflow/frontend/assets/{index-BJrY2Fiu.js → index-D-9TI74R.js} +1 -1
  88. langflow/frontend/assets/{index-BlBl2tvQ.js → index-D3DDfngy.js} +1 -1
  89. langflow/frontend/assets/{index-DzeIsaBm.js → index-D5_DsUJc.js} +1 -1
  90. langflow/frontend/assets/{index-AY5Dm2mG.js → index-D6PSjHxP.js} +1 -1
  91. langflow/frontend/assets/{index-C9N80hP8.js → index-D8GJngXa.js} +1 -1
  92. langflow/frontend/assets/{index-BxWXWRmZ.js → index-D8lOi1GI.js} +1 -1
  93. langflow/frontend/assets/{index-DWkMJnbd.js → index-DCRk27Tp.js} +1 -1
  94. langflow/frontend/assets/{index-BnLT29qW.js → index-DF5VwgU6.js} +1 -1
  95. langflow/frontend/assets/{index-7xXgqu09.js → index-DGRMNe9n.js} +1 -1
  96. langflow/frontend/assets/{index-3TJWUdmx.js → index-DHq8TQPB.js} +1 -1
  97. langflow/frontend/assets/{index-BVtf6m9S.js → index-DIDDfmlJ.js} +1 -1
  98. langflow/frontend/assets/{index-B2ggrBuR.js → index-DIkNW9Cd.js} +1 -1
  99. langflow/frontend/assets/{index-r1LZg-PY.js → index-DJB12jIC.js} +1 -1
  100. langflow/frontend/assets/{index-DS9I4y48.js → index-DK1Ptcc4.js} +1 -1
  101. langflow/frontend/assets/{index-CG7cp0nD.js → index-DKHNourL.js} +1 -1
  102. langflow/frontend/assets/{index-BeNby7qF.js → index-DPCzHdsC.js} +1 -1
  103. langflow/frontend/assets/{index-COL0eiWI.js → index-DVlceYFD.js} +1 -1
  104. langflow/frontend/assets/{index-DK8vNpXK.js → index-DZTC5pdT.js} +1 -1
  105. langflow/frontend/assets/{index-Baka5dKE.js → index-Db71w3lq.js} +1 -1
  106. langflow/frontend/assets/{index-Du9aJK7m.js → index-DbMFlnHE.js} +1 -1
  107. langflow/frontend/assets/{index-CvQ0w8Pj.js → index-DfngcQxO.js} +1 -1
  108. langflow/frontend/assets/{index-DIqSyDVO.js → index-DfxYyS3M.js} +1 -1
  109. langflow/frontend/assets/{index-3uOAA_XX.js → index-Dg-63Si_.js} +1 -1
  110. langflow/frontend/assets/{index-BsBWP-Dh.js → index-DjQETUy8.js} +1 -1
  111. langflow/frontend/assets/{index-CDFLVFB4.js → index-DkXy1WFo.js} +1 -1
  112. langflow/frontend/assets/{index-B8TlNgn-.js → index-DkelbYy7.js} +1 -1
  113. langflow/frontend/assets/{index-GODbXlHC.js → index-DmMDPoi0.js} +1 -1
  114. langflow/frontend/assets/{index-DpQKtcXu.js → index-DnEGCgih.js} +1 -1
  115. langflow/frontend/assets/{index-VHmUHUUU.js → index-DpClkXIV.js} +1 -1
  116. langflow/frontend/assets/{index-BRWNIt9F.js → index-Dq5ilsem.js} +1 -1
  117. langflow/frontend/assets/{index-DDNNv4C0.js → index-Dqd4RjYA.js} +1 -1
  118. langflow/frontend/assets/{index-C2Xd7UkR.js → index-Dsps-jKu.js} +1 -1
  119. langflow/frontend/assets/{index-BVHvIhT5.js → index-Du_18NCU.js} +1 -1
  120. langflow/frontend/assets/{index-C7V5U9yH.js → index-DysKpOuj.js} +1 -1
  121. langflow/frontend/assets/{index-Bxml6wXu.js → index-DytJENYD.js} +1 -1
  122. langflow/frontend/assets/{index-BWq9GTzt.js → index-DzW2mfkK.js} +1 -1
  123. langflow/frontend/assets/{index-js8ceOaP.js → index-FUxmznS-.js} +1 -1
  124. langflow/frontend/assets/{index-DuAeoC-H.js → index-Gkrq-vzm.js} +1 -1
  125. langflow/frontend/assets/{index-DPX6X_bw.js → index-HK3bVMYA.js} +1 -1
  126. langflow/frontend/assets/{index-BEKoRwsX.js → index-LbYjHKkn.js} +1 -1
  127. langflow/frontend/assets/{index-C8KD3LPb.js → index-OazXJdEl.js} +1 -1
  128. langflow/frontend/assets/{index-DpJiH-Rk.js → index-Q9vDw0Xl.js} +1 -1
  129. langflow/frontend/assets/{index-DWr_zPkx.js → index-Ui4xUImO.js} +1 -1
  130. langflow/frontend/assets/{index-BejHxU5W.js → index-WPFivmdQ.js} +1 -1
  131. langflow/frontend/assets/{index-lKEJpUsF.js → index-_UcqeEjm.js} +1 -1
  132. langflow/frontend/assets/{index-VZnN0P6C.js → index-ajRge-Mg.js} +1 -1
  133. langflow/frontend/assets/{index-BQB-iDYl.js → index-cvZdgWHQ.js} +1 -1
  134. langflow/frontend/assets/{index-AlJ7td-D.js → index-dcnYpT9N.js} +1 -1
  135. langflow/frontend/assets/{index-DKEXZFUO.js → index-l7bzB8Ex.js} +1 -1
  136. langflow/frontend/assets/index-nVwHLjuV.js +1 -0
  137. langflow/frontend/assets/{index-BtJ2o21k.js → index-pCQ_yw8m.js} +1 -1
  138. langflow/frontend/assets/{index-B536IPXH.js → index-rXV1G1aB.js} +1 -1
  139. langflow/frontend/assets/{index-BIkqesA-.js → index-tVYiABdp.js} +1 -1
  140. langflow/frontend/assets/{index-CJwYfDBz.js → index-xuIrH2Dq.js} +1 -1
  141. langflow/frontend/assets/{index-BXMhmvTj.js → index-yCHsaqs8.js} +1 -1
  142. langflow/frontend/assets/{index-BqUeOc7Y.js → index-ya2uXE8v.js} +1 -1
  143. langflow/frontend/assets/lazyIconImports-t6wEndt1.js +2 -0
  144. langflow/frontend/assets/{use-post-add-user-HN0rRnhv.js → use-post-add-user-BrBYH9eR.js} +1 -1
  145. langflow/frontend/index.html +1 -1
  146. langflow/initial_setup/starter_projects/Hybrid Search RAG.json +2 -2
  147. langflow/initial_setup/starter_projects/Knowledge Ingestion.json +2 -2
  148. langflow/initial_setup/starter_projects/Knowledge Retrieval.json +2 -2
  149. langflow/initial_setup/starter_projects/News Aggregator.json +2 -19
  150. langflow/initial_setup/starter_projects/Nvidia Remix.json +2 -19
  151. langflow/initial_setup/starter_projects/Vector Store RAG.json +4 -4
  152. langflow/processing/process.py +1 -1
  153. {langflow_base_nightly-0.5.0.dev35.dist-info → langflow_base_nightly-0.5.0.dev37.dist-info}/METADATA +1 -1
  154. {langflow_base_nightly-0.5.0.dev35.dist-info → langflow_base_nightly-0.5.0.dev37.dist-info}/RECORD +156 -155
  155. langflow/frontend/assets/lazyIconImports-Bh1TFfvH.js +0 -2
  156. {langflow_base_nightly-0.5.0.dev35.dist-info → langflow_base_nightly-0.5.0.dev37.dist-info}/WHEEL +0 -0
  157. {langflow_base_nightly-0.5.0.dev35.dist-info → langflow_base_nightly-0.5.0.dev37.dist-info}/entry_points.txt +0 -0
@@ -5,13 +5,16 @@ from typing import Any
5
5
  from cryptography.fernet import InvalidToken
6
6
  from langchain_chroma import Chroma
7
7
  from loguru import logger
8
+ from pydantic import SecretStr
8
9
 
10
+ from langflow.base.data.kb_utils import get_knowledge_bases
9
11
  from langflow.custom import Component
10
12
  from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput
11
13
  from langflow.schema.data import Data
12
14
  from langflow.schema.dataframe import DataFrame
13
15
  from langflow.services.auth.utils import decrypt_api_key
14
- from langflow.services.deps import get_settings_service
16
+ from langflow.services.database.models.user.crud import get_user_by_id
17
+ from langflow.services.deps import get_settings_service, session_scope
15
18
 
16
19
  settings = get_settings_service().settings
17
20
  knowledge_directory = settings.knowledge_bases_dir
@@ -33,11 +36,7 @@ class KBRetrievalComponent(Component):
33
36
  display_name="Knowledge",
34
37
  info="Select the knowledge to load data from.",
35
38
  required=True,
36
- options=[
37
- str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(".") and d.is_dir()
38
- ]
39
- if KNOWLEDGE_BASES_ROOT_PATH.exists()
40
- else [],
39
+ options=[],
41
40
  refresh_button=True,
42
41
  real_time_refresh=True,
43
42
  ),
@@ -79,21 +78,13 @@ class KBRetrievalComponent(Component):
79
78
  ),
80
79
  ]
81
80
 
82
- def _get_knowledge_bases(self) -> list[str]:
83
- """Retrieve a list of available knowledge bases.
84
-
85
- Returns:
86
- A list of knowledge base names.
87
- """
88
- if not KNOWLEDGE_BASES_ROOT_PATH.exists():
89
- return []
90
-
91
- return [str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(".") and d.is_dir()]
92
-
93
- def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002
81
+ async def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002
94
82
  if field_name == "knowledge_base":
95
83
  # Update the knowledge base options dynamically
96
- build_config["knowledge_base"]["options"] = self._get_knowledge_bases()
84
+ build_config["knowledge_base"]["options"] = await get_knowledge_bases(
85
+ KNOWLEDGE_BASES_ROOT_PATH,
86
+ user_id=self.user_id, # Use the user_id from the component context
87
+ )
97
88
 
98
89
  # If the selected knowledge base is not available, reset it
99
90
  if build_config["knowledge_base"]["value"] not in build_config["knowledge_base"]["options"]:
@@ -129,15 +120,12 @@ class KBRetrievalComponent(Component):
129
120
 
130
121
  def _build_embeddings(self, metadata: dict):
131
122
  """Build embedding model from metadata."""
123
+ runtime_api_key = self.api_key.get_secret_value() if isinstance(self.api_key, SecretStr) else self.api_key
132
124
  provider = metadata.get("embedding_provider")
133
125
  model = metadata.get("embedding_model")
134
- api_key = metadata.get("api_key")
126
+ api_key = runtime_api_key or metadata.get("api_key")
135
127
  chunk_size = metadata.get("chunk_size")
136
128
 
137
- # If user provided a key in the input, it overrides the stored one.
138
- if self.api_key and self.api_key.get_secret_value():
139
- api_key = self.api_key.get_secret_value()
140
-
141
129
  # Handle various providers
142
130
  if provider == "OpenAI":
143
131
  from langchain_openai import OpenAIEmbeddings
@@ -174,13 +162,23 @@ class KBRetrievalComponent(Component):
174
162
  msg = f"Embedding provider '{provider}' is not supported for retrieval."
175
163
  raise NotImplementedError(msg)
176
164
 
177
- def get_chroma_kb_data(self) -> DataFrame:
165
+ async def get_chroma_kb_data(self) -> DataFrame:
178
166
  """Retrieve data from the selected knowledge base by reading the Chroma collection.
179
167
 
180
168
  Returns:
181
169
  A DataFrame containing the data rows from the knowledge base.
182
170
  """
183
- kb_path = KNOWLEDGE_BASES_ROOT_PATH / self.knowledge_base
171
+ # Get the current user
172
+ async with session_scope() as db:
173
+ if not self.user_id:
174
+ msg = "User ID is required for fetching Knowledge Base data."
175
+ raise ValueError(msg)
176
+ current_user = await get_user_by_id(db, self.user_id)
177
+ if not current_user:
178
+ msg = f"User with ID {self.user_id} not found."
179
+ raise ValueError(msg)
180
+ kb_user = current_user.username
181
+ kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user / self.knowledge_base
184
182
 
185
183
  metadata = self._get_kb_metadata(kb_path)
186
184
  if not metadata:
@@ -1,7 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import signal
4
+ import sys
5
+ import traceback
6
+ from contextlib import suppress
3
7
  from typing import TYPE_CHECKING, Any
4
8
 
9
+ from loguru import logger
10
+
5
11
  from langflow.components._importing import import_mod
6
12
 
7
13
  if TYPE_CHECKING:
@@ -41,3 +47,195 @@ def __getattr__(attr_name: str) -> Any:
41
47
 
42
48
  def __dir__() -> list[str]:
43
49
  return list(__all__)
50
+
51
+
52
+ def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str):
53
+ """Worker function for processing files with Docling in a separate process."""
54
+ # Signal handling for graceful shutdown
55
+ shutdown_requested = False
56
+
57
+ def signal_handler(signum: int, frame) -> None: # noqa: ARG001
58
+ """Handle shutdown signals gracefully."""
59
+ nonlocal shutdown_requested
60
+ signal_names: dict[int, str] = {signal.SIGTERM: "SIGTERM", signal.SIGINT: "SIGINT"}
61
+ signal_name = signal_names.get(signum, f"signal {signum}")
62
+
63
+ logger.debug(f"Docling worker received {signal_name}, initiating graceful shutdown...")
64
+ shutdown_requested = True
65
+
66
+ # Send shutdown notification to parent process
67
+ with suppress(Exception):
68
+ queue.put({"error": f"Worker interrupted by {signal_name}", "shutdown": True})
69
+
70
+ # Exit gracefully
71
+ sys.exit(0)
72
+
73
+ def check_shutdown() -> None:
74
+ """Check if shutdown was requested and exit if so."""
75
+ if shutdown_requested:
76
+ logger.info("Shutdown requested, exiting worker...")
77
+
78
+ with suppress(Exception):
79
+ queue.put({"error": "Worker shutdown requested", "shutdown": True})
80
+
81
+ sys.exit(0)
82
+
83
+ # Register signal handlers early
84
+ try:
85
+ signal.signal(signal.SIGTERM, signal_handler)
86
+ signal.signal(signal.SIGINT, signal_handler)
87
+ logger.debug("Signal handlers registered for graceful shutdown")
88
+ except (OSError, ValueError) as e:
89
+ # Some signals might not be available on all platforms
90
+ logger.warning(f"Warning: Could not register signal handlers: {e}")
91
+
92
+ # Check for shutdown before heavy imports
93
+ check_shutdown()
94
+
95
+ try:
96
+ from docling.datamodel.base_models import ConversionStatus, InputFormat
97
+ from docling.datamodel.pipeline_options import (
98
+ OcrOptions,
99
+ PdfPipelineOptions,
100
+ VlmPipelineOptions,
101
+ )
102
+ from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
103
+ from docling.models.factories import get_ocr_factory
104
+ from docling.pipeline.vlm_pipeline import VlmPipeline
105
+
106
+ # Check for shutdown after imports
107
+ check_shutdown()
108
+ logger.debug("Docling dependencies loaded successfully")
109
+
110
+ except ModuleNotFoundError:
111
+ msg = (
112
+ "Docling is an optional dependency of Langflow. "
113
+ "Install with `uv pip install 'langflow[docling]'` "
114
+ "or refer to the documentation"
115
+ )
116
+ queue.put({"error": msg})
117
+ return
118
+ except ImportError as e:
119
+ # A different import failed (e.g., a transitive dependency); preserve details.
120
+ queue.put({"error": f"Failed to import a Docling dependency: {e}"})
121
+ return
122
+ except KeyboardInterrupt:
123
+ logger.warning("KeyboardInterrupt during imports, exiting...")
124
+ queue.put({"error": "Worker interrupted during imports", "shutdown": True})
125
+ return
126
+
127
+ # Configure the standard PDF pipeline
128
+ def _get_standard_opts() -> PdfPipelineOptions:
129
+ check_shutdown() # Check before heavy operations
130
+
131
+ pipeline_options = PdfPipelineOptions()
132
+ pipeline_options.do_ocr = ocr_engine != ""
133
+ if pipeline_options.do_ocr:
134
+ ocr_factory = get_ocr_factory(
135
+ allow_external_plugins=False,
136
+ )
137
+
138
+ ocr_options: OcrOptions = ocr_factory.create_options(
139
+ kind=ocr_engine,
140
+ )
141
+ pipeline_options.ocr_options = ocr_options
142
+ return pipeline_options
143
+
144
+ # Configure the VLM pipeline
145
+ def _get_vlm_opts() -> VlmPipelineOptions:
146
+ check_shutdown() # Check before heavy operations
147
+ return VlmPipelineOptions()
148
+
149
+ # Configure the main format options and create the DocumentConverter()
150
+ def _get_converter() -> DocumentConverter:
151
+ check_shutdown() # Check before heavy operations
152
+
153
+ if pipeline == "standard":
154
+ pdf_format_option = PdfFormatOption(
155
+ pipeline_options=_get_standard_opts(),
156
+ )
157
+ elif pipeline == "vlm":
158
+ pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
159
+ else:
160
+ msg = f"Unknown pipeline: {pipeline!r}"
161
+ raise ValueError(msg)
162
+
163
+ format_options: dict[InputFormat, FormatOption] = {
164
+ InputFormat.PDF: pdf_format_option,
165
+ InputFormat.IMAGE: pdf_format_option,
166
+ }
167
+
168
+ return DocumentConverter(format_options=format_options)
169
+
170
+ try:
171
+ # Check for shutdown before creating converter (can be slow)
172
+ check_shutdown()
173
+ logger.info(f"Initializing {pipeline} pipeline with OCR: {ocr_engine or 'disabled'}")
174
+
175
+ converter = _get_converter()
176
+
177
+ # Check for shutdown before processing files
178
+ check_shutdown()
179
+ logger.info(f"Starting to process {len(file_paths)} files...")
180
+
181
+ # Process files with periodic shutdown checks
182
+ results = []
183
+ for i, file_path in enumerate(file_paths):
184
+ # Check for shutdown before processing each file
185
+ check_shutdown()
186
+
187
+ logger.debug(f"Processing file {i + 1}/{len(file_paths)}: {file_path}")
188
+
189
+ try:
190
+ # Process single file (we can't easily interrupt convert_all)
191
+ single_result = converter.convert_all([file_path])
192
+ results.extend(single_result)
193
+
194
+ # Check for shutdown after each file
195
+ check_shutdown()
196
+
197
+ except (OSError, ValueError, RuntimeError, ImportError) as file_error:
198
+ # Handle specific file processing errors
199
+ logger.error(f"Error processing file {file_path}: {file_error}")
200
+ # Continue with other files, but check for shutdown
201
+ check_shutdown()
202
+ except Exception as file_error: # noqa: BLE001
203
+ # Catch any other unexpected errors to prevent worker crash
204
+ logger.error(f"Unexpected error processing file {file_path}: {file_error}")
205
+ # Continue with other files, but check for shutdown
206
+ check_shutdown()
207
+
208
+ # Final shutdown check before sending results
209
+ check_shutdown()
210
+
211
+ # Process the results while maintaining the original structure
212
+ processed_data = [
213
+ {"document": res.document, "file_path": str(res.input.file), "status": res.status.name}
214
+ if res.status == ConversionStatus.SUCCESS
215
+ else None
216
+ for res in results
217
+ ]
218
+
219
+ logger.info(f"Successfully processed {len([d for d in processed_data if d])} files")
220
+ queue.put(processed_data)
221
+
222
+ except KeyboardInterrupt:
223
+ logger.warning("KeyboardInterrupt during processing, exiting gracefully...")
224
+ queue.put({"error": "Worker interrupted during processing", "shutdown": True})
225
+ return
226
+ except Exception as e: # noqa: BLE001
227
+ if shutdown_requested:
228
+ logger.exception("Exception occurred during shutdown, exiting...")
229
+ return
230
+
231
+ # Send any processing error to the main process with traceback
232
+ error_info = {"error": str(e), "traceback": traceback.format_exc()}
233
+ logger.error(f"Error in worker: {error_info}")
234
+ queue.put(error_info)
235
+ finally:
236
+ logger.info("Docling worker finishing...")
237
+ # Ensure we don't leave any hanging processes
238
+ if shutdown_requested:
239
+ logger.debug("Worker shutdown completed")
240
+ else:
241
+ logger.debug("Worker completed normally")
@@ -1,4 +1,9 @@
1
+ import time
2
+ from multiprocessing import Queue, get_context
3
+ from queue import Empty
4
+
1
5
  from langflow.base.data import BaseFileComponent
6
+ from langflow.components.docling import docling_worker
2
7
  from langflow.inputs import DropdownInput
3
8
  from langflow.schema import Data
4
9
 
@@ -69,73 +74,110 @@ class DoclingInlineComponent(BaseFileComponent):
69
74
  *BaseFileComponent._base_outputs,
70
75
  ]
71
76
 
72
- def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
73
- try:
74
- from docling.datamodel.base_models import ConversionStatus, InputFormat
75
- from docling.datamodel.pipeline_options import (
76
- OcrOptions,
77
- PdfPipelineOptions,
78
- VlmPipelineOptions,
79
- )
80
- from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
81
- from docling.models.factories import get_ocr_factory
82
- from docling.pipeline.vlm_pipeline import VlmPipeline
83
- except ImportError as e:
84
- msg = (
85
- "Docling is not installed. Please install it with `uv pip install docling` or"
86
- " `uv pip install langflow[docling]`."
87
- )
88
- raise ImportError(msg) from e
89
-
90
- # Configure the standard PDF pipeline
91
- def _get_standard_opts() -> PdfPipelineOptions:
92
- pipeline_options = PdfPipelineOptions()
93
- pipeline_options.do_ocr = self.ocr_engine != ""
94
- if pipeline_options.do_ocr:
95
- ocr_factory = get_ocr_factory(
96
- allow_external_plugins=False,
97
- )
98
-
99
- ocr_options: OcrOptions = ocr_factory.create_options(
100
- kind=self.ocr_engine,
101
- )
102
- pipeline_options.ocr_options = ocr_options
103
- return pipeline_options
104
-
105
- # Configure the VLM pipeline
106
- def _get_vlm_opts() -> VlmPipelineOptions:
107
- return VlmPipelineOptions()
108
-
109
- # Configure the main format options and create the DocumentConverter()
110
- def _get_converter() -> DocumentConverter:
111
- if self.pipeline == "standard":
112
- pdf_format_option = PdfFormatOption(
113
- pipeline_options=_get_standard_opts(),
114
- )
115
- elif self.pipeline == "vlm":
116
- pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
117
-
118
- format_options: dict[InputFormat, FormatOption] = {
119
- InputFormat.PDF: pdf_format_option,
120
- InputFormat.IMAGE: pdf_format_option,
121
- }
122
-
123
- return DocumentConverter(format_options=format_options)
77
+ def _wait_for_result_with_process_monitoring(self, queue: Queue, proc, timeout: int = 300):
78
+ """Wait for result from queue while monitoring process health.
79
+
80
+ Handles cases where process crashes without sending result.
81
+ """
82
+ start_time = time.time()
83
+
84
+ while time.time() - start_time < timeout:
85
+ # Check if process is still alive
86
+ if not proc.is_alive():
87
+ # Process died, try to get any result it might have sent
88
+ try:
89
+ result = queue.get_nowait()
90
+ except Empty:
91
+ # Process died without sending result
92
+ msg = f"Worker process crashed unexpectedly without producing result. Exit code: {proc.exitcode}"
93
+ raise RuntimeError(msg) from None
94
+ else:
95
+ self.log("Process completed and result retrieved")
96
+ return result
97
+
98
+ # Poll the queue instead of blocking
99
+ try:
100
+ result = queue.get(timeout=1)
101
+ except Empty:
102
+ # No result yet, continue monitoring
103
+ continue
104
+ else:
105
+ self.log("Result received from worker process")
106
+ return result
107
+
108
+ # Overall timeout reached
109
+ msg = f"Process timed out after {timeout} seconds"
110
+ raise TimeoutError(msg)
111
+
112
+ def _terminate_process_gracefully(self, proc, timeout_terminate: int = 10, timeout_kill: int = 5):
113
+ """Terminate process gracefully with escalating signals.
114
+
115
+ First tries SIGTERM, then SIGKILL if needed.
116
+ """
117
+ if not proc.is_alive():
118
+ return
119
+
120
+ self.log("Attempting graceful process termination with SIGTERM")
121
+ proc.terminate() # Send SIGTERM
122
+ proc.join(timeout=timeout_terminate)
123
+
124
+ if proc.is_alive():
125
+ self.log("Process didn't respond to SIGTERM, using SIGKILL")
126
+ proc.kill() # Send SIGKILL
127
+ proc.join(timeout=timeout_kill)
128
+
129
+ if proc.is_alive():
130
+ self.log("Warning: Process still alive after SIGKILL")
124
131
 
132
+ def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
125
133
  file_paths = [file.path for file in file_list if file.path]
126
134
 
127
135
  if not file_paths:
128
136
  self.log("No files to process.")
129
137
  return file_list
130
138
 
131
- converter = _get_converter()
132
- results = converter.convert_all(file_paths)
139
+ ctx = get_context("spawn")
140
+ queue: Queue = ctx.Queue()
141
+ proc = ctx.Process(
142
+ target=docling_worker,
143
+ args=(file_paths, queue, self.pipeline, self.ocr_engine),
144
+ )
133
145
 
134
- processed_data: list[Data | None] = [
135
- Data(data={"doc": res.document, "file_path": str(res.input.file)})
136
- if res.status == ConversionStatus.SUCCESS
137
- else None
138
- for res in results
139
- ]
146
+ result = None
147
+ proc.start()
140
148
 
149
+ try:
150
+ result = self._wait_for_result_with_process_monitoring(queue, proc, timeout=300)
151
+ except KeyboardInterrupt:
152
+ self.log("Docling process cancelled by user")
153
+ result = []
154
+ except Exception as e:
155
+ self.log(f"Error during processing: {e}")
156
+ raise
157
+ finally:
158
+ # Improved cleanup with graceful termination
159
+ try:
160
+ self._terminate_process_gracefully(proc)
161
+ finally:
162
+ # Always close and cleanup queue resources
163
+ try:
164
+ queue.close()
165
+ queue.join_thread()
166
+ except Exception as e: # noqa: BLE001
167
+ # Ignore cleanup errors, but log them
168
+ self.log(f"Warning: Error during queue cleanup - {e}")
169
+
170
+ # Check if there was an error in the worker
171
+ if isinstance(result, dict) and "error" in result:
172
+ msg = result["error"]
173
+ if msg.startswith("Docling is not installed"):
174
+ raise ImportError(msg)
175
+ # Handle interrupt gracefully - return empty result instead of raising error
176
+ if "Worker interrupted by SIGINT" in msg or "shutdown" in result:
177
+ self.log("Docling process cancelled by user")
178
+ result = []
179
+ else:
180
+ raise RuntimeError(msg)
181
+
182
+ processed_data = [Data(data={"doc": r["document"], "file_path": r["file_path"]}) if r else None for r in result]
141
183
  return self.rollup_data(file_list, processed_data)
@@ -1,7 +1,6 @@
1
1
  import json
2
2
  from collections.abc import AsyncIterator, Iterator
3
3
  from pathlib import Path
4
- from typing import TYPE_CHECKING
5
4
 
6
5
  import orjson
7
6
  import pandas as pd
@@ -10,16 +9,12 @@ from fastapi.encoders import jsonable_encoder
10
9
 
11
10
  from langflow.api.v2.files import upload_user_file
12
11
  from langflow.custom import Component
13
- from langflow.io import DropdownInput, HandleInput, SecretStrInput, StrInput
12
+ from langflow.io import DropdownInput, HandleInput, StrInput
14
13
  from langflow.schema import Data, DataFrame, Message
15
- from langflow.services.auth.utils import create_user_longterm_token, get_current_user
16
14
  from langflow.services.database.models.user.crud import get_user_by_id
17
- from langflow.services.deps import get_session, get_settings_service, get_storage_service
15
+ from langflow.services.deps import get_settings_service, get_storage_service, session_scope
18
16
  from langflow.template.field.base import Output
19
17
 
20
- if TYPE_CHECKING:
21
- from langflow.services.database.models.user.model import User
22
-
23
18
 
24
19
  class SaveToFileComponent(Component):
25
20
  display_name = "Save File"
@@ -55,13 +50,6 @@ class SaveToFileComponent(Component):
55
50
  value="",
56
51
  advanced=True,
57
52
  ),
58
- SecretStrInput(
59
- name="api_key",
60
- display_name="Langflow API Key",
61
- info="Langflow API key for authentication when saving the file.",
62
- required=False,
63
- advanced=True,
64
- ),
65
53
  ]
66
54
 
67
55
  outputs = [Output(display_name="File Path", name="message", method="save_to_file")]
@@ -148,25 +136,11 @@ class SaveToFileComponent(Component):
148
136
  raise FileNotFoundError(msg)
149
137
 
150
138
  with file_path.open("rb") as f:
151
- async for db in get_session():
152
- # TODO: In 1.6, this may need to be removed or adjusted
153
- # Try to get the super user token, if possible
154
- current_user: User | None = None
155
- if self.api_key:
156
- current_user = await get_current_user(
157
- token="",
158
- query_param=self.api_key,
159
- header_param="",
160
- db=db,
161
- )
162
- else:
163
- user_id, _ = await create_user_longterm_token(db)
164
- current_user = await get_user_by_id(db, user_id)
165
-
166
- # Fail if the user is not found
167
- if not current_user:
168
- msg = "User not found. Please provide a valid API key or ensure the user exists."
139
+ async with session_scope() as db:
140
+ if not self.user_id:
141
+ msg = "User ID is required for file saving."
169
142
  raise ValueError(msg)
143
+ current_user = await get_user_by_id(db, self.user_id)
170
144
 
171
145
  await upload_user_file(
172
146
  file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),