langflow-base-nightly 0.5.0.dev35__py3-none-any.whl → 0.5.0.dev37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langflow/api/v1/knowledge_bases.py +16 -9
- langflow/api/v2/files.py +3 -1
- langflow/base/data/kb_utils.py +33 -0
- langflow/base/models/model.py +3 -3
- langflow/components/agents/mcp_component.py +40 -55
- langflow/components/data/kb_ingest.py +116 -43
- langflow/components/data/kb_retrieval.py +24 -26
- langflow/components/docling/__init__.py +198 -0
- langflow/components/docling/docling_inline.py +102 -60
- langflow/components/processing/save_file.py +6 -32
- langflow/components/vectorstores/astradb.py +30 -19
- langflow/frontend/assets/{SlackIcon-B260Qg_R.js → SlackIcon-CnvyOamQ.js} +1 -1
- langflow/frontend/assets/{Wikipedia-BB2mbgyd.js → Wikipedia-nyTEXdr2.js} +1 -1
- langflow/frontend/assets/{Wolfram-DytXC9hF.js → Wolfram-BYMQkNSq.js} +1 -1
- langflow/frontend/assets/{index-BdIWbCEL.js → index-8WdfSTTz.js} +1 -1
- langflow/frontend/assets/{index-D87Zw62M.js → index-8yMsjVV2.js} +1 -1
- langflow/frontend/assets/{index-DyJDHm2D.js → index-B1YN7oMV.js} +1 -1
- langflow/frontend/assets/{index-BEDxAk3N.js → index-B3Sur4Z3.js} +1 -1
- langflow/frontend/assets/{index-DhzEUXfr.js → index-B748uLP1.js} +1 -1
- langflow/frontend/assets/{index-4eRtaV45.js → index-BB15_iOb.js} +1 -1
- langflow/frontend/assets/{index-C_1RBTul.js → index-BBxAPk1y.js} +1 -1
- langflow/frontend/assets/{index-DHlEwAxb.js → index-BCCGvqay.js} +1 -1
- langflow/frontend/assets/{index-ci4XHjbJ.js → index-BChjg6Az.js} +3 -3
- langflow/frontend/assets/{index-B9Mo3ndZ.js → index-BEMw2Np8.js} +1 -1
- langflow/frontend/assets/{index-BKvKC-12.js → index-BFp_O-c9.js} +1 -1
- langflow/frontend/assets/{index-Ym6gz0T6.js → index-BIQQCMvz.js} +1 -1
- langflow/frontend/assets/{index-CwIxqYlT.js → index-BIXaW2aY.js} +1 -1
- langflow/frontend/assets/{index-BxkZkBgQ.js → index-BIzTEqFh.js} +1 -1
- langflow/frontend/assets/{index-C76aBV_h.js → index-BLGYN-9b.js} +1 -1
- langflow/frontend/assets/{index-B-c82Fnu.js → index-BOB_zsjl.js} +1 -1
- langflow/frontend/assets/{index-BbsND1Qg.js → index-BOeo01QB.js} +1 -1
- langflow/frontend/assets/{index-DztLFiip.js → index-BQ6NUdMY.js} +1 -1
- langflow/frontend/assets/{index-G_U_kPAd.js → index-BR0bkVqX.js} +1 -1
- langflow/frontend/assets/{index-R7q8cAek.js → index-BRYjyhAd.js} +1 -1
- langflow/frontend/assets/{index-Ccb5B8zG.js → index-BRxvproo.js} +1 -1
- langflow/frontend/assets/{index-B8y58M9b.js → index-BTEW9e8P.js} +1 -1
- langflow/frontend/assets/{index-DdzVmJHE.js → index-BTrsh9LS.js} +1 -1
- langflow/frontend/assets/{index-CkSzjCqM.js → index-BVEZDXxS.js} +1 -1
- langflow/frontend/assets/{index-BLROcaSz.js → index-BWmPX4iQ.js} +1 -1
- langflow/frontend/assets/{index-Ct9_T9ox.js → index-BX5D-USa.js} +1 -1
- langflow/frontend/assets/{index-DtJyCbzF.js → index-BZgXW854.js} +1 -1
- langflow/frontend/assets/{index-CkQ-bJ4G.js → index-BbJjt5m4.js} +1 -1
- langflow/frontend/assets/{index-D5PeCofu.js → index-BbRm7beF.js} +1 -1
- langflow/frontend/assets/{index-Uq2ij_SS.js → index-Bd6WtbKA.js} +1 -1
- langflow/frontend/assets/{index-sS6XLk3j.js → index-BhIOhlCH.js} +1 -1
- langflow/frontend/assets/{index-dkS0ek2S.js → index-BkPYpfgw.js} +1 -1
- langflow/frontend/assets/{index-BOYTBrh9.js → index-BmX5CoED.js} +1 -1
- langflow/frontend/assets/{index-CMGZGIx_.js → index-Bnqod3vk.js} +1 -1
- langflow/frontend/assets/{index-Bisa4IQF.js → index-Boso-xEw.js} +1 -1
- langflow/frontend/assets/{index-DqSH4x-R.js → index-BqPpO6KG.js} +1 -1
- langflow/frontend/assets/{index-B5ed-sAv.js → index-Bsa0xZyL.js} +1 -1
- langflow/frontend/assets/{index-CoUlHbtg.js → index-Bv8h2Z-q.js} +1 -1
- langflow/frontend/assets/{index-tOy_uloT.js → index-BvT7L317.js} +1 -1
- langflow/frontend/assets/{index-D-zkHcob.js → index-BvwZfF2i.js} +1 -1
- langflow/frontend/assets/{index-DxIs8VSp.js → index-Bvxg4_ux.js} +1 -1
- langflow/frontend/assets/{index-CqDUqHfd.js → index-BxEuHa76.js} +1 -1
- langflow/frontend/assets/{index-DX7XsAcx.js → index-BzEUlaw_.js} +1 -1
- langflow/frontend/assets/{index-BCK-ZyIh.js → index-BzL_EoKd.js} +1 -1
- langflow/frontend/assets/{index-BNbWMmAV.js → index-C-2hghRJ.js} +1 -1
- langflow/frontend/assets/{index-CWWo2zOA.js → index-C26RqKWL.js} +1 -1
- langflow/frontend/assets/{index-BcgB3rXH.js → index-C6jri9Wm.js} +1 -1
- langflow/frontend/assets/{index-mBjJYD9q.js → index-C7QWbnLK.js} +1 -1
- langflow/frontend/assets/{index-D0HmkH0H.js → index-C82JjCPD.js} +1 -1
- langflow/frontend/assets/{index-Cpgkb0Q3.js → index-CCePCqkT.js} +1 -1
- langflow/frontend/assets/{index-IFGgPiye.js → index-CCxGSSTT.js} +1 -1
- langflow/frontend/assets/{index-BOEf7-ty.js → index-CFDvOtKC.js} +1 -1
- langflow/frontend/assets/{index-Ba3RTMXI.js → index-CJo_cyWW.js} +1 -1
- langflow/frontend/assets/{index-Cx__T92e.js → index-CLPdN-q6.js} +1 -1
- langflow/frontend/assets/{index-CF4dtI6S.js → index-CQMoqLAu.js} +1 -1
- langflow/frontend/assets/{index-3qMh9x6K.js → index-CTrt1Q_j.js} +1 -1
- langflow/frontend/assets/{index-rcdQpNcU.js → index-CVQmT7ZL.js} +1 -1
- langflow/frontend/assets/{index-Dpz3oBf5.js → index-CWdkbVsd.js} +1 -1
- langflow/frontend/assets/{index-D0s9f6Re.js → index-CYDAYm-i.js} +1 -1
- langflow/frontend/assets/{index-BjENqyKe.js → index-CYe8Ipef.js} +1 -1
- langflow/frontend/assets/{index-ByFXr9Iq.js → index-CZQ9rXNa.js} +1 -1
- langflow/frontend/assets/{index-VcXZzovW.js → index-C_TdzfAn.js} +1 -1
- langflow/frontend/assets/{index-LrMzDsq9.js → index-C_veJlEb.js} +1 -1
- langflow/frontend/assets/{index-BdYgKk1d.js → index-CaQ_H9ww.js} +1 -1
- langflow/frontend/assets/{index-CHFO5O4g.js → index-Car-zdor.js} +1 -1
- langflow/frontend/assets/{index-DZzbmg3J.js → index-ChXJpBz4.js} +1 -1
- langflow/frontend/assets/{index-C7x9R_Yo.js → index-CmplyEaa.js} +1 -1
- langflow/frontend/assets/{index-Cd5zuUUK.js → index-CpcbQZIF.js} +1 -1
- langflow/frontend/assets/{index-D9eflZfP.js → index-CpvYQ0ug.js} +1 -1
- langflow/frontend/assets/{index-DS1EgA10.js → index-CvcEzq4x.js} +1 -1
- langflow/frontend/assets/{index-hOkEW3JP.js → index-CxvP91st.js} +1 -1
- langflow/frontend/assets/{index-DasrI03Y.js → index-CyPvTB63.js} +1 -1
- langflow/frontend/assets/{index-BJrY2Fiu.js → index-D-9TI74R.js} +1 -1
- langflow/frontend/assets/{index-BlBl2tvQ.js → index-D3DDfngy.js} +1 -1
- langflow/frontend/assets/{index-DzeIsaBm.js → index-D5_DsUJc.js} +1 -1
- langflow/frontend/assets/{index-AY5Dm2mG.js → index-D6PSjHxP.js} +1 -1
- langflow/frontend/assets/{index-C9N80hP8.js → index-D8GJngXa.js} +1 -1
- langflow/frontend/assets/{index-BxWXWRmZ.js → index-D8lOi1GI.js} +1 -1
- langflow/frontend/assets/{index-DWkMJnbd.js → index-DCRk27Tp.js} +1 -1
- langflow/frontend/assets/{index-BnLT29qW.js → index-DF5VwgU6.js} +1 -1
- langflow/frontend/assets/{index-7xXgqu09.js → index-DGRMNe9n.js} +1 -1
- langflow/frontend/assets/{index-3TJWUdmx.js → index-DHq8TQPB.js} +1 -1
- langflow/frontend/assets/{index-BVtf6m9S.js → index-DIDDfmlJ.js} +1 -1
- langflow/frontend/assets/{index-B2ggrBuR.js → index-DIkNW9Cd.js} +1 -1
- langflow/frontend/assets/{index-r1LZg-PY.js → index-DJB12jIC.js} +1 -1
- langflow/frontend/assets/{index-DS9I4y48.js → index-DK1Ptcc4.js} +1 -1
- langflow/frontend/assets/{index-CG7cp0nD.js → index-DKHNourL.js} +1 -1
- langflow/frontend/assets/{index-BeNby7qF.js → index-DPCzHdsC.js} +1 -1
- langflow/frontend/assets/{index-COL0eiWI.js → index-DVlceYFD.js} +1 -1
- langflow/frontend/assets/{index-DK8vNpXK.js → index-DZTC5pdT.js} +1 -1
- langflow/frontend/assets/{index-Baka5dKE.js → index-Db71w3lq.js} +1 -1
- langflow/frontend/assets/{index-Du9aJK7m.js → index-DbMFlnHE.js} +1 -1
- langflow/frontend/assets/{index-CvQ0w8Pj.js → index-DfngcQxO.js} +1 -1
- langflow/frontend/assets/{index-DIqSyDVO.js → index-DfxYyS3M.js} +1 -1
- langflow/frontend/assets/{index-3uOAA_XX.js → index-Dg-63Si_.js} +1 -1
- langflow/frontend/assets/{index-BsBWP-Dh.js → index-DjQETUy8.js} +1 -1
- langflow/frontend/assets/{index-CDFLVFB4.js → index-DkXy1WFo.js} +1 -1
- langflow/frontend/assets/{index-B8TlNgn-.js → index-DkelbYy7.js} +1 -1
- langflow/frontend/assets/{index-GODbXlHC.js → index-DmMDPoi0.js} +1 -1
- langflow/frontend/assets/{index-DpQKtcXu.js → index-DnEGCgih.js} +1 -1
- langflow/frontend/assets/{index-VHmUHUUU.js → index-DpClkXIV.js} +1 -1
- langflow/frontend/assets/{index-BRWNIt9F.js → index-Dq5ilsem.js} +1 -1
- langflow/frontend/assets/{index-DDNNv4C0.js → index-Dqd4RjYA.js} +1 -1
- langflow/frontend/assets/{index-C2Xd7UkR.js → index-Dsps-jKu.js} +1 -1
- langflow/frontend/assets/{index-BVHvIhT5.js → index-Du_18NCU.js} +1 -1
- langflow/frontend/assets/{index-C7V5U9yH.js → index-DysKpOuj.js} +1 -1
- langflow/frontend/assets/{index-Bxml6wXu.js → index-DytJENYD.js} +1 -1
- langflow/frontend/assets/{index-BWq9GTzt.js → index-DzW2mfkK.js} +1 -1
- langflow/frontend/assets/{index-js8ceOaP.js → index-FUxmznS-.js} +1 -1
- langflow/frontend/assets/{index-DuAeoC-H.js → index-Gkrq-vzm.js} +1 -1
- langflow/frontend/assets/{index-DPX6X_bw.js → index-HK3bVMYA.js} +1 -1
- langflow/frontend/assets/{index-BEKoRwsX.js → index-LbYjHKkn.js} +1 -1
- langflow/frontend/assets/{index-C8KD3LPb.js → index-OazXJdEl.js} +1 -1
- langflow/frontend/assets/{index-DpJiH-Rk.js → index-Q9vDw0Xl.js} +1 -1
- langflow/frontend/assets/{index-DWr_zPkx.js → index-Ui4xUImO.js} +1 -1
- langflow/frontend/assets/{index-BejHxU5W.js → index-WPFivmdQ.js} +1 -1
- langflow/frontend/assets/{index-lKEJpUsF.js → index-_UcqeEjm.js} +1 -1
- langflow/frontend/assets/{index-VZnN0P6C.js → index-ajRge-Mg.js} +1 -1
- langflow/frontend/assets/{index-BQB-iDYl.js → index-cvZdgWHQ.js} +1 -1
- langflow/frontend/assets/{index-AlJ7td-D.js → index-dcnYpT9N.js} +1 -1
- langflow/frontend/assets/{index-DKEXZFUO.js → index-l7bzB8Ex.js} +1 -1
- langflow/frontend/assets/index-nVwHLjuV.js +1 -0
- langflow/frontend/assets/{index-BtJ2o21k.js → index-pCQ_yw8m.js} +1 -1
- langflow/frontend/assets/{index-B536IPXH.js → index-rXV1G1aB.js} +1 -1
- langflow/frontend/assets/{index-BIkqesA-.js → index-tVYiABdp.js} +1 -1
- langflow/frontend/assets/{index-CJwYfDBz.js → index-xuIrH2Dq.js} +1 -1
- langflow/frontend/assets/{index-BXMhmvTj.js → index-yCHsaqs8.js} +1 -1
- langflow/frontend/assets/{index-BqUeOc7Y.js → index-ya2uXE8v.js} +1 -1
- langflow/frontend/assets/lazyIconImports-t6wEndt1.js +2 -0
- langflow/frontend/assets/{use-post-add-user-HN0rRnhv.js → use-post-add-user-BrBYH9eR.js} +1 -1
- langflow/frontend/index.html +1 -1
- langflow/initial_setup/starter_projects/Hybrid Search RAG.json +2 -2
- langflow/initial_setup/starter_projects/Knowledge Ingestion.json +2 -2
- langflow/initial_setup/starter_projects/Knowledge Retrieval.json +2 -2
- langflow/initial_setup/starter_projects/News Aggregator.json +2 -19
- langflow/initial_setup/starter_projects/Nvidia Remix.json +2 -19
- langflow/initial_setup/starter_projects/Vector Store RAG.json +4 -4
- langflow/processing/process.py +1 -1
- {langflow_base_nightly-0.5.0.dev35.dist-info → langflow_base_nightly-0.5.0.dev37.dist-info}/METADATA +1 -1
- {langflow_base_nightly-0.5.0.dev35.dist-info → langflow_base_nightly-0.5.0.dev37.dist-info}/RECORD +156 -155
- langflow/frontend/assets/lazyIconImports-Bh1TFfvH.js +0 -2
- {langflow_base_nightly-0.5.0.dev35.dist-info → langflow_base_nightly-0.5.0.dev37.dist-info}/WHEEL +0 -0
- {langflow_base_nightly-0.5.0.dev35.dist-info → langflow_base_nightly-0.5.0.dev37.dist-info}/entry_points.txt +0 -0
|
@@ -5,13 +5,16 @@ from typing import Any
|
|
|
5
5
|
from cryptography.fernet import InvalidToken
|
|
6
6
|
from langchain_chroma import Chroma
|
|
7
7
|
from loguru import logger
|
|
8
|
+
from pydantic import SecretStr
|
|
8
9
|
|
|
10
|
+
from langflow.base.data.kb_utils import get_knowledge_bases
|
|
9
11
|
from langflow.custom import Component
|
|
10
12
|
from langflow.io import BoolInput, DropdownInput, IntInput, MessageTextInput, Output, SecretStrInput
|
|
11
13
|
from langflow.schema.data import Data
|
|
12
14
|
from langflow.schema.dataframe import DataFrame
|
|
13
15
|
from langflow.services.auth.utils import decrypt_api_key
|
|
14
|
-
from langflow.services.
|
|
16
|
+
from langflow.services.database.models.user.crud import get_user_by_id
|
|
17
|
+
from langflow.services.deps import get_settings_service, session_scope
|
|
15
18
|
|
|
16
19
|
settings = get_settings_service().settings
|
|
17
20
|
knowledge_directory = settings.knowledge_bases_dir
|
|
@@ -33,11 +36,7 @@ class KBRetrievalComponent(Component):
|
|
|
33
36
|
display_name="Knowledge",
|
|
34
37
|
info="Select the knowledge to load data from.",
|
|
35
38
|
required=True,
|
|
36
|
-
options=[
|
|
37
|
-
str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(".") and d.is_dir()
|
|
38
|
-
]
|
|
39
|
-
if KNOWLEDGE_BASES_ROOT_PATH.exists()
|
|
40
|
-
else [],
|
|
39
|
+
options=[],
|
|
41
40
|
refresh_button=True,
|
|
42
41
|
real_time_refresh=True,
|
|
43
42
|
),
|
|
@@ -79,21 +78,13 @@ class KBRetrievalComponent(Component):
|
|
|
79
78
|
),
|
|
80
79
|
]
|
|
81
80
|
|
|
82
|
-
def
|
|
83
|
-
"""Retrieve a list of available knowledge bases.
|
|
84
|
-
|
|
85
|
-
Returns:
|
|
86
|
-
A list of knowledge base names.
|
|
87
|
-
"""
|
|
88
|
-
if not KNOWLEDGE_BASES_ROOT_PATH.exists():
|
|
89
|
-
return []
|
|
90
|
-
|
|
91
|
-
return [str(d.name) for d in KNOWLEDGE_BASES_ROOT_PATH.iterdir() if not d.name.startswith(".") and d.is_dir()]
|
|
92
|
-
|
|
93
|
-
def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002
|
|
81
|
+
async def update_build_config(self, build_config, field_value, field_name=None): # noqa: ARG002
|
|
94
82
|
if field_name == "knowledge_base":
|
|
95
83
|
# Update the knowledge base options dynamically
|
|
96
|
-
build_config["knowledge_base"]["options"] =
|
|
84
|
+
build_config["knowledge_base"]["options"] = await get_knowledge_bases(
|
|
85
|
+
KNOWLEDGE_BASES_ROOT_PATH,
|
|
86
|
+
user_id=self.user_id, # Use the user_id from the component context
|
|
87
|
+
)
|
|
97
88
|
|
|
98
89
|
# If the selected knowledge base is not available, reset it
|
|
99
90
|
if build_config["knowledge_base"]["value"] not in build_config["knowledge_base"]["options"]:
|
|
@@ -129,15 +120,12 @@ class KBRetrievalComponent(Component):
|
|
|
129
120
|
|
|
130
121
|
def _build_embeddings(self, metadata: dict):
|
|
131
122
|
"""Build embedding model from metadata."""
|
|
123
|
+
runtime_api_key = self.api_key.get_secret_value() if isinstance(self.api_key, SecretStr) else self.api_key
|
|
132
124
|
provider = metadata.get("embedding_provider")
|
|
133
125
|
model = metadata.get("embedding_model")
|
|
134
|
-
api_key = metadata.get("api_key")
|
|
126
|
+
api_key = runtime_api_key or metadata.get("api_key")
|
|
135
127
|
chunk_size = metadata.get("chunk_size")
|
|
136
128
|
|
|
137
|
-
# If user provided a key in the input, it overrides the stored one.
|
|
138
|
-
if self.api_key and self.api_key.get_secret_value():
|
|
139
|
-
api_key = self.api_key.get_secret_value()
|
|
140
|
-
|
|
141
129
|
# Handle various providers
|
|
142
130
|
if provider == "OpenAI":
|
|
143
131
|
from langchain_openai import OpenAIEmbeddings
|
|
@@ -174,13 +162,23 @@ class KBRetrievalComponent(Component):
|
|
|
174
162
|
msg = f"Embedding provider '{provider}' is not supported for retrieval."
|
|
175
163
|
raise NotImplementedError(msg)
|
|
176
164
|
|
|
177
|
-
def get_chroma_kb_data(self) -> DataFrame:
|
|
165
|
+
async def get_chroma_kb_data(self) -> DataFrame:
|
|
178
166
|
"""Retrieve data from the selected knowledge base by reading the Chroma collection.
|
|
179
167
|
|
|
180
168
|
Returns:
|
|
181
169
|
A DataFrame containing the data rows from the knowledge base.
|
|
182
170
|
"""
|
|
183
|
-
|
|
171
|
+
# Get the current user
|
|
172
|
+
async with session_scope() as db:
|
|
173
|
+
if not self.user_id:
|
|
174
|
+
msg = "User ID is required for fetching Knowledge Base data."
|
|
175
|
+
raise ValueError(msg)
|
|
176
|
+
current_user = await get_user_by_id(db, self.user_id)
|
|
177
|
+
if not current_user:
|
|
178
|
+
msg = f"User with ID {self.user_id} not found."
|
|
179
|
+
raise ValueError(msg)
|
|
180
|
+
kb_user = current_user.username
|
|
181
|
+
kb_path = KNOWLEDGE_BASES_ROOT_PATH / kb_user / self.knowledge_base
|
|
184
182
|
|
|
185
183
|
metadata = self._get_kb_metadata(kb_path)
|
|
186
184
|
if not metadata:
|
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import signal
|
|
4
|
+
import sys
|
|
5
|
+
import traceback
|
|
6
|
+
from contextlib import suppress
|
|
3
7
|
from typing import TYPE_CHECKING, Any
|
|
4
8
|
|
|
9
|
+
from loguru import logger
|
|
10
|
+
|
|
5
11
|
from langflow.components._importing import import_mod
|
|
6
12
|
|
|
7
13
|
if TYPE_CHECKING:
|
|
@@ -41,3 +47,195 @@ def __getattr__(attr_name: str) -> Any:
|
|
|
41
47
|
|
|
42
48
|
def __dir__() -> list[str]:
|
|
43
49
|
return list(__all__)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str):
|
|
53
|
+
"""Worker function for processing files with Docling in a separate process."""
|
|
54
|
+
# Signal handling for graceful shutdown
|
|
55
|
+
shutdown_requested = False
|
|
56
|
+
|
|
57
|
+
def signal_handler(signum: int, frame) -> None: # noqa: ARG001
|
|
58
|
+
"""Handle shutdown signals gracefully."""
|
|
59
|
+
nonlocal shutdown_requested
|
|
60
|
+
signal_names: dict[int, str] = {signal.SIGTERM: "SIGTERM", signal.SIGINT: "SIGINT"}
|
|
61
|
+
signal_name = signal_names.get(signum, f"signal {signum}")
|
|
62
|
+
|
|
63
|
+
logger.debug(f"Docling worker received {signal_name}, initiating graceful shutdown...")
|
|
64
|
+
shutdown_requested = True
|
|
65
|
+
|
|
66
|
+
# Send shutdown notification to parent process
|
|
67
|
+
with suppress(Exception):
|
|
68
|
+
queue.put({"error": f"Worker interrupted by {signal_name}", "shutdown": True})
|
|
69
|
+
|
|
70
|
+
# Exit gracefully
|
|
71
|
+
sys.exit(0)
|
|
72
|
+
|
|
73
|
+
def check_shutdown() -> None:
|
|
74
|
+
"""Check if shutdown was requested and exit if so."""
|
|
75
|
+
if shutdown_requested:
|
|
76
|
+
logger.info("Shutdown requested, exiting worker...")
|
|
77
|
+
|
|
78
|
+
with suppress(Exception):
|
|
79
|
+
queue.put({"error": "Worker shutdown requested", "shutdown": True})
|
|
80
|
+
|
|
81
|
+
sys.exit(0)
|
|
82
|
+
|
|
83
|
+
# Register signal handlers early
|
|
84
|
+
try:
|
|
85
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
|
86
|
+
signal.signal(signal.SIGINT, signal_handler)
|
|
87
|
+
logger.debug("Signal handlers registered for graceful shutdown")
|
|
88
|
+
except (OSError, ValueError) as e:
|
|
89
|
+
# Some signals might not be available on all platforms
|
|
90
|
+
logger.warning(f"Warning: Could not register signal handlers: {e}")
|
|
91
|
+
|
|
92
|
+
# Check for shutdown before heavy imports
|
|
93
|
+
check_shutdown()
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
from docling.datamodel.base_models import ConversionStatus, InputFormat
|
|
97
|
+
from docling.datamodel.pipeline_options import (
|
|
98
|
+
OcrOptions,
|
|
99
|
+
PdfPipelineOptions,
|
|
100
|
+
VlmPipelineOptions,
|
|
101
|
+
)
|
|
102
|
+
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
|
103
|
+
from docling.models.factories import get_ocr_factory
|
|
104
|
+
from docling.pipeline.vlm_pipeline import VlmPipeline
|
|
105
|
+
|
|
106
|
+
# Check for shutdown after imports
|
|
107
|
+
check_shutdown()
|
|
108
|
+
logger.debug("Docling dependencies loaded successfully")
|
|
109
|
+
|
|
110
|
+
except ModuleNotFoundError:
|
|
111
|
+
msg = (
|
|
112
|
+
"Docling is an optional dependency of Langflow. "
|
|
113
|
+
"Install with `uv pip install 'langflow[docling]'` "
|
|
114
|
+
"or refer to the documentation"
|
|
115
|
+
)
|
|
116
|
+
queue.put({"error": msg})
|
|
117
|
+
return
|
|
118
|
+
except ImportError as e:
|
|
119
|
+
# A different import failed (e.g., a transitive dependency); preserve details.
|
|
120
|
+
queue.put({"error": f"Failed to import a Docling dependency: {e}"})
|
|
121
|
+
return
|
|
122
|
+
except KeyboardInterrupt:
|
|
123
|
+
logger.warning("KeyboardInterrupt during imports, exiting...")
|
|
124
|
+
queue.put({"error": "Worker interrupted during imports", "shutdown": True})
|
|
125
|
+
return
|
|
126
|
+
|
|
127
|
+
# Configure the standard PDF pipeline
|
|
128
|
+
def _get_standard_opts() -> PdfPipelineOptions:
|
|
129
|
+
check_shutdown() # Check before heavy operations
|
|
130
|
+
|
|
131
|
+
pipeline_options = PdfPipelineOptions()
|
|
132
|
+
pipeline_options.do_ocr = ocr_engine != ""
|
|
133
|
+
if pipeline_options.do_ocr:
|
|
134
|
+
ocr_factory = get_ocr_factory(
|
|
135
|
+
allow_external_plugins=False,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
ocr_options: OcrOptions = ocr_factory.create_options(
|
|
139
|
+
kind=ocr_engine,
|
|
140
|
+
)
|
|
141
|
+
pipeline_options.ocr_options = ocr_options
|
|
142
|
+
return pipeline_options
|
|
143
|
+
|
|
144
|
+
# Configure the VLM pipeline
|
|
145
|
+
def _get_vlm_opts() -> VlmPipelineOptions:
|
|
146
|
+
check_shutdown() # Check before heavy operations
|
|
147
|
+
return VlmPipelineOptions()
|
|
148
|
+
|
|
149
|
+
# Configure the main format options and create the DocumentConverter()
|
|
150
|
+
def _get_converter() -> DocumentConverter:
|
|
151
|
+
check_shutdown() # Check before heavy operations
|
|
152
|
+
|
|
153
|
+
if pipeline == "standard":
|
|
154
|
+
pdf_format_option = PdfFormatOption(
|
|
155
|
+
pipeline_options=_get_standard_opts(),
|
|
156
|
+
)
|
|
157
|
+
elif pipeline == "vlm":
|
|
158
|
+
pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
|
|
159
|
+
else:
|
|
160
|
+
msg = f"Unknown pipeline: {pipeline!r}"
|
|
161
|
+
raise ValueError(msg)
|
|
162
|
+
|
|
163
|
+
format_options: dict[InputFormat, FormatOption] = {
|
|
164
|
+
InputFormat.PDF: pdf_format_option,
|
|
165
|
+
InputFormat.IMAGE: pdf_format_option,
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return DocumentConverter(format_options=format_options)
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
# Check for shutdown before creating converter (can be slow)
|
|
172
|
+
check_shutdown()
|
|
173
|
+
logger.info(f"Initializing {pipeline} pipeline with OCR: {ocr_engine or 'disabled'}")
|
|
174
|
+
|
|
175
|
+
converter = _get_converter()
|
|
176
|
+
|
|
177
|
+
# Check for shutdown before processing files
|
|
178
|
+
check_shutdown()
|
|
179
|
+
logger.info(f"Starting to process {len(file_paths)} files...")
|
|
180
|
+
|
|
181
|
+
# Process files with periodic shutdown checks
|
|
182
|
+
results = []
|
|
183
|
+
for i, file_path in enumerate(file_paths):
|
|
184
|
+
# Check for shutdown before processing each file
|
|
185
|
+
check_shutdown()
|
|
186
|
+
|
|
187
|
+
logger.debug(f"Processing file {i + 1}/{len(file_paths)}: {file_path}")
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
# Process single file (we can't easily interrupt convert_all)
|
|
191
|
+
single_result = converter.convert_all([file_path])
|
|
192
|
+
results.extend(single_result)
|
|
193
|
+
|
|
194
|
+
# Check for shutdown after each file
|
|
195
|
+
check_shutdown()
|
|
196
|
+
|
|
197
|
+
except (OSError, ValueError, RuntimeError, ImportError) as file_error:
|
|
198
|
+
# Handle specific file processing errors
|
|
199
|
+
logger.error(f"Error processing file {file_path}: {file_error}")
|
|
200
|
+
# Continue with other files, but check for shutdown
|
|
201
|
+
check_shutdown()
|
|
202
|
+
except Exception as file_error: # noqa: BLE001
|
|
203
|
+
# Catch any other unexpected errors to prevent worker crash
|
|
204
|
+
logger.error(f"Unexpected error processing file {file_path}: {file_error}")
|
|
205
|
+
# Continue with other files, but check for shutdown
|
|
206
|
+
check_shutdown()
|
|
207
|
+
|
|
208
|
+
# Final shutdown check before sending results
|
|
209
|
+
check_shutdown()
|
|
210
|
+
|
|
211
|
+
# Process the results while maintaining the original structure
|
|
212
|
+
processed_data = [
|
|
213
|
+
{"document": res.document, "file_path": str(res.input.file), "status": res.status.name}
|
|
214
|
+
if res.status == ConversionStatus.SUCCESS
|
|
215
|
+
else None
|
|
216
|
+
for res in results
|
|
217
|
+
]
|
|
218
|
+
|
|
219
|
+
logger.info(f"Successfully processed {len([d for d in processed_data if d])} files")
|
|
220
|
+
queue.put(processed_data)
|
|
221
|
+
|
|
222
|
+
except KeyboardInterrupt:
|
|
223
|
+
logger.warning("KeyboardInterrupt during processing, exiting gracefully...")
|
|
224
|
+
queue.put({"error": "Worker interrupted during processing", "shutdown": True})
|
|
225
|
+
return
|
|
226
|
+
except Exception as e: # noqa: BLE001
|
|
227
|
+
if shutdown_requested:
|
|
228
|
+
logger.exception("Exception occurred during shutdown, exiting...")
|
|
229
|
+
return
|
|
230
|
+
|
|
231
|
+
# Send any processing error to the main process with traceback
|
|
232
|
+
error_info = {"error": str(e), "traceback": traceback.format_exc()}
|
|
233
|
+
logger.error(f"Error in worker: {error_info}")
|
|
234
|
+
queue.put(error_info)
|
|
235
|
+
finally:
|
|
236
|
+
logger.info("Docling worker finishing...")
|
|
237
|
+
# Ensure we don't leave any hanging processes
|
|
238
|
+
if shutdown_requested:
|
|
239
|
+
logger.debug("Worker shutdown completed")
|
|
240
|
+
else:
|
|
241
|
+
logger.debug("Worker completed normally")
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from multiprocessing import Queue, get_context
|
|
3
|
+
from queue import Empty
|
|
4
|
+
|
|
1
5
|
from langflow.base.data import BaseFileComponent
|
|
6
|
+
from langflow.components.docling import docling_worker
|
|
2
7
|
from langflow.inputs import DropdownInput
|
|
3
8
|
from langflow.schema import Data
|
|
4
9
|
|
|
@@ -69,73 +74,110 @@ class DoclingInlineComponent(BaseFileComponent):
|
|
|
69
74
|
*BaseFileComponent._base_outputs,
|
|
70
75
|
]
|
|
71
76
|
|
|
72
|
-
def
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
77
|
+
def _wait_for_result_with_process_monitoring(self, queue: Queue, proc, timeout: int = 300):
|
|
78
|
+
"""Wait for result from queue while monitoring process health.
|
|
79
|
+
|
|
80
|
+
Handles cases where process crashes without sending result.
|
|
81
|
+
"""
|
|
82
|
+
start_time = time.time()
|
|
83
|
+
|
|
84
|
+
while time.time() - start_time < timeout:
|
|
85
|
+
# Check if process is still alive
|
|
86
|
+
if not proc.is_alive():
|
|
87
|
+
# Process died, try to get any result it might have sent
|
|
88
|
+
try:
|
|
89
|
+
result = queue.get_nowait()
|
|
90
|
+
except Empty:
|
|
91
|
+
# Process died without sending result
|
|
92
|
+
msg = f"Worker process crashed unexpectedly without producing result. Exit code: {proc.exitcode}"
|
|
93
|
+
raise RuntimeError(msg) from None
|
|
94
|
+
else:
|
|
95
|
+
self.log("Process completed and result retrieved")
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
# Poll the queue instead of blocking
|
|
99
|
+
try:
|
|
100
|
+
result = queue.get(timeout=1)
|
|
101
|
+
except Empty:
|
|
102
|
+
# No result yet, continue monitoring
|
|
103
|
+
continue
|
|
104
|
+
else:
|
|
105
|
+
self.log("Result received from worker process")
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
# Overall timeout reached
|
|
109
|
+
msg = f"Process timed out after {timeout} seconds"
|
|
110
|
+
raise TimeoutError(msg)
|
|
111
|
+
|
|
112
|
+
def _terminate_process_gracefully(self, proc, timeout_terminate: int = 10, timeout_kill: int = 5):
|
|
113
|
+
"""Terminate process gracefully with escalating signals.
|
|
114
|
+
|
|
115
|
+
First tries SIGTERM, then SIGKILL if needed.
|
|
116
|
+
"""
|
|
117
|
+
if not proc.is_alive():
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
self.log("Attempting graceful process termination with SIGTERM")
|
|
121
|
+
proc.terminate() # Send SIGTERM
|
|
122
|
+
proc.join(timeout=timeout_terminate)
|
|
123
|
+
|
|
124
|
+
if proc.is_alive():
|
|
125
|
+
self.log("Process didn't respond to SIGTERM, using SIGKILL")
|
|
126
|
+
proc.kill() # Send SIGKILL
|
|
127
|
+
proc.join(timeout=timeout_kill)
|
|
128
|
+
|
|
129
|
+
if proc.is_alive():
|
|
130
|
+
self.log("Warning: Process still alive after SIGKILL")
|
|
124
131
|
|
|
132
|
+
def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
|
|
125
133
|
file_paths = [file.path for file in file_list if file.path]
|
|
126
134
|
|
|
127
135
|
if not file_paths:
|
|
128
136
|
self.log("No files to process.")
|
|
129
137
|
return file_list
|
|
130
138
|
|
|
131
|
-
|
|
132
|
-
|
|
139
|
+
ctx = get_context("spawn")
|
|
140
|
+
queue: Queue = ctx.Queue()
|
|
141
|
+
proc = ctx.Process(
|
|
142
|
+
target=docling_worker,
|
|
143
|
+
args=(file_paths, queue, self.pipeline, self.ocr_engine),
|
|
144
|
+
)
|
|
133
145
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
if res.status == ConversionStatus.SUCCESS
|
|
137
|
-
else None
|
|
138
|
-
for res in results
|
|
139
|
-
]
|
|
146
|
+
result = None
|
|
147
|
+
proc.start()
|
|
140
148
|
|
|
149
|
+
try:
|
|
150
|
+
result = self._wait_for_result_with_process_monitoring(queue, proc, timeout=300)
|
|
151
|
+
except KeyboardInterrupt:
|
|
152
|
+
self.log("Docling process cancelled by user")
|
|
153
|
+
result = []
|
|
154
|
+
except Exception as e:
|
|
155
|
+
self.log(f"Error during processing: {e}")
|
|
156
|
+
raise
|
|
157
|
+
finally:
|
|
158
|
+
# Improved cleanup with graceful termination
|
|
159
|
+
try:
|
|
160
|
+
self._terminate_process_gracefully(proc)
|
|
161
|
+
finally:
|
|
162
|
+
# Always close and cleanup queue resources
|
|
163
|
+
try:
|
|
164
|
+
queue.close()
|
|
165
|
+
queue.join_thread()
|
|
166
|
+
except Exception as e: # noqa: BLE001
|
|
167
|
+
# Ignore cleanup errors, but log them
|
|
168
|
+
self.log(f"Warning: Error during queue cleanup - {e}")
|
|
169
|
+
|
|
170
|
+
# Check if there was an error in the worker
|
|
171
|
+
if isinstance(result, dict) and "error" in result:
|
|
172
|
+
msg = result["error"]
|
|
173
|
+
if msg.startswith("Docling is not installed"):
|
|
174
|
+
raise ImportError(msg)
|
|
175
|
+
# Handle interrupt gracefully - return empty result instead of raising error
|
|
176
|
+
if "Worker interrupted by SIGINT" in msg or "shutdown" in result:
|
|
177
|
+
self.log("Docling process cancelled by user")
|
|
178
|
+
result = []
|
|
179
|
+
else:
|
|
180
|
+
raise RuntimeError(msg)
|
|
181
|
+
|
|
182
|
+
processed_data = [Data(data={"doc": r["document"], "file_path": r["file_path"]}) if r else None for r in result]
|
|
141
183
|
return self.rollup_data(file_list, processed_data)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from collections.abc import AsyncIterator, Iterator
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
5
4
|
|
|
6
5
|
import orjson
|
|
7
6
|
import pandas as pd
|
|
@@ -10,16 +9,12 @@ from fastapi.encoders import jsonable_encoder
|
|
|
10
9
|
|
|
11
10
|
from langflow.api.v2.files import upload_user_file
|
|
12
11
|
from langflow.custom import Component
|
|
13
|
-
from langflow.io import DropdownInput, HandleInput,
|
|
12
|
+
from langflow.io import DropdownInput, HandleInput, StrInput
|
|
14
13
|
from langflow.schema import Data, DataFrame, Message
|
|
15
|
-
from langflow.services.auth.utils import create_user_longterm_token, get_current_user
|
|
16
14
|
from langflow.services.database.models.user.crud import get_user_by_id
|
|
17
|
-
from langflow.services.deps import
|
|
15
|
+
from langflow.services.deps import get_settings_service, get_storage_service, session_scope
|
|
18
16
|
from langflow.template.field.base import Output
|
|
19
17
|
|
|
20
|
-
if TYPE_CHECKING:
|
|
21
|
-
from langflow.services.database.models.user.model import User
|
|
22
|
-
|
|
23
18
|
|
|
24
19
|
class SaveToFileComponent(Component):
|
|
25
20
|
display_name = "Save File"
|
|
@@ -55,13 +50,6 @@ class SaveToFileComponent(Component):
|
|
|
55
50
|
value="",
|
|
56
51
|
advanced=True,
|
|
57
52
|
),
|
|
58
|
-
SecretStrInput(
|
|
59
|
-
name="api_key",
|
|
60
|
-
display_name="Langflow API Key",
|
|
61
|
-
info="Langflow API key for authentication when saving the file.",
|
|
62
|
-
required=False,
|
|
63
|
-
advanced=True,
|
|
64
|
-
),
|
|
65
53
|
]
|
|
66
54
|
|
|
67
55
|
outputs = [Output(display_name="File Path", name="message", method="save_to_file")]
|
|
@@ -148,25 +136,11 @@ class SaveToFileComponent(Component):
|
|
|
148
136
|
raise FileNotFoundError(msg)
|
|
149
137
|
|
|
150
138
|
with file_path.open("rb") as f:
|
|
151
|
-
async
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
current_user: User | None = None
|
|
155
|
-
if self.api_key:
|
|
156
|
-
current_user = await get_current_user(
|
|
157
|
-
token="",
|
|
158
|
-
query_param=self.api_key,
|
|
159
|
-
header_param="",
|
|
160
|
-
db=db,
|
|
161
|
-
)
|
|
162
|
-
else:
|
|
163
|
-
user_id, _ = await create_user_longterm_token(db)
|
|
164
|
-
current_user = await get_user_by_id(db, user_id)
|
|
165
|
-
|
|
166
|
-
# Fail if the user is not found
|
|
167
|
-
if not current_user:
|
|
168
|
-
msg = "User not found. Please provide a valid API key or ensure the user exists."
|
|
139
|
+
async with session_scope() as db:
|
|
140
|
+
if not self.user_id:
|
|
141
|
+
msg = "User ID is required for file saving."
|
|
169
142
|
raise ValueError(msg)
|
|
143
|
+
current_user = await get_user_by_id(db, self.user_id)
|
|
170
144
|
|
|
171
145
|
await upload_user_file(
|
|
172
146
|
file=UploadFile(filename=file_path.name, file=f, size=file_path.stat().st_size),
|