lfx-nightly 0.1.13.dev11__py3-none-any.whl → 0.1.13.dev12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lfx-nightly might be problematic. Click here for more details.

@@ -8,6 +8,7 @@ if TYPE_CHECKING:
8
8
  from .chunk_docling_document import ChunkDoclingDocumentComponent
9
9
  from .docling_inline import DoclingInlineComponent
10
10
  from .docling_remote import DoclingRemoteComponent
11
+ from .docling_remote_vlm import DoclingRemoteVLMComponent
11
12
  from .export_docling_document import ExportDoclingDocumentComponent
12
13
 
13
14
  _dynamic_imports = {
@@ -15,12 +16,14 @@ _dynamic_imports = {
15
16
  "DoclingInlineComponent": "docling_inline",
16
17
  "DoclingRemoteComponent": "docling_remote",
17
18
  "ExportDoclingDocumentComponent": "export_docling_document",
19
+ "DoclingRemoteVLMComponent": "docling_remote_vlm",
18
20
  }
19
21
 
20
22
  __all__ = [
21
23
  "ChunkDoclingDocumentComponent",
22
24
  "DoclingInlineComponent",
23
25
  "DoclingRemoteComponent",
26
+ "DoclingRemoteVLMComponent",
24
27
  "ExportDoclingDocumentComponent",
25
28
  ]
26
29
 
@@ -0,0 +1,284 @@
1
+ from typing import Any
2
+
3
+ import requests
4
+ from docling.datamodel.base_models import ConversionStatus, InputFormat
5
+ from docling.datamodel.pipeline_options import (
6
+ ApiVlmOptions,
7
+ ResponseFormat,
8
+ VlmPipelineOptions,
9
+ )
10
+ from docling.document_converter import DocumentConverter, PdfFormatOption
11
+ from docling.pipeline.vlm_pipeline import VlmPipeline
12
+ from langflow.base.data import BaseFileComponent
13
+ from langflow.inputs import DropdownInput, SecretStrInput, StrInput
14
+ from langflow.schema import Data
15
+ from langflow.schema.dotdict import dotdict
16
+
17
+ from lfx.components.ibm.watsonx import WatsonxAIComponent
18
+ from lfx.log.logger import logger
19
+
20
+
21
+ class DoclingRemoteVLMComponent(BaseFileComponent):
22
+ display_name = "Docling Remote VLM"
23
+ description = (
24
+ "Uses Docling to process input documents running a VLM pipeline with a remote model"
25
+ "(OpenAI-compatible API or IBM Cloud)."
26
+ )
27
+ documentation = "https://docling-project.github.io/docling/examples/vlm_pipeline_api_model/"
28
+ trace_type = "tool"
29
+ icon = "Docling"
30
+ name = "DoclingRemoteVLM"
31
+
32
+ # https://docling-project.github.io/docling/usage/supported_formats/
33
+ VALID_EXTENSIONS = [
34
+ "adoc",
35
+ "asciidoc",
36
+ "asc",
37
+ "bmp",
38
+ "csv",
39
+ "dotx",
40
+ "dotm",
41
+ "docm",
42
+ "docx",
43
+ "htm",
44
+ "html",
45
+ "jpeg",
46
+ "json",
47
+ "md",
48
+ "pdf",
49
+ "png",
50
+ "potx",
51
+ "ppsx",
52
+ "pptm",
53
+ "potm",
54
+ "ppsm",
55
+ "pptx",
56
+ "tiff",
57
+ "txt",
58
+ "xls",
59
+ "xlsx",
60
+ "xhtml",
61
+ "xml",
62
+ "webp",
63
+ ]
64
+
65
+ inputs = [
66
+ *BaseFileComponent.get_base_inputs(),
67
+ DropdownInput(
68
+ name="provider",
69
+ display_name="Provider",
70
+ info="Select which remote VLM provider to use.",
71
+ options=["IBM Cloud", "OpenAI-Compatible"],
72
+ value="IBM Cloud",
73
+ real_time_refresh=True,
74
+ ),
75
+ # IBM Cloud inputs
76
+ SecretStrInput(
77
+ name="watsonx_api_key",
78
+ display_name="Watsonx API Key",
79
+ info="IBM Cloud API key used for authentication (leave blank to load from .env).",
80
+ required=False,
81
+ ),
82
+ StrInput(
83
+ name="watsonx_project_id",
84
+ display_name="Watsonx Project ID",
85
+ required=False,
86
+ info="The Watsonx project ID or deployment space ID associated with the model.",
87
+ value="",
88
+ ),
89
+ DropdownInput(
90
+ name="url",
91
+ display_name="Watsonx API Endpoint",
92
+ info="The base URL of the Watsonx API.",
93
+ options=[
94
+ "https://us-south.ml.cloud.ibm.com",
95
+ "https://eu-de.ml.cloud.ibm.com",
96
+ "https://eu-gb.ml.cloud.ibm.com",
97
+ "https://au-syd.ml.cloud.ibm.com",
98
+ "https://jp-tok.ml.cloud.ibm.com",
99
+ "https://ca-tor.ml.cloud.ibm.com",
100
+ ],
101
+ real_time_refresh=True,
102
+ ),
103
+ DropdownInput(
104
+ name="model_name",
105
+ display_name="Model Name",
106
+ options=[],
107
+ value=None,
108
+ dynamic=True,
109
+ required=False,
110
+ ),
111
+ # OpenAI inputs
112
+ StrInput(
113
+ name="openai_base_url",
114
+ display_name="OpenAI-Compatible API Base URL",
115
+ info="Example: https://openrouter.ai/api/",
116
+ required=False,
117
+ show=False,
118
+ ),
119
+ SecretStrInput(
120
+ name="openai_api_key",
121
+ display_name="API Key",
122
+ info="API key for OpenAI-compatible endpoints (leave blank if not required).",
123
+ required=False,
124
+ show=False,
125
+ ),
126
+ StrInput(
127
+ name="openai_model",
128
+ display_name="OpenAI Model Name",
129
+ info="Model ID for OpenAI-compatible provider (e.g. gpt-4o-mini).",
130
+ required=False,
131
+ show=False,
132
+ ),
133
+ StrInput(name="vlm_prompt", display_name="Prompt", info="Prompt for VLM.", required=False),
134
+ ]
135
+
136
+ outputs = [*BaseFileComponent.get_base_outputs()]
137
+
138
+ @staticmethod
139
+ def fetch_models(base_url: str) -> list[str]:
140
+ """Fetch available models from the Watsonx.ai API."""
141
+ try:
142
+ endpoint = f"{base_url}/ml/v1/foundation_model_specs"
143
+ params = {"version": "2024-09-16", "filters": "function_text_chat,!lifecycle_withdrawn"}
144
+ response = requests.get(endpoint, params=params, timeout=10)
145
+ response.raise_for_status()
146
+ data = response.json()
147
+ models = [model["model_id"] for model in data.get("resources", [])]
148
+ return sorted(models)
149
+ except (requests.RequestException, requests.HTTPError, requests.Timeout, ConnectionError, ValueError):
150
+ logger.exception("Error fetching models. Using default models.")
151
+ return WatsonxAIComponent._default_models # noqa: SLF001
152
+
153
+ def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):
154
+ """Update shown fields based on chosen provider."""
155
+ logger.info(f"update_build_config called: field_name={field_name}, field_value={field_value}")
156
+
157
+ if field_name == "provider":
158
+ provider_choice = field_value
159
+
160
+ if provider_choice == "IBM Cloud":
161
+ build_config.model_name.show = True
162
+ build_config.watsonx_api_key.show = True
163
+ build_config.watsonx_project_id.show = True
164
+ build_config.url.show = True
165
+
166
+ build_config.openai_base_url.show = False
167
+ build_config.openai_api_key.show = False
168
+ build_config.openai_model.show = False
169
+
170
+ elif provider_choice == "OpenAI-Compatible":
171
+ build_config.model_name.show = False
172
+ build_config.watsonx_api_key.show = False
173
+ build_config.watsonx_project_id.show = False
174
+ build_config.url.show = False
175
+
176
+ build_config.openai_base_url.show = True
177
+ build_config.openai_api_key.show = True
178
+ build_config.openai_model.show = True
179
+
180
+ if field_name == "url":
181
+ provider_value = build_config.provider.value if hasattr(build_config, "provider") else None
182
+ if provider_value == "IBM Cloud" and field_value:
183
+ models = self.fetch_models(base_url=field_value)
184
+ build_config.model_name.options = models
185
+ if models:
186
+ build_config.model_name.value = models[0]
187
+ logger.info(f"Updated Watsonx model list: {len(models)} models found.")
188
+
189
+ def watsonx_vlm_options(self, model: str, prompt: str):
190
+ """Creates Docling ApiVlmOptions for a watsonx VLM."""
191
+ api_key = getattr(self, "watsonx_api_key", "")
192
+ project_id = getattr(self, "watsonx_project_id", "")
193
+ base_url = getattr(self, "url", "https://us-south.ml.cloud.ibm.com")
194
+
195
+ def _get_iam_access_token(api_key: str) -> str:
196
+ res = requests.post(
197
+ url="https://iam.cloud.ibm.com/identity/token",
198
+ headers={"Content-Type": "application/x-www-form-urlencoded"},
199
+ data=f"grant_type=urn:ibm:params:oauth:grant-type:apikey&apikey={api_key}",
200
+ timeout=90,
201
+ )
202
+ res.raise_for_status()
203
+ return res.json()["access_token"]
204
+
205
+ access_token = _get_iam_access_token(api_key)
206
+ return ApiVlmOptions(
207
+ url=f"{base_url}/ml/v1/text/chat?version=2023-05-29",
208
+ params={"model_id": model, "project_id": project_id, "parameters": {"max_new_tokens": 400}},
209
+ headers={"Authorization": f"Bearer {access_token}"},
210
+ prompt=prompt,
211
+ timeout=60,
212
+ response_format=ResponseFormat.MARKDOWN,
213
+ )
214
+
215
+ def openai_compatible_vlm_options(
216
+ self,
217
+ model: str,
218
+ prompt: str,
219
+ response_format: ResponseFormat,
220
+ url: str,
221
+ temperature: float = 0.7,
222
+ max_tokens: int = 4096,
223
+ api_key: str = "",
224
+ *,
225
+ skip_special_tokens: bool = False,
226
+ ):
227
+ """Create OpenAI-compatible Docling ApiVlmOptions options (e.g., LM Studio, vLLM, Ollama)."""
228
+ api_key = getattr(self, "openai_api_key", api_key)
229
+ model_override = getattr(self, "openai_model", model)
230
+
231
+ headers = {}
232
+ if api_key:
233
+ headers["Authorization"] = f"Bearer {api_key}"
234
+
235
+ return ApiVlmOptions(
236
+ url=f"{url}/v1/chat/completions",
237
+ params={"model": model_override, "max_tokens": max_tokens, "skip_special_tokens": skip_special_tokens},
238
+ headers=headers,
239
+ prompt=prompt,
240
+ timeout=90,
241
+ scale=2.0,
242
+ temperature=temperature,
243
+ response_format=response_format,
244
+ )
245
+
246
+ def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
247
+ file_paths = [file.path for file in file_list if file.path]
248
+ if not file_paths:
249
+ logger.warning("No files to process.")
250
+ return file_list
251
+
252
+ provider = getattr(self, "provider", "IBM Cloud")
253
+ prompt = getattr(self, "vlm_prompt", "")
254
+
255
+ if provider == "IBM Cloud":
256
+ model = getattr(self, "model_name", "")
257
+ vlm_opts = self.watsonx_vlm_options(model=model, prompt=prompt)
258
+ else:
259
+ model = getattr(self, "openai_model", "") or getattr(self, "model_name", "")
260
+ base_url = getattr(self, "openai_base_url", "")
261
+ vlm_opts = self.openai_compatible_vlm_options(
262
+ model=model,
263
+ prompt=prompt,
264
+ response_format=ResponseFormat.MARKDOWN,
265
+ url=base_url,
266
+ )
267
+
268
+ pipeline_options = VlmPipelineOptions(enable_remote_services=True)
269
+ pipeline_options.vlm_options = vlm_opts
270
+
271
+ converter = DocumentConverter(
272
+ format_options={
273
+ InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options, pipeline_cls=VlmPipeline)
274
+ }
275
+ )
276
+
277
+ results = converter.convert_all(file_paths)
278
+ processed_data = [
279
+ Data(data={"doc": res.document, "file_path": str(res.input.file)})
280
+ if res.status == ConversionStatus.SUCCESS
281
+ else None
282
+ for res in results
283
+ ]
284
+ return self.rollup_data(file_list, processed_data)
@@ -122,7 +122,12 @@ class ParserComponent(Component):
122
122
  formatted_text = self.pattern.format(**row.to_dict())
123
123
  lines.append(formatted_text)
124
124
  elif data is not None:
125
- formatted_text = self.pattern.format(**data.data)
125
+ # Use format_map with a dict that returns default_value for missing keys
126
+ class DefaultDict(dict):
127
+ def __missing__(self, key):
128
+ return data.default_value or ""
129
+
130
+ formatted_text = self.pattern.format_map(DefaultDict(data.data))
126
131
  lines.append(formatted_text)
127
132
 
128
133
  combined_text = self.sep.join(lines)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lfx-nightly
3
- Version: 0.1.13.dev11
3
+ Version: 0.1.13.dev12
4
4
  Summary: Langflow Executor - A lightweight CLI tool for executing and serving Langflow AI flows
5
5
  Author-email: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
6
6
  Requires-Python: <3.14,>=3.10
@@ -4,7 +4,7 @@ lfx/constants.py,sha256=Ert_SpwXhutgcTKEvtDArtkONXgyE5x68opMoQfukMA,203
4
4
  lfx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  lfx/settings.py,sha256=wnx4zkOLQ8mvampYsnnvVV9GvEnRUuWQpKFSbFTCIp4,181
6
6
  lfx/type_extraction.py,sha256=eCZNl9nAQivKdaPv_9BK71N0JV9Rtr--veAht0dnQ4A,2921
7
- lfx/_assets/component_index.json,sha256=5u7MEm93Yt_-xBeIa5pRTGAor8cRFcUMetsV9O5Q2HY,3572648
7
+ lfx/_assets/component_index.json,sha256=WxdnXtEx5ZLGCM7ef5jLrw1-n6gDHT6bkS1KbKhZ3GY,3592770
8
8
  lfx/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  lfx/base/constants.py,sha256=v9vo0Ifg8RxDu__XqgGzIXHlsnUFyWM-SSux0uHHoz8,1187
10
10
  lfx/base/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -285,10 +285,11 @@ lfx/components/deactivated/vectara_self_query.py,sha256=nlRFL-FIdecgpaR70ohC8Tsl
285
285
  lfx/components/deactivated/vector_store.py,sha256=7L1Z8Nl0RZjEGAj1O2tMhb6I6jUNNc5MBOui4a2FkXE,728
286
286
  lfx/components/deepseek/__init__.py,sha256=gmyOcLeNEcnwSeowow0N0UhBDlSuZ_8x-DMUjwkNRFM,935
287
287
  lfx/components/deepseek/deepseek.py,sha256=yNrHoljXOMScKng-oSB-ceWhVZeuh11lmrAY7WiB2H0,4702
288
- lfx/components/docling/__init__.py,sha256=O4utz9GHFpTVe_Wy0PR80yA1irJQRnAFQWkoLCVj888,1424
288
+ lfx/components/docling/__init__.py,sha256=UPtKdQKonYMQZCmE-zuBhJwNbSKIoddgB_WTl0Yp7oM,1574
289
289
  lfx/components/docling/chunk_docling_document.py,sha256=OX-jj4nX3UZgopViMAGAnFgtLql0sgs6cVmU8p9QbqA,7600
290
290
  lfx/components/docling/docling_inline.py,sha256=12s4U860c-wkpmd2JYi6qxK1Wx_PF9j9BARLhXCL0E0,8496
291
291
  lfx/components/docling/docling_remote.py,sha256=Ju61E93tLBq6KsRRGVA1_ySWzEOdOFj9jS9kJ7gc3H4,6980
292
+ lfx/components/docling/docling_remote_vlm.py,sha256=aAEk2vepXzB9aHWEfgbmOrfRuLa2sEh4T1dVyGwIN-A,10538
292
293
  lfx/components/docling/export_docling_document.py,sha256=TeFt3TesCxSqW57nv-30gf2dX8qMDUHLRhwU-1ciq08,4681
293
294
  lfx/components/documentloaders/__init__.py,sha256=LNl2hG2InevQCUREFKhF9ylaTf_kwPsdjiDbx2ElX3M,69
294
295
  lfx/components/duckduckgo/__init__.py,sha256=Y4zaOLVOKsD_qwF7KRLek1pcaKKHa6lGUHObuQTR9iY,104
@@ -471,7 +472,7 @@ lfx/components/processing/message_to_data.py,sha256=0K8SIq6vuAvQ3K7siXstNint6R1-
471
472
  lfx/components/processing/parse_data.py,sha256=P6xEqbs3geWP0gYMdS9QIVJiIREEgDjxzENRLwiNgE0,2463
472
473
  lfx/components/processing/parse_dataframe.py,sha256=nUsFzxVkBXJhoPP9f6NmmKmwXSKs9IjaIvrr7DeLSSY,2518
473
474
  lfx/components/processing/parse_json_data.py,sha256=OdmZ2Kqdfb0uBCA5FdKSv4y_3OqfWY56Mesg1iO666Q,3160
474
- lfx/components/processing/parser.py,sha256=vxnub-7jUTAtMgcLTdZGzHuRby_B1d1HOntsxM3KU_E,5414
475
+ lfx/components/processing/parser.py,sha256=F1FJU8foJvj8AwlKBPlDhuhqEr4I_6XqDgJ0h1iBbpw,5648
475
476
  lfx/components/processing/prompt.py,sha256=c4LQPOQSvz1Z1e73uyOm8TaTxWDpCGcujBd-a6AxL1A,2761
476
477
  lfx/components/processing/python_repl_core.py,sha256=6kOu64pWyBwBpTqOTM9LPnSsnTX6q_J-Hqhmoxp0wFs,3472
477
478
  lfx/components/processing/regex.py,sha256=9n171_Ze--5gpKFJJyJlYafuEOwbPQPiyjhdLY3SUrY,2689
@@ -730,7 +731,7 @@ lfx/utils/schemas.py,sha256=NbOtVQBrn4d0BAu-0H_eCTZI2CXkKZlRY37XCSmuJwc,3865
730
731
  lfx/utils/util.py,sha256=Ww85wbr1-vjh2pXVtmTqoUVr6MXAW8S7eDx_Ys6HpE8,20696
731
732
  lfx/utils/util_strings.py,sha256=nU_IcdphNaj6bAPbjeL-c1cInQPfTBit8mp5Y57lwQk,1686
732
733
  lfx/utils/version.py,sha256=cHpbO0OJD2JQAvVaTH_6ibYeFbHJV0QDHs_YXXZ-bT8,671
733
- lfx_nightly-0.1.13.dev11.dist-info/METADATA,sha256=dherVA7JNlTrOnZxoaISknodoQzPGMe-n4Q3Szv43bw,8290
734
- lfx_nightly-0.1.13.dev11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
735
- lfx_nightly-0.1.13.dev11.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
736
- lfx_nightly-0.1.13.dev11.dist-info/RECORD,,
734
+ lfx_nightly-0.1.13.dev12.dist-info/METADATA,sha256=pTwMBq4ciOBRLatxElSZFMwuB46Vhpm14_4k-aYNrNE,8290
735
+ lfx_nightly-0.1.13.dev12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
736
+ lfx_nightly-0.1.13.dev12.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
737
+ lfx_nightly-0.1.13.dev12.dist-info/RECORD,,