PyPI - lfx-nightly - Versions diffs - 0.1.13.dev11__py3-none-any.whl → 0.1.13.dev12__py3-none-any.whl - Mend

lfx-nightly 0.1.13.dev11py3-none-any.whl → 0.1.13.dev12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lfx-nightly might be problematic. Click here for more details.

Files changed (8) hide show

lfx/components/docling/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ if TYPE_CHECKING:
     from .chunk_docling_document import ChunkDoclingDocumentComponent
     from .docling_inline import DoclingInlineComponent
     from .docling_remote import DoclingRemoteComponent
+    from .docling_remote_vlm import DoclingRemoteVLMComponent
     from .export_docling_document import ExportDoclingDocumentComponent
 _dynamic_imports = {
@@ -15,12 +16,14 @@ _dynamic_imports = {
     "DoclingInlineComponent": "docling_inline",
     "DoclingRemoteComponent": "docling_remote",
     "ExportDoclingDocumentComponent": "export_docling_document",
+    "DoclingRemoteVLMComponent": "docling_remote_vlm",
 }
 __all__ = [
     "ChunkDoclingDocumentComponent",
     "DoclingInlineComponent",
     "DoclingRemoteComponent",
+    "DoclingRemoteVLMComponent",
     "ExportDoclingDocumentComponent",
 ]

lfx/components/docling/docling_remote_vlm.py ADDED Viewed

@@ -0,0 +1,284 @@
+from typing import Any
+import requests
+from docling.datamodel.base_models import ConversionStatus, InputFormat
+from docling.datamodel.pipeline_options import (
+    ApiVlmOptions,
+    ResponseFormat,
+    VlmPipelineOptions,
+)
+from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.pipeline.vlm_pipeline import VlmPipeline
+from langflow.base.data import BaseFileComponent
+from langflow.inputs import DropdownInput, SecretStrInput, StrInput
+from langflow.schema import Data
+from langflow.schema.dotdict import dotdict
+from lfx.components.ibm.watsonx import WatsonxAIComponent
+from lfx.log.logger import logger
+class DoclingRemoteVLMComponent(BaseFileComponent):
+    display_name = "Docling Remote VLM"
+    description = (
+        "Uses Docling to process input documents running a VLM pipeline with a remote model"
+        "(OpenAI-compatible API or IBM Cloud)."
+    )
+    documentation = "https://docling-project.github.io/docling/examples/vlm_pipeline_api_model/"
+    trace_type = "tool"
+    icon = "Docling"
+    name = "DoclingRemoteVLM"
+    # https://docling-project.github.io/docling/usage/supported_formats/
+    VALID_EXTENSIONS = [
+        "adoc",
+        "asciidoc",
+        "asc",
+        "bmp",
+        "csv",
+        "dotx",
+        "dotm",
+        "docm",
+        "docx",
+        "htm",
+        "html",
+        "jpeg",
+        "json",
+        "md",
+        "pdf",
+        "png",
+        "potx",
+        "ppsx",
+        "pptm",
+        "potm",
+        "ppsm",
+        "pptx",
+        "tiff",
+        "txt",
+        "xls",
+        "xlsx",
+        "xhtml",
+        "xml",
+        "webp",
+    ]
+    inputs = [
+        *BaseFileComponent.get_base_inputs(),
+        DropdownInput(
+            name="provider",
+            display_name="Provider",
+            info="Select which remote VLM provider to use.",
+            options=["IBM Cloud", "OpenAI-Compatible"],
+            value="IBM Cloud",
+            real_time_refresh=True,
+        ),
+        # IBM Cloud inputs
+        SecretStrInput(
+            name="watsonx_api_key",
+            display_name="Watsonx API Key",
+            info="IBM Cloud API key used for authentication (leave blank to load from .env).",
+            required=False,
+        ),
+        StrInput(
+            name="watsonx_project_id",
+            display_name="Watsonx Project ID",
+            required=False,
+            info="The Watsonx project ID or deployment space ID associated with the model.",
+            value="",
+        ),
+        DropdownInput(
+            name="url",
+            display_name="Watsonx API Endpoint",
+            info="The base URL of the Watsonx API.",
+            options=[
+                "https://us-south.ml.cloud.ibm.com",
+                "https://eu-de.ml.cloud.ibm.com",
+                "https://eu-gb.ml.cloud.ibm.com",
+                "https://au-syd.ml.cloud.ibm.com",
+                "https://jp-tok.ml.cloud.ibm.com",
+                "https://ca-tor.ml.cloud.ibm.com",
+            ],
+            real_time_refresh=True,
+        ),
+        DropdownInput(
+            name="model_name",
+            display_name="Model Name",
+            options=[],
+            value=None,
+            dynamic=True,
+            required=False,
+        ),
+        # OpenAI inputs
+        StrInput(
+            name="openai_base_url",
+            display_name="OpenAI-Compatible API Base URL",
+            info="Example: https://openrouter.ai/api/",
+            required=False,
+            show=False,
+        ),
+        SecretStrInput(
+            name="openai_api_key",
+            display_name="API Key",
+            info="API key for OpenAI-compatible endpoints (leave blank if not required).",
+            required=False,
+            show=False,
+        ),
+        StrInput(
+            name="openai_model",
+            display_name="OpenAI Model Name",
+            info="Model ID for OpenAI-compatible provider (e.g. gpt-4o-mini).",
+            required=False,
+            show=False,
+        ),
+        StrInput(name="vlm_prompt", display_name="Prompt", info="Prompt for VLM.", required=False),
+    ]
+    outputs = [*BaseFileComponent.get_base_outputs()]
+    @staticmethod
+    def fetch_models(base_url: str) -> list[str]:
+        """Fetch available models from the Watsonx.ai API."""
+        try:
+            endpoint = f"{base_url}/ml/v1/foundation_model_specs"
+            params = {"version": "2024-09-16", "filters": "function_text_chat,!lifecycle_withdrawn"}
+            response = requests.get(endpoint, params=params, timeout=10)
+            response.raise_for_status()
+            data = response.json()
+            models = [model["model_id"] for model in data.get("resources", [])]
+            return sorted(models)
+        except (requests.RequestException, requests.HTTPError, requests.Timeout, ConnectionError, ValueError):
+            logger.exception("Error fetching models. Using default models.")
+            return WatsonxAIComponent._default_models  # noqa: SLF001
+    def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):
+        """Update shown fields based on chosen provider."""
+        logger.info(f"update_build_config called: field_name={field_name}, field_value={field_value}")
+        if field_name == "provider":
+            provider_choice = field_value
+            if provider_choice == "IBM Cloud":
+                build_config.model_name.show = True
+                build_config.watsonx_api_key.show = True
+                build_config.watsonx_project_id.show = True
+                build_config.url.show = True
+                build_config.openai_base_url.show = False
+                build_config.openai_api_key.show = False
+                build_config.openai_model.show = False
+            elif provider_choice == "OpenAI-Compatible":
+                build_config.model_name.show = False
+                build_config.watsonx_api_key.show = False
+                build_config.watsonx_project_id.show = False
+                build_config.url.show = False
+                build_config.openai_base_url.show = True
+                build_config.openai_api_key.show = True
+                build_config.openai_model.show = True
+        if field_name == "url":
+            provider_value = build_config.provider.value if hasattr(build_config, "provider") else None
+            if provider_value == "IBM Cloud" and field_value:
+                models = self.fetch_models(base_url=field_value)
+                build_config.model_name.options = models
+                if models:
+                    build_config.model_name.value = models[0]
+                logger.info(f"Updated Watsonx model list: {len(models)} models found.")
+    def watsonx_vlm_options(self, model: str, prompt: str):
+        """Creates Docling ApiVlmOptions for a watsonx VLM."""
+        api_key = getattr(self, "watsonx_api_key", "")
+        project_id = getattr(self, "watsonx_project_id", "")
+        base_url = getattr(self, "url", "https://us-south.ml.cloud.ibm.com")
+        def _get_iam_access_token(api_key: str) -> str:
+            res = requests.post(
+                url="https://iam.cloud.ibm.com/identity/token",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+                data=f"grant_type=urn:ibm:params:oauth:grant-type:apikey&apikey={api_key}",
+                timeout=90,
+            )
+            res.raise_for_status()
+            return res.json()["access_token"]
+        access_token = _get_iam_access_token(api_key)
+        return ApiVlmOptions(
+            url=f"{base_url}/ml/v1/text/chat?version=2023-05-29",
+            params={"model_id": model, "project_id": project_id, "parameters": {"max_new_tokens": 400}},
+            headers={"Authorization": f"Bearer {access_token}"},
+            prompt=prompt,
+            timeout=60,
+            response_format=ResponseFormat.MARKDOWN,
+        )
+    def openai_compatible_vlm_options(
+        self,
+        model: str,
+        prompt: str,
+        response_format: ResponseFormat,
+        url: str,
+        temperature: float = 0.7,
+        max_tokens: int = 4096,
+        api_key: str = "",
+        *,
+        skip_special_tokens: bool = False,
+    ):
+        """Create OpenAI-compatible Docling ApiVlmOptions options (e.g., LM Studio, vLLM, Ollama)."""
+        api_key = getattr(self, "openai_api_key", api_key)
+        model_override = getattr(self, "openai_model", model)
+        headers = {}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        return ApiVlmOptions(
+            url=f"{url}/v1/chat/completions",
+            params={"model": model_override, "max_tokens": max_tokens, "skip_special_tokens": skip_special_tokens},
+            headers=headers,
+            prompt=prompt,
+            timeout=90,
+            scale=2.0,
+            temperature=temperature,
+            response_format=response_format,
+        )
+    def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
+        file_paths = [file.path for file in file_list if file.path]
+        if not file_paths:
+            logger.warning("No files to process.")
+            return file_list
+        provider = getattr(self, "provider", "IBM Cloud")
+        prompt = getattr(self, "vlm_prompt", "")
+        if provider == "IBM Cloud":
+            model = getattr(self, "model_name", "")
+            vlm_opts = self.watsonx_vlm_options(model=model, prompt=prompt)
+        else:
+            model = getattr(self, "openai_model", "") or getattr(self, "model_name", "")
+            base_url = getattr(self, "openai_base_url", "")
+            vlm_opts = self.openai_compatible_vlm_options(
+                model=model,
+                prompt=prompt,
+                response_format=ResponseFormat.MARKDOWN,
+                url=base_url,
+            )
+        pipeline_options = VlmPipelineOptions(enable_remote_services=True)
+        pipeline_options.vlm_options = vlm_opts
+        converter = DocumentConverter(
+            format_options={
+                InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options, pipeline_cls=VlmPipeline)
+            }
+        )
+        results = converter.convert_all(file_paths)
+        processed_data = [
+            Data(data={"doc": res.document, "file_path": str(res.input.file)})
+            if res.status == ConversionStatus.SUCCESS
+            else None
+            for res in results
+        ]
+        return self.rollup_data(file_list, processed_data)

lfx/components/processing/parser.py CHANGED Viewed

@@ -122,7 +122,12 @@ class ParserComponent(Component):
                 formatted_text = self.pattern.format(**row.to_dict())
                 lines.append(formatted_text)
         elif data is not None:
-            formatted_text = self.pattern.format(**data.data)
+            # Use format_map with a dict that returns default_value for missing keys
+            class DefaultDict(dict):
+                def __missing__(self, key):
+                    return data.default_value or ""
+            formatted_text = self.pattern.format_map(DefaultDict(data.data))
             lines.append(formatted_text)
         combined_text = self.sep.join(lines)

{lfx_nightly-0.1.13.dev11.dist-info → lfx_nightly-0.1.13.dev12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lfx-nightly
-Version: 0.1.13.dev11
+Version: 0.1.13.dev12
 Summary: Langflow Executor - A lightweight CLI tool for executing and serving Langflow AI flows
 Author-email: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
 Requires-Python: <3.14,>=3.10

{lfx_nightly-0.1.13.dev11.dist-info → lfx_nightly-0.1.13.dev12.dist-info}/RECORD RENAMED Viewed

@@ -4,7 +4,7 @@ lfx/constants.py,sha256=Ert_SpwXhutgcTKEvtDArtkONXgyE5x68opMoQfukMA,203
 lfx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lfx/settings.py,sha256=wnx4zkOLQ8mvampYsnnvVV9GvEnRUuWQpKFSbFTCIp4,181
 lfx/type_extraction.py,sha256=eCZNl9nAQivKdaPv_9BK71N0JV9Rtr--veAht0dnQ4A,2921
-lfx/_assets/component_index.json,sha256=5u7MEm93Yt_-xBeIa5pRTGAor8cRFcUMetsV9O5Q2HY,3572648
+lfx/_assets/component_index.json,sha256=WxdnXtEx5ZLGCM7ef5jLrw1-n6gDHT6bkS1KbKhZ3GY,3592770
 lfx/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lfx/base/constants.py,sha256=v9vo0Ifg8RxDu__XqgGzIXHlsnUFyWM-SSux0uHHoz8,1187
 lfx/base/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -285,10 +285,11 @@ lfx/components/deactivated/vectara_self_query.py,sha256=nlRFL-FIdecgpaR70ohC8Tsl
 lfx/components/deactivated/vector_store.py,sha256=7L1Z8Nl0RZjEGAj1O2tMhb6I6jUNNc5MBOui4a2FkXE,728
 lfx/components/deepseek/__init__.py,sha256=gmyOcLeNEcnwSeowow0N0UhBDlSuZ_8x-DMUjwkNRFM,935
 lfx/components/deepseek/deepseek.py,sha256=yNrHoljXOMScKng-oSB-ceWhVZeuh11lmrAY7WiB2H0,4702
-lfx/components/docling/__init__.py,sha256=O4utz9GHFpTVe_Wy0PR80yA1irJQRnAFQWkoLCVj888,1424
+lfx/components/docling/__init__.py,sha256=UPtKdQKonYMQZCmE-zuBhJwNbSKIoddgB_WTl0Yp7oM,1574
 lfx/components/docling/chunk_docling_document.py,sha256=OX-jj4nX3UZgopViMAGAnFgtLql0sgs6cVmU8p9QbqA,7600
 lfx/components/docling/docling_inline.py,sha256=12s4U860c-wkpmd2JYi6qxK1Wx_PF9j9BARLhXCL0E0,8496
 lfx/components/docling/docling_remote.py,sha256=Ju61E93tLBq6KsRRGVA1_ySWzEOdOFj9jS9kJ7gc3H4,6980
+lfx/components/docling/docling_remote_vlm.py,sha256=aAEk2vepXzB9aHWEfgbmOrfRuLa2sEh4T1dVyGwIN-A,10538
 lfx/components/docling/export_docling_document.py,sha256=TeFt3TesCxSqW57nv-30gf2dX8qMDUHLRhwU-1ciq08,4681
 lfx/components/documentloaders/__init__.py,sha256=LNl2hG2InevQCUREFKhF9ylaTf_kwPsdjiDbx2ElX3M,69
 lfx/components/duckduckgo/__init__.py,sha256=Y4zaOLVOKsD_qwF7KRLek1pcaKKHa6lGUHObuQTR9iY,104
@@ -471,7 +472,7 @@ lfx/components/processing/message_to_data.py,sha256=0K8SIq6vuAvQ3K7siXstNint6R1-
 lfx/components/processing/parse_data.py,sha256=P6xEqbs3geWP0gYMdS9QIVJiIREEgDjxzENRLwiNgE0,2463
 lfx/components/processing/parse_dataframe.py,sha256=nUsFzxVkBXJhoPP9f6NmmKmwXSKs9IjaIvrr7DeLSSY,2518
 lfx/components/processing/parse_json_data.py,sha256=OdmZ2Kqdfb0uBCA5FdKSv4y_3OqfWY56Mesg1iO666Q,3160
-lfx/components/processing/parser.py,sha256=vxnub-7jUTAtMgcLTdZGzHuRby_B1d1HOntsxM3KU_E,5414
+lfx/components/processing/parser.py,sha256=F1FJU8foJvj8AwlKBPlDhuhqEr4I_6XqDgJ0h1iBbpw,5648
 lfx/components/processing/prompt.py,sha256=c4LQPOQSvz1Z1e73uyOm8TaTxWDpCGcujBd-a6AxL1A,2761
 lfx/components/processing/python_repl_core.py,sha256=6kOu64pWyBwBpTqOTM9LPnSsnTX6q_J-Hqhmoxp0wFs,3472
 lfx/components/processing/regex.py,sha256=9n171_Ze--5gpKFJJyJlYafuEOwbPQPiyjhdLY3SUrY,2689
@@ -730,7 +731,7 @@ lfx/utils/schemas.py,sha256=NbOtVQBrn4d0BAu-0H_eCTZI2CXkKZlRY37XCSmuJwc,3865
 lfx/utils/util.py,sha256=Ww85wbr1-vjh2pXVtmTqoUVr6MXAW8S7eDx_Ys6HpE8,20696
 lfx/utils/util_strings.py,sha256=nU_IcdphNaj6bAPbjeL-c1cInQPfTBit8mp5Y57lwQk,1686
 lfx/utils/version.py,sha256=cHpbO0OJD2JQAvVaTH_6ibYeFbHJV0QDHs_YXXZ-bT8,671
-lfx_nightly-0.1.13.dev11.dist-info/METADATA,sha256=dherVA7JNlTrOnZxoaISknodoQzPGMe-n4Q3Szv43bw,8290
-lfx_nightly-0.1.13.dev11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-lfx_nightly-0.1.13.dev11.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
-lfx_nightly-0.1.13.dev11.dist-info/RECORD,,
+lfx_nightly-0.1.13.dev12.dist-info/METADATA,sha256=pTwMBq4ciOBRLatxElSZFMwuB46Vhpm14_4k-aYNrNE,8290
+lfx_nightly-0.1.13.dev12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+lfx_nightly-0.1.13.dev12.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
+lfx_nightly-0.1.13.dev12.dist-info/RECORD,,

{lfx_nightly-0.1.13.dev11.dist-info → lfx_nightly-0.1.13.dev12.dist-info}/WHEEL RENAMED Viewed

File without changes

{lfx_nightly-0.1.13.dev11.dist-info → lfx_nightly-0.1.13.dev12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

lfx-nightly 0.1.13.dev11__py3-none-any.whl → 0.1.13.dev12__py3-none-any.whl

Potentially problematic release.

lfx-nightly 0.1.13.dev11py3-none-any.whl → 0.1.13.dev12py3-none-any.whl