PyPI - lfx-nightly - Versions diffs - 0.1.12.dev0__py3-none-any.whl → 0.1.12.dev1__py3-none-any.whl - Mend

lfx-nightly 0.1.12.dev0py3-none-any.whl → 0.1.12.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

lfx/base/data/docling_utils.py CHANGED Viewed

@@ -1,14 +1,20 @@
+import importlib
 import signal
 import sys
 import traceback
 from contextlib import suppress
+from typing import TYPE_CHECKING
 from docling_core.types.doc import DoclingDocument
+from pydantic import BaseModel, SecretStr, TypeAdapter
 from lfx.log.logger import logger
 from lfx.schema.data import Data
 from lfx.schema.dataframe import DataFrame
+if TYPE_CHECKING:
+    from langchain_core.language_models.chat_models import BaseChatModel
 def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_key: str) -> list[DoclingDocument]:
     documents: list[DoclingDocument] = []
@@ -57,7 +63,45 @@ def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_ke
     return documents
-def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str):
+def _unwrap_secrets(obj):
+    if isinstance(obj, SecretStr):
+        return obj.get_secret_value()
+    if isinstance(obj, dict):
+        return {k: _unwrap_secrets(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_unwrap_secrets(v) for v in obj]
+    return obj
+def _dump_with_secrets(model: BaseModel):
+    return _unwrap_secrets(model.model_dump(mode="python", round_trip=True))
+def _serialize_pydantic_model(model: BaseModel):
+    return {
+        "__class_path__": f"{model.__class__.__module__}.{model.__class__.__name__}",
+        "config": _dump_with_secrets(model),
+    }
+def _deserialize_pydantic_model(data: dict):
+    module_name, class_name = data["__class_path__"].rsplit(".", 1)
+    module = importlib.import_module(module_name)
+    cls = getattr(module, class_name)
+    adapter = TypeAdapter(cls)
+    return adapter.validate_python(data["config"])
+def docling_worker(
+    *,
+    file_paths: list[str],
+    queue,
+    pipeline: str,
+    ocr_engine: str,
+    do_picture_classification: bool,
+    pic_desc_config: dict | None,
+    pic_desc_prompt: str,
+):
     """Worker function for processing files with Docling in a separate process."""
     # Signal handling for graceful shutdown
     shutdown_requested = False
@@ -106,6 +150,7 @@ def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str)
         from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
         from docling.models.factories import get_ocr_factory
         from docling.pipeline.vlm_pipeline import VlmPipeline
+        from langchain_docling.picture_description import PictureDescriptionLangChainOptions
         # Check for shutdown after imports
         check_shutdown()
@@ -143,6 +188,19 @@ def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str)
                 kind=ocr_engine,
             )
             pipeline_options.ocr_options = ocr_options
+        pipeline_options.do_picture_classification = do_picture_classification
+        if pic_desc_config:
+            pic_desc_llm: BaseChatModel = _deserialize_pydantic_model(pic_desc_config)
+            logger.info("Docling enabling the picture description stage.")
+            pipeline_options.do_picture_description = True
+            pipeline_options.allow_external_plugins = True
+            pipeline_options.picture_description_options = PictureDescriptionLangChainOptions(
+                llm=pic_desc_llm,
+                prompt=pic_desc_prompt,
+            )
         return pipeline_options
     # Configure the VLM pipeline

lfx/components/docling/docling_inline.py CHANGED Viewed

@@ -3,8 +3,8 @@ from multiprocessing import Queue, get_context
 from queue import Empty
 from lfx.base.data import BaseFileComponent
-from lfx.base.data.docling_utils import docling_worker
-from lfx.inputs import DropdownInput
+from lfx.base.data.docling_utils import _serialize_pydantic_model, docling_worker
+from lfx.inputs import BoolInput, DropdownInput, HandleInput, StrInput
 from lfx.schema import Data
@@ -67,6 +67,26 @@ class DoclingInlineComponent(BaseFileComponent):
             real_time_refresh=False,
             value="None",
         ),
+        BoolInput(
+            name="do_picture_classification",
+            display_name="Picture classification",
+            info="If enabled, the Docling pipeline will classify the pictures type.",
+            value=False,
+        ),
+        HandleInput(
+            name="pic_desc_llm",
+            display_name="Picture description LLM",
+            info="If connected, the model to use for running the picture description task.",
+            input_types=["LanguageModel"],
+            required=False,
+        ),
+        StrInput(
+            name="pic_desc_prompt",
+            display_name="Picture description prompt",
+            value="Describe the image in three sentences. Be concise and accurate.",
+            info="The user prompt to use when invoking the model.",
+            advanced=True,
+        ),
         # TODO: expose more Docling options
     ]
@@ -131,11 +151,7 @@ class DoclingInlineComponent(BaseFileComponent):
     def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
         try:
-            from docling.datamodel.base_models import InputFormat
-            from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions
-            from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
-            from docling.models.factories import get_ocr_factory
-            from docling.pipeline.vlm_pipeline import VlmPipeline
+            from docling.document_converter import DocumentConverter  # noqa: F401
         except ImportError as e:
             msg = (
                 "Docling is an optional dependency. Install with `uv pip install 'langflow[docling]'` or refer to the "
@@ -143,52 +159,29 @@ class DoclingInlineComponent(BaseFileComponent):
             )
             raise ImportError(msg) from e
-        # Configure the standard PDF pipeline
-        def _get_standard_opts() -> PdfPipelineOptions:
-            pipeline_options = PdfPipelineOptions()
-            pipeline_options.do_ocr = self.ocr_engine != "None"
-            if pipeline_options.do_ocr:
-                ocr_factory = get_ocr_factory(
-                    allow_external_plugins=False,
-                )
-                ocr_options: OcrOptions = ocr_factory.create_options(
-                    kind=self.ocr_engine,
-                )
-                pipeline_options.ocr_options = ocr_options
-            return pipeline_options
-        # Configure the VLM pipeline
-        def _get_vlm_opts() -> VlmPipelineOptions:
-            return VlmPipelineOptions()
-        # Configure the main format options and create the DocumentConverter()
-        def _get_converter() -> DocumentConverter:
-            if self.pipeline == "standard":
-                pdf_format_option = PdfFormatOption(
-                    pipeline_options=_get_standard_opts(),
-                )
-            elif self.pipeline == "vlm":
-                pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
-            format_options: dict[InputFormat, FormatOption] = {
-                InputFormat.PDF: pdf_format_option,
-                InputFormat.IMAGE: pdf_format_option,
-            }
-            return DocumentConverter(format_options=format_options)
         file_paths = [file.path for file in file_list if file.path]
         if not file_paths:
             self.log("No files to process.")
             return file_list
+        pic_desc_config: dict | None = None
+        if self.pic_desc_llm is not None:
+            pic_desc_config = _serialize_pydantic_model(self.pic_desc_llm)
         ctx = get_context("spawn")
         queue: Queue = ctx.Queue()
         proc = ctx.Process(
             target=docling_worker,
-            args=(file_paths, queue, self.pipeline, self.ocr_engine),
+            kwargs={
+                "file_paths": file_paths,
+                "queue": queue,
+                "pipeline": self.pipeline,
+                "ocr_engine": self.ocr_engine,
+                "do_picture_classification": self.do_picture_classification,
+                "pic_desc_config": pic_desc_config,
+                "pic_desc_prompt": self.pic_desc_prompt,
+            },
         )
         result = None

{lfx_nightly-0.1.12.dev0.dist-info → lfx_nightly-0.1.12.dev1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lfx-nightly
-Version: 0.1.12.dev0
+Version: 0.1.12.dev1
 Summary: Langflow Executor - A lightweight CLI tool for executing and serving Langflow AI flows
 Author-email: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
 Requires-Python: <3.14,>=3.10

{lfx_nightly-0.1.12.dev0.dist-info → lfx_nightly-0.1.12.dev1.dist-info}/RECORD RENAMED Viewed

@@ -29,7 +29,7 @@ lfx/base/curl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lfx/base/curl/parse.py,sha256=Yw6mMbGg7e-ffrBItEUJeTiljneCXlNyt5afzEP9eUI,6094
 lfx/base/data/__init__.py,sha256=lQsYYMyAg_jA9ZF7oc-LNZsRE2uMGT6g16WzsUByHqs,81
 lfx/base/data/base_file.py,sha256=XFj3u9OGHcRbWfzslzvvxn-qpaCeX0uUQ0fStUCo65I,25495
-lfx/base/data/docling_utils.py,sha256=2kwI_eOPg-Wr2mfuGkOXFsW-53VqV8_F-XUTWruYMXg,9744
+lfx/base/data/docling_utils.py,sha256=i0KpNNLgPJ0D226Tm5j_oaCv09w9IspBU2OwTDCfnBc,11625
 lfx/base/data/utils.py,sha256=eZJgkOvQ3MaURDfgkH2MiZZOBF5_D0nSlmDY6LgLRik,5960
 lfx/base/document_transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lfx/base/document_transformers/model.py,sha256=etVEmyakiEgflB-fayClPnFRhaEdXfdUu4cqpgtk8ek,1317
@@ -242,7 +242,7 @@ lfx/components/deepseek/__init__.py,sha256=gmyOcLeNEcnwSeowow0N0UhBDlSuZ_8x-DMUj
 lfx/components/deepseek/deepseek.py,sha256=yNrHoljXOMScKng-oSB-ceWhVZeuh11lmrAY7WiB2H0,4702
 lfx/components/docling/__init__.py,sha256=O4utz9GHFpTVe_Wy0PR80yA1irJQRnAFQWkoLCVj888,1424
 lfx/components/docling/chunk_docling_document.py,sha256=OX-jj4nX3UZgopViMAGAnFgtLql0sgs6cVmU8p9QbqA,7600
-lfx/components/docling/docling_inline.py,sha256=uq_YULsYVaz31A6HaHnE7rKacJXWAcEsC_LdWj_8arA,8278
+lfx/components/docling/docling_inline.py,sha256=-m8hTANtdUDUjsJtJTB1sl6MJMhXG8zMeBMwbn0w9Ig,7871
 lfx/components/docling/docling_remote.py,sha256=kwMS_-QMiM_JmPqvtHf4gDS73d2hZrIbtAPsN8bZxGE,6769
 lfx/components/docling/export_docling_document.py,sha256=TeFt3TesCxSqW57nv-30gf2dX8qMDUHLRhwU-1ciq08,4681
 lfx/components/documentloaders/__init__.py,sha256=LNl2hG2InevQCUREFKhF9ylaTf_kwPsdjiDbx2ElX3M,69
@@ -693,7 +693,7 @@ lfx/utils/schemas.py,sha256=NbOtVQBrn4d0BAu-0H_eCTZI2CXkKZlRY37XCSmuJwc,3865
 lfx/utils/util.py,sha256=xGR32XDRr_TtruhjnXfI7lEWmk-vgywHAy3kz5SBowc,15725
 lfx/utils/util_strings.py,sha256=nU_IcdphNaj6bAPbjeL-c1cInQPfTBit8mp5Y57lwQk,1686
 lfx/utils/version.py,sha256=cHpbO0OJD2JQAvVaTH_6ibYeFbHJV0QDHs_YXXZ-bT8,671
-lfx_nightly-0.1.12.dev0.dist-info/METADATA,sha256=2-HWdV_bpI8ChwlifgmnuZ7U8rEfbmdeaszShAytmIw,8000
-lfx_nightly-0.1.12.dev0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-lfx_nightly-0.1.12.dev0.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
-lfx_nightly-0.1.12.dev0.dist-info/RECORD,,
+lfx_nightly-0.1.12.dev1.dist-info/METADATA,sha256=eMZwEM_BySUNrUL6AE3XpjsO-k1I1zchBEvtSHaZF4M,8000
+lfx_nightly-0.1.12.dev1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+lfx_nightly-0.1.12.dev1.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
+lfx_nightly-0.1.12.dev1.dist-info/RECORD,,

{lfx_nightly-0.1.12.dev0.dist-info → lfx_nightly-0.1.12.dev1.dist-info}/WHEEL RENAMED Viewed

File without changes

{lfx_nightly-0.1.12.dev0.dist-info → lfx_nightly-0.1.12.dev1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

lfx-nightly 0.1.12.dev0__py3-none-any.whl → 0.1.12.dev1__py3-none-any.whl

lfx-nightly 0.1.12.dev0py3-none-any.whl → 0.1.12.dev1py3-none-any.whl