lfx-nightly 0.1.12.dev0__py3-none-any.whl → 0.1.12.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,20 @@
1
+ import importlib
1
2
  import signal
2
3
  import sys
3
4
  import traceback
4
5
  from contextlib import suppress
6
+ from typing import TYPE_CHECKING
5
7
 
6
8
  from docling_core.types.doc import DoclingDocument
9
+ from pydantic import BaseModel, SecretStr, TypeAdapter
7
10
 
8
11
  from lfx.log.logger import logger
9
12
  from lfx.schema.data import Data
10
13
  from lfx.schema.dataframe import DataFrame
11
14
 
15
+ if TYPE_CHECKING:
16
+ from langchain_core.language_models.chat_models import BaseChatModel
17
+
12
18
 
13
19
  def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_key: str) -> list[DoclingDocument]:
14
20
  documents: list[DoclingDocument] = []
@@ -57,7 +63,45 @@ def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_ke
57
63
  return documents
58
64
 
59
65
 
60
- def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str):
66
+ def _unwrap_secrets(obj):
67
+ if isinstance(obj, SecretStr):
68
+ return obj.get_secret_value()
69
+ if isinstance(obj, dict):
70
+ return {k: _unwrap_secrets(v) for k, v in obj.items()}
71
+ if isinstance(obj, list):
72
+ return [_unwrap_secrets(v) for v in obj]
73
+ return obj
74
+
75
+
76
+ def _dump_with_secrets(model: BaseModel):
77
+ return _unwrap_secrets(model.model_dump(mode="python", round_trip=True))
78
+
79
+
80
+ def _serialize_pydantic_model(model: BaseModel):
81
+ return {
82
+ "__class_path__": f"{model.__class__.__module__}.{model.__class__.__name__}",
83
+ "config": _dump_with_secrets(model),
84
+ }
85
+
86
+
87
+ def _deserialize_pydantic_model(data: dict):
88
+ module_name, class_name = data["__class_path__"].rsplit(".", 1)
89
+ module = importlib.import_module(module_name)
90
+ cls = getattr(module, class_name)
91
+ adapter = TypeAdapter(cls)
92
+ return adapter.validate_python(data["config"])
93
+
94
+
95
+ def docling_worker(
96
+ *,
97
+ file_paths: list[str],
98
+ queue,
99
+ pipeline: str,
100
+ ocr_engine: str,
101
+ do_picture_classification: bool,
102
+ pic_desc_config: dict | None,
103
+ pic_desc_prompt: str,
104
+ ):
61
105
  """Worker function for processing files with Docling in a separate process."""
62
106
  # Signal handling for graceful shutdown
63
107
  shutdown_requested = False
@@ -106,6 +150,7 @@ def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str)
106
150
  from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
107
151
  from docling.models.factories import get_ocr_factory
108
152
  from docling.pipeline.vlm_pipeline import VlmPipeline
153
+ from langchain_docling.picture_description import PictureDescriptionLangChainOptions
109
154
 
110
155
  # Check for shutdown after imports
111
156
  check_shutdown()
@@ -143,6 +188,19 @@ def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str)
143
188
  kind=ocr_engine,
144
189
  )
145
190
  pipeline_options.ocr_options = ocr_options
191
+
192
+ pipeline_options.do_picture_classification = do_picture_classification
193
+
194
+ if pic_desc_config:
195
+ pic_desc_llm: BaseChatModel = _deserialize_pydantic_model(pic_desc_config)
196
+
197
+ logger.info("Docling enabling the picture description stage.")
198
+ pipeline_options.do_picture_description = True
199
+ pipeline_options.allow_external_plugins = True
200
+ pipeline_options.picture_description_options = PictureDescriptionLangChainOptions(
201
+ llm=pic_desc_llm,
202
+ prompt=pic_desc_prompt,
203
+ )
146
204
  return pipeline_options
147
205
 
148
206
  # Configure the VLM pipeline
@@ -3,8 +3,8 @@ from multiprocessing import Queue, get_context
3
3
  from queue import Empty
4
4
 
5
5
  from lfx.base.data import BaseFileComponent
6
- from lfx.base.data.docling_utils import docling_worker
7
- from lfx.inputs import DropdownInput
6
+ from lfx.base.data.docling_utils import _serialize_pydantic_model, docling_worker
7
+ from lfx.inputs import BoolInput, DropdownInput, HandleInput, StrInput
8
8
  from lfx.schema import Data
9
9
 
10
10
 
@@ -67,6 +67,26 @@ class DoclingInlineComponent(BaseFileComponent):
67
67
  real_time_refresh=False,
68
68
  value="None",
69
69
  ),
70
+ BoolInput(
71
+ name="do_picture_classification",
72
+ display_name="Picture classification",
73
+ info="If enabled, the Docling pipeline will classify the pictures type.",
74
+ value=False,
75
+ ),
76
+ HandleInput(
77
+ name="pic_desc_llm",
78
+ display_name="Picture description LLM",
79
+ info="If connected, the model to use for running the picture description task.",
80
+ input_types=["LanguageModel"],
81
+ required=False,
82
+ ),
83
+ StrInput(
84
+ name="pic_desc_prompt",
85
+ display_name="Picture description prompt",
86
+ value="Describe the image in three sentences. Be concise and accurate.",
87
+ info="The user prompt to use when invoking the model.",
88
+ advanced=True,
89
+ ),
70
90
  # TODO: expose more Docling options
71
91
  ]
72
92
 
@@ -131,11 +151,7 @@ class DoclingInlineComponent(BaseFileComponent):
131
151
 
132
152
  def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
133
153
  try:
134
- from docling.datamodel.base_models import InputFormat
135
- from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions
136
- from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
137
- from docling.models.factories import get_ocr_factory
138
- from docling.pipeline.vlm_pipeline import VlmPipeline
154
+ from docling.document_converter import DocumentConverter # noqa: F401
139
155
  except ImportError as e:
140
156
  msg = (
141
157
  "Docling is an optional dependency. Install with `uv pip install 'langflow[docling]'` or refer to the "
@@ -143,52 +159,29 @@ class DoclingInlineComponent(BaseFileComponent):
143
159
  )
144
160
  raise ImportError(msg) from e
145
161
 
146
- # Configure the standard PDF pipeline
147
- def _get_standard_opts() -> PdfPipelineOptions:
148
- pipeline_options = PdfPipelineOptions()
149
- pipeline_options.do_ocr = self.ocr_engine != "None"
150
- if pipeline_options.do_ocr:
151
- ocr_factory = get_ocr_factory(
152
- allow_external_plugins=False,
153
- )
154
-
155
- ocr_options: OcrOptions = ocr_factory.create_options(
156
- kind=self.ocr_engine,
157
- )
158
- pipeline_options.ocr_options = ocr_options
159
- return pipeline_options
160
-
161
- # Configure the VLM pipeline
162
- def _get_vlm_opts() -> VlmPipelineOptions:
163
- return VlmPipelineOptions()
164
-
165
- # Configure the main format options and create the DocumentConverter()
166
- def _get_converter() -> DocumentConverter:
167
- if self.pipeline == "standard":
168
- pdf_format_option = PdfFormatOption(
169
- pipeline_options=_get_standard_opts(),
170
- )
171
- elif self.pipeline == "vlm":
172
- pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
173
-
174
- format_options: dict[InputFormat, FormatOption] = {
175
- InputFormat.PDF: pdf_format_option,
176
- InputFormat.IMAGE: pdf_format_option,
177
- }
178
-
179
- return DocumentConverter(format_options=format_options)
180
-
181
162
  file_paths = [file.path for file in file_list if file.path]
182
163
 
183
164
  if not file_paths:
184
165
  self.log("No files to process.")
185
166
  return file_list
186
167
 
168
+ pic_desc_config: dict | None = None
169
+ if self.pic_desc_llm is not None:
170
+ pic_desc_config = _serialize_pydantic_model(self.pic_desc_llm)
171
+
187
172
  ctx = get_context("spawn")
188
173
  queue: Queue = ctx.Queue()
189
174
  proc = ctx.Process(
190
175
  target=docling_worker,
191
- args=(file_paths, queue, self.pipeline, self.ocr_engine),
176
+ kwargs={
177
+ "file_paths": file_paths,
178
+ "queue": queue,
179
+ "pipeline": self.pipeline,
180
+ "ocr_engine": self.ocr_engine,
181
+ "do_picture_classification": self.do_picture_classification,
182
+ "pic_desc_config": pic_desc_config,
183
+ "pic_desc_prompt": self.pic_desc_prompt,
184
+ },
192
185
  )
193
186
 
194
187
  result = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lfx-nightly
3
- Version: 0.1.12.dev0
3
+ Version: 0.1.12.dev1
4
4
  Summary: Langflow Executor - A lightweight CLI tool for executing and serving Langflow AI flows
5
5
  Author-email: Gabriel Luiz Freitas Almeida <gabriel@langflow.org>
6
6
  Requires-Python: <3.14,>=3.10
@@ -29,7 +29,7 @@ lfx/base/curl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  lfx/base/curl/parse.py,sha256=Yw6mMbGg7e-ffrBItEUJeTiljneCXlNyt5afzEP9eUI,6094
30
30
  lfx/base/data/__init__.py,sha256=lQsYYMyAg_jA9ZF7oc-LNZsRE2uMGT6g16WzsUByHqs,81
31
31
  lfx/base/data/base_file.py,sha256=XFj3u9OGHcRbWfzslzvvxn-qpaCeX0uUQ0fStUCo65I,25495
32
- lfx/base/data/docling_utils.py,sha256=2kwI_eOPg-Wr2mfuGkOXFsW-53VqV8_F-XUTWruYMXg,9744
32
+ lfx/base/data/docling_utils.py,sha256=i0KpNNLgPJ0D226Tm5j_oaCv09w9IspBU2OwTDCfnBc,11625
33
33
  lfx/base/data/utils.py,sha256=eZJgkOvQ3MaURDfgkH2MiZZOBF5_D0nSlmDY6LgLRik,5960
34
34
  lfx/base/document_transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  lfx/base/document_transformers/model.py,sha256=etVEmyakiEgflB-fayClPnFRhaEdXfdUu4cqpgtk8ek,1317
@@ -242,7 +242,7 @@ lfx/components/deepseek/__init__.py,sha256=gmyOcLeNEcnwSeowow0N0UhBDlSuZ_8x-DMUj
242
242
  lfx/components/deepseek/deepseek.py,sha256=yNrHoljXOMScKng-oSB-ceWhVZeuh11lmrAY7WiB2H0,4702
243
243
  lfx/components/docling/__init__.py,sha256=O4utz9GHFpTVe_Wy0PR80yA1irJQRnAFQWkoLCVj888,1424
244
244
  lfx/components/docling/chunk_docling_document.py,sha256=OX-jj4nX3UZgopViMAGAnFgtLql0sgs6cVmU8p9QbqA,7600
245
- lfx/components/docling/docling_inline.py,sha256=uq_YULsYVaz31A6HaHnE7rKacJXWAcEsC_LdWj_8arA,8278
245
+ lfx/components/docling/docling_inline.py,sha256=-m8hTANtdUDUjsJtJTB1sl6MJMhXG8zMeBMwbn0w9Ig,7871
246
246
  lfx/components/docling/docling_remote.py,sha256=kwMS_-QMiM_JmPqvtHf4gDS73d2hZrIbtAPsN8bZxGE,6769
247
247
  lfx/components/docling/export_docling_document.py,sha256=TeFt3TesCxSqW57nv-30gf2dX8qMDUHLRhwU-1ciq08,4681
248
248
  lfx/components/documentloaders/__init__.py,sha256=LNl2hG2InevQCUREFKhF9ylaTf_kwPsdjiDbx2ElX3M,69
@@ -693,7 +693,7 @@ lfx/utils/schemas.py,sha256=NbOtVQBrn4d0BAu-0H_eCTZI2CXkKZlRY37XCSmuJwc,3865
693
693
  lfx/utils/util.py,sha256=xGR32XDRr_TtruhjnXfI7lEWmk-vgywHAy3kz5SBowc,15725
694
694
  lfx/utils/util_strings.py,sha256=nU_IcdphNaj6bAPbjeL-c1cInQPfTBit8mp5Y57lwQk,1686
695
695
  lfx/utils/version.py,sha256=cHpbO0OJD2JQAvVaTH_6ibYeFbHJV0QDHs_YXXZ-bT8,671
696
- lfx_nightly-0.1.12.dev0.dist-info/METADATA,sha256=2-HWdV_bpI8ChwlifgmnuZ7U8rEfbmdeaszShAytmIw,8000
697
- lfx_nightly-0.1.12.dev0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
698
- lfx_nightly-0.1.12.dev0.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
699
- lfx_nightly-0.1.12.dev0.dist-info/RECORD,,
696
+ lfx_nightly-0.1.12.dev1.dist-info/METADATA,sha256=eMZwEM_BySUNrUL6AE3XpjsO-k1I1zchBEvtSHaZF4M,8000
697
+ lfx_nightly-0.1.12.dev1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
698
+ lfx_nightly-0.1.12.dev1.dist-info/entry_points.txt,sha256=1724p3RHDQRT2CKx_QRzEIa7sFuSVO0Ux70YfXfoMT4,42
699
+ lfx_nightly-0.1.12.dev1.dist-info/RECORD,,