lfx-docling 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ """lfx-docling: Docling document processing components."""
2
+
3
+ from lfx_docling.components.docling.chunk_docling_document import ChunkDoclingDocumentComponent
4
+ from lfx_docling.components.docling.docling_inline import DoclingInlineComponent
5
+ from lfx_docling.components.docling.docling_remote import DoclingRemoteComponent
6
+ from lfx_docling.components.docling.export_docling_document import ExportDoclingDocumentComponent
7
+
8
+ __all__ = [
9
+ "ChunkDoclingDocumentComponent",
10
+ "DoclingInlineComponent",
11
+ "DoclingRemoteComponent",
12
+ "ExportDoclingDocumentComponent",
13
+ ]
@@ -0,0 +1,11 @@
1
+ from .chunk_docling_document import ChunkDoclingDocumentComponent
2
+ from .docling_inline import DoclingInlineComponent
3
+ from .docling_remote import DoclingRemoteComponent
4
+ from .export_docling_document import ExportDoclingDocumentComponent
5
+
6
+ __all__ = [
7
+ "ChunkDoclingDocumentComponent",
8
+ "DoclingInlineComponent",
9
+ "DoclingRemoteComponent",
10
+ "ExportDoclingDocumentComponent",
11
+ ]
@@ -0,0 +1,224 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ from lfx.base.data.docling_utils import extract_docling_documents
5
+ from lfx.custom import Component
6
+ from lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MessageTextInput, Output, StrInput
7
+ from lfx.schema import Data, DataFrame
8
+
9
+ _CHUNKING_INSTALL_HINT = (
10
+ "Install them with `uv pip install 'lfx-docling[chunking]'`, "
11
+ "`uv pip install 'langflow[docling-chunking]'`, or "
12
+ "`uv pip install 'docling-core[chunking]' tiktoken`."
13
+ )
14
+
15
+
16
+ def _load_docling_chunker_dependencies() -> tuple[type[Any], type[Any]]:
17
+ try:
18
+ from docling_core.transforms.chunker.doc_chunk import DocMeta as DocMetaCls
19
+ from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker as HierarchicalChunkerCls
20
+ except (ImportError, RuntimeError) as e:
21
+ msg = f"Docling chunking dependencies are not installed. {_CHUNKING_INSTALL_HINT}"
22
+ raise ImportError(msg) from e
23
+ return DocMetaCls, HierarchicalChunkerCls
24
+
25
+
26
+ class ChunkDoclingDocumentComponent(Component):
27
+ display_name: str = "Chunk DoclingDocument"
28
+ description: str = "Use DoclingDocument chunkers to split the document into chunks."
29
+ documentation = "https://docling-project.github.io/docling/concepts/chunking/"
30
+ icon = "Docling"
31
+ name = "ChunkDoclingDocument"
32
+
33
+ inputs = [
34
+ HandleInput(
35
+ name="data_inputs",
36
+ display_name="JSON or Table",
37
+ info="The data with documents to split in chunks.",
38
+ input_types=["Data", "JSON", "DataFrame", "Table"],
39
+ required=True,
40
+ ),
41
+ DropdownInput(
42
+ name="chunker",
43
+ display_name="Chunker",
44
+ options=["HybridChunker", "HierarchicalChunker"],
45
+ info=("Which chunker to use."),
46
+ value="HybridChunker",
47
+ real_time_refresh=True,
48
+ input_types=["Message"],
49
+ ),
50
+ DropdownInput(
51
+ name="provider",
52
+ display_name="Provider",
53
+ options=["Hugging Face", "OpenAI"],
54
+ info=("Which tokenizer provider."),
55
+ value="Hugging Face",
56
+ show=True,
57
+ real_time_refresh=True,
58
+ advanced=True,
59
+ dynamic=True,
60
+ ),
61
+ StrInput(
62
+ name="hf_model_name",
63
+ display_name="HF model name",
64
+ info=(
65
+ "Model name of the tokenizer to use with the HybridChunker when Hugging Face is chosen as a tokenizer."
66
+ ),
67
+ value="sentence-transformers/all-MiniLM-L6-v2",
68
+ show=True,
69
+ advanced=True,
70
+ dynamic=True,
71
+ ),
72
+ StrInput(
73
+ name="openai_model_name",
74
+ display_name="OpenAI model name",
75
+ info=("Model name of the tokenizer to use with the HybridChunker when OpenAI is chosen as a tokenizer."),
76
+ value="gpt-4o",
77
+ show=False,
78
+ advanced=True,
79
+ dynamic=True,
80
+ ),
81
+ IntInput(
82
+ name="max_tokens",
83
+ display_name="Maximum tokens",
84
+ info=("Maximum number of tokens for the HybridChunker."),
85
+ show=True,
86
+ required=False,
87
+ advanced=True,
88
+ dynamic=True,
89
+ input_types=["Message"],
90
+ ),
91
+ BoolInput(
92
+ name="merge_peers",
93
+ display_name="Merge peers",
94
+ info="Merge undersized chunks sharing the same relevant metadata.",
95
+ value=True,
96
+ show=True,
97
+ advanced=True,
98
+ dynamic=True,
99
+ ),
100
+ BoolInput(
101
+ name="always_emit_headings",
102
+ display_name="Always emit headings",
103
+ info="Emit headings even for empty sections.",
104
+ value=False,
105
+ show=True,
106
+ advanced=True,
107
+ dynamic=True,
108
+ ),
109
+ MessageTextInput(
110
+ name="doc_key",
111
+ display_name="Doc Key",
112
+ info="The key to use for the DoclingDocument column.",
113
+ value="doc",
114
+ advanced=True,
115
+ ),
116
+ ]
117
+
118
+ outputs = [
119
+ Output(display_name="Table", name="dataframe", method="chunk_documents"),
120
+ ]
121
+
122
+ def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
123
+ """Update build_config to show/hide fields based on chunker and provider selection."""
124
+ if field_name == "chunker":
125
+ provider_type = build_config["provider"]["value"]
126
+ is_hf = provider_type == "Hugging Face"
127
+ is_openai = provider_type == "OpenAI"
128
+ if field_value == "HybridChunker":
129
+ build_config["provider"]["show"] = True
130
+ build_config["hf_model_name"]["show"] = is_hf
131
+ build_config["openai_model_name"]["show"] = is_openai
132
+ build_config["max_tokens"]["show"] = True
133
+ build_config["merge_peers"]["show"] = True
134
+ build_config["always_emit_headings"]["show"] = True
135
+ else:
136
+ build_config["provider"]["show"] = False
137
+ build_config["hf_model_name"]["show"] = False
138
+ build_config["openai_model_name"]["show"] = False
139
+ build_config["max_tokens"]["show"] = False
140
+ build_config["merge_peers"]["show"] = False
141
+ build_config["always_emit_headings"]["show"] = False
142
+ elif field_name == "provider" and build_config["chunker"]["value"] == "HybridChunker":
143
+ if field_value == "Hugging Face":
144
+ build_config["hf_model_name"]["show"] = True
145
+ build_config["openai_model_name"]["show"] = False
146
+ elif field_value == "OpenAI":
147
+ build_config["hf_model_name"]["show"] = False
148
+ build_config["openai_model_name"]["show"] = True
149
+
150
+ return build_config
151
+
152
+ def _docs_to_data(self, docs) -> list[Data]:
153
+ return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]
154
+
155
+ def chunk_documents(self) -> DataFrame:
156
+ documents, warning = extract_docling_documents(self.data_inputs, self.doc_key)
157
+ if warning:
158
+ self.status = warning
159
+
160
+ doc_meta_cls, hierarchical_chunker_cls = _load_docling_chunker_dependencies()
161
+ chunker: Any
162
+ if self.chunker == "HybridChunker":
163
+ try:
164
+ from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
165
+ except (ImportError, RuntimeError) as e:
166
+ msg = f"HybridChunker is not installed. {_CHUNKING_INSTALL_HINT}"
167
+ raise ImportError(msg) from e
168
+ max_tokens: int | None = self.max_tokens if self.max_tokens else None
169
+ if self.provider == "Hugging Face":
170
+ try:
171
+ from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
172
+ except (ImportError, RuntimeError) as e:
173
+ msg = f"HuggingFaceTokenizer is not installed. {_CHUNKING_INSTALL_HINT}"
174
+ raise ImportError(msg) from e
175
+ tokenizer = HuggingFaceTokenizer.from_pretrained(
176
+ model_name=self.hf_model_name,
177
+ max_tokens=max_tokens,
178
+ )
179
+ elif self.provider == "OpenAI":
180
+ try:
181
+ import tiktoken
182
+ from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
183
+ except (ImportError, RuntimeError) as e:
184
+ msg = f"OpenAITokenizer is not installed. {_CHUNKING_INSTALL_HINT}"
185
+ raise ImportError(msg) from e
186
+ if max_tokens is None:
187
+ max_tokens = 128 * 1024 # context window length required for OpenAI tokenizers
188
+ tokenizer = OpenAITokenizer(
189
+ tokenizer=tiktoken.encoding_for_model(self.openai_model_name), max_tokens=max_tokens
190
+ )
191
+ chunker = HybridChunker(
192
+ tokenizer=tokenizer,
193
+ merge_peers=bool(self.merge_peers),
194
+ always_emit_headings=bool(self.always_emit_headings),
195
+ )
196
+
197
+ elif self.chunker == "HierarchicalChunker":
198
+ chunker = hierarchical_chunker_cls()
199
+ else:
200
+ msg = f"Unknown chunker: {self.chunker}"
201
+ raise ValueError(msg)
202
+
203
+ results: list[Data] = []
204
+ try:
205
+ for doc in documents:
206
+ for chunk in chunker.chunk(dl_doc=doc):
207
+ enriched_text = chunker.contextualize(chunk=chunk)
208
+ meta = doc_meta_cls.model_validate(chunk.meta)
209
+
210
+ results.append(
211
+ Data(
212
+ data={
213
+ "text": enriched_text,
214
+ "document_id": f"{doc.origin.binary_hash}",
215
+ "doc_items": json.dumps([item.self_ref for item in meta.doc_items]),
216
+ }
217
+ )
218
+ )
219
+
220
+ except Exception as e:
221
+ msg = f"Error splitting text: {e}"
222
+ raise TypeError(msg) from e
223
+
224
+ return DataFrame(results)
@@ -0,0 +1,350 @@
1
+ import json
2
+ import subprocess
3
+ import sys
4
+ import textwrap
5
+ import time
6
+
7
+ from lfx.base.data import BaseFileComponent
8
+ from lfx.base.data.docling_utils import _serialize_pydantic_model
9
+ from lfx.inputs import BoolInput, DropdownInput, HandleInput, StrInput
10
+ from lfx.schema import Data, DataFrame
11
+
12
+
13
+ class DoclingInlineComponent(BaseFileComponent):
14
+ display_name = "Docling"
15
+ description = "Uses Docling to process input documents running the Docling models locally."
16
+ documentation = "https://docling-project.github.io/docling/"
17
+ trace_type = "tool"
18
+ icon = "Docling"
19
+ name = "DoclingInline"
20
+
21
+ # https://docling-project.github.io/docling/usage/supported_formats/
22
+ VALID_EXTENSIONS = [
23
+ "adoc",
24
+ "asciidoc",
25
+ "asc",
26
+ "bmp",
27
+ "csv",
28
+ "dotx",
29
+ "dotm",
30
+ "docm",
31
+ "docx",
32
+ "htm",
33
+ "html",
34
+ "jpeg",
35
+ "json",
36
+ "md",
37
+ "pdf",
38
+ "png",
39
+ "potx",
40
+ "ppsx",
41
+ "pptm",
42
+ "potm",
43
+ "ppsm",
44
+ "pptx",
45
+ "tiff",
46
+ "txt",
47
+ "xls",
48
+ "xlsx",
49
+ "xhtml",
50
+ "xml",
51
+ "webp",
52
+ ]
53
+
54
+ inputs = [
55
+ *BaseFileComponent.get_base_inputs(),
56
+ DropdownInput(
57
+ name="pipeline",
58
+ display_name="Pipeline",
59
+ info="Docling pipeline to use",
60
+ options=["standard", "vlm"],
61
+ value="standard",
62
+ ),
63
+ DropdownInput(
64
+ name="ocr_engine",
65
+ display_name="OCR Engine",
66
+ info="OCR engine to use. None will disable OCR.",
67
+ options=["None", "easyocr", "tesserocr", "rapidocr", "ocrmac"],
68
+ value="None",
69
+ ),
70
+ BoolInput(
71
+ name="do_picture_classification",
72
+ display_name="Picture classification",
73
+ info="If enabled, the Docling pipeline will classify the pictures type.",
74
+ value=False,
75
+ ),
76
+ HandleInput(
77
+ name="pic_desc_llm",
78
+ display_name="Picture description LLM",
79
+ info="If connected, the model to use for running the picture description task.",
80
+ input_types=["LanguageModel"],
81
+ required=False,
82
+ ),
83
+ StrInput(
84
+ name="pic_desc_prompt",
85
+ display_name="Picture description prompt",
86
+ value="Describe the image in three sentences. Be concise and accurate.",
87
+ info="The user prompt to use when invoking the model.",
88
+ advanced=True,
89
+ ),
90
+ # TODO: expose more Docling options
91
+ ]
92
+
93
+ outputs = [
94
+ *BaseFileComponent.get_base_outputs(),
95
+ ]
96
+
97
+ def build(self) -> DataFrame:
98
+ # Static bundle validation cannot see BaseFileComponent's inherited output method.
99
+ return self.load_files()
100
+
101
+ # ------------------------------------------------------------------ #
102
+ # Child script that runs Docling in a separate OS process. #
103
+ # Uses subprocess.Popen (same pattern as Read File advanced mode) #
104
+ # instead of multiprocessing/threading so that: #
105
+ # 1. It works reliably under Gunicorn's fork-based workers #
106
+ # 2. The parent's event loop stays free for SSE heartbeats #
107
+ # 3. No pickling / signal-handler conflicts #
108
+ # ------------------------------------------------------------------ #
109
+ _CHILD_SCRIPT: str = textwrap.dedent(r"""
110
+ import json, sys
111
+
112
+ def main():
113
+ cfg = json.loads(sys.stdin.read())
114
+ file_paths = cfg["file_paths"]
115
+ pipeline = cfg["pipeline"]
116
+ ocr_engine = cfg["ocr_engine"]
117
+ do_picture_cls = cfg["do_picture_classification"]
118
+ pic_desc_config = cfg.get("pic_desc_config")
119
+ pic_desc_prompt = cfg.get("pic_desc_prompt", "")
120
+
121
+ try:
122
+ from docling.datamodel.base_models import ConversionStatus, InputFormat
123
+ from docling.datamodel.pipeline_options import PdfPipelineOptions
124
+ from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
125
+ except ImportError as e:
126
+ print(json.dumps({"ok": False, "error": f"Docling is not installed: {e}"}))
127
+ return
128
+
129
+ # --- build converter ------------------------------------------------
130
+ try:
131
+ pipe = PdfPipelineOptions()
132
+ pipe.do_ocr = ocr_engine not in ("", "None")
133
+ if pipe.do_ocr:
134
+ try:
135
+ from docling.models.factories import get_ocr_factory
136
+ fac = get_ocr_factory(allow_external_plugins=False)
137
+ pipe.ocr_options = fac.create_options(kind=ocr_engine)
138
+ except Exception:
139
+ pipe.do_ocr = False
140
+
141
+ pipe.do_picture_classification = do_picture_cls
142
+
143
+ if pic_desc_config:
144
+ try:
145
+ import importlib
146
+ from pydantic import TypeAdapter
147
+ try:
148
+ from langchain_docling.picture_description import PictureDescriptionLangChainOptions
149
+ except ImportError:
150
+ print(json.dumps({
151
+ "ok": False,
152
+ "error": (
153
+ "langchain-docling is not installed. Install it with "
154
+ "`uv pip install 'langflow[docling-image-description]'` or "
155
+ "`uv pip install 'lfx-docling[image-description]'`."
156
+ )
157
+ }))
158
+ return
159
+ mod_name, cls_name = pic_desc_config["__class_path__"].rsplit(".", 1)
160
+ mod = importlib.import_module(mod_name)
161
+ cls = getattr(mod, cls_name)
162
+ adapter = TypeAdapter(cls)
163
+ llm = adapter.validate_python(pic_desc_config["config"])
164
+ pipe.do_picture_description = True
165
+ pipe.allow_external_plugins = True
166
+ pipe.picture_description_options = PictureDescriptionLangChainOptions(
167
+ llm=llm, prompt=pic_desc_prompt,
168
+ )
169
+ except Exception as e:
170
+ print(json.dumps({"ok": False, "error": f"Picture description setup failed: {e}"}))
171
+ return
172
+
173
+ if pipeline == "vlm":
174
+ try:
175
+ from docling.datamodel.pipeline_options import VlmPipelineOptions
176
+ from docling.pipeline.vlm_pipeline import VlmPipeline
177
+ vlm_opts = VlmPipelineOptions()
178
+ if sys.platform == "darwin":
179
+ try:
180
+ from docling.datamodel.vlm_model_specs import GRANITEDOCLING_MLX
181
+ vlm_opts.vlm_options = GRANITEDOCLING_MLX
182
+ except ImportError:
183
+ from docling.datamodel.vlm_model_specs import GRANITEDOCLING_TRANSFORMERS
184
+ vlm_opts.vlm_options = GRANITEDOCLING_TRANSFORMERS
185
+ fmt = {}
186
+ if hasattr(InputFormat, "PDF"):
187
+ fmt[InputFormat.PDF] = PdfFormatOption(
188
+ pipeline_cls=VlmPipeline, pipeline_options=vlm_opts,
189
+ )
190
+ if hasattr(InputFormat, "IMAGE"):
191
+ fmt[InputFormat.IMAGE] = PdfFormatOption(
192
+ pipeline_cls=VlmPipeline, pipeline_options=vlm_opts,
193
+ )
194
+ converter = DocumentConverter(format_options=fmt)
195
+ except Exception as e:
196
+ print(json.dumps({"ok": False, "error": f"VLM pipeline setup failed: {e}"}))
197
+ return
198
+ else:
199
+ pdf_opt = PdfFormatOption(pipeline_options=pipe)
200
+ fmt = {}
201
+ if hasattr(InputFormat, "PDF"):
202
+ fmt[InputFormat.PDF] = pdf_opt
203
+ if hasattr(InputFormat, "IMAGE"):
204
+ fmt[InputFormat.IMAGE] = pdf_opt
205
+ converter = DocumentConverter(format_options=fmt)
206
+ except Exception as e:
207
+ print(json.dumps({"ok": False, "error": f"Converter creation failed: {e}"}))
208
+ return
209
+
210
+ # --- process files --------------------------------------------------
211
+ results = []
212
+ for fp in file_paths:
213
+ try:
214
+ res = converter.convert(fp)
215
+ ok = False
216
+ if hasattr(res, "status"):
217
+ try:
218
+ ok = res.status == ConversionStatus.SUCCESS
219
+ except Exception:
220
+ ok = str(res.status).lower() == "success"
221
+ if not ok and getattr(res, "document", None) is not None:
222
+ ok = True
223
+ if ok and res.document is not None:
224
+ doc_json = res.document.export_to_dict()
225
+ results.append({
226
+ "document": doc_json,
227
+ "file_path": str(fp),
228
+ "status": "SUCCESS",
229
+ })
230
+ else:
231
+ results.append(None)
232
+ except Exception as e:
233
+ sys.stderr.write(f"Error processing {fp}: {e}\n")
234
+ results.append(None)
235
+
236
+ print(json.dumps({"ok": True, "results": results}))
237
+
238
+ if __name__ == "__main__":
239
+ main()
240
+ """)
241
+
242
+ def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
243
+ # Check that docling is installed without actually importing it.
244
+ # The real import (PyTorch, transformers, etc.) happens in the child
245
+ # subprocess. Importing it here would spike memory and get the
246
+ # Gunicorn worker SIGKILL'd by the OOM killer.
247
+ import importlib.util
248
+
249
+ if importlib.util.find_spec("docling") is None:
250
+ msg = (
251
+ "Docling is an optional dependency. Install with `uv pip install 'langflow[docling]'` or "
252
+ "`uv pip install 'lfx-docling[local]'`."
253
+ )
254
+ raise ImportError(msg)
255
+
256
+ file_paths = [str(file.path) for file in file_list if file.path]
257
+
258
+ if not file_paths:
259
+ self.log("No files to process.")
260
+ return file_list
261
+
262
+ pic_desc_config: dict | None = None
263
+ if self.pic_desc_llm is not None:
264
+ pic_desc_config = _serialize_pydantic_model(self.pic_desc_llm)
265
+
266
+ args = {
267
+ "file_paths": file_paths,
268
+ "pipeline": self.pipeline,
269
+ "ocr_engine": self.ocr_engine,
270
+ "do_picture_classification": self.do_picture_classification,
271
+ "pic_desc_config": pic_desc_config,
272
+ "pic_desc_prompt": self.pic_desc_prompt,
273
+ }
274
+
275
+ # Use Popen with a polling loop (same pattern as Read File advanced mode).
276
+ # This avoids multiprocessing/threading issues under Gunicorn and keeps the
277
+ # SSE event stream alive via periodic heartbeat logs.
278
+ docling_timeout = 600 # 10 minutes
279
+ poll_interval = 5
280
+
281
+ # Use a temporary file for stdout to avoid pipe buffer deadlocks.
282
+ # Docling (and its transitive imports: PyTorch, transformers, etc.) can
283
+ # write large amounts of output. With subprocess.PIPE the OS pipe
284
+ # buffer (~16 KB on macOS) fills up, the child blocks on write, and the
285
+ # parent - which only reads *after* the child exits - waits forever.
286
+ import tempfile
287
+
288
+ with tempfile.TemporaryFile() as stdout_file, tempfile.TemporaryFile() as stderr_file:
289
+ proc = subprocess.Popen( # noqa: S603
290
+ [sys.executable, "-u", "-c", self._CHILD_SCRIPT],
291
+ stdin=subprocess.PIPE,
292
+ stdout=stdout_file,
293
+ stderr=stderr_file,
294
+ )
295
+ proc.stdin.write(json.dumps(args).encode("utf-8"))
296
+ proc.stdin.close()
297
+
298
+ start = time.monotonic()
299
+ while proc.poll() is None:
300
+ elapsed = time.monotonic() - start
301
+ if elapsed >= docling_timeout:
302
+ proc.kill()
303
+ proc.wait()
304
+ msg = (
305
+ f"Docling processing timed out after {docling_timeout}s. Try processing fewer or smaller files."
306
+ )
307
+ raise TimeoutError(msg)
308
+ self.log(f"Docling processing in progress ({int(elapsed)}s elapsed)...")
309
+ time.sleep(poll_interval)
310
+
311
+ stdout_file.seek(0)
312
+ stderr_file.seek(0)
313
+ stdout_bytes = stdout_file.read()
314
+ stderr_bytes = stderr_file.read()
315
+
316
+ if not stdout_bytes:
317
+ err_msg = stderr_bytes.decode("utf-8", errors="replace") if stderr_bytes else "no output"
318
+ msg = f"Docling subprocess error: {err_msg}"
319
+ raise RuntimeError(msg)
320
+
321
+ try:
322
+ payload = json.loads(stdout_bytes.decode("utf-8"))
323
+ except Exception as e:
324
+ err_msg = stderr_bytes.decode("utf-8", errors="replace")
325
+ msg = f"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}"
326
+ raise RuntimeError(msg) from e
327
+
328
+ if not payload.get("ok"):
329
+ error_msg = payload.get("error", "Unknown Docling error")
330
+ if "not installed" in error_msg.lower():
331
+ raise ImportError(error_msg)
332
+ raise RuntimeError(error_msg)
333
+
334
+ # Reconstruct DoclingDocument objects from JSON dicts returned by the child
335
+ from docling_core.types.doc import DoclingDocument
336
+
337
+ raw_results = payload.get("results", [])
338
+ processed_data: list[Data | None] = []
339
+ for r in raw_results:
340
+ if r is None:
341
+ processed_data.append(None)
342
+ continue
343
+ try:
344
+ doc = DoclingDocument.model_validate(r["document"])
345
+ except Exception: # noqa: BLE001
346
+ # Fall back to keeping the raw dict if validation fails
347
+ doc = r["document"]
348
+ processed_data.append(Data(data={"doc": doc, "file_path": r["file_path"]}))
349
+
350
+ return self.rollup_data(file_list, processed_data)
@@ -0,0 +1,353 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import json
5
+ import time
6
+ from concurrent.futures import Future, ThreadPoolExecutor
7
+ from pathlib import Path # noqa: TC003
8
+ from typing import Any
9
+
10
+ import httpx
11
+ from lfx.base.data import BaseFileComponent
12
+ from lfx.base.data.docling_utils import coerce_docling_document
13
+ from lfx.inputs import IntInput, NestedDictInput, StrInput, TableInput
14
+ from lfx.inputs.inputs import FloatInput
15
+ from lfx.schema import Data, DataFrame, dotdict
16
+ from lfx.utils.util import transform_localhost_url
17
+
18
+
19
+ class DoclingRemoteComponent(BaseFileComponent):
20
+ display_name = "Docling Serve"
21
+ description = "Uses Docling to process input documents connecting to your instance of Docling Serve."
22
+ documentation = "https://docling-project.github.io/docling/"
23
+ trace_type = "tool"
24
+ icon = "Docling"
25
+ name = "DoclingRemote"
26
+
27
+ MAX_500_RETRIES = 5
28
+
29
+ # https://docling-project.github.io/docling/usage/supported_formats/
30
+ VALID_EXTENSIONS = [
31
+ "adoc",
32
+ "asciidoc",
33
+ "asc",
34
+ "bmp",
35
+ "csv",
36
+ "dotx",
37
+ "dotm",
38
+ "docm",
39
+ "docx",
40
+ "htm",
41
+ "html",
42
+ "jpeg",
43
+ "jpg",
44
+ "json",
45
+ "md",
46
+ "pdf",
47
+ "png",
48
+ "potx",
49
+ "ppsx",
50
+ "pptm",
51
+ "potm",
52
+ "ppsm",
53
+ "pptx",
54
+ "tiff",
55
+ "txt",
56
+ "xls",
57
+ "xlsx",
58
+ "xhtml",
59
+ "xml",
60
+ "webp",
61
+ ]
62
+
63
+ inputs = [
64
+ *BaseFileComponent.get_base_inputs(),
65
+ StrInput(
66
+ name="api_url",
67
+ display_name="Server address",
68
+ info="URL of the Docling Serve instance.",
69
+ required=True,
70
+ ),
71
+ StrInput(
72
+ name="task_id",
73
+ display_name="Task ID",
74
+ info=(
75
+ "Optional task ID from a previous Docling Serve upload. "
76
+ "If provided, file input is ignored and the component polls for this task's results."
77
+ ),
78
+ required=False,
79
+ ),
80
+ IntInput(
81
+ name="max_concurrency",
82
+ display_name="Concurrency",
83
+ info="Maximum number of concurrent requests for the server.",
84
+ advanced=True,
85
+ value=2,
86
+ input_types=["Message"],
87
+ ),
88
+ FloatInput(
89
+ name="max_poll_timeout",
90
+ display_name="Maximum poll time",
91
+ info="Maximum waiting time for the document conversion to complete.",
92
+ advanced=True,
93
+ value=3600,
94
+ input_types=["Message"],
95
+ ),
96
+ TableInput(
97
+ name="api_headers",
98
+ display_name="HTTP headers",
99
+ advanced=True,
100
+ required=False,
101
+ info=("Optional headers required for connecting to Docling Serve."),
102
+ table_schema=[
103
+ {
104
+ "name": "key",
105
+ "display_name": "Key",
106
+ "type": "string",
107
+ "description": "Key name",
108
+ },
109
+ {
110
+ "name": "value",
111
+ "display_name": "Value",
112
+ "load_from_db": True,
113
+ "type": "string",
114
+ "description": "Value of the header",
115
+ },
116
+ ],
117
+ value=[],
118
+ real_time_refresh=True,
119
+ input_types=["Data", "JSON"],
120
+ ),
121
+ NestedDictInput(
122
+ name="docling_serve_opts",
123
+ display_name="Docling options",
124
+ advanced=True,
125
+ required=False,
126
+ info=(
127
+ "Optional dictionary of additional options. "
128
+ "See https://github.com/docling-project/docling-serve/blob/main/docs/usage.md for more information."
129
+ ),
130
+ input_types=["Message"],
131
+ ),
132
+ ]
133
+
134
+ outputs = [
135
+ *BaseFileComponent.get_base_outputs(),
136
+ ]
137
+
138
+ def build(self) -> DataFrame:
139
+ # Static bundle validation cannot see BaseFileComponent's inherited output method.
140
+ return self.load_files()
141
+
142
+ @staticmethod
143
+ def _add_header(headers: dict[str, str], key: Any, value: Any) -> None:
144
+ key_str = str(key).strip()
145
+ if not key_str or key_str == "None":
146
+ return
147
+ headers[key_str] = str(value)
148
+
149
+ def _process_headers_input(self, headers_input: Any, component_headers_dict: dict[str, str]) -> None:
150
+ if not headers_input:
151
+ return
152
+
153
+ items = headers_input if isinstance(headers_input, list) else [headers_input]
154
+
155
+ for item in items:
156
+ if not item:
157
+ continue
158
+
159
+ # Case 1: Data object
160
+ if hasattr(item, "data") and isinstance(item.data, dict):
161
+ data = item.data
162
+ if "key" in data and "value" in data:
163
+ self._add_header(component_headers_dict, data["key"], data["value"])
164
+ else:
165
+ # Fallback: merge all keys from Data object
166
+ for k, v in data.items():
167
+ if k not in ("text_key", "default_value"):
168
+ self._add_header(component_headers_dict, k, v)
169
+
170
+ # Case 2: Dictionary (Table row)
171
+ elif isinstance(item, dict):
172
+ if "key" in item and "value" in item:
173
+ self._add_header(component_headers_dict, item["key"], item["value"])
174
+ else:
175
+ # Fallback: merge all keys
176
+ for k, v in item.items():
177
+ self._add_header(component_headers_dict, k, v)
178
+
179
+ # Case 3: Message object
180
+ elif hasattr(item, "text") and isinstance(item.text, str):
181
+ try:
182
+ parsed = json.loads(item.text)
183
+ if isinstance(parsed, dict):
184
+ for k, v in parsed.items():
185
+ self._add_header(component_headers_dict, k, v)
186
+ except json.JSONDecodeError:
187
+ pass
188
+
189
+ def _process_headers(self) -> dict[str, str]:
190
+ """Process the headers input into a valid dictionary."""
191
+ component_headers_dict: dict[str, str] = {}
192
+ self._process_headers_input(self.api_headers, component_headers_dict)
193
+ return component_headers_dict
194
+
195
+ def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None) -> dotdict:
196
+ if field_name == "api_headers":
197
+ if isinstance(field_value, dict):
198
+ build_config["api_headers"]["value"] = [{"key": k, "value": v} for k, v in field_value.items()]
199
+ return build_config
200
+ if field_value is None:
201
+ build_config["api_headers"]["value"] = []
202
+ return build_config
203
+
204
+ # Default behavior
205
+ return super().update_build_config(build_config, field_value, field_name)
206
+
207
+ def _poll_and_fetch_result(
208
+ self, client: httpx.Client, base_url: str, task_id: str, file_path: str | None = None
209
+ ) -> Data | None:
210
+ """Poll for task completion and fetch the result.
211
+
212
+ Args:
213
+ client: The HTTP client to use for requests.
214
+ base_url: The base URL of the Docling Serve API.
215
+ task_id: The task ID to poll for.
216
+ file_path: Optional file path to include in the result data.
217
+
218
+ Returns:
219
+ Data object with the DoclingDocument, or None if processing failed.
220
+ """
221
+ http_failures = 0
222
+ retry_status_start = 500
223
+ retry_status_end = 600
224
+ start_wait_time = time.monotonic()
225
+
226
+ task_status = None
227
+ while task_status not in ("success", "failure"):
228
+ processing_time = time.monotonic() - start_wait_time
229
+ if processing_time >= self.max_poll_timeout:
230
+ msg = (
231
+ f"Processing time {processing_time=} exceeds the maximum poll timeout {self.max_poll_timeout=}."
232
+ "Please increase the max_poll_timeout parameter or review why the processing "
233
+ "takes long on the server."
234
+ )
235
+ self.log(msg)
236
+ raise RuntimeError(msg)
237
+
238
+ response = client.get(f"{base_url}/status/poll/{task_id}")
239
+
240
+ if retry_status_start <= response.status_code < retry_status_end:
241
+ http_failures += 1
242
+ if http_failures > self.MAX_500_RETRIES:
243
+ self.log(f"The status requests got a http response {response.status_code} too many times.")
244
+ return None
245
+ time.sleep(2)
246
+ continue
247
+
248
+ response.raise_for_status()
249
+ task = response.json()
250
+ task_status = task["task_status"]
251
+ if task_status not in ("success", "failure"):
252
+ time.sleep(2)
253
+
254
+ result_resp = client.get(f"{base_url}/result/{task_id}")
255
+ result_resp.raise_for_status()
256
+ result = result_resp.json()
257
+
258
+ if result.get("status") == "failure" or result.get("errors"):
259
+ errors = result.get("errors", [])
260
+ err_msg_list = []
261
+ for err in errors:
262
+ if isinstance(err, dict) and "error_message" in err:
263
+ err_msg_list.append(err["error_message"])
264
+ elif isinstance(err, str):
265
+ err_msg_list.append(err)
266
+
267
+ err_details = "; ".join(err_msg_list) if err_msg_list else "Unknown Docling processing error"
268
+
269
+ msg = f"Docling processing failed: {err_details}"
270
+ raise ValueError(msg)
271
+
272
+ if "json_content" not in result["document"] or result["document"]["json_content"] is None:
273
+ self.log("No JSON DoclingDocument found in the result.")
274
+ return None
275
+
276
+ try:
277
+ doc = coerce_docling_document(result["document"]["json_content"])
278
+ data_dict: dict[str, Any] = {"doc": doc}
279
+ if file_path:
280
+ data_dict["file_path"] = file_path
281
+ return Data(data=data_dict)
282
+ except Exception as e: # noqa: BLE001
283
+ self.log(f"Error validating the document. {e}")
284
+ return None
285
+
286
+ def _process_task_id(self) -> list[Data]:
287
+ """Process an existing task by polling for status and retrieving results.
288
+
289
+ Returns:
290
+ List containing the result Data object, or empty list if processing failed.
291
+ """
292
+ transformed_url = transform_localhost_url(self.api_url)
293
+ base_url = f"{transformed_url}/v1"
294
+
295
+ with httpx.Client(headers=self._process_headers()) as client:
296
+ result = self._poll_and_fetch_result(client, base_url, self.task_id)
297
+ return [result] if result else []
298
+
299
+ def load_files_base(self) -> list[Data]:
300
+ """Load and process files, or poll an existing task if task_id is provided.
301
+
302
+ Returns:
303
+ list[Data]: Parsed data from the processed files or task.
304
+ """
305
+ if self.task_id:
306
+ return self._process_task_id()
307
+ return super().load_files_base()
308
+
309
+ def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
310
+ transformed_url = transform_localhost_url(self.api_url)
311
+ base_url = f"{transformed_url}/v1"
312
+
313
+ def _convert_document(client: httpx.Client, file_path: Path, options: dict[str, Any]) -> Data | None:
314
+ encoded_doc = base64.b64encode(file_path.read_bytes()).decode()
315
+ payload = {
316
+ "options": options,
317
+ "sources": [{"kind": "file", "base64_string": encoded_doc, "filename": file_path.name}],
318
+ }
319
+
320
+ response = client.post(f"{base_url}/convert/source/async", json=payload)
321
+ response.raise_for_status()
322
+ task = response.json()
323
+
324
+ return self._poll_and_fetch_result(client, base_url, task["task_id"], str(file_path))
325
+
326
+ docling_options = {
327
+ "to_formats": ["json"],
328
+ "image_export_mode": "placeholder",
329
+ **(self.docling_serve_opts or {}),
330
+ }
331
+
332
+ processed_data: list[Data | None] = []
333
+ with (
334
+ httpx.Client(headers=self._process_headers()) as client,
335
+ ThreadPoolExecutor(max_workers=self.max_concurrency) as executor,
336
+ ):
337
+ futures: list[tuple[int, Future]] = []
338
+ for i, file in enumerate(file_list):
339
+ if file.path is None:
340
+ processed_data.append(None)
341
+ continue
342
+
343
+ futures.append((i, executor.submit(_convert_document, client, file.path, docling_options)))
344
+
345
+ for _index, future in futures:
346
+ try:
347
+ result_data = future.result()
348
+ processed_data.append(result_data)
349
+ except (httpx.HTTPStatusError, httpx.RequestError, KeyError, ValueError) as exc:
350
+ self.log(f"Docling remote processing failed: {exc}")
351
+ raise
352
+
353
+ return self.rollup_data(file_list, processed_data)
@@ -0,0 +1,137 @@
1
+ from typing import Any
2
+
3
+ from lfx.base.data.docling_utils import coerce_docling_document, extract_docling_documents, get_docling_image_ref_mode
4
+ from lfx.custom import Component
5
+ from lfx.io import DropdownInput, HandleInput, MessageTextInput, Output, StrInput
6
+ from lfx.schema import Data, DataFrame
7
+
8
+
9
+ class ExportDoclingDocumentComponent(Component):
10
+ display_name: str = "Export DoclingDocument"
11
+ description: str = "Export DoclingDocument to markdown, html or other formats."
12
+ documentation = "https://docling-project.github.io/docling/"
13
+ icon = "Docling"
14
+ name = "ExportDoclingDocument"
15
+
16
+ inputs = [
17
+ HandleInput(
18
+ name="data_inputs",
19
+ display_name="JSON or Table",
20
+ info="The data with documents to export.",
21
+ input_types=["Data", "JSON", "DataFrame", "Table"],
22
+ required=True,
23
+ ),
24
+ DropdownInput(
25
+ name="export_format",
26
+ display_name="Export format",
27
+ options=["Markdown", "HTML", "Plaintext", "DocTags"],
28
+ info="Select the export format to convert the input.",
29
+ value="Markdown",
30
+ real_time_refresh=True,
31
+ ),
32
+ DropdownInput(
33
+ name="image_mode",
34
+ display_name="Image export mode",
35
+ options=["placeholder", "embedded"],
36
+ info=(
37
+ "Specify how images are exported in the output. Placeholder will replace the images with a string, "
38
+ "whereas Embedded will include them as base64 encoded images."
39
+ ),
40
+ value="placeholder",
41
+ ),
42
+ StrInput(
43
+ name="md_image_placeholder",
44
+ display_name="Image placeholder",
45
+ info="Specify the image placeholder for markdown exports.",
46
+ value="<!-- image -->",
47
+ advanced=True,
48
+ ),
49
+ StrInput(
50
+ name="md_page_break_placeholder",
51
+ display_name="Page break placeholder",
52
+ info="Add this placeholder between pages in the markdown output.",
53
+ value="",
54
+ advanced=True,
55
+ ),
56
+ MessageTextInput(
57
+ name="doc_key",
58
+ display_name="Doc Key",
59
+ info="The key to use for the DoclingDocument column.",
60
+ value="doc",
61
+ advanced=True,
62
+ ),
63
+ ]
64
+
65
+ outputs = [
66
+ Output(display_name="Exported data", name="data", method="export_document"),
67
+ Output(display_name="Table", name="dataframe", method="as_dataframe"),
68
+ ]
69
+
70
+ def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
71
+ if field_name == "export_format" and field_value == "Markdown":
72
+ build_config["md_image_placeholder"]["show"] = True
73
+ build_config["md_page_break_placeholder"]["show"] = True
74
+ build_config["image_mode"]["show"] = True
75
+ elif field_name == "export_format" and field_value == "HTML":
76
+ build_config["md_image_placeholder"]["show"] = False
77
+ build_config["md_page_break_placeholder"]["show"] = False
78
+ build_config["image_mode"]["show"] = True
79
+ elif field_name == "export_format" and field_value in {"Plaintext", "DocTags"}:
80
+ build_config["md_image_placeholder"]["show"] = False
81
+ build_config["md_page_break_placeholder"]["show"] = False
82
+ build_config["image_mode"]["show"] = False
83
+
84
+ return build_config
85
+
86
+ def _get_image_mode(self) -> Any:
87
+ return get_docling_image_ref_mode(self.image_mode)
88
+
89
+ @staticmethod
90
+ def _coerce_exportable_document(doc: Any) -> Any:
91
+ return coerce_docling_document(doc)
92
+
93
+ def export_document(self) -> list[Data]:
94
+ documents, warning = extract_docling_documents(self.data_inputs, self.doc_key)
95
+ if warning:
96
+ self.status = warning
97
+
98
+ results: list[Data] = []
99
+ try:
100
+ image_mode = self._get_image_mode()
101
+ for raw_doc in documents:
102
+ doc = self._coerce_exportable_document(raw_doc)
103
+ content = ""
104
+ if self.export_format == "Markdown":
105
+ content = doc.export_to_markdown(
106
+ image_mode=image_mode,
107
+ image_placeholder=self.md_image_placeholder,
108
+ page_break_placeholder=self.md_page_break_placeholder,
109
+ )
110
+ elif self.export_format == "HTML":
111
+ content = doc.export_to_html(image_mode=image_mode)
112
+ elif self.export_format == "Plaintext":
113
+ content = doc.export_to_text()
114
+ elif self.export_format == "DocTags":
115
+ content = doc.export_to_doctags()
116
+
117
+ # Preserve metadata from the DoclingDocument
118
+ metadata: dict = {"export_format": self.export_format}
119
+ if hasattr(doc, "name") and doc.name:
120
+ metadata["name"] = doc.name
121
+ if hasattr(doc, "origin") and doc.origin is not None:
122
+ if hasattr(doc.origin, "filename") and doc.origin.filename:
123
+ metadata["filename"] = doc.origin.filename
124
+ if hasattr(doc.origin, "binary_hash") and doc.origin.binary_hash:
125
+ metadata["document_id"] = str(doc.origin.binary_hash)
126
+ if hasattr(doc.origin, "mimetype") and doc.origin.mimetype:
127
+ metadata["mimetype"] = doc.origin.mimetype
128
+
129
+ results.append(Data(text=content, data={"text": content, **metadata}))
130
+ except Exception as e:
131
+ msg = f"Error exporting document: {e}"
132
+ raise TypeError(msg) from e
133
+
134
+ return results
135
+
136
+ def as_dataframe(self) -> DataFrame:
137
+ return DataFrame(self.export_document())
@@ -0,0 +1,16 @@
1
+ {
2
+ "$schema": "https://schemas.langflow.org/extension/v1.json",
3
+ "id": "lfx-docling",
4
+ "version": "0.1.0",
5
+ "name": "Docling",
6
+ "description": "Docling document processing components as a standalone Langflow Extension Bundle.",
7
+ "lfx": {
8
+ "compat": ["1"]
9
+ },
10
+ "bundles": [
11
+ {
12
+ "name": "docling",
13
+ "path": "components/docling"
14
+ }
15
+ ]
16
+ }
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.4
2
+ Name: lfx-docling
3
+ Version: 0.1.0
4
+ Summary: Docling document processing components as a standalone Langflow Extension Bundle.
5
+ Project-URL: Homepage, https://github.com/langflow-ai/langflow
6
+ Project-URL: Documentation, https://docs.langflow.org/bundles-docling
7
+ Project-URL: Repository, https://github.com/langflow-ai/langflow
8
+ Author-email: Langflow <contact@langflow.org>
9
+ License: MIT
10
+ Keywords: bundle,docling,documents,extension,langflow,lfx
11
+ Requires-Python: <3.15,>=3.10
12
+ Requires-Dist: docling-core<3.0.0,>=2.36.1
13
+ Requires-Dist: httpx<1.0.0,>=0.28.1
14
+ Requires-Dist: lfx>=0.5.0
15
+ Provides-Extra: all
16
+ Requires-Dist: docling-core[chunking]<3.0.0,>=2.36.1; extra == 'all'
17
+ Requires-Dist: docling<3.0.0,>=2.36.1; (sys_platform != 'darwin' or platform_machine != 'x86_64') and extra == 'all'
18
+ Requires-Dist: langchain-docling>=1.1.0; extra == 'all'
19
+ Requires-Dist: ocrmac>=1.0.0; (sys_platform == 'darwin') and extra == 'all'
20
+ Requires-Dist: rapidocr-onnxruntime>=1.4.4; extra == 'all'
21
+ Requires-Dist: tesserocr>=2.8.0; extra == 'all'
22
+ Requires-Dist: tiktoken>=0.7.0; extra == 'all'
23
+ Requires-Dist: torch>=2.6.0; extra == 'all'
24
+ Requires-Dist: torchvision>=0.21.0; extra == 'all'
25
+ Provides-Extra: chunking
26
+ Requires-Dist: docling-core[chunking]<3.0.0,>=2.36.1; extra == 'chunking'
27
+ Requires-Dist: tiktoken>=0.7.0; extra == 'chunking'
28
+ Provides-Extra: image-description
29
+ Requires-Dist: docling<3.0.0,>=2.36.1; (sys_platform != 'darwin' or platform_machine != 'x86_64') and extra == 'image-description'
30
+ Requires-Dist: langchain-docling>=1.1.0; extra == 'image-description'
31
+ Requires-Dist: ocrmac>=1.0.0; (sys_platform == 'darwin') and extra == 'image-description'
32
+ Requires-Dist: rapidocr-onnxruntime>=1.4.4; extra == 'image-description'
33
+ Requires-Dist: tesserocr>=2.8.0; extra == 'image-description'
34
+ Requires-Dist: torch>=2.6.0; extra == 'image-description'
35
+ Requires-Dist: torchvision>=0.21.0; extra == 'image-description'
36
+ Provides-Extra: local
37
+ Requires-Dist: docling<3.0.0,>=2.36.1; (sys_platform != 'darwin' or platform_machine != 'x86_64') and extra == 'local'
38
+ Requires-Dist: ocrmac>=1.0.0; (sys_platform == 'darwin') and extra == 'local'
39
+ Requires-Dist: rapidocr-onnxruntime>=1.4.4; extra == 'local'
40
+ Requires-Dist: tesserocr>=2.8.0; extra == 'local'
41
+ Requires-Dist: torch>=2.6.0; extra == 'local'
42
+ Requires-Dist: torchvision>=0.21.0; extra == 'local'
43
+ Description-Content-Type: text/markdown
44
+
45
+ # Docling Bundle
46
+
47
+ Docling components for Langflow packaged as a standalone Extension Bundle.
48
+
49
+ ## Components
50
+
51
+ - Docling
52
+ - Docling Serve
53
+ - Export DoclingDocument
54
+ - Chunk DoclingDocument
55
+
56
+ ## Install
57
+
58
+ The bundle is installed with Langflow in the 1.10 workspace. The base package includes `docling-core` for the `DoclingDocument` schema. For standalone local conversion:
59
+
60
+ ```bash
61
+ uv pip install "lfx-docling[local]"
62
+ ```
63
+
64
+ Chunking and picture-description support use separate optional extras. Chunking
65
+ does not install the full local converter/OCR stack:
66
+
67
+ ```bash
68
+ uv pip install "lfx-docling[chunking]"
69
+ uv pip install "lfx-docling[image-description]"
70
+ ```
71
+
72
+ ## Develop
73
+
74
+ ```bash
75
+ uv run lfx extension validate src/bundles/docling/src/lfx_docling
76
+ uv run pytest src/bundles/docling/tests
77
+ ```
@@ -0,0 +1,11 @@
1
+ lfx_docling/__init__.py,sha256=cCRwW1jLZX9BJ5oV9TXYYcYRuhlVDPVDqyPjZBlaBxk,566
2
+ lfx_docling/extension.json,sha256=omS8BGiwJYDI-2zkjTeT1TLTH35ORLBas1ugmA_bdgI,362
3
+ lfx_docling/components/docling/__init__.py,sha256=6xxVeK1DutDKS7vdiuF9XFGbd5yJ9M_ep2e_YYqa32I,386
4
+ lfx_docling/components/docling/chunk_docling_document.py,sha256=9e8yUfXO4YHxjgBfb0Vm7cQ2XSFW4KPkgifrLu489Tc,9220
5
+ lfx_docling/components/docling/docling_inline.py,sha256=ZdOLflUAEm_bbqnucA86jLzDci2ExCmvH9f9rK7ogQw,14833
6
+ lfx_docling/components/docling/docling_remote.py,sha256=pE4ULahalE96hGkcedvEWRehWZmKJmZJUdbgkfMrUEk,13050
7
+ lfx_docling/components/docling/export_docling_document.py,sha256=9gGsRKBQMNRQx8cSkWyhBSlQlC7vHt_z04t7fbWSIoM,5836
8
+ lfx_docling-0.1.0.dist-info/METADATA,sha256=SlOPqGiirb0PkVqdOvnv0oPXspiFJO55tyKIo6Gy8Y4,3167
9
+ lfx_docling-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ lfx_docling-0.1.0.dist-info/entry_points.txt,sha256=ZMFLxk5y0VG5SrHLjZeXSZSA1ceNypO23fDHeHLmDnc,48
11
+ lfx_docling-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [langflow.extensions]
2
+ lfx-docling = lfx_docling