lfx-paddle 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lfx_paddle/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """lfx-paddle: PaddleOCR bundle.
2
+
3
+ Distribution unit ``lfx-paddle``. At runtime Langflow's loader discovers
4
+ ``extension.json`` shipped alongside this ``__init__.py`` and registers the
5
+ bundle's component under the namespaced ID
6
+ ``ext:paddle:PaddleOCRComponent@official``.
7
+ """
8
+
9
+ from lfx_paddle.components.paddle.paddleocr import PaddleOCRComponent
10
+
11
+ __all__ = ["PaddleOCRComponent"]
@@ -0,0 +1,10 @@
1
+ """Component re-exports for the ``paddle`` bundle.
2
+
3
+ Saved-flow migration entries that target ``lfx.components.paddle.<Class>``
4
+ resolve through this package, so the moved Component class(es) must be
5
+ importable from here by name.
6
+ """
7
+
8
+ from .paddleocr import PaddleOCRComponent
9
+
10
+ __all__ = ["PaddleOCRComponent"]
@@ -0,0 +1,502 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import time
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ import httpx
8
+ from lfx.base.data.base_file import BaseFileComponent
9
+ from lfx.inputs.inputs import BoolInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SecretStrInput
10
+ from lfx.schema.data import Data
11
+ from lfx.utils.ssrf_protection import is_ssrf_protection_enabled, validate_and_resolve_url
12
+ from lfx.utils.ssrf_transport import create_ssrf_protected_sync_client
13
+
14
+ if TYPE_CHECKING:
15
+ from pathlib import Path
16
+
17
+
18
+ class PaddleOCRComponent(BaseFileComponent):
19
+ display_name = "PaddleOCR"
20
+ description = "Use PaddleOCR for either layout-aware document parsing into Markdown or plain OCR text recognition."
21
+ documentation = "https://paddlepaddle.github.io/PaddleOCR/latest/en/version3.x/paddleocr_and_ppstructure.html"
22
+ icon = "file-search"
23
+ name = "PaddleOCR"
24
+
25
+ VALID_EXTENSIONS = ["png", "jpg", "jpeg", "bmp", "tiff", "webp", "pdf"]
26
+ DEFAULT_BASE_URL = "https://paddleocr.aistudio-app.com"
27
+ API_PATH = "/api/v2/ocr/jobs"
28
+ REQUEST_TIMEOUT = 300.0
29
+ INITIAL_POLL_INTERVAL = 3.0
30
+ POLL_MULTIPLIER = 1.5
31
+ MAX_POLL_INTERVAL = 15.0
32
+
33
+ inputs = [
34
+ *BaseFileComponent.get_base_inputs(),
35
+ SecretStrInput(
36
+ name="access_token",
37
+ display_name="AI Studio Access Token",
38
+ required=True,
39
+ info="AI Studio access token. Get it from https://aistudio.baidu.com/account/accessToken.",
40
+ ),
41
+ MessageTextInput(
42
+ name="base_url",
43
+ display_name="Base URL",
44
+ required=False,
45
+ value="",
46
+ info="Optional PaddleOCR service root URL. Leave empty to use the official default service.",
47
+ advanced=True,
48
+ ),
49
+ DropdownInput(
50
+ name="task_type",
51
+ display_name="Task Type",
52
+ options=["document_parsing", "ocr"],
53
+ value="document_parsing",
54
+ info=(
55
+ "document_parsing: preserves reading order and layout as Markdown — "
56
+ "best when you need structure-aware text (PDFs, scanned documents, tables).\n"
57
+ "ocr: extracts text regions in scan order — best for images with simple text content."
58
+ ),
59
+ real_time_refresh=True,
60
+ ),
61
+ DropdownInput(
62
+ name="model",
63
+ display_name="Model",
64
+ options=["PP-StructureV3", "PaddleOCR-VL-1.6"],
65
+ value="PP-StructureV3",
66
+ info="PaddleOCR model to use for the selected task type.",
67
+ ),
68
+ IntInput(
69
+ name="poll_timeout",
70
+ display_name="Timeout (s)",
71
+ value=600,
72
+ info="Maximum time to wait for the PaddleOCR job to complete.",
73
+ advanced=True,
74
+ ),
75
+ BoolInput(
76
+ name="use_doc_orientation_classify",
77
+ display_name="Document Orientation Classification",
78
+ value=False,
79
+ advanced=True,
80
+ info="OCR/document parsing option. Enable document orientation classification.",
81
+ ),
82
+ BoolInput(
83
+ name="use_doc_unwarping",
84
+ display_name="Document Unwarping",
85
+ value=False,
86
+ advanced=True,
87
+ info="OCR/document parsing option. Enable document unwarping.",
88
+ ),
89
+ BoolInput(
90
+ name="use_textline_orientation",
91
+ display_name="Text Line Orientation",
92
+ value=False,
93
+ advanced=True,
94
+ info="OCR option. Enable text line orientation detection.",
95
+ ),
96
+ FloatInput(
97
+ name="text_det_thresh",
98
+ display_name="Text Detection Threshold",
99
+ required=False,
100
+ advanced=True,
101
+ info="OCR option. Text detection threshold.",
102
+ ),
103
+ FloatInput(
104
+ name="text_det_box_thresh",
105
+ display_name="Text Detection Box Threshold",
106
+ required=False,
107
+ advanced=True,
108
+ info="OCR option. Text detection box threshold.",
109
+ ),
110
+ FloatInput(
111
+ name="text_det_unclip_ratio",
112
+ display_name="Text Detection Unclip Ratio",
113
+ required=False,
114
+ advanced=True,
115
+ info="OCR option. Text detection unclip ratio.",
116
+ ),
117
+ FloatInput(
118
+ name="text_rec_score_thresh",
119
+ display_name="Text Recognition Score Threshold",
120
+ required=False,
121
+ advanced=True,
122
+ info="OCR option. Text recognition score threshold.",
123
+ ),
124
+ BoolInput(
125
+ name="use_table_recognition",
126
+ display_name="Table Recognition",
127
+ value=True,
128
+ advanced=True,
129
+ info="Document parsing option. Enable table recognition.",
130
+ ),
131
+ BoolInput(
132
+ name="use_formula_recognition",
133
+ display_name="Formula Recognition",
134
+ value=False,
135
+ advanced=True,
136
+ info="Document parsing option. Enable formula recognition.",
137
+ ),
138
+ BoolInput(
139
+ name="use_chart_recognition",
140
+ display_name="Chart Recognition",
141
+ value=False,
142
+ advanced=True,
143
+ info="Document parsing option. Enable chart recognition.",
144
+ ),
145
+ BoolInput(
146
+ name="use_seal_recognition",
147
+ display_name="Seal Recognition",
148
+ value=False,
149
+ advanced=True,
150
+ info="Document parsing option. Enable seal recognition.",
151
+ ),
152
+ BoolInput(
153
+ name="prettify_markdown",
154
+ display_name="Prettify Markdown",
155
+ value=True,
156
+ advanced=True,
157
+ info="Document parsing option. Return prettier Markdown when supported.",
158
+ ),
159
+ FloatInput(
160
+ name="temperature",
161
+ display_name="Temperature",
162
+ required=False,
163
+ advanced=True,
164
+ info="PaddleOCR-VL option. Sampling temperature.",
165
+ ),
166
+ FloatInput(
167
+ name="top_p",
168
+ display_name="Top P",
169
+ required=False,
170
+ advanced=True,
171
+ info="PaddleOCR-VL option. Nucleus sampling top_p.",
172
+ ),
173
+ BoolInput(
174
+ name="visualize",
175
+ display_name="Visualize",
176
+ value=False,
177
+ advanced=True,
178
+ info="Document parsing option. Generate visualization outputs when supported.",
179
+ ),
180
+ ]
181
+
182
+ outputs = [*BaseFileComponent.get_base_outputs()]
183
+
184
+ def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:
185
+ if field_name == "task_type":
186
+ if field_value == "ocr":
187
+ build_config["model"]["options"] = ["PP-OCRv6", "PP-OCRv5"]
188
+ build_config["model"]["value"] = "PP-OCRv6"
189
+ else:
190
+ build_config["model"]["options"] = ["PP-StructureV3", "PaddleOCR-VL-1.6"]
191
+ build_config["model"]["value"] = "PP-StructureV3"
192
+
193
+ return build_config
194
+
195
+ def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
196
+ if not file_list:
197
+ self.log("No files to process.")
198
+ return file_list
199
+
200
+ access_token = str(self.access_token or "").strip()
201
+ if not access_token:
202
+ msg = "AI Studio Access Token is required."
203
+ raise ValueError(msg)
204
+
205
+ base_url = (str(self.base_url or "").strip() or self.DEFAULT_BASE_URL).rstrip("/")
206
+ headers = {
207
+ "Authorization": f"Bearer {access_token}",
208
+ "Client-Platform": "langflow",
209
+ }
210
+ poll_timeout = int(self.poll_timeout or 600)
211
+
212
+ # ``base_url`` is operator-configurable, so the submit and poll requests
213
+ # (which carry the bearer token and the uploaded file) are validated for
214
+ # SSRF up front and DNS-pinned for the rest of the run. Like
215
+ # ``_fetch_result``, this is a no-op when SSRF protection is disabled
216
+ # (the default), so default behavior is unchanged.
217
+ _validated_url, base_ips = validate_and_resolve_url(base_url)
218
+
219
+ try:
220
+ for file in file_list:
221
+ file.data = self._process_file(file.path, base_url, base_ips, headers, poll_timeout)
222
+ except Exception as e:
223
+ error_message = self._format_paddleocr_error(e)
224
+ self.log(error_message)
225
+ raise RuntimeError(error_message) from e
226
+
227
+ return file_list
228
+
229
+ def _process_file(
230
+ self, file_path: Path, base_url: str, base_ips: list[str], headers: dict[str, str], poll_timeout: int
231
+ ) -> Data:
232
+ options = self._build_ocr_options() if self.task_type == "ocr" else self._build_document_parsing_options()
233
+ job_id = self._submit_job(
234
+ base_url=base_url, base_ips=base_ips, headers=headers, file_path=file_path, options=options
235
+ )
236
+ jsonl_data = self._poll_job(
237
+ base_url=base_url, base_ips=base_ips, headers=headers, job_id=job_id, poll_timeout=poll_timeout
238
+ )
239
+
240
+ if self.task_type == "ocr":
241
+ return self._ocr_result_to_data(job_id, jsonl_data, file_path)
242
+ return self._document_result_to_data(job_id, jsonl_data, file_path)
243
+
244
+ def _submit_job(
245
+ self, *, base_url: str, base_ips: list[str], headers: dict[str, str], file_path: Path, options: dict[str, Any]
246
+ ) -> str:
247
+ url = f"{base_url}{self.API_PATH}"
248
+ data = {"model": self.model, "optionalPayload": json.dumps(options)}
249
+ with (
250
+ file_path.open("rb") as file_obj,
251
+ self._build_client(url, base_ips) as client,
252
+ ):
253
+ response = client.post(
254
+ url,
255
+ data=data,
256
+ files={"file": (file_path.name, file_obj)},
257
+ headers=headers,
258
+ timeout=self.REQUEST_TIMEOUT,
259
+ )
260
+ response.raise_for_status()
261
+ payload = response.json()
262
+ job_id = (payload.get("data") or {}).get("jobId") or payload.get("jobId")
263
+ if not job_id:
264
+ msg = f"PaddleOCR job ID not found in response: {payload}"
265
+ raise ValueError(msg)
266
+ return job_id
267
+
268
+ def _poll_job(
269
+ self,
270
+ *,
271
+ base_url: str,
272
+ base_ips: list[str],
273
+ headers: dict[str, str],
274
+ job_id: str,
275
+ poll_timeout: int,
276
+ ) -> list[dict[str, Any]]:
277
+ status_url = f"{base_url}{self.API_PATH}/{job_id}"
278
+ deadline = time.monotonic() + poll_timeout
279
+ interval = self.INITIAL_POLL_INTERVAL
280
+
281
+ with self._build_client(status_url, base_ips) as client:
282
+ while True:
283
+ remaining = deadline - time.monotonic()
284
+ if remaining <= 0:
285
+ msg = f"PaddleOCR job {job_id} timed out."
286
+ raise TimeoutError(msg)
287
+
288
+ # Bound each request by the remaining budget so a hung poll cannot
289
+ # overrun ``poll_timeout`` by up to ``REQUEST_TIMEOUT``.
290
+ response = client.get(status_url, headers=headers, timeout=min(self.REQUEST_TIMEOUT, remaining))
291
+ response.raise_for_status()
292
+ payload = response.json()
293
+ data = payload.get("data") or {}
294
+ state = data.get("state") or payload.get("state")
295
+
296
+ if state == "done":
297
+ result_url = data.get("resultJsonUrl") or (data.get("resultUrl") or {}).get("jsonUrl")
298
+ if not result_url:
299
+ msg = f"PaddleOCR result URL not found in response: {payload}"
300
+ raise ValueError(msg)
301
+ return self._fetch_result(result_url)
302
+
303
+ if state == "failed":
304
+ msg = f"PaddleOCR job failed: {payload}"
305
+ raise RuntimeError(msg)
306
+
307
+ time.sleep(min(interval, max(deadline - time.monotonic(), 0)))
308
+ interval = min(interval * self.POLL_MULTIPLIER, self.MAX_POLL_INTERVAL)
309
+
310
+ def _fetch_result(self, result_url: str) -> list[dict[str, Any]]:
311
+ # ``result_url`` comes from the remote job-status response, not from
312
+ # operator input, so it is validated for SSRF before being fetched: a
313
+ # compromised/rogue endpoint could otherwise point it at internal or
314
+ # cloud-metadata addresses and have the worker fetch them server-side.
315
+ # ``validate_and_resolve_url`` is a no-op (returns no pinned IPs) when
316
+ # SSRF protection is disabled -- the default -- so behavior is unchanged
317
+ # unless an operator opts in; when enabled it blocks internal targets
318
+ # and pins DNS to the validated IPs. This mirrors the shared pattern in
319
+ # ``lfx.components.data_source.api_request``.
320
+ _validated_url, validated_ips = validate_and_resolve_url(result_url)
321
+ with self._build_client(result_url, validated_ips) as client:
322
+ response = client.get(result_url)
323
+ response.raise_for_status()
324
+ text = response.text.strip()
325
+ if not text:
326
+ return []
327
+
328
+ try:
329
+ payload = response.json()
330
+ except ValueError:
331
+ return [json.loads(line) for line in text.splitlines() if line.strip()]
332
+
333
+ if isinstance(payload, list):
334
+ return payload
335
+ if isinstance(payload, dict):
336
+ return [payload]
337
+ return []
338
+
339
+ def _build_client(self, url: str, validated_ips: list[str]) -> httpx.Client:
340
+ """Create the HTTP client for ``url``, pinning DNS when SSRF protection applies.
341
+
342
+ Used for the submit, poll, and result-fetch requests. Returns a client
343
+ that pins DNS to ``validated_ips`` (preventing rebinding) when SSRF
344
+ protection is enabled and the host resolved to validated IPs; otherwise a
345
+ standard client (protection disabled, allowlisted host, or hostname
346
+ extraction failure).
347
+ """
348
+ if is_ssrf_protection_enabled() and validated_ips:
349
+ hostname = httpx.URL(url).host
350
+ if hostname:
351
+ return create_ssrf_protected_sync_client(
352
+ hostname=hostname, validated_ips=validated_ips, timeout=self.REQUEST_TIMEOUT
353
+ )
354
+ return httpx.Client(timeout=self.REQUEST_TIMEOUT)
355
+
356
+ def _build_ocr_options(self) -> dict[str, Any]:
357
+ return self._collect_options(
358
+ [
359
+ "use_doc_orientation_classify",
360
+ "use_doc_unwarping",
361
+ "use_textline_orientation",
362
+ "text_det_thresh",
363
+ "text_det_box_thresh",
364
+ "text_det_unclip_ratio",
365
+ "text_rec_score_thresh",
366
+ ]
367
+ )
368
+
369
+ def _build_document_parsing_options(self) -> dict[str, Any]:
370
+ return self._collect_options(
371
+ [
372
+ "use_doc_orientation_classify",
373
+ "use_doc_unwarping",
374
+ "use_table_recognition",
375
+ "use_formula_recognition",
376
+ "use_chart_recognition",
377
+ "use_seal_recognition",
378
+ "prettify_markdown",
379
+ "temperature",
380
+ "top_p",
381
+ "visualize",
382
+ ]
383
+ )
384
+
385
+ def _collect_options(self, option_names: list[str]) -> dict[str, Any]:
386
+ options: dict[str, Any] = {}
387
+ for name in option_names:
388
+ value = getattr(self, name, None)
389
+ if value is not None:
390
+ options[name] = value
391
+ return options
392
+
393
+ def _ocr_result_to_data(self, job_id: str, jsonl_data: list[dict[str, Any]], file_path: Path) -> Data:
394
+ pages_payload: list[dict[str, Any]] = []
395
+ text_parts: list[str] = []
396
+
397
+ for line_obj in jsonl_data:
398
+ result = line_obj.get("result", line_obj)
399
+ for item in result.get("ocrResults", []) or []:
400
+ pruned_result = item.get("prunedResult", {}) or {}
401
+ rec_texts = pruned_result.get("rec_texts", []) or []
402
+ if rec_texts:
403
+ text_parts.append("\n".join(str(text) for text in rec_texts))
404
+ pages_payload.append(
405
+ {
406
+ "pruned_result": pruned_result,
407
+ "ocr_image_url": item.get("ocrImage"),
408
+ }
409
+ )
410
+
411
+ text = "\n\n".join(part for part in text_parts if part)
412
+ return Data(
413
+ text=text,
414
+ data={
415
+ self.SERVER_FILE_PATH_FIELDNAME: str(file_path),
416
+ "text": text,
417
+ "task_type": "ocr",
418
+ "output_format": "plain_text",
419
+ "model": self.model,
420
+ "job_id": job_id,
421
+ "pages": pages_payload,
422
+ },
423
+ )
424
+
425
+ def _document_result_to_data(self, job_id: str, jsonl_data: list[dict[str, Any]], file_path: Path) -> Data:
426
+ pages_payload: list[dict[str, Any]] = []
427
+ text_parts: list[str] = []
428
+
429
+ for line_obj in jsonl_data:
430
+ result = line_obj.get("result", line_obj)
431
+ layout_results = result.get("layoutParsingResults", []) or []
432
+ if layout_results:
433
+ self._append_layout_results(layout_results, pages_payload, text_parts)
434
+ continue
435
+ self._append_ocr_fallback_results(result.get("ocrResults", []) or [], pages_payload, text_parts)
436
+
437
+ markdown_text = "\n\n".join(part for part in text_parts if part)
438
+ return Data(
439
+ text=markdown_text,
440
+ data={
441
+ self.SERVER_FILE_PATH_FIELDNAME: str(file_path),
442
+ "text": markdown_text,
443
+ "task_type": "document_parsing",
444
+ "output_format": "markdown",
445
+ "model": self.model,
446
+ "job_id": job_id,
447
+ "pages": pages_payload,
448
+ },
449
+ )
450
+
451
+ def _append_layout_results(
452
+ self,
453
+ layout_results: list[dict[str, Any]],
454
+ pages_payload: list[dict[str, Any]],
455
+ text_parts: list[str],
456
+ ) -> None:
457
+ for item in layout_results:
458
+ markdown = item.get("markdown", {}) or {}
459
+ markdown_text = markdown.get("text") or item.get("markdown_text") or ""
460
+ if markdown_text:
461
+ text_parts.append(str(markdown_text))
462
+ pages_payload.append(
463
+ {
464
+ "markdown_text": markdown_text,
465
+ "markdown_images": markdown.get("images", {}) or {},
466
+ "output_images": item.get("outputImages", {}) or {},
467
+ }
468
+ )
469
+
470
+ def _append_ocr_fallback_results(
471
+ self,
472
+ ocr_results: list[dict[str, Any]],
473
+ pages_payload: list[dict[str, Any]],
474
+ text_parts: list[str],
475
+ ) -> None:
476
+ for item in ocr_results:
477
+ pruned_result = item.get("prunedResult", {}) or {}
478
+ rec_texts = pruned_result.get("rec_texts", []) or []
479
+ text = "\n".join(str(text) for text in rec_texts)
480
+ if text:
481
+ text_parts.append(text)
482
+ pages_payload.append(
483
+ {
484
+ "markdown_text": text,
485
+ "markdown_images": {},
486
+ "output_images": {},
487
+ "pruned_result": pruned_result,
488
+ "ocr_image_url": item.get("ocrImage"),
489
+ }
490
+ )
491
+
492
+ def _format_paddleocr_error(self, error: Exception) -> str:
493
+ if isinstance(error, httpx.HTTPStatusError):
494
+ status_code = error.response.status_code
495
+ if status_code in {401, 403}:
496
+ return "PaddleOCR authentication failed. Please check the AI Studio Access Token."
497
+ return f"PaddleOCR API error ({status_code}): {error.response.text}"
498
+ if isinstance(error, httpx.TimeoutException | TimeoutError):
499
+ return "PaddleOCR job timed out. Increase the timeout or try again later."
500
+ if isinstance(error, httpx.HTTPError):
501
+ return f"PaddleOCR network error: {error}"
502
+ return f"PaddleOCR failed: {error}"
@@ -0,0 +1,16 @@
1
+ {
2
+ "$schema": "https://schemas.langflow.org/extension/v1.json",
3
+ "id": "lfx-paddle",
4
+ "version": "0.1.0",
5
+ "name": "PaddleOCR",
6
+ "description": "PaddleOCR component (OCR and layout-aware document parsing via the AI Studio async Job API) as a standalone Langflow Extension Bundle.",
7
+ "lfx": {
8
+ "compat": ["1"]
9
+ },
10
+ "bundles": [
11
+ {
12
+ "name": "paddle",
13
+ "path": "components/paddle"
14
+ }
15
+ ]
16
+ }
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: lfx-paddle
3
+ Version: 0.1.0
4
+ Summary: PaddleOCR component (OCR and layout-aware document parsing via the AI Studio async Job API) as a standalone Langflow Extension Bundle.
5
+ Project-URL: Homepage, https://github.com/langflow-ai/langflow
6
+ Project-URL: Documentation, https://docs.langflow.org/extensions
7
+ Project-URL: Repository, https://github.com/langflow-ai/langflow
8
+ Author-email: Langflow <contact@langflow.org>
9
+ License: MIT
10
+ Keywords: bundle,extension,langflow,lfx,ocr,paddle,paddleocr
11
+ Requires-Python: <3.15,>=3.10
12
+ Requires-Dist: httpx<1.0.0,>=0.24.0
13
+ Requires-Dist: lfx<2.0.0,>=1.11.0.dev0
14
+ Description-Content-Type: text/markdown
15
+
16
+ # lfx-paddle
17
+
18
+ PaddleOCR as a standalone Langflow Extension Bundle.
19
+
20
+ Ships the **PaddleOCR** component, which performs either layout-aware document
21
+ parsing into Markdown (`PP-StructureV3`, `PaddleOCR-VL-1.6`) or plain OCR text
22
+ recognition (`PP-OCRv5`, `PP-OCRv6`). It talks to the PaddleOCR
23
+ [AI Studio async Job HTTP API](https://paddlepaddle.github.io/PaddleOCR/latest/en/version3.x/paddleocr_and_ppstructure.html)
24
+ (`submit -> poll -> fetch`) directly via `httpx`, so it does **not** require the
25
+ `paddleocr` Python SDK (whose transitive `pyyaml` constraint conflicts with
26
+ Langflow's dependency tree).
27
+
28
+ ## Install
29
+
30
+ ```bash
31
+ pip install lfx-paddle
32
+ ```
33
+
34
+ The bundle is registered automatically via the `langflow.extensions`
35
+ entry-point. After install, restart your Langflow server; the component will
36
+ appear in the palette under the `paddle` group.
37
+
38
+ You will need an AI Studio access token
39
+ (<https://aistudio.baidu.com/account/accessToken>) to run the component.
40
+
41
+ ## Develop
42
+
43
+ ```bash
44
+ cd src/bundles/paddle
45
+ pip install -e .
46
+ lfx extension validate src/lfx_paddle
47
+ ```
48
+
49
+ ## Migration
50
+
51
+ Saved flows referencing the legacy class name or the old import paths under
52
+ `lfx.components.paddle.*` are rewritten to the new namespaced ID
53
+ `ext:paddle:PaddleOCRComponent@official` by the migration table in
54
+ `src/lfx/src/lfx/extension/migration/migration_table.json`.
@@ -0,0 +1,8 @@
1
+ lfx_paddle/__init__.py,sha256=WjKSh_Oi7xqE2lGfnxrt3P9etv3ps1rEN2DZ46XjNp0,380
2
+ lfx_paddle/extension.json,sha256=6-XV3PlmbFgKLRhaUM0Txi-tkJFOZb1xzK8zcujKhkg,414
3
+ lfx_paddle/components/paddle/__init__.py,sha256=c73ZHRS90szobz4AArj3l9tmNeh759ZYf0bRZd-e_8M,309
4
+ lfx_paddle/components/paddle/paddleocr.py,sha256=Njvs-9x4AKfYZBv3GjkSOpkmNGy3v2aotCtjC7bilus,20044
5
+ lfx_paddle-0.1.0.dist-info/METADATA,sha256=vmyn7kFSGuqar41TrW3ZwgonS9qXw-mukt9PA420Fuw,2000
6
+ lfx_paddle-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
7
+ lfx_paddle-0.1.0.dist-info/entry_points.txt,sha256=YWCXw3eNS9iNZ9Y8BtDYK_zoIOxk6wQf--pLSZDB05Y,46
8
+ lfx_paddle-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [langflow.extensions]
2
+ lfx-paddle = lfx_paddle