paddleocr-haystack 1.0.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ # Changelog
2
+
3
+ ## [integrations/paddleocr-v1.0.0] - 2026-01-12
4
+
5
+ ### 🧹 Chores
6
+
7
+ - Make fmt command more forgiving (#2671)
8
+ - [**breaking**] Paddleocr - drop Python 3.9 and use X|Y typing (#2714)
9
+
10
+ ### 🌀 Miscellaneous
11
+
12
+ - Feat: Add 'Client-Platform' header for server processing for PaddleOCR (#2657)
13
+
14
+ ## [integrations/paddleocr-v0.1.0] - 2025-12-10
15
+
16
+ ### 🚀 Features
17
+
18
+ - Add PaddleOCR-VL document converter (#2567)
19
+
20
+ <!-- generated by git-cliff -->
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: paddleocr-haystack
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: An integration of PaddleOCR with Haystack
5
5
  Project-URL: Documentation, https://github.com/haystack-core-integrations/tree/main/integrations/paddleocr#readme
6
6
  Project-URL: Issues, https://github.com/haystack-core-integrations/issues
@@ -17,8 +17,8 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
17
17
  Classifier: Programming Language :: Python :: Implementation :: PyPy
18
18
  Requires-Python: >=3.10
19
19
  Requires-Dist: haystack-ai>=2.22.0
20
- Requires-Dist: paddleocr>=3.3.2
21
- Requires-Dist: paddlex[serving]>=3.3.10
20
+ Requires-Dist: paddleocr>=3.4.0
21
+ Requires-Dist: paddlex[serving]>=3.4.0
22
22
  Requires-Dist: requests>=2.25.0
23
23
  Description-Content-Type: text/markdown
24
24
 
@@ -24,8 +24,8 @@ classifiers = [
24
24
  ]
25
25
  dependencies = [
26
26
  "haystack-ai>=2.22.0",
27
- "paddleocr>=3.3.2",
28
- "paddlex[serving]>=3.3.10",
27
+ "paddleocr>=3.4.0",
28
+ "paddlex[serving]>=3.4.0",
29
29
  "requests>=2.25.0",
30
30
  ]
31
31
 
@@ -146,14 +146,17 @@ class PaddleOCRVLDocumentConverter:
146
146
  api_url: str,
147
147
  access_token: Secret = Secret.from_env_var("AISTUDIO_ACCESS_TOKEN"),
148
148
  file_type: FileTypeInput = None,
149
- use_doc_orientation_classify: bool | None = None,
150
- use_doc_unwarping: bool | None = None,
149
+ use_doc_orientation_classify: bool | None = False,
150
+ use_doc_unwarping: bool | None = False,
151
151
  use_layout_detection: bool | None = None,
152
152
  use_chart_recognition: bool | None = None,
153
+ use_seal_recognition: bool | None = None,
154
+ use_ocr_for_image_block: bool | None = None,
153
155
  layout_threshold: float | dict | None = None,
154
156
  layout_nms: bool | None = None,
155
157
  layout_unclip_ratio: float | tuple[float, float] | dict | None = None,
156
158
  layout_merge_bboxes_mode: str | dict | None = None,
159
+ layout_shape_mode: str | None = None,
157
160
  prompt_label: str | None = None,
158
161
  format_block_content: bool | None = None,
159
162
  repetition_penalty: float | None = None,
@@ -161,8 +164,15 @@ class PaddleOCRVLDocumentConverter:
161
164
  top_p: float | None = None,
162
165
  min_pixels: int | None = None,
163
166
  max_pixels: int | None = None,
167
+ max_new_tokens: int | None = None,
168
+ merge_layout_blocks: bool | None = None,
169
+ markdown_ignore_labels: list[str] | None = None,
170
+ vlm_extra_args: dict | None = None,
164
171
  prettify_markdown: bool | None = None,
165
172
  show_formula_number: bool | None = None,
173
+ restructure_pages: bool | None = None,
174
+ merge_tables: bool | None = None,
175
+ relevel_titles: bool | None = None,
166
176
  visualize: bool | None = None,
167
177
  additional_params: dict[str, Any] | None = None,
168
178
  ):
@@ -171,10 +181,9 @@ class PaddleOCRVLDocumentConverter:
171
181
 
172
182
  :param api_url:
173
183
  API URL. To obtain the API URL, visit the [PaddleOCR official
174
- website](https://aistudio.baidu.com/paddleocr/task), click the
175
- **API** button in the upper-left corner, choose the example code
176
- for **Large Model document parsing(PaddleOCR-VL)**, and copy the
177
- `API_URL`.
184
+ website](https://aistudio.baidu.com/paddleocr), click the
185
+ **API** button, choose the example code for PaddleOCR-VL, and copy
186
+ the `API_URL`.
178
187
  :param access_token:
179
188
  AI Studio access token. You can obtain it from [this
180
189
  page](https://aistudio.baidu.com/account/accessToken).
@@ -193,6 +202,10 @@ class PaddleOCRVLDocumentConverter:
193
202
  Whether to enable the layout detection function.
194
203
  :param use_chart_recognition:
195
204
  Whether to enable the chart recognition function.
205
+ :param use_seal_recognition:
206
+ Whether to enable the seal recognition function.
207
+ :param use_ocr_for_image_block:
208
+ Whether to recognize text in image blocks.
196
209
  :param layout_threshold:
197
210
  Layout detection threshold. Can be a float or a dict with
198
211
  page-specific thresholds.
@@ -204,9 +217,11 @@ class PaddleOCRVLDocumentConverter:
204
217
  dict with page-specific values.
205
218
  :param layout_merge_bboxes_mode:
206
219
  Layout merge bounding boxes mode. Can be a string or a dict.
220
+ :param layout_shape_mode:
221
+ Layout shape mode.
207
222
  :param prompt_label:
208
223
  Prompt type for the VLM. Possible values are "ocr", "formula",
209
- "table", and "chart".
224
+ "table", "chart", "seal", and "spotting".
210
225
  :param format_block_content:
211
226
  Whether to format block content.
212
227
  :param repetition_penalty:
@@ -219,10 +234,25 @@ class PaddleOCRVLDocumentConverter:
219
234
  Minimum number of pixels allowed during VLM preprocessing.
220
235
  :param max_pixels:
221
236
  Maximum number of pixels allowed during VLM preprocessing.
237
+ :param max_new_tokens:
238
+ Maximum number of tokens generated by the VLM.
239
+ :param merge_layout_blocks:
240
+ Whether to merge the layout detection boxes for cross-column or
241
+ staggered top and bottom columns.
242
+ :param markdown_ignore_labels:
243
+ Layout labels that need to be ignored in Markdown.
244
+ :param vlm_extra_args:
245
+ Additional configuration parameters for the VLM.
222
246
  :param prettify_markdown:
223
247
  Whether to prettify the output Markdown text.
224
248
  :param show_formula_number:
225
249
  Whether to include formula numbers in the output markdown text.
250
+ :param restructure_pages:
251
+ Whether to restructure results across multiple pages.
252
+ :param merge_tables:
253
+ Whether to merge tables across pages.
254
+ :param relevel_titles:
255
+ Whether to relevel titles.
226
256
  :param visualize:
227
257
  Whether to return visualization results.
228
258
  :param additional_params:
@@ -235,10 +265,13 @@ class PaddleOCRVLDocumentConverter:
235
265
  self.use_doc_unwarping = use_doc_unwarping
236
266
  self.use_layout_detection = use_layout_detection
237
267
  self.use_chart_recognition = use_chart_recognition
268
+ self.use_seal_recognition = use_seal_recognition
269
+ self.use_ocr_for_image_block = use_ocr_for_image_block
238
270
  self.layout_threshold = layout_threshold
239
271
  self.layout_nms = layout_nms
240
272
  self.layout_unclip_ratio = layout_unclip_ratio
241
273
  self.layout_merge_bboxes_mode = layout_merge_bboxes_mode
274
+ self.layout_shape_mode = layout_shape_mode
242
275
  self.prompt_label = prompt_label
243
276
  self.format_block_content = format_block_content
244
277
  self.repetition_penalty = repetition_penalty
@@ -246,8 +279,15 @@ class PaddleOCRVLDocumentConverter:
246
279
  self.top_p = top_p
247
280
  self.min_pixels = min_pixels
248
281
  self.max_pixels = max_pixels
282
+ self.max_new_tokens = max_new_tokens
283
+ self.merge_layout_blocks = merge_layout_blocks
284
+ self.markdown_ignore_labels = markdown_ignore_labels
285
+ self.vlm_extra_args = vlm_extra_args
249
286
  self.prettify_markdown = prettify_markdown
250
287
  self.show_formula_number = show_formula_number
288
+ self.restructure_pages = restructure_pages
289
+ self.merge_tables = merge_tables
290
+ self.relevel_titles = relevel_titles
251
291
  self.visualize = visualize
252
292
  self.additional_params = additional_params
253
293
 
@@ -267,10 +307,13 @@ class PaddleOCRVLDocumentConverter:
267
307
  use_doc_unwarping=self.use_doc_unwarping,
268
308
  use_layout_detection=self.use_layout_detection,
269
309
  use_chart_recognition=self.use_chart_recognition,
310
+ use_seal_recognition=self.use_seal_recognition,
311
+ use_ocr_for_image_block=self.use_ocr_for_image_block,
270
312
  layout_threshold=self.layout_threshold,
271
313
  layout_nms=self.layout_nms,
272
314
  layout_unclip_ratio=self.layout_unclip_ratio,
273
315
  layout_merge_bboxes_mode=self.layout_merge_bboxes_mode,
316
+ layout_shape_mode=self.layout_shape_mode,
274
317
  prompt_label=self.prompt_label,
275
318
  format_block_content=self.format_block_content,
276
319
  repetition_penalty=self.repetition_penalty,
@@ -278,8 +321,15 @@ class PaddleOCRVLDocumentConverter:
278
321
  top_p=self.top_p,
279
322
  min_pixels=self.min_pixels,
280
323
  max_pixels=self.max_pixels,
324
+ max_new_tokens=self.max_new_tokens,
325
+ merge_layout_blocks=self.merge_layout_blocks,
326
+ markdown_ignore_labels=self.markdown_ignore_labels,
327
+ vlm_extra_args=self.vlm_extra_args,
281
328
  prettify_markdown=self.prettify_markdown,
282
329
  show_formula_number=self.show_formula_number,
330
+ restructure_pages=self.restructure_pages,
331
+ merge_tables=self.merge_tables,
332
+ relevel_titles=self.relevel_titles,
283
333
  visualize=self.visualize,
284
334
  additional_params=self.additional_params,
285
335
  )
@@ -331,6 +381,10 @@ class PaddleOCRVLDocumentConverter:
331
381
  request_data["useLayoutDetection"] = self.use_layout_detection
332
382
  if self.use_chart_recognition is not None:
333
383
  request_data["useChartRecognition"] = self.use_chart_recognition
384
+ if self.use_seal_recognition is not None:
385
+ request_data["useSealRecognition"] = self.use_seal_recognition
386
+ if self.use_ocr_for_image_block is not None:
387
+ request_data["useOcrForImageBlock"] = self.use_ocr_for_image_block
334
388
  if self.layout_threshold is not None:
335
389
  request_data["layoutThreshold"] = self.layout_threshold
336
390
  if self.layout_nms is not None:
@@ -339,6 +393,8 @@ class PaddleOCRVLDocumentConverter:
339
393
  request_data["layoutUnclipRatio"] = self.layout_unclip_ratio
340
394
  if self.layout_merge_bboxes_mode is not None:
341
395
  request_data["layoutMergeBboxesMode"] = self.layout_merge_bboxes_mode
396
+ if self.layout_shape_mode is not None:
397
+ request_data["layoutShapeMode"] = self.layout_shape_mode
342
398
  if self.prompt_label is not None:
343
399
  request_data["promptLabel"] = self.prompt_label
344
400
  if self.format_block_content is not None:
@@ -353,10 +409,24 @@ class PaddleOCRVLDocumentConverter:
353
409
  request_data["minPixels"] = self.min_pixels
354
410
  if self.max_pixels is not None:
355
411
  request_data["maxPixels"] = self.max_pixels
412
+ if self.max_new_tokens is not None:
413
+ request_data["maxNewTokens"] = self.max_new_tokens
414
+ if self.merge_layout_blocks is not None:
415
+ request_data["mergeLayoutBlocks"] = self.merge_layout_blocks
416
+ if self.markdown_ignore_labels is not None:
417
+ request_data["markdownIgnoreLabels"] = self.markdown_ignore_labels
418
+ if self.vlm_extra_args is not None:
419
+ request_data["vlmExtraArgs"] = self.vlm_extra_args
356
420
  if self.prettify_markdown is not None:
357
421
  request_data["prettifyMarkdown"] = self.prettify_markdown
358
422
  if self.show_formula_number is not None:
359
423
  request_data["showFormulaNumber"] = self.show_formula_number
424
+ if self.restructure_pages is not None:
425
+ request_data["restructurePages"] = self.restructure_pages
426
+ if self.merge_tables is not None:
427
+ request_data["mergeTables"] = self.merge_tables
428
+ if self.relevel_titles is not None:
429
+ request_data["relevelTitles"] = self.relevel_titles
360
430
  if self.visualize is not None:
361
431
  request_data["visualize"] = self.visualize
362
432
  if self.additional_params is not None:
@@ -42,8 +42,8 @@ class TestPaddleOCRVLDocumentConverter:
42
42
  assert converter.access_token == Secret.from_env_var("AISTUDIO_ACCESS_TOKEN")
43
43
  assert converter.api_url == "http://test-api-url.com"
44
44
  assert converter.file_type is None
45
- assert converter.use_doc_orientation_classify is None
46
- assert converter.use_doc_unwarping is None
45
+ assert converter.use_doc_orientation_classify is False
46
+ assert converter.use_doc_unwarping is False
47
47
  assert converter.use_layout_detection is None
48
48
  assert converter.use_chart_recognition is None
49
49
  assert converter.layout_threshold is None
@@ -126,14 +126,17 @@ class TestPaddleOCRVLDocumentConverter:
126
126
  "type": "env_var",
127
127
  },
128
128
  "file_type": None,
129
- "use_doc_orientation_classify": None,
130
- "use_doc_unwarping": None,
129
+ "use_doc_orientation_classify": False,
130
+ "use_doc_unwarping": False,
131
131
  "use_layout_detection": None,
132
132
  "use_chart_recognition": None,
133
+ "use_seal_recognition": None,
134
+ "use_ocr_for_image_block": None,
133
135
  "layout_threshold": None,
134
136
  "layout_nms": None,
135
137
  "layout_unclip_ratio": None,
136
138
  "layout_merge_bboxes_mode": None,
139
+ "layout_shape_mode": None,
137
140
  "prompt_label": None,
138
141
  "format_block_content": None,
139
142
  "repetition_penalty": None,
@@ -141,8 +144,15 @@ class TestPaddleOCRVLDocumentConverter:
141
144
  "top_p": None,
142
145
  "min_pixels": None,
143
146
  "max_pixels": None,
147
+ "max_new_tokens": None,
148
+ "merge_layout_blocks": None,
149
+ "markdown_ignore_labels": None,
150
+ "vlm_extra_args": None,
144
151
  "prettify_markdown": None,
145
152
  "show_formula_number": None,
153
+ "restructure_pages": None,
154
+ "merge_tables": None,
155
+ "relevel_titles": None,
146
156
  "visualize": None,
147
157
  "additional_params": None,
148
158
  },
@@ -158,10 +168,13 @@ class TestPaddleOCRVLDocumentConverter:
158
168
  use_doc_unwarping=False,
159
169
  use_layout_detection=True,
160
170
  use_chart_recognition=False,
171
+ use_seal_recognition=None,
172
+ use_ocr_for_image_block=None,
161
173
  layout_threshold=0.7,
162
174
  layout_nms=False,
163
175
  layout_unclip_ratio=2.0,
164
176
  layout_merge_bboxes_mode="separate",
177
+ layout_shape_mode=None,
165
178
  prompt_label="formula",
166
179
  format_block_content=False,
167
180
  repetition_penalty=1.2,
@@ -169,8 +182,15 @@ class TestPaddleOCRVLDocumentConverter:
169
182
  top_p=0.95,
170
183
  min_pixels=200,
171
184
  max_pixels=2000,
185
+ max_new_tokens=None,
186
+ merge_layout_blocks=None,
187
+ markdown_ignore_labels=None,
188
+ vlm_extra_args=None,
172
189
  prettify_markdown=True,
173
190
  show_formula_number=True,
191
+ restructure_pages=None,
192
+ merge_tables=None,
193
+ relevel_titles=None,
174
194
  visualize=False,
175
195
  additional_params={},
176
196
  )
@@ -190,10 +210,13 @@ class TestPaddleOCRVLDocumentConverter:
190
210
  "use_doc_unwarping": False,
191
211
  "use_layout_detection": True,
192
212
  "use_chart_recognition": False,
213
+ "use_seal_recognition": None,
214
+ "use_ocr_for_image_block": None,
193
215
  "layout_threshold": 0.7,
194
216
  "layout_nms": False,
195
217
  "layout_unclip_ratio": 2.0,
196
218
  "layout_merge_bboxes_mode": "separate",
219
+ "layout_shape_mode": None,
197
220
  "prompt_label": "formula",
198
221
  "format_block_content": False,
199
222
  "repetition_penalty": 1.2,
@@ -201,8 +224,15 @@ class TestPaddleOCRVLDocumentConverter:
201
224
  "top_p": 0.95,
202
225
  "min_pixels": 200,
203
226
  "max_pixels": 2000,
227
+ "max_new_tokens": None,
228
+ "merge_layout_blocks": None,
229
+ "markdown_ignore_labels": None,
230
+ "vlm_extra_args": None,
204
231
  "prettify_markdown": True,
205
232
  "show_formula_number": True,
233
+ "restructure_pages": None,
234
+ "merge_tables": None,
235
+ "relevel_titles": None,
206
236
  "visualize": False,
207
237
  "additional_params": {},
208
238
  },
@@ -1,9 +0,0 @@
1
- # Changelog
2
-
3
- ## [integrations/paddleocr-v0.1.0] - 2025-12-10
4
-
5
- ### 🚀 Features
6
-
7
- - Add PaddleOCR-VL document converter (#2567)
8
-
9
- <!-- generated by git-cliff -->