paddlex 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. paddlex/.version +1 -1
  2. paddlex/inference/models/base/predictor/base_predictor.py +2 -0
  3. paddlex/inference/models/common/static_infer.py +20 -14
  4. paddlex/inference/models/common/ts/funcs.py +19 -8
  5. paddlex/inference/models/formula_recognition/predictor.py +1 -1
  6. paddlex/inference/models/formula_recognition/processors.py +2 -2
  7. paddlex/inference/models/text_recognition/result.py +1 -1
  8. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  9. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +144 -205
  10. paddlex/inference/pipelines/layout_parsing/result_v2.py +13 -272
  11. paddlex/inference/pipelines/layout_parsing/setting.py +1 -0
  12. paddlex/inference/pipelines/layout_parsing/utils.py +108 -312
  13. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +302 -247
  14. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +156 -104
  15. paddlex/inference/pipelines/ocr/result.py +2 -2
  16. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +1 -1
  17. paddlex/inference/serving/basic_serving/_app.py +47 -13
  18. paddlex/inference/serving/infra/utils.py +22 -17
  19. paddlex/inference/utils/hpi.py +60 -25
  20. paddlex/inference/utils/hpi_model_info_collection.json +627 -204
  21. paddlex/inference/utils/misc.py +20 -0
  22. paddlex/inference/utils/mkldnn_blocklist.py +36 -2
  23. paddlex/inference/utils/official_models.py +126 -5
  24. paddlex/inference/utils/pp_option.py +81 -21
  25. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  26. paddlex/ops/__init__.py +6 -3
  27. paddlex/utils/deps.py +2 -2
  28. paddlex/utils/device.py +4 -19
  29. paddlex/utils/download.py +10 -7
  30. paddlex/utils/flags.py +9 -0
  31. paddlex/utils/subclass_register.py +2 -2
  32. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/METADATA +307 -162
  33. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/RECORD +37 -35
  34. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/WHEEL +1 -1
  35. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/entry_points.txt +1 -0
  36. {paddlex-3.0.1.dist-info/licenses → paddlex-3.0.3.dist-info}/LICENSE +0 -0
  37. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/top_level.txt +0 -0
@@ -240,7 +240,7 @@ def calculate_overlap_ratio(
240
240
  inter_width = max(0, x_max_inter - x_min_inter)
241
241
  inter_height = max(0, y_max_inter - y_min_inter)
242
242
 
243
- inter_area = inter_width * inter_height
243
+ inter_area = float(inter_width) * float(inter_height)
244
244
 
245
245
  bbox1_area = caculate_bbox_area(bbox1)
246
246
  bbox2_area = caculate_bbox_area(bbox2)
@@ -262,76 +262,6 @@ def calculate_overlap_ratio(
262
262
  return inter_area / ref_area
263
263
 
264
264
 
265
- def group_boxes_into_lines(ocr_rec_res, line_height_iou_threshold):
266
- rec_boxes = ocr_rec_res["boxes"]
267
- rec_texts = ocr_rec_res["rec_texts"]
268
- rec_labels = ocr_rec_res["rec_labels"]
269
-
270
- text_boxes = [
271
- rec_boxes[i] for i in range(len(rec_boxes)) if rec_labels[i] == "text"
272
- ]
273
- text_orientation = calculate_text_orientation(text_boxes)
274
-
275
- match_direction = "vertical" if text_orientation == "horizontal" else "horizontal"
276
-
277
- line_start_index = 1 if text_orientation == "horizontal" else 0
278
- line_end_index = 3 if text_orientation == "horizontal" else 2
279
-
280
- spans = list(zip(rec_boxes, rec_texts, rec_labels))
281
- sort_index = 1
282
- reverse = False
283
- if text_orientation == "vertical":
284
- sort_index = 0
285
- reverse = True
286
- spans.sort(key=lambda span: span[0][sort_index], reverse=reverse)
287
- spans = [list(span) for span in spans]
288
-
289
- lines = []
290
- line = [spans[0]]
291
- line_region_box = spans[0][0].copy()
292
- line_heights = []
293
- # merge line
294
- for span in spans[1:]:
295
- rec_bbox = span[0]
296
- if (
297
- calculate_projection_overlap_ratio(
298
- line_region_box, rec_bbox, match_direction, mode="small"
299
- )
300
- >= line_height_iou_threshold
301
- ):
302
- line.append(span)
303
- line_region_box[line_start_index] = min(
304
- line_region_box[line_start_index], rec_bbox[line_start_index]
305
- )
306
- line_region_box[line_end_index] = max(
307
- line_region_box[line_end_index], rec_bbox[line_end_index]
308
- )
309
- else:
310
- line_heights.append(
311
- line_region_box[line_end_index] - line_region_box[line_start_index]
312
- )
313
- lines.append(line)
314
- line = [span]
315
- line_region_box = rec_bbox.copy()
316
-
317
- lines.append(line)
318
- line_heights.append(
319
- line_region_box[line_end_index] - line_region_box[line_start_index]
320
- )
321
-
322
- min_height = min(line_heights) if line_heights else 0
323
- max_height = max(line_heights) if line_heights else 0
324
-
325
- if max_height > min_height * 2 and text_orientation == "vertical":
326
- line_heights = np.array(line_heights)
327
- min_height_num = np.sum(line_heights < min_height * 1.1)
328
- if min_height_num < len(lines) * 0.4:
329
- condition = line_heights > min_height * 1.1
330
- lines = [value for value, keep in zip(lines, condition) if keep]
331
-
332
- return lines, text_orientation, np.mean(line_heights)
333
-
334
-
335
265
  def calculate_minimum_enclosing_bbox(bboxes):
336
266
  """
337
267
  Calculate the minimum enclosing bounding box for a list of bounding boxes.
@@ -355,260 +285,44 @@ def calculate_minimum_enclosing_bbox(bboxes):
355
285
  max_y = np.max(bboxes_array[:, 3])
356
286
 
357
287
  # Return the minimum enclosing bounding box
358
- return [min_x, min_y, max_x, max_y]
359
-
360
-
361
- def calculate_text_orientation(
362
- bboxes: List[List[int]], orientation_ratio: float = 1.5
363
- ) -> bool:
364
- """
365
- Calculate the orientation of the text based on the bounding boxes.
366
-
367
- Args:
368
- bboxes (list): A list of bounding boxes.
369
- orientation_ratio (float): Ratio for determining orientation. Default is 1.5.
370
-
371
- Returns:
372
- str: "horizontal" or "vertical".
373
- """
374
-
375
- horizontal_box_num = 0
376
- for bbox in bboxes:
377
- if len(bbox) != 4:
378
- raise ValueError(
379
- "Invalid bounding box format. Expected a list of length 4."
380
- )
381
- x1, y1, x2, y2 = bbox
382
- width = x2 - x1
383
- height = y2 - y1
384
- horizontal_box_num += 1 if width * orientation_ratio >= height else 0
385
-
386
- return "horizontal" if horizontal_box_num >= len(bboxes) * 0.5 else "vertical"
288
+ return np.array([min_x, min_y, max_x, max_y])
387
289
 
388
290
 
389
291
  def is_english_letter(char):
292
+ """check if the char is english letter"""
390
293
  return bool(re.match(r"^[A-Za-z]$", char))
391
294
 
392
295
 
393
296
  def is_numeric(char):
394
- return bool(re.match(r"^[\d.]+$", char))
297
+ """check if the char is numeric"""
298
+ return bool(re.match(r"^[\d]+$", char))
395
299
 
396
300
 
397
301
  def is_non_breaking_punctuation(char):
398
302
  """
399
- 判断一个字符是否是不需要换行的标点符号,包括全角和半角的符号。
400
-
401
- :param char: str, 单个字符
402
- :return: bool, 如果字符是不需要换行的标点符号,返回True,否则返回False
403
- """
404
- non_breaking_punctuations = {
405
- ",", # 半角逗号
406
- ",", # 全角逗号
407
- "、", # 顿号
408
- ";", # 半角分号
409
- ";", # 全角分号
410
- ":", # 半角冒号
411
- ":", # 全角冒号
412
- "-", # 连字符
413
- }
414
-
415
- return char in non_breaking_punctuations
416
-
417
-
418
- def format_line(
419
- line: List[List[Union[List[int], str]]],
420
- text_direction: int,
421
- block_width: int,
422
- block_start_coordinate: int,
423
- block_stop_coordinate: int,
424
- line_gap_limit: int = 10,
425
- block_label: str = "text",
426
- ) -> None:
427
- """
428
- Format a line of text spans based on layout constraints.
303
+ check if the char is non-breaking punctuation
429
304
 
430
305
  Args:
431
- line (list): A list of spans, where each span is a list containing a bounding box and text.
432
- block_left_coordinate (int): The text line directional minimum coordinate of the layout bounding box.
433
- block_stop_coordinate (int): The text line directional maximum x-coordinate of the layout bounding box.
434
- first_line_span_limit (int): The limit for the number of pixels before the first span that should be considered part of the first line. Default is 10.
435
- line_gap_limit (int): The limit for the number of pixels after the last span that should be considered part of the last line. Default is 10.
436
- block_label (str): The label associated with the entire block. Default is 'text'.
437
- Returns:
438
- None: The function modifies the line in place.
439
- """
440
- first_span_box = line[0][0]
441
- last_span_box = line[-1][0]
442
-
443
- for span in line:
444
- if span[2] == "formula" and block_label != "formula":
445
- formula_rec = span[1]
446
- if not formula_rec.startswith("$") and not formula_rec.endswith("$"):
447
- if len(line) > 1:
448
- span[1] = f"${span[1]}$"
449
- else:
450
- span[1] = f"\n${span[1]}$"
451
-
452
- line_text = ""
453
- for span in line:
454
- _, text, label = span
455
- line_text += text
456
- if len(text) > 0 and is_english_letter(line_text[-1]) or label == "formula":
457
- line_text += " "
458
-
459
- if text_direction == "horizontal":
460
- text_start_index = 0
461
- text_stop_index = 2
462
- else:
463
- text_start_index = 1
464
- text_stop_index = 3
465
-
466
- need_new_line = False
467
- if (
468
- len(line_text) > 0
469
- and not is_english_letter(line_text[-1])
470
- and not is_non_breaking_punctuation(line_text[-1])
471
- ):
472
- if (
473
- text_direction == "horizontal"
474
- and block_stop_coordinate - last_span_box[text_stop_index] > line_gap_limit
475
- ) or (
476
- text_direction == "vertical"
477
- and (
478
- block_stop_coordinate - last_span_box[text_stop_index] > line_gap_limit
479
- or first_span_box[1] - block_start_coordinate > line_gap_limit
480
- )
481
- ):
482
- need_new_line = True
483
-
484
- if line_text.endswith("-"):
485
- line_text = line_text[:-1]
486
- elif (
487
- len(line_text) > 0 and is_english_letter(line_text[-1])
488
- ) or line_text.endswith("$"):
489
- line_text += " "
490
- elif (
491
- len(line_text) > 0
492
- and not is_english_letter(line_text[-1])
493
- and not is_non_breaking_punctuation(line_text[-1])
494
- and not is_numeric(line_text[-1])
495
- ) or text_direction == "vertical":
496
- if block_stop_coordinate - last_span_box[text_stop_index] > block_width * 0.4:
497
- line_text += "\n"
498
- if (
499
- first_span_box[text_start_index] - block_start_coordinate
500
- > block_width * 0.4
501
- ):
502
- line_text = "\n" + line_text
503
-
504
- return line_text, need_new_line
306
+ char (str): character to check
505
307
 
506
-
507
- def split_boxes_by_projection(spans: List[List[int]], direction, offset=1e-5):
508
- """
509
- Check if there is any complete containment in the x-direction
510
- between the bounding boxes and split the containing box accordingly.
511
-
512
- Args:
513
- spans (list of lists): Each element is a list containing an ndarray of length 4, a text string, and a label.
514
- direction: 'horizontal' or 'vertical', indicating whether the spans are arranged horizontally or vertically.
515
- offset (float): A small offset value to ensure that the split boxes are not too close to the original boxes.
516
308
  Returns:
517
- A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
309
+ bool: True if the char is non-breaking punctuation
518
310
  """
311
+ non_breaking_punctuations = {
312
+ ",",
313
+ ",",
314
+ "、",
315
+ ";",
316
+ ";",
317
+ ":",
318
+ ":",
319
+ "-",
320
+ "'",
321
+ '"',
322
+ "“",
323
+ }
519
324
 
520
- def is_projection_contained(box_a, box_b, start_idx, end_idx):
521
- """Check if box_a completely contains box_b in the x-direction."""
522
- return box_a[start_idx] <= box_b[start_idx] and box_a[end_idx] >= box_b[end_idx]
523
-
524
- new_boxes = []
525
- if direction == "horizontal":
526
- projection_start_index, projection_end_index = 0, 2
527
- else:
528
- projection_start_index, projection_end_index = 1, 3
529
-
530
- for i in range(len(spans)):
531
- span = spans[i]
532
- is_split = False
533
- for j in range(i, len(spans)):
534
- box_b = spans[j][0]
535
- box_a, text, label = span
536
- if is_projection_contained(
537
- box_a, box_b, projection_start_index, projection_end_index
538
- ):
539
- is_split = True
540
- # Split box_a based on the x-coordinates of box_b
541
- if box_a[projection_start_index] < box_b[projection_start_index]:
542
- w = (
543
- box_b[projection_start_index]
544
- - offset
545
- - box_a[projection_start_index]
546
- )
547
- if w > 1:
548
- new_bbox = box_a.copy()
549
- new_bbox[projection_end_index] = (
550
- box_b[projection_start_index] - offset
551
- )
552
- new_boxes.append(
553
- [
554
- np.array(new_bbox),
555
- text,
556
- label,
557
- ]
558
- )
559
- if box_a[projection_end_index] > box_b[projection_end_index]:
560
- w = (
561
- box_a[projection_end_index]
562
- - box_b[projection_end_index]
563
- + offset
564
- )
565
- if w > 1:
566
- box_a[projection_start_index] = (
567
- box_b[projection_end_index] + offset
568
- )
569
- span = [
570
- np.array(box_a),
571
- text,
572
- label,
573
- ]
574
- if j == len(spans) - 1 and is_split:
575
- new_boxes.append(span)
576
- if not is_split:
577
- new_boxes.append(span)
578
-
579
- return new_boxes
580
-
581
-
582
- def remove_extra_space(input_text: str) -> str:
583
- """
584
- Process the input text to handle spaces.
585
-
586
- The function removes multiple consecutive spaces between Chinese characters and ensures that
587
- only a single space is retained between Chinese and non-Chinese characters.
588
-
589
- Args:
590
- input_text (str): The text to be processed.
591
-
592
- Returns:
593
- str: The processed text with properly formatted spaces.
594
- """
595
-
596
- # Remove spaces between Chinese characters
597
- text_without_spaces = re.sub(
598
- r"(?<=[\u4e00-\u9fff])\s+(?=[\u4e00-\u9fff])", "", input_text
599
- )
600
-
601
- # Ensure single space between Chinese and non-Chinese characters
602
- text_with_single_spaces = re.sub(
603
- r"(?<=[\u4e00-\u9fff])\s+(?=[^\u4e00-\u9fff])|(?<=[^\u4e00-\u9fff])\s+(?=[\u4e00-\u9fff])",
604
- " ",
605
- text_without_spaces,
606
- )
607
-
608
- # Reduce any remaining consecutive spaces to a single space
609
- final_text = re.sub(r"\s+", " ", text_with_single_spaces).strip()
610
-
611
- return final_text
325
+ return char in non_breaking_punctuations
612
326
 
613
327
 
614
328
  def gather_imgs(original_img, layout_det_objs):
@@ -794,7 +508,7 @@ def shrink_supplement_region_bbox(
794
508
  (x2 - x2_prime) / image_width,
795
509
  (y2 - y2_prime) / image_height,
796
510
  ]
797
- edge_distance_list_tmp = edge_distance_list[:]
511
+ edge_distance_list_tmp = deepcopy(edge_distance_list)
798
512
  min_distance = min(edge_distance_list)
799
513
  src_index = index_conversion_map[edge_distance_list.index(min_distance)]
800
514
  if len(block_idxes_set) == 0:
@@ -847,15 +561,14 @@ def shrink_supplement_region_bbox(
847
561
  supplement_region_bbox = calculate_minimum_enclosing_bbox(matched_bboxes)
848
562
  break
849
563
  else:
850
- edge_distance_list_tmp = [
851
- x for x in edge_distance_list_tmp if x != min_distance
852
- ]
564
+ edge_distance_list_tmp.remove(min_distance)
853
565
  min_distance = min(edge_distance_list_tmp)
854
566
  src_index = index_conversion_map[edge_distance_list.index(min_distance)]
855
567
  return supplement_region_bbox, iner_block_idxes
856
568
 
857
569
 
858
570
  def update_region_box(bbox, region_box):
571
+ """Update region box with bbox"""
859
572
  if region_box is None:
860
573
  return bbox
861
574
 
@@ -873,6 +586,14 @@ def update_region_box(bbox, region_box):
873
586
 
874
587
 
875
588
  def convert_formula_res_to_ocr_format(formula_res_list: List, ocr_res: dict):
589
+ """Convert formula result to OCR result format
590
+
591
+ Args:
592
+ formula_res_list (List): Formula results
593
+ ocr_res (dict): OCR result
594
+ Returns:
595
+ ocr_res (dict): Updated OCR result
596
+ """
876
597
  for formula_res in formula_res_list:
877
598
  x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
878
599
  poly_points = [
@@ -896,11 +617,86 @@ def convert_formula_res_to_ocr_format(formula_res_list: List, ocr_res: dict):
896
617
 
897
618
 
898
619
  def caculate_bbox_area(bbox):
620
+ """Calculate bounding box area"""
899
621
  x1, y1, x2, y2 = map(float, bbox)
900
622
  area = abs((x2 - x1) * (y2 - y1))
901
623
  return area
902
624
 
903
625
 
626
+ def caculate_euclidean_dist(point1, point2):
627
+ """Calculate euclidean distance between two points"""
628
+ x1, y1 = point1
629
+ x2, y2 = point2
630
+ return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
631
+
632
+
633
+ def get_seg_flag(block, prev_block):
634
+ """Get segment start flag and end flag based on previous block
635
+
636
+ Args:
637
+ block (Block): Current block
638
+ prev_block (Block): Previous block
639
+
640
+ Returns:
641
+ seg_start_flag (bool): Segment start flag
642
+ seg_end_flag (bool): Segment end flag
643
+ """
644
+
645
+ seg_start_flag = True
646
+ seg_end_flag = True
647
+
648
+ context_left_coordinate = block.start_coordinate
649
+ context_right_coordinate = block.end_coordinate
650
+ seg_start_coordinate = block.seg_start_coordinate
651
+ seg_end_coordinate = block.seg_end_coordinate
652
+
653
+ if prev_block is not None:
654
+ num_of_prev_lines = prev_block.num_of_lines
655
+ pre_block_seg_end_coordinate = prev_block.seg_end_coordinate
656
+ prev_end_space_small = (
657
+ abs(prev_block.end_coordinate - pre_block_seg_end_coordinate) < 10
658
+ )
659
+ prev_lines_more_than_one = num_of_prev_lines > 1
660
+
661
+ overlap_blocks = (
662
+ context_left_coordinate < prev_block.end_coordinate
663
+ and context_right_coordinate > prev_block.start_coordinate
664
+ )
665
+
666
+ # update context_left_coordinate and context_right_coordinate
667
+ if overlap_blocks:
668
+ context_left_coordinate = min(
669
+ prev_block.start_coordinate, context_left_coordinate
670
+ )
671
+ context_right_coordinate = max(
672
+ prev_block.end_coordinate, context_right_coordinate
673
+ )
674
+ prev_end_space_small = (
675
+ abs(context_right_coordinate - pre_block_seg_end_coordinate) < 10
676
+ )
677
+ edge_distance = 0
678
+ else:
679
+ edge_distance = abs(block.start_coordinate - prev_block.end_coordinate)
680
+
681
+ current_start_space_small = seg_start_coordinate - context_left_coordinate < 10
682
+
683
+ if (
684
+ prev_end_space_small
685
+ and current_start_space_small
686
+ and prev_lines_more_than_one
687
+ and edge_distance < max(prev_block.width, block.width)
688
+ ):
689
+ seg_start_flag = False
690
+ else:
691
+ if seg_start_coordinate - context_left_coordinate < 10:
692
+ seg_start_flag = False
693
+
694
+ if context_right_coordinate - seg_end_coordinate < 10:
695
+ seg_end_flag = False
696
+
697
+ return seg_start_flag, seg_end_flag
698
+
699
+
904
700
  def get_show_color(label: str, order_label=False) -> Tuple:
905
701
  if order_label:
906
702
  label_colors = {