PyPI - openocr-python - Versions diffs - 0.0.9__py3-none-any.whl → 0.1.0.dev0__py3-none-any.whl - Mend

openocr-python 0.0.9py3-none-any.whl → 0.1.0.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

openocr/__init__.py +35 -1
openocr/configs/dataset/rec/evaluation.yaml +41 -0
openocr/configs/dataset/rec/ltb.yaml +9 -0
openocr/configs/dataset/rec/mjsynth.yaml +11 -0
openocr/configs/dataset/rec/openvino.yaml +25 -0
openocr/configs/dataset/rec/ost.yaml +17 -0
openocr/configs/dataset/rec/synthtext.yaml +7 -0
openocr/configs/dataset/rec/test.yaml +77 -0
openocr/configs/dataset/rec/textocr.yaml +13 -0
openocr/configs/dataset/rec/textocr_horizontal.yaml +13 -0
openocr/configs/dataset/rec/union14m_b.yaml +47 -0
openocr/configs/dataset/rec/union14m_l_filtered.yaml +35 -0
openocr/configs/rec/cmer/cmer.yml +127 -0
openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_base.yml +152 -0
openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_small.yml +152 -0
openocr/configs/rec/unirec/focalsvtr_ardecoder_unirec.yml +114 -0
openocr/configs/rec/unirec/opendoc_pipeline.yml +105 -0
openocr/demo_gradio.py +28 -8
openocr/demo_opendoc.py +572 -0
openocr/demo_unirec.py +392 -0
openocr/opendet/losses/__init__.py +5 -7
openocr/opendet/preprocess/crop_resize.py +2 -1
openocr/openocr.py +685 -0
openocr/openrec/losses/__init__.py +8 -3
openocr/openrec/losses/cmer_loss.py +12 -0
openocr/openrec/losses/mdiff_loss.py +11 -0
openocr/openrec/losses/unirec_loss.py +12 -0
openocr/openrec/metrics/__init__.py +4 -1
openocr/openrec/metrics/rec_metric_cmer.py +328 -0
openocr/openrec/modeling/cmer_modeling/modeling_cmer.py +643 -0
openocr/openrec/modeling/decoders/__init__.py +1 -0
openocr/openrec/modeling/decoders/ctc_decoder.py +1 -1
openocr/openrec/modeling/decoders/dan_decoder.py +4 -4
openocr/openrec/modeling/decoders/dptr_parseq_clip_b_decoder.py +1563 -1398
openocr/openrec/modeling/decoders/mdiff_decoder.py +587 -0
openocr/openrec/modeling/decoders/smtr_decoder.py +99 -48
openocr/openrec/modeling/unirec_modeling/configuration_unirec.py +166 -0
openocr/openrec/modeling/unirec_modeling/modeling_unirec.py +433 -0
openocr/openrec/optimizer/__init__.py +4 -3
openocr/openrec/optimizer/lr.py +49 -0
openocr/openrec/postprocess/__init__.py +2 -0
openocr/openrec/postprocess/abinet_postprocess.py +1 -1
openocr/openrec/postprocess/ar_postprocess.py +1 -1
openocr/openrec/postprocess/cmer_postprocess.py +86 -0
openocr/openrec/postprocess/cppd_postprocess.py +1 -1
openocr/openrec/postprocess/igtr_postprocess.py +1 -1
openocr/openrec/postprocess/lister_postprocess.py +1 -1
openocr/openrec/postprocess/mgp_postprocess.py +1 -1
openocr/openrec/postprocess/nrtr_postprocess.py +2 -2
openocr/openrec/postprocess/smtr_postprocess.py +1 -1
openocr/openrec/postprocess/srn_postprocess.py +1 -1
openocr/openrec/postprocess/unirec_postprocess.py +58 -0
openocr/openrec/postprocess/visionlan_postprocess.py +1 -1
openocr/openrec/preprocess/__init__.py +5 -0
openocr/openrec/preprocess/ce_label_encode.py +1 -1
openocr/openrec/preprocess/cmer_label_encode.py +1025 -0
openocr/openrec/preprocess/ctc_label_encode.py +1 -1
openocr/openrec/preprocess/dptr_label_encode.py +177 -157
openocr/openrec/preprocess/igtr_label_encode.py +4 -2
openocr/openrec/preprocess/mdiff_label_encode.py +312 -0
openocr/openrec/preprocess/rec_aug.py +128 -2
openocr/openrec/preprocess/resize.py +57 -0
openocr/openrec/preprocess/unirec_label_encode.py +62 -0
openocr/tools/data/__init__.py +78 -55
openocr/tools/data/cmer_web_dataset.py +310 -0
openocr/tools/data/native_size_dataset.py +753 -0
openocr/tools/data/native_size_sampler.py +158 -0
openocr/tools/data/ratio_dataset_tvresize.py +2 -0
openocr/tools/data/ratio_sampler.py +2 -1
openocr/tools/download/download_dataset.py +38 -0
openocr/tools/download/utils.py +28 -0
openocr/tools/download_example_images.py +236 -0
openocr/tools/engine/trainer.py +155 -39
openocr/tools/eval_rec_all_ch.py +2 -2
openocr/tools/infer_det.py +20 -2
openocr/tools/infer_doc.py +898 -0
openocr/tools/infer_doc_onnx.py +1172 -0
openocr/tools/infer_e2e.py +27 -10
openocr/tools/infer_rec.py +64 -15
openocr/tools/infer_unirec_onnx.py +730 -0
openocr/tools/to_markdown.py +468 -0
openocr/tools/utils/ckpt.py +17 -5
openocr/tools/utils/opendoc_onnx_utils/utils.py +1052 -0
openocr_python-0.1.0.dev0.dist-info/METADATA +324 -0
{openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/RECORD +89 -45
{openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/WHEEL +1 -1
openocr_python-0.1.0.dev0.dist-info/entry_points.txt +2 -0
openocr_python-0.0.9.dist-info/METADATA +0 -149
/openocr_python-0.0.9.dist-info/LICENCE → /openocr_python-0.1.0.dev0.dist-info/licenses/LICENSE +0 -0
{openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/top_level.txt +0 -0

openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_small.yml ADDED Viewed

@@ -0,0 +1,152 @@
+Global:
+  device: gpu
+  epoch_num: 40
+  log_smooth_window: 20
+  print_batch_step: 10
+  output_dir: ./output/rec/u14m_filter/svtrv2_mdiffdecoder_small/
+  save_epoch_step: [30, 1]
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 500]
+  eval_epoch_step: [0, 1]
+  cal_metric_during_train: False
+  pretrained_model:
+  checkpoints:
+  use_tensorboard: false
+  infer_img:
+  # for data or label process
+  character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
+  # ./tools/utils/ppocr_keys_v1.txt  # ch
+  max_text_length: &max_text_length 25
+  use_space_char: &use_space_char False
+  save_res_path: ./output/rec/u14m_filter/predicts_svtrv2_mdiffdecoder_small.txt
+  use_amp: True
+  grad_clip_val: 20.0
+Optimizer:
+  name: AdamW
+  lr: 0.0005 # for 8gpus bs128/gpu
+  weight_decay: 0.05
+  filter_bias_and_bn: True
+LRScheduler:
+  name: OneCycleLR
+  warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
+  cycle_momentum: False
+Architecture:
+  model_type: rec
+  algorithm: MDiff4STR
+  in_channels: 3
+  Transform:
+  Encoder:
+    name: SVTRv2LNConvTwo33
+    use_pos_embed: False
+    dims: [128, 256, 384]
+    depths: [3, 6, 3]
+    num_heads: [4, 8, 12]
+    mixer: [['Conv','Conv','Conv'],['Conv','Conv','Conv','FGlobal','Global','Global'],['Global','Global','Global']]
+    local_k: [[5, 5], [5, 5], [-1, -1]]
+    sub_k: [[1, 1], [2, 1], [-1, -1]]
+    last_stage: false
+    feat2d: False
+  Decoder:
+    name: MDiffDecoder
+    num_decoder_layers: 3
+    nhead: 6
+    max_len: *max_text_length
+    parallel_decoding: False
+    autoregressive_decoding: False
+    low_confidence_decoding: False
+    random_mask_decoding: False
+    semi_autoregressive_decoding: False
+    cloze_mask_decoding: False
+    sampler_step: 3
+    sample_k: &sample_k 3
+    temperature: 1.0
+Loss:
+  name: MDiffLoss
+PostProcess:
+  name: ARLabelDecode
+  character_dict_path: *character_dict_path
+  use_space_char: *use_space_char
+Metric:
+  name: RecMetric
+  main_indicator: acc
+  is_filter: True
+Train:
+  dataset:
+    name: RatioDataSetTVResize
+    ds_width: True
+    padding: false
+    data_dir_list: ['../Union14M-L-LMDB-Filtered/filter_train_challenging',
+    '../Union14M-L-LMDB-Filtered/filter_train_hard',
+    '../Union14M-L-LMDB-Filtered/filter_train_medium',
+    '../Union14M-L-LMDB-Filtered/filter_train_normal',
+    '../Union14M-L-LMDB-Filtered/filter_train_easy',
+    ]
+    transforms:
+      - DecodeImagePIL: # load image
+          img_mode: RGB
+      - PARSeqAugPIL:
+      - MDiffLabelEncode: # Class handling label
+          character_dict_path: *character_dict_path
+          use_space_char: *use_space_char
+          max_text_length: *max_text_length
+          sample_num: *sample_k
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'reflect_ids', 'noisy_batch', 'masked_indices', 'p_mask', 'length'] # dataloader will return list in this order
+  sampler:
+    name: RatioSampler
+    scales: [[128, 32]] # w, h
+    # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
+    first_bs: &bs 256
+    fix_bs: false
+    divided_factor: [4, 16] # w, h
+    is_training: True
+  loader:
+    shuffle: True
+    batch_size_per_card: *bs
+    drop_last: True
+    max_ratio: &max_ratio 4
+    num_workers: 4
+Eval:
+  dataset:
+    name: RatioDataSetTVResize
+    ds_width: True
+    padding: False
+    data_dir_list: [
+      '../evaluation/CUTE80',
+      '../evaluation/IC13_857',
+      '../evaluation/IC15_1811',
+      '../evaluation/IIIT5k_3000',
+      '../evaluation/SVT',
+      '../evaluation/SVTP',
+      ]
+    transforms:
+      - DecodeImagePIL: # load image
+          img_mode: RGB
+      - ARLabelEncode: # Class handling label
+          character_dict_path: *character_dict_path
+          use_space_char: *use_space_char
+          max_text_length: *max_text_length
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  sampler:
+    name: RatioSampler
+    scales: [[128, 32]] # w, h
+    # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
+    first_bs: *bs
+    fix_bs: false
+    divided_factor: [4, 16] # w, h
+    is_training: False
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: *bs
+    max_ratio: *max_ratio
+    num_workers: 4

openocr/configs/rec/unirec/focalsvtr_ardecoder_unirec.yml ADDED Viewed

@@ -0,0 +1,114 @@
+Global:
+  device: gpu
+  epoch_num: 10
+  log_smooth_window: 20
+  print_batch_step: 20
+  output_dir: ./output/rec/unirec-0.1b/
+  save_epoch_step: [0, 1] # save every 1 epoch after 7 epochs
+  save_iter_step: [0, 2000]
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [2000000, 4000]
+  eval_epoch_step: [150, 1]
+  cal_metric_during_train: False
+  pretrained_model: ./unirec-0.1b/model.pth
+  checkpoints:
+  resume_from_iter: False
+  use_tensorboard: false
+  infer_img: ../crop_img_hand
+  # for data or label process
+  character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt # 96en
+  # ./tools/utils/ppocr_keys_v1.txt  # ch
+  max_text_length: &max_text_length 2048
+  use_space_char: &use_space_char False
+  save_res_path: ./output/rec/unirec-0.1b/predicts_unirec-0.1b.txt
+  use_amp: True
+  use_ema: False
+  use_transformers: True
+  grad_clip_val: 1.0
+  vlm_ocr_config: &vlm_ocr_config ./configs/rec/unirec/unirec-0.1b
+Optimizer:
+  name: AdamW
+  lr: 0.0001 # for 4gpus bs256/gpu
+  weight_decay: 0.01
+  filter_bias_and_bn: True
+LRScheduler:
+  name: OneCycleLR
+  warmup_epoch: 0.3 # pct_start 0.075*20 = 1.5ep
+  cycle_momentum: False
+Architecture:
+  model_type: rec
+  algorithm: UniRec
+  in_channels: 3
+  Transform:
+  Encoder:
+  Decoder:
+    out_channels: -1 # for inference, set to -1
+Loss:
+  name: UniRecLoss
+PostProcess:
+  name: UniRecLabelDecode
+  lower: False
+  tokenizer_path: *vlm_ocr_config
+Metric:
+  name: RecMetric
+  main_indicator: acc
+  is_filter: False
+Train:
+  dataset:
+    name: NaSizeDataSet
+    divided_factor: &divided_factor [64, 64] # w, h
+    max_side: &max_side [960, 1408] # [64*30, 64*44] # w, h [960, 1408] #
+    root_path: path/to/UniRec40M
+    add_return: True
+    zoom_min_factor: 4
+    use_zoom: True
+    # all_data: True
+    test_data: True
+    use_aug: false
+    use_linedata: False
+    transforms:
+      - UniRecLabelEncode: # Class handling label
+          max_text_length: *max_text_length
+          vlmocr: True
+          tokenizer_path: *vlm_ocr_config # path to tokenizer, e.g. 'vocab.json', 'merges.txt'
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  sampler:
+    name: NaSizeSampler
+    # divide_factor: to ensure the width and height dimensions can be devided by downsampling multiple
+    min_bs: 1
+    max_bs: 24
+  loader:
+    shuffle: True
+    batch_size_per_card: 64
+    drop_last: True
+    num_workers: 8
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ../evaluation
+    transforms:
+      - DecodeImagePIL: # load image
+          img_mode: RGB
+      - UniRecLabelEncode: # Class handling label
+          max_text_length: *max_text_length
+          vlmocr: True
+          tokenizer_path: *vlm_ocr_config # path to tokenizer, e.g. 'vocab.json', 'merges.txt'
+      - NaSizeResize:
+          max_side: *max_side
+          divided_factor: *divided_factor
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1
+    num_workers: 1

openocr/configs/rec/unirec/opendoc_pipeline.yml ADDED Viewed

@@ -0,0 +1,105 @@
+pipeline_name: PaddleOCR-VL
+batch_size: 64
+use_queues: False
+use_doc_preprocessor: False
+use_layout_detection: True
+use_chart_recognition: False
+format_block_content: False
+merge_layout_blocks: True
+markdown_ignore_labels:
+  - number
+  - footnote
+  - header
+  - header_image
+  - footer
+  - footer_image
+  - aside_text
+SubModules:
+  LayoutDetection:
+    module_name: layout_detection
+    model_name: PP-DocLayoutV2
+    model_dir: null
+    batch_size: 8
+    threshold:
+      0: 0.5 # abstract
+      1: 0.5 # algorithm
+      2: 0.5 # aside_text
+      3: 0.5 # chart
+      4: 0.5 # content
+      5: 0.4 # formula
+      6: 0.4 # doc_title
+      7: 0.5 # figure_title
+      8: 0.5 # footer
+      9: 0.5 # footer
+      10: 0.5 # footnote
+      11: 0.5 # formula_number
+      12: 0.5 # header
+      13: 0.5 # header
+      14: 0.5 # image
+      15: 0.4 # formula
+      16: 0.5 # number
+      17: 0.4 # paragraph_title
+      18: 0.5 # reference
+      19: 0.5 # reference_content
+      20: 0.45 # seal
+      21: 0.5 # table
+      22: 0.4 # text
+      23: 0.4 # text
+      24: 0.5 # vision_footnote
+    layout_nms: True
+    layout_unclip_ratio: [1.0, 1.0]
+    layout_merge_bboxes_mode:
+      0: "union" # abstract
+      1: "union" # algorithm
+      2: "union" # aside_text
+      3: "large" # chart
+      4: "union" # content
+      5: "large" # display_formula
+      6: "large" # doc_title
+      7: "union" # figure_title
+      8: "union" # footer
+      9: "union" # footer
+      10: "union" # footnote
+      11: "union" # formula_number
+      12: "union" # header
+      13: "union" # header
+      14: "union" # image
+      15: "large" # inline_formula
+      16: "union" # number
+      17: "large" # paragraph_title
+      18: "union" # reference
+      19: "union" # reference_content
+      20: "union" # seal
+      21: "union" # table
+      22: "union" # text
+      23: "union" # text
+      24: "union" # vision_footnote
+  VLRecognition:
+    module_name: vl_recognition
+    model_name: PaddleOCR-VL-0.9B
+    model_dir: null
+    batch_size: 4096
+    genai_config:
+      backend: native
+SubPipelines:
+  DocPreprocessor:
+    pipeline_name: doc_preprocessor
+    batch_size: 8
+    use_doc_orientation_classify: True
+    use_doc_unwarping: True
+    SubModules:
+      DocOrientationClassify:
+        module_name: doc_text_orientation
+        model_name: PP-LCNet_x1_0_doc_ori
+        model_dir: null
+        batch_size: 8
+      DocUnwarping:
+        module_name: image_unwarping
+        model_name: UVDoc
+        model_dir: null

openocr/demo_gradio.py CHANGED Viewed

@@ -9,11 +9,12 @@ import numpy as np
 import json
 import time
 from PIL import Image
-from tools.infer_e2e import OpenOCR, check_and_download_font, draw_ocr_box_txt
+from tools.infer_e2e import OpenOCRE2E, check_and_download_font, draw_ocr_box_txt
+from tools.download_example_images import get_example_images_path
 def initialize_ocr(model_type, drop_score):
-    return OpenOCR(mode=model_type, drop_score=drop_score)
+    return OpenOCRE2E(mode=model_type, drop_score=drop_score, backend='onnx')
 # Default model type
@@ -108,12 +109,25 @@ def find_file_in_current_dir_and_subdirs(file_name):
             return relative_path
-e2e_img_example = list_image_paths('./OCR_e2e_img')
+# Get example images path and download if necessary
+example_img_dir = get_example_images_path(demo_type='ocr')
+e2e_img_example = list_image_paths(example_img_dir)
-if __name__ == '__main__':
+def launch_demo(share=False, server_port=7860, server_name='0.0.0.0'):
+    """Launch OpenOCR Gradio demo with default configuration.
+    Args:
+        share: Whether to create a public share link (default: False)
+        server_port: Server port (default: 7860)
+        server_name: Server name (default: '0.0.0.0')
+    Returns:
+        gr.Blocks: Gradio demo instance
+    """
     css = '.image-container img { width: 100%; max-height: 320px;}'
-    with gr.Blocks(css=css) as demo:
+    with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
         gr.HTML("""
                 <h1 style='text-align: center;'><a href="https://github.com/Topdu/OpenOCR">OpenOCR</a></h1>
                 <p style='text-align: center;'>准确高效的通用 OCR 系统 （由<a href="https://fvl.fudan.edu.cn">FVL实验室</a> <a href="https://github.com/Topdu/OpenOCR">OCR Team</a> 创建） <a href="https://github.com/Topdu/OpenOCR/tree/main?tab=readme-ov-file#quick-start">[本地快速部署]</a></p>"""
@@ -126,7 +140,7 @@ if __name__ == '__main__':
                 examples = gr.Examples(examples=e2e_img_example,
                                        inputs=input_image,
                                        label='Examples')
-                downstream = gr.Button('Run')
+                downstream = gr.Button('🚀 运行识别', variant='primary')
                 # 添加参数调节组件
                 with gr.Column():
@@ -139,7 +153,8 @@ if __name__ == '__main__':
                             ['slow', 'fast'],
                             value='slow',
                             label='Detection Score Mode',
-                            info='文本框的置信度计算模式，默认为 slow。slow 模式计算速度较慢，但准确度较高。fast 模式计算速度较快，但准确度较低。'
+                            info=
+                            '文本框的置信度计算模式，默认为 slow。slow 模式计算速度较慢，但准确度较高。fast 模式计算速度较快，但准确度较低。'
                         )
                     with gr.Row():
                         rec_drop_score_slider = gr.Slider(
@@ -204,4 +219,9 @@ if __name__ == '__main__':
                                  img_mask,
                              ])
-    demo.launch(share=True)
+    demo.launch(share=share, server_port=server_port, server_name=server_name)
+    return demo
+if __name__ == '__main__':
+    launch_demo(share=False)

openocr-python 0.0.9__py3-none-any.whl → 0.1.0.dev0__py3-none-any.whl

openocr-python 0.0.9py3-none-any.whl → 0.1.0.dev0py3-none-any.whl