PyPI - openocr-python - Versions diffs - 0.0.9__py3-none-any.whl → 0.1.0.dev0__py3-none-any.whl - Mend

openocr-python 0.0.9py3-none-any.whl → 0.1.0.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

openocr/__init__.py +35 -1
openocr/configs/dataset/rec/evaluation.yaml +41 -0
openocr/configs/dataset/rec/ltb.yaml +9 -0
openocr/configs/dataset/rec/mjsynth.yaml +11 -0
openocr/configs/dataset/rec/openvino.yaml +25 -0
openocr/configs/dataset/rec/ost.yaml +17 -0
openocr/configs/dataset/rec/synthtext.yaml +7 -0
openocr/configs/dataset/rec/test.yaml +77 -0
openocr/configs/dataset/rec/textocr.yaml +13 -0
openocr/configs/dataset/rec/textocr_horizontal.yaml +13 -0
openocr/configs/dataset/rec/union14m_b.yaml +47 -0
openocr/configs/dataset/rec/union14m_l_filtered.yaml +35 -0
openocr/configs/rec/cmer/cmer.yml +127 -0
openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_base.yml +152 -0
openocr/configs/rec/mdiff4str/svtrv2_mdiffdecoder_small.yml +152 -0
openocr/configs/rec/unirec/focalsvtr_ardecoder_unirec.yml +114 -0
openocr/configs/rec/unirec/opendoc_pipeline.yml +105 -0
openocr/demo_gradio.py +28 -8
openocr/demo_opendoc.py +572 -0
openocr/demo_unirec.py +392 -0
openocr/opendet/losses/__init__.py +5 -7
openocr/opendet/preprocess/crop_resize.py +2 -1
openocr/openocr.py +685 -0
openocr/openrec/losses/__init__.py +8 -3
openocr/openrec/losses/cmer_loss.py +12 -0
openocr/openrec/losses/mdiff_loss.py +11 -0
openocr/openrec/losses/unirec_loss.py +12 -0
openocr/openrec/metrics/__init__.py +4 -1
openocr/openrec/metrics/rec_metric_cmer.py +328 -0
openocr/openrec/modeling/cmer_modeling/modeling_cmer.py +643 -0
openocr/openrec/modeling/decoders/__init__.py +1 -0
openocr/openrec/modeling/decoders/ctc_decoder.py +1 -1
openocr/openrec/modeling/decoders/dan_decoder.py +4 -4
openocr/openrec/modeling/decoders/dptr_parseq_clip_b_decoder.py +1563 -1398
openocr/openrec/modeling/decoders/mdiff_decoder.py +587 -0
openocr/openrec/modeling/decoders/smtr_decoder.py +99 -48
openocr/openrec/modeling/unirec_modeling/configuration_unirec.py +166 -0
openocr/openrec/modeling/unirec_modeling/modeling_unirec.py +433 -0
openocr/openrec/optimizer/__init__.py +4 -3
openocr/openrec/optimizer/lr.py +49 -0
openocr/openrec/postprocess/__init__.py +2 -0
openocr/openrec/postprocess/abinet_postprocess.py +1 -1
openocr/openrec/postprocess/ar_postprocess.py +1 -1
openocr/openrec/postprocess/cmer_postprocess.py +86 -0
openocr/openrec/postprocess/cppd_postprocess.py +1 -1
openocr/openrec/postprocess/igtr_postprocess.py +1 -1
openocr/openrec/postprocess/lister_postprocess.py +1 -1
openocr/openrec/postprocess/mgp_postprocess.py +1 -1
openocr/openrec/postprocess/nrtr_postprocess.py +2 -2
openocr/openrec/postprocess/smtr_postprocess.py +1 -1
openocr/openrec/postprocess/srn_postprocess.py +1 -1
openocr/openrec/postprocess/unirec_postprocess.py +58 -0
openocr/openrec/postprocess/visionlan_postprocess.py +1 -1
openocr/openrec/preprocess/__init__.py +5 -0
openocr/openrec/preprocess/ce_label_encode.py +1 -1
openocr/openrec/preprocess/cmer_label_encode.py +1025 -0
openocr/openrec/preprocess/ctc_label_encode.py +1 -1
openocr/openrec/preprocess/dptr_label_encode.py +177 -157
openocr/openrec/preprocess/igtr_label_encode.py +4 -2
openocr/openrec/preprocess/mdiff_label_encode.py +312 -0
openocr/openrec/preprocess/rec_aug.py +128 -2
openocr/openrec/preprocess/resize.py +57 -0
openocr/openrec/preprocess/unirec_label_encode.py +62 -0
openocr/tools/data/__init__.py +78 -55
openocr/tools/data/cmer_web_dataset.py +310 -0
openocr/tools/data/native_size_dataset.py +753 -0
openocr/tools/data/native_size_sampler.py +158 -0
openocr/tools/data/ratio_dataset_tvresize.py +2 -0
openocr/tools/data/ratio_sampler.py +2 -1
openocr/tools/download/download_dataset.py +38 -0
openocr/tools/download/utils.py +28 -0
openocr/tools/download_example_images.py +236 -0
openocr/tools/engine/trainer.py +155 -39
openocr/tools/eval_rec_all_ch.py +2 -2
openocr/tools/infer_det.py +20 -2
openocr/tools/infer_doc.py +898 -0
openocr/tools/infer_doc_onnx.py +1172 -0
openocr/tools/infer_e2e.py +27 -10
openocr/tools/infer_rec.py +64 -15
openocr/tools/infer_unirec_onnx.py +730 -0
openocr/tools/to_markdown.py +468 -0
openocr/tools/utils/ckpt.py +17 -5
openocr/tools/utils/opendoc_onnx_utils/utils.py +1052 -0
openocr_python-0.1.0.dev0.dist-info/METADATA +324 -0
{openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/RECORD +89 -45
{openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/WHEEL +1 -1
openocr_python-0.1.0.dev0.dist-info/entry_points.txt +2 -0
openocr_python-0.0.9.dist-info/METADATA +0 -149
/openocr_python-0.0.9.dist-info/LICENCE → /openocr_python-0.1.0.dev0.dist-info/licenses/LICENSE +0 -0
{openocr_python-0.0.9.dist-info → openocr_python-0.1.0.dev0.dist-info}/top_level.txt +0 -0

openocr/tools/data/native_size_sampler.py ADDED Viewed

@@ -0,0 +1,158 @@
+import numpy as np
+import torch
+from torch.utils.data import Sampler
+def resize_image(original_width, original_height, max_width, max_height):
+    # 计算宽高比
+    aspect_ratio = original_width / original_height
+    # 计算新的宽度和高度
+    if original_width > max_width or original_height > max_height:
+        if (max_width / max_height) >= aspect_ratio:
+            # 按高度限制比例
+            new_height = max_height
+            new_width = int(new_height * aspect_ratio)
+        else:
+            # 按宽度限制比例
+            new_width = max_width
+            new_height = int(new_width / aspect_ratio)
+    else:
+        # 如果图片已经小于或等于最大尺寸，则无需调整
+        new_width, new_height = original_width, original_height
+    return new_width, new_height
+class NaSizeSampler(Sampler):
+    def __init__(
+            self,
+            data_source,
+            max_side=[64 * 15, 64 * 22],  # w,h
+            min_bs=1,
+            max_bs=1024,
+            resume_iter=0,
+            scale_ratio=2,
+            seed=None):
+        """
+            multi scale samper
+            Args:
+                data_source(dataset)
+                scales(list): several scales for image resolution
+                first_bs(int): batch size for the first scale in scales
+                divided_factor(list[w, h]): ImageNet models down-sample images by a factor, ensure that width and height dimensions are multiples are multiple of devided_factor.
+                is_training(boolean): mode
+        """
+        self.data_source = data_source
+        self.seed = data_source.seed
+        self.img_label_pair_list = data_source.img_label_pair_list
+        self.shuffle = data_source.do_shuffle
+        self.is_training = data_source.mode == 'train'
+        max_side = data_source.max_side
+        batch_list = []
+        sorted_keys = sorted(
+            self.img_label_pair_list.keys(),
+            key=lambda k: int(k.split('_')[0]) * int(k.split('_')[1]))
+        for key in sorted_keys:
+            w_r, h_r = key.split('_')
+            w_r = int(w_r)
+            h_r = int(h_r)
+            current_bs = int(((max_side[0] * max_side[1]) // (w_r * h_r)) *
+                             min_bs * scale_ratio)
+            current_bs = min(current_bs, max_bs,
+                             len(self.img_label_pair_list[key]))
+            bacth_num = len(self.img_label_pair_list[key]) // current_bs
+            current_img_indices_all = np.arange(len(
+                self.img_label_pair_list[key]),
+                                                dtype=np.int64)
+            drop = len(self.img_label_pair_list[key]) - current_bs * bacth_num
+            if self.is_training and drop > 0:
+                drop_full_num = current_bs - drop
+                drop_full = np.random.choice(current_img_indices_all,
+                                             drop_full_num,
+                                             replace=True)
+                current_img_indices = np.append(current_img_indices_all,
+                                                drop_full)
+            else:
+                current_img_indices = current_img_indices_all[:bacth_num *
+                                                              current_bs]
+            current_batch_list = current_img_indices.reshape(-1, current_bs, 1)
+            w_r_batch = np.full_like(current_batch_list, w_r)
+            h_r_batch = np.full_like(current_batch_list, h_r)
+            random_zoom_time = np.random.randint(
+                -5, 50, [current_batch_list.shape[0], 1, 1])
+            random_zoom_time = np.tile(random_zoom_time,
+                                       (1, current_batch_list.shape[1], 1))
+            current_batch_list = np.concatenate(
+                [current_batch_list, w_r_batch, h_r_batch, random_zoom_time],
+                axis=-1)
+            batch_list.extend(current_batch_list.tolist())
+            if not self.is_training and drop > 0:
+                current_img_indices = current_img_indices_all[bacth_num *
+                                                              current_bs:]
+                current_batch_list = current_img_indices.reshape(-1, drop, 1)
+                w_r_batch = np.full_like(current_batch_list, w_r)
+                h_r_batch = np.full_like(current_batch_list, h_r)
+                random_zoom_time = np.random.randint(
+                    -5, 50, [current_batch_list.shape[0], 1, 1])
+                random_zoom_time = np.tile(random_zoom_time,
+                                           (1, current_batch_list.shape[1], 1))
+                current_batch_list = np.concatenate([
+                    current_batch_list, w_r_batch, h_r_batch, random_zoom_time
+                ],
+                                                    axis=-1)
+                batch_list.extend(current_batch_list.tolist())
+        self.fix_cobatch = 4
+        self.batch_list = batch_list  # [[[img_id, w_r, h_r, zoom_time], ...], ...]
+        self.length = len(self.batch_list)
+        self.batchs_id_sort = [i for i in range(self.length)]
+        self.batchs_in_one_epoch_id = self.batchs_id_sort.copy()
+        self.is_shuffled = False
+        self.resume_iter = resume_iter
+        if self.shuffle or self.is_training:
+            g = torch.Generator()
+            g.manual_seed(self.seed)  # 让所有进程的种子相同
+            random_indices = torch.randperm(len(self.batchs_in_one_epoch_id),
+                                            generator=g).tolist()
+            self.batchs_in_one_epoch_id = [
+                self.batchs_in_one_epoch_id[i] for i in random_indices
+            ]
+            if self.resume_iter > 0:
+                # resume iter
+                for iter_ in range(len(self.batch_list)):
+                    if iter_ <= self.resume_iter:
+                        batch_list_current = self.batch_list[
+                            self.batchs_in_one_epoch_id[iter_]]
+                        batch_list_current_resume = []
+                        for batch in batch_list_current:
+                            batch.append(1)
+                            batch_list_current_resume.append(batch)
+                        self.batch_list[self.batchs_in_one_epoch_id[
+                            iter_]] = batch_list_current_resume
+                    else:
+                        batch_list_current = self.batch_list[
+                            self.batchs_in_one_epoch_id[iter_]]
+                        batch_list_current_resume = []
+                        for batch in batch_list_current:
+                            batch.append(0)
+                            batch_list_current_resume.append(batch)
+                        self.batch_list[self.batchs_in_one_epoch_id[
+                            iter_]] = batch_list_current_resume
+                self.resume_iter = 0
+    def __iter__(self):
+        for batch_tuple_id in self.batchs_in_one_epoch_id:
+            yield self.batch_list[batch_tuple_id]
+    def set_epoch(self, epoch: int):
+        self.epoch = epoch
+    def __len__(self):
+        return self.length

openocr/tools/data/ratio_dataset_tvresize.py CHANGED Viewed

@@ -167,6 +167,8 @@ class RatioDataSetTVResize(Dataset):
         valid_ratio = min(1.0, float(resized_w / imgW))
         data['image'] = img
         data['valid_ratio'] = valid_ratio
+        r = float(w) / float(h)
+        data['real_ratio'] = max(1, round(r))
         return data
     def get_lmdb_sample_info(self, txn, index):

openocr/tools/data/ratio_sampler.py CHANGED Viewed

@@ -56,7 +56,8 @@ class RatioSampler(Sampler):
         self.base_im_w = base_im_w
         # Get the GPU and node related information
-        num_replicas = torch.cuda.device_count() if torch.cuda.is_available() else 1
+        num_replicas = torch.cuda.device_count() if torch.cuda.is_available(
+        ) else 1
         # rank = dist.get_rank()
         rank = (int(os.environ['LOCAL_RANK'])
                 if 'LOCAL_RANK' in os.environ else 0)

openocr/tools/download/download_dataset.py ADDED Viewed

@@ -0,0 +1,38 @@
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..', '..')))
+from engine import Config
+from utility import ArgsParser
+import download.utils
+from torchvision.datasets.utils import extract_archive
+def main(cfg):
+    urls, filename_paths, check_validity = download.utils.get_dataset_info(cfg)
+    for url, filename_path in zip(urls, filename_paths):
+        print(f'Downloading {filename_path} from {url} . . .')
+        download.utils.urlretrieve(url=url,
+                                   filename=filename_path,
+                                   check_validity=check_validity)
+        if not filename_path.endswith('.mdb'):
+            extract_archive(from_path=filename_path,
+                            to_path=cfg['root'],
+                            remove_finished=True)
+    print('Downloads finished!')
+if __name__ == '__main__':
+    FLAGS = ArgsParser().parse_args()
+    cfg = Config(FLAGS.config)
+    FLAGS = vars(FLAGS)
+    opt = FLAGS.pop('opt')
+    cfg.merge_dict(FLAGS)
+    cfg.merge_dict(opt)
+    main(cfg.cfg)

openocr/tools/download/utils.py ADDED Viewed

@@ -0,0 +1,28 @@
+import urllib
+import ssl
+from tqdm import tqdm
+import os
+def get_dataset_info(cfg):
+    download_urls, filenames, check_validity = cfg['download_links'], cfg[
+        'filenames'], cfg['check_validity']
+    return download_urls, filenames, check_validity
+# Modified from torchvision as some datasets cant pass the certificate validity check:
+# https://github.com/pytorch/vision/blob/868a3b42f4bffe29e4414ad7e4c7d9d0b4690ecb/torchvision/datasets/utils.py#L27C1-L32C40
+def urlretrieve(url, filename, chunk_size=1024 * 32, check_validity=True):
+    os.makedirs(os.path.dirname(filename), exist_ok=True)
+    ctx = ssl.create_default_context()
+    if not check_validity:
+        ctx.check_hostname = False
+        ctx.verify_mode = ssl.CERT_NONE
+    request = urllib.request.Request(url)
+    with urllib.request.urlopen(request, context=ctx) as response:
+        with open(filename, 'wb') as fh, tqdm(total=response.length,
+                                              unit='B',
+                                              unit_scale=True) as pbar:
+            while chunk := response.read(chunk_size):
+                fh.write(chunk)
+                pbar.update(len(chunk))

openocr/tools/download_example_images.py ADDED Viewed

@@ -0,0 +1,236 @@
+"""Download example images from ModelScope dataset for demo purposes."""
+import os
+from pathlib import Path
+import shutil
+def download_example_images():
+    """Download example images from ModelScope dataset.
+    Returns:
+        Dict with paths to example image directories: {'ocr': path, 'doc': path, 'unirec': path}
+    """
+    # Will use dataset cache path folders directly
+    subdirs = {}
+    print(f'📥 Downloading example images...')
+    download_success = False
+    try:
+        # Try ModelScope first (default)
+        print('🌐 Trying ModelScope (China mirror) first...')
+        try:
+            # Download files directly from ModelScope dataset repository
+            dataset_id = 'topdktu/openocr_test_images'
+            # Try to get file list and download
+            try:
+                # This is a simplified approach - download via git clone or snapshot
+                from modelscope.hub.snapshot_download import snapshot_download
+                cache_path = snapshot_download(
+                    repo_id=dataset_id,
+                    repo_type='dataset',
+                    cache_dir=str(Path.home() / '.cache' / 'openocr')
+                )
+                print(f'✅ Dataset downloaded from ModelScope to {cache_path}')
+                # Use dataset cache path folders directly
+                cache_dir = Path(cache_path)
+                subdirs = {
+                    'ocr': cache_dir / 'ocr',
+                    'doc': cache_dir / 'doc',
+                    'unirec': cache_dir / 'unirec'
+                }
+                # Verify folders exist and have images
+                all_folders_valid = True
+                for folder_name, folder_path in subdirs.items():
+                    if folder_path.exists():
+                        img_count = len([f for f in folder_path.glob('*') if f.is_file() and f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.gif']])
+                        if img_count > 0:
+                            print(f'  ✓ Found {folder_name} folder with {img_count} images')
+                        else:
+                            print(f'  ⚠️  {folder_name} folder exists but has no images')
+                            all_folders_valid = False
+                    else:
+                        print(f'  ⚠️  {folder_name} folder not found')
+                        all_folders_valid = False
+                if all_folders_valid:
+                    download_success = True
+                else:
+                    print('⚠️  ModelScope download incomplete, trying HuggingFace...')
+                    subdirs = {}
+            except Exception as e:
+                print(f'⚠️  ModelScope snapshot download failed: {e}')
+                print('   Trying HuggingFace...')
+        except ImportError:
+            print('⚠️  modelscope not installed. Install with: pip install modelscope')
+            print('   Trying HuggingFace...')
+        except Exception as e:
+            print(f'⚠️  ModelScope download failed: {e}')
+            print('   Trying HuggingFace...')
+        if not download_success:
+            # Try HuggingFace
+            print('🌐 Using HuggingFace...')
+            try:
+                from huggingface_hub import snapshot_download
+                # Download entire dataset
+                dataset_path = snapshot_download(
+                    repo_id='topdu/openocr_test_images',
+                    repo_type='dataset',
+                    cache_dir=str(Path.home() / '.cache' / 'openocr')
+                )
+                print(f'✅ Dataset downloaded to {dataset_path}')
+                # Use dataset cache path folders directly
+                cache_dir = Path(dataset_path)
+                subdirs = {
+                    'ocr': cache_dir / 'ocr',
+                    'doc': cache_dir / 'doc',
+                    'unirec': cache_dir / 'unirec'
+                }
+                # Verify folders exist and have images
+                all_folders_valid = True
+                for folder_name, folder_path in subdirs.items():
+                    if folder_path.exists():
+                        img_count = len([f for f in folder_path.glob('*') if f.is_file() and f.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.gif']])
+                        if img_count > 0:
+                            print(f'  ✓ Found {folder_name} folder with {img_count} images')
+                        else:
+                            print(f'  ⚠️  {folder_name} folder exists but has no images')
+                            all_folders_valid = False
+                    else:
+                        print(f'  ⚠️  {folder_name} folder not found')
+                        all_folders_valid = False
+                if all_folders_valid:
+                    download_success = True
+            except ImportError:
+                print('⚠️  huggingface_hub not installed. Install with: pip install huggingface_hub')
+            except Exception as e:
+                print(f'⚠️  HuggingFace download failed: {e}')
+        # Try GitHub releases as fallback for OCR examples only
+        if not download_success:
+            print('🌐 Trying GitHub releases as fallback for OCR examples...')
+            try:
+                import urllib.request
+                import tarfile
+                import tempfile
+                ocr_url = 'https://github.com/Topdu/OpenOCR/releases/download/develop0.0.1/OCR_e2e_img.tar'
+                # Use temp directory for download
+                with tempfile.TemporaryDirectory() as temp_dir:
+                    temp_path = Path(temp_dir)
+                    tar_path = temp_path / 'OCR_e2e_img.tar'
+                    print(f'  Downloading from {ocr_url}...')
+                    urllib.request.urlretrieve(ocr_url, str(tar_path))
+                    print(f'  Extracting...')
+                    with tarfile.open(str(tar_path), 'r') as tar:
+                        tar.extractall(path=str(temp_path))
+                    # Move to cache directory
+                    cache_base = Path.home() / '.cache' / 'openocr' / 'openocr_examples'
+                    cache_base.mkdir(parents=True, exist_ok=True)
+                    # Copy extracted files to cache
+                    ocr_source = temp_path / 'OCR_e2e_img'
+                    ocr_target = cache_base / 'ocr'
+                    if ocr_source.exists():
+                        if ocr_target.exists():
+                            shutil.rmtree(str(ocr_target))
+                        shutil.copytree(str(ocr_source), str(ocr_target))
+                    # Set subdirs for GitHub download
+                    subdirs = {
+                        'ocr': ocr_target,
+                        'doc': cache_base / 'doc',
+                        'unirec': cache_base / 'unirec'
+                    }
+                    # Create empty directories for doc and unirec if they don't exist
+                    for key in ['doc', 'unirec']:
+                        subdirs[key].mkdir(parents=True, exist_ok=True)
+                    print(f'  ✓ OCR example images downloaded from GitHub to cache')
+                    download_success = True
+            except Exception as e:
+                print(f'⚠️  GitHub download failed: {e}')
+        if download_success:
+            print(f'✅ Example images ready!')
+        else:
+            print('⚠️  Could not download example images automatically.')
+    except Exception as e:
+        print(f'❌ Download failed: {e}')
+    finally:
+        # Verify directories
+        if subdirs:
+            print('\n📝 Example image directories:')
+            for name, subdir in subdirs.items():
+                if subdir.exists():
+                    if not any(subdir.iterdir()):
+                        print(f'   ⚠️  {name}: No images found in {subdir}')
+                        print(f'      You can manually add example images to this directory.')
+                    else:
+                        img_count = len(list(subdir.glob('*.[jp][pn]g')) + list(subdir.glob('*.jpeg')) + list(subdir.glob('*.bmp')))
+                        print(f'   ✓ {name}: {img_count} images found in {subdir}')
+                else:
+                    print(f'   ⚠️  {name}: Directory not found at {subdir}')
+        else:
+            print('\n⚠️  No example image directories available')
+    return {k: str(v) for k, v in subdirs.items()}
+def get_example_images_path(demo_type='ocr'):
+    """Get the path to example images for a specific demo type.
+    Args:
+        demo_type: Type of demo ('ocr', 'doc', or 'unirec')
+    Returns:
+        Path to example images directory
+    """
+    # Download and get paths from cache
+    print(f'Getting example images for {demo_type}...')
+    paths = download_example_images()
+    # Return the path for the requested demo type
+    if demo_type in paths:
+        return paths[demo_type]
+    else:
+        print(f'⚠️  Unknown demo type: {demo_type}')
+        return paths.get('ocr', '')
+if __name__ == '__main__':
+    # Test download
+    import argparse
+    parser = argparse.ArgumentParser(description='Download example images for OpenOCR demos')
+    args = parser.parse_args()
+    paths = download_example_images()
+    print('\n📁 Example image directories:')
+    for demo_type, path in paths.items():
+        print(f'  {demo_type}: {path}')

openocr-python 0.0.9__py3-none-any.whl → 0.1.0.dev0__py3-none-any.whl

openocr-python 0.0.9py3-none-any.whl → 0.1.0.dev0py3-none-any.whl