PyPI - doc-page-extractor - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl - Mend

doc-page-extractor 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of doc-page-extractor might be problematic. Click here for more details.

Files changed (23) hide show

doc_page_extractor/__init__.py +1 -1
doc_page_extractor/downloader.py +4 -1
doc_page_extractor/extractor.py +7 -13
doc_page_extractor/ocr.py +110 -58
doc_page_extractor/ocr_corrector.py +3 -3
doc_page_extractor/onnxocr/__init__.py +1 -0
doc_page_extractor/onnxocr/cls_postprocess.py +26 -0
doc_page_extractor/onnxocr/db_postprocess.py +246 -0
doc_page_extractor/onnxocr/imaug.py +32 -0
doc_page_extractor/onnxocr/operators.py +187 -0
doc_page_extractor/onnxocr/predict_base.py +52 -0
doc_page_extractor/onnxocr/predict_cls.py +89 -0
doc_page_extractor/onnxocr/predict_det.py +120 -0
doc_page_extractor/onnxocr/predict_rec.py +321 -0
doc_page_extractor/onnxocr/predict_system.py +97 -0
doc_page_extractor/onnxocr/rec_postprocess.py +896 -0
doc_page_extractor/onnxocr/utils.py +71 -0
{doc_page_extractor-0.0.5.dist-info → doc_page_extractor-0.0.7.dist-info}/METADATA +17 -5
doc_page_extractor-0.0.7.dist-info/RECORD +33 -0
doc_page_extractor-0.0.5.dist-info/RECORD +0 -21
{doc_page_extractor-0.0.5.dist-info → doc_page_extractor-0.0.7.dist-info}/LICENSE +0 -0
{doc_page_extractor-0.0.5.dist-info → doc_page_extractor-0.0.7.dist-info}/WHEEL +0 -0
{doc_page_extractor-0.0.5.dist-info → doc_page_extractor-0.0.7.dist-info}/top_level.txt +0 -0

doc_page_extractor/onnxocr/operators.py ADDED Viewed

@@ -0,0 +1,187 @@
+import numpy as np
+import cv2
+import sys
+import math
+class NormalizeImage(object):
+  """ normalize image such as substract mean, divide std
+  """
+  def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
+    if isinstance(scale, str):
+      scale = eval(scale)
+    self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+    mean = mean if mean is not None else [0.485, 0.456, 0.406]
+    std = std if std is not None else [0.229, 0.224, 0.225]
+    shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
+    self.mean = np.array(mean).reshape(shape).astype('float32')
+    self.std = np.array(std).reshape(shape).astype('float32')
+  def __call__(self, data):
+    img = data['image']
+    from PIL import Image
+    if isinstance(img, Image.Image):
+      img = np.array(img)
+    assert isinstance(img,
+              np.ndarray), "invalid input 'img' in NormalizeImage"
+    data['image'] = (
+      img.astype('float32') * self.scale - self.mean) / self.std
+    return data
+class DetResizeForTest(object):
+  def __init__(self, **kwargs):
+    super(DetResizeForTest, self).__init__()
+    self.resize_type = 0
+    self.keep_ratio = False
+    if 'image_shape' in kwargs:
+      self.image_shape = kwargs['image_shape']
+      self.resize_type = 1
+      if 'keep_ratio' in kwargs:
+        self.keep_ratio = kwargs['keep_ratio']
+    elif 'limit_side_len' in kwargs:
+      self.limit_side_len = kwargs['limit_side_len']
+      self.limit_type = kwargs.get('limit_type', 'min')
+    elif 'resize_long' in kwargs:
+      self.resize_type = 2
+      self.resize_long = kwargs.get('resize_long', 960)
+    else:
+      self.limit_side_len = 736
+      self.limit_type = 'min'
+  def __call__(self, data):
+    img = data['image']
+    src_h, src_w, _ = img.shape
+    if sum([src_h, src_w]) < 64:
+      img = self.image_padding(img)
+    if self.resize_type == 0:
+      # img, shape = self.resize_image_type0(img)
+      img, [ratio_h, ratio_w] = self.resize_image_type0(img)
+    elif self.resize_type == 2:
+      img, [ratio_h, ratio_w] = self.resize_image_type2(img)
+    else:
+      # img, shape = self.resize_image_type1(img)
+      img, [ratio_h, ratio_w] = self.resize_image_type1(img)
+    data['image'] = img
+    data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
+    return data
+  def image_padding(self, im, value=0):
+    h, w, c = im.shape
+    im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
+    im_pad[:h, :w, :] = im
+    return im_pad
+  def resize_image_type1(self, img):
+    resize_h, resize_w = self.image_shape
+    ori_h, ori_w = img.shape[:2]  # (h, w, c)
+    if self.keep_ratio is True:
+      resize_w = ori_w * resize_h / ori_h
+      N = math.ceil(resize_w / 32)
+      resize_w = N * 32
+    ratio_h = float(resize_h) / ori_h
+    ratio_w = float(resize_w) / ori_w
+    img = cv2.resize(img, (int(resize_w), int(resize_h)))
+    # return img, np.array([ori_h, ori_w])
+    return img, [ratio_h, ratio_w]
+  def resize_image_type0(self, img):
+    """
+    resize image to a size multiple of 32 which is required by the network
+    args:
+      img(array): array with shape [h, w, c]
+    return(tuple):
+      img, (ratio_h, ratio_w)
+    """
+    limit_side_len = self.limit_side_len
+    h, w, c = img.shape
+    # limit the max side
+    if self.limit_type == 'max':
+      if max(h, w) > limit_side_len:
+        if h > w:
+          ratio = float(limit_side_len) / h
+        else:
+          ratio = float(limit_side_len) / w
+      else:
+        ratio = 1.
+    elif self.limit_type == 'min':
+      if min(h, w) < limit_side_len:
+        if h < w:
+          ratio = float(limit_side_len) / h
+        else:
+          ratio = float(limit_side_len) / w
+      else:
+        ratio = 1.
+    elif self.limit_type == 'resize_long':
+      ratio = float(limit_side_len) / max(h, w)
+    else:
+      raise Exception('not support limit type, image ')
+    resize_h = int(h * ratio)
+    resize_w = int(w * ratio)
+    resize_h = max(int(round(resize_h / 32) * 32), 32)
+    resize_w = max(int(round(resize_w / 32) * 32), 32)
+    try:
+      if int(resize_w) <= 0 or int(resize_h) <= 0:
+        return None, (None, None)
+      img = cv2.resize(img, (int(resize_w), int(resize_h)))
+    except:
+      print(img.shape, resize_w, resize_h)
+      sys.exit(0)
+    ratio_h = resize_h / float(h)
+    ratio_w = resize_w / float(w)
+    return img, [ratio_h, ratio_w]
+  def resize_image_type2(self, img):
+    h, w, _ = img.shape
+    resize_w = w
+    resize_h = h
+    if resize_h > resize_w:
+      ratio = float(self.resize_long) / resize_h
+    else:
+      ratio = float(self.resize_long) / resize_w
+    resize_h = int(resize_h * ratio)
+    resize_w = int(resize_w * ratio)
+    max_stride = 128
+    resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+    resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+    img = cv2.resize(img, (int(resize_w), int(resize_h)))
+    ratio_h = resize_h / float(h)
+    ratio_w = resize_w / float(w)
+    return img, [ratio_h, ratio_w]
+class ToCHWImage(object):
+  """ convert hwc image to chw image
+  """
+  def __init__(self, **kwargs):
+    pass
+  def __call__(self, data):
+    img = data['image']
+    from PIL import Image
+    if isinstance(img, Image.Image):
+      img = np.array(img)
+    data['image'] = img.transpose((2, 0, 1))
+    return data
+class KeepKeys(object):
+  def __init__(self, keep_keys, **kwargs):
+    self.keep_keys = keep_keys
+  def __call__(self, data):
+    data_list = []
+    for key in self.keep_keys:
+      data_list.append(data[key])
+    return data_list

doc_page_extractor/onnxocr/predict_base.py ADDED Viewed

@@ -0,0 +1,52 @@
+import onnxruntime
+class PredictBase(object):
+  def __init__(self):
+    pass
+  def get_onnx_session(self, model_dir, use_gpu):
+    # 使用gpu
+    if use_gpu:
+      providers = providers=['CUDAExecutionProvider']
+    else:
+      providers = providers = ['CPUExecutionProvider']
+    onnx_session = onnxruntime.InferenceSession(model_dir, None,providers=providers)
+    # print("providers:", onnxruntime.get_device())
+    return onnx_session
+  def get_output_name(self, onnx_session):
+    """
+    output_name = onnx_session.get_outputs()[0].name
+    :param onnx_session:
+    :return:
+    """
+    output_name = []
+    for node in onnx_session.get_outputs():
+      output_name.append(node.name)
+    return output_name
+  def get_input_name(self, onnx_session):
+    """
+    input_name = onnx_session.get_inputs()[0].name
+    :param onnx_session:
+    :return:
+    """
+    input_name = []
+    for node in onnx_session.get_inputs():
+      input_name.append(node.name)
+    return input_name
+  def get_input_feed(self, input_name, image_numpy):
+    """
+    input_feed={self.input_name: image_numpy}
+    :param input_name:
+    :param image_numpy:
+    :return:
+    """
+    input_feed = {}
+    for name in input_name:
+      input_feed[name] = image_numpy
+    return input_feed

doc_page_extractor/onnxocr/predict_cls.py ADDED Viewed

@@ -0,0 +1,89 @@
+import cv2
+import copy
+import numpy as np
+import math
+from .cls_postprocess import ClsPostProcess
+from .predict_base import PredictBase
+class TextClassifier(PredictBase):
+  def __init__(self, args):
+    self.cls_image_shape = args.cls_image_shape
+    self.cls_batch_num = args.cls_batch_num
+    self.cls_thresh = args.cls_thresh
+    self.postprocess_op = ClsPostProcess(label_list=args.label_list)
+    # 初始化模型
+    self.cls_onnx_session = self.get_onnx_session(args.cls_model_dir, args.use_gpu)
+    self.cls_input_name = self.get_input_name(self.cls_onnx_session)
+    self.cls_output_name = self.get_output_name(self.cls_onnx_session)
+  def resize_norm_img(self, img):
+    imgC, imgH, imgW = self.cls_image_shape
+    h = img.shape[0]
+    w = img.shape[1]
+    ratio = w / float(h)
+    if math.ceil(imgH * ratio) > imgW:
+      resized_w = imgW
+    else:
+      resized_w = int(math.ceil(imgH * ratio))
+    resized_image = cv2.resize(img, (resized_w, imgH))
+    resized_image = resized_image.astype("float32")
+    if self.cls_image_shape[0] == 1:
+      resized_image = resized_image / 255
+      resized_image = resized_image[np.newaxis, :]
+    else:
+      resized_image = resized_image.transpose((2, 0, 1)) / 255
+    resized_image -= 0.5
+    resized_image /= 0.5
+    padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    padding_im[:, :, 0:resized_w] = resized_image
+    return padding_im
+  def __call__(self, img_list):
+    img_list = copy.deepcopy(img_list)
+    img_num = len(img_list)
+    # Calculate the aspect ratio of all text bars
+    width_list = []
+    for img in img_list:
+      width_list.append(img.shape[1] / float(img.shape[0]))
+    # Sorting can speed up the cls process
+    indices = np.argsort(np.array(width_list))
+    cls_res = [["", 0.0]] * img_num
+    batch_num = self.cls_batch_num
+    for beg_img_no in range(0, img_num, batch_num):
+      end_img_no = min(img_num, beg_img_no + batch_num)
+      norm_img_batch = []
+      max_wh_ratio = 0
+      for ino in range(beg_img_no, end_img_no):
+        h, w = img_list[indices[ino]].shape[0:2]
+        wh_ratio = w * 1.0 / h
+        max_wh_ratio = max(max_wh_ratio, wh_ratio)
+      for ino in range(beg_img_no, end_img_no):
+        norm_img = self.resize_norm_img(img_list[indices[ino]])
+        norm_img = norm_img[np.newaxis, :]
+        norm_img_batch.append(norm_img)
+      norm_img_batch = np.concatenate(norm_img_batch)
+      norm_img_batch = norm_img_batch.copy()
+      input_feed = self.get_input_feed(self.cls_input_name, norm_img_batch)
+      outputs = self.cls_onnx_session.run(
+        self.cls_output_name, input_feed=input_feed
+      )
+      prob_out = outputs[0]
+      cls_result = self.postprocess_op(prob_out)
+      for rno in range(len(cls_result)):
+        label, score = cls_result[rno]
+        cls_res[indices[beg_img_no + rno]] = [label, score]
+        if "180" in label and score > self.cls_thresh:
+          img_list[indices[beg_img_no + rno]] = cv2.rotate(
+            img_list[indices[beg_img_no + rno]], 1
+          )
+    return img_list, cls_res

doc_page_extractor/onnxocr/predict_det.py ADDED Viewed

@@ -0,0 +1,120 @@
+import numpy as np
+from .imaug import transform, create_operators
+from .db_postprocess import DBPostProcess
+from .predict_base import PredictBase
+class TextDetector(PredictBase):
+  def __init__(self, args):
+    self.args = args
+    self.det_algorithm = args.det_algorithm
+    pre_process_list = [
+      {
+        "DetResizeForTest": {
+          "limit_side_len": args.det_limit_side_len,
+          "limit_type": args.det_limit_type,
+        }
+      },
+      {
+        "NormalizeImage": {
+          "std": [0.229, 0.224, 0.225],
+          "mean": [0.485, 0.456, 0.406],
+          "scale": "1./255.",
+          "order": "hwc",
+        }
+      },
+      {"ToCHWImage": None},
+      {"KeepKeys": {"keep_keys": ["image", "shape"]}},
+    ]
+    postprocess_params = {}
+    postprocess_params["name"] = "DBPostProcess"
+    postprocess_params["thresh"] = args.det_db_thresh
+    postprocess_params["box_thresh"] = args.det_db_box_thresh
+    postprocess_params["max_candidates"] = 1000
+    postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
+    postprocess_params["use_dilation"] = args.use_dilation
+    postprocess_params["score_mode"] = args.det_db_score_mode
+    postprocess_params["box_type"] = args.det_box_type
+    # 实例化预处理操作类
+    self.preprocess_op = create_operators(pre_process_list)
+    # self.postprocess_op = build_post_process(postprocess_params)
+    # 实例化后处理操作类
+    self.postprocess_op = DBPostProcess(**postprocess_params)
+    # 初始化模型
+    self.det_onnx_session = self.get_onnx_session(args.det_model_dir, args.use_gpu)
+    self.det_input_name = self.get_input_name(self.det_onnx_session)
+    self.det_output_name = self.get_output_name(self.det_onnx_session)
+  def order_points_clockwise(self, pts):
+    rect = np.zeros((4, 2), dtype="float32")
+    s = pts.sum(axis=1)
+    rect[0] = pts[np.argmin(s)]
+    rect[2] = pts[np.argmax(s)]
+    tmp = np.delete(pts, (np.argmin(s), np.argmax(s)), axis=0)
+    diff = np.diff(np.array(tmp), axis=1)
+    rect[1] = tmp[np.argmin(diff)]
+    rect[3] = tmp[np.argmax(diff)]
+    return rect
+  def clip_det_res(self, points, img_height, img_width):
+    for pno in range(points.shape[0]):
+      points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
+      points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
+    return points
+  def filter_tag_det_res(self, dt_boxes, image_shape):
+    img_height, img_width = image_shape[0:2]
+    dt_boxes_new = []
+    for box in dt_boxes:
+      if type(box) is list:
+        box = np.array(box)
+      box = self.order_points_clockwise(box)
+      box = self.clip_det_res(box, img_height, img_width)
+      rect_width = int(np.linalg.norm(box[0] - box[1]))
+      rect_height = int(np.linalg.norm(box[0] - box[3]))
+      if rect_width <= 3 or rect_height <= 3:
+        continue
+      dt_boxes_new.append(box)
+    dt_boxes = np.array(dt_boxes_new)
+    return dt_boxes
+  def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
+    img_height, img_width = image_shape[0:2]
+    dt_boxes_new = []
+    for box in dt_boxes:
+      if type(box) is list:
+        box = np.array(box)
+      box = self.clip_det_res(box, img_height, img_width)
+      dt_boxes_new.append(box)
+    dt_boxes = np.array(dt_boxes_new)
+    return dt_boxes
+  def __call__(self, img):
+    ori_im = img.copy()
+    data = {"image": img}
+    data = transform(data, self.preprocess_op)
+    img, shape_list = data
+    if img is None:
+      return None, 0
+    img = np.expand_dims(img, axis=0)
+    shape_list = np.expand_dims(shape_list, axis=0)
+    img = img.copy()
+    input_feed = self.get_input_feed(self.det_input_name, img)
+    outputs = self.det_onnx_session.run(self.det_output_name, input_feed=input_feed)
+    preds = {}
+    preds["maps"] = outputs[0]
+    post_result = self.postprocess_op(preds, shape_list)
+    dt_boxes = post_result[0]["points"]
+    if self.args.det_box_type == "poly":
+      dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
+    else:
+      dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
+    return dt_boxes

doc-page-extractor 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

Potentially problematic release.

doc-page-extractor 0.0.5py3-none-any.whl → 0.0.7py3-none-any.whl