PyPI - ocrany - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ocrany 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

__init__.py +9 -0
extractor.py +26 -0
hard_ocr.py +23 -0
light_ocr.py +39 -0
ocrany-0.1.0.dist-info/METADATA +15 -0
ocrany-0.1.0.dist-info/RECORD +8 -0
ocrany-0.1.0.dist-info/WHEEL +5 -0
ocrany-0.1.0.dist-info/top_level.txt +4 -0

__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+from src.light_ocr import LightWeightOCR
+from src.hard_ocr import HardWeightOCR
+from src.extractor import Extractor
+__all__ = [
+    'LightWeightOCR',
+    'HardWeightOCR',
+    'Extractor'
+]

extractor.py ADDED Viewed

@@ -0,0 +1,26 @@
+import pypdfium2 as pdfium
+from chandra.input import load_pdf_images
+from ocr.src.light_ocr import LightWeightOCR
+from ocr.src.hard_ocr import HardWeightOCR
+class Extractor:
+    def __init__(
+            self,
+            light_model_name_or_path: str,
+            light_vllm_api_url: str,
+            hard_ocr_mode: str = "vllm",
+            hard_vllm_api_url: str = None
+    ):
+        self.light_ocr = LightWeightOCR(light_model_name_or_path, light_vllm_api_url)
+        self.hard_ocr = HardWeightOCR(hard_ocr_mode, hard_vllm_api_url)
+    def extract(self, file_path: str, max_tokens: int, temperature: float) -> str:
+        pdf = pdfium.PdfDocument(file_path)
+        if len(pdf) == 1:
+            return self.light_ocr.extract(pdf[0], max_tokens, temperature)
+        images = load_pdf_images(file_path, page_range=[i for i in range(min(40, len(pdf)))])
+        return self.hard_ocr.extract(images)

hard_ocr.py ADDED Viewed

@@ -0,0 +1,23 @@
+from chandra.model import InferenceManager
+from chandra.model.schema import BatchInputItem
+class HardWeightOCR:
+    def __init__(self, hard_ocr_mode: str = "vllm", hard_vllm_api_url: str = None):
+        if hard_ocr_mode not in ["vllm", "hf"]:
+            raise ValueError("Only supports `vllm` or `hf` inference mode.")
+        if hard_ocr_mode == "vllm" and not hard_vllm_api_url:
+            raise ValueError("Need to provide vllm API url for vllm mode.")
+        self.ocr = InferenceManager(method=hard_ocr_mode)
+        self.hard_vllm_api_url = hard_vllm_api_url
+    def process_images(self, images: list) -> str:
+        batch_images = [BatchInputItem(image=image, prompt_type="ocr_layout") for image in images]
+        responses = self.ocr.generate(batch_images, vllm_api_base=self.hard_vllm_api_url)
+        return "\n\n".join(res.markdown.strip() for res in responses)
+    def extract(self, images: list) -> str:
+        return self.process_images(images)

light_ocr.py ADDED Viewed

@@ -0,0 +1,39 @@
+import base64
+import io
+import requests
+from PIL import Image
+class LightWeightOCR:
+    def __init__(
+            self,
+            light_model_name_or_path: str,
+            light_vllm_api_url: str,
+    ) -> None:
+        self.light_model_name_or_path = light_model_name_or_path
+        self.light_vllm_api_url = light_vllm_api_url
+    def extract(self, image: Image.Image, max_tokens: int, temperature: float) -> str:
+        pil_image = image.render(scale=2.77).to_pil()
+        buffer = io.BytesIO()
+        pil_image.save(buffer, format="PNG")
+        image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+        payload = {
+            "model": self.light_model_name_or_path,
+            "messages": [{
+                "role": "user",
+                "content": [{
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{image_base64}"}
+                }]
+            }],
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+        }
+        response = requests.post(self.light_vllm_api_url, json=payload)
+        response = response.json()['choices'][0]['message']['content']
+        return response

ocrany-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,15 @@
+Metadata-Version: 2.4
+Name: ocrany
+Version: 0.1.0
+Summary: A lightweight and hardware-accelerated OCR pipeline
+Author-email: Đặng Phương Nam <phuongnamdpn2k2@gmail.com>
+Project-URL: Homepage, https://github.com/phuongnam2002/OCRANY
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: chandra_ocr
+Requires-Dist: json_repair
+Requires-Dist: openai
+Requires-Dist: Pillow
+Requires-Dist: pypdfium2
+Requires-Dist: python-dotenv
+Requires-Dist: tqdm

ocrany-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+__init__.py,sha256=ZzLRUfW4GTKHOBvkUDuydIzOK0d_zpgCmsEacq4DpoQ,199
+extractor.py,sha256=vtYz43aFVtdeiEda_2j1ry0sh81EKHEGUFSYsc8J0xw,937
+hard_ocr.py,sha256=-ySU7hQvZRzxM75RjitDw2DhNq2FhJWyX25gULgaZR8,995
+light_ocr.py,sha256=nQYiNxt4NqtzaFUmZ40TffBzr7XLlZpH85OFhCpt3lI,1244
+ocrany-0.1.0.dist-info/METADATA,sha256=VlPh1MRe9wZAh-dgU_PD67j1znq_gaFXmjaIcS68lqA,472
+ocrany-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+ocrany-0.1.0.dist-info/top_level.txt,sha256=CM2uG3Xs3bwLOo1dogbg1QSKVa056xkkYbmxbdrCKO0,38
+ocrany-0.1.0.dist-info/RECORD,,

ocrany-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

ocrany-0.1.0.dist-info/top_level.txt ADDED Viewed

@@ -0,0 +1,4 @@
+__init__
+extractor
+hard_ocr
+light_ocr