PyPI - monocr - Versions diffs - 0.1.0__tar.gz → 0.1.1__tar.gz - Mend

monocr 0.1.0tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of monocr might be problematic. Click here for more details.

Files changed (11) hide show

{monocr-0.1.0 → monocr-0.1.1}/PKG-INFO +22 -2
{monocr-0.1.0 → monocr-0.1.1}/README.md +21 -1
{monocr-0.1.0 → monocr-0.1.1}/pyproject.toml +1 -1
monocr-0.1.1/src/monocr/__init__.py +55 -0
{monocr-0.1.0 → monocr-0.1.1}/src/monocr/cli.py +26 -26
{monocr-0.1.0 → monocr-0.1.1}/src/monocr/crnn_model.py +25 -20
monocr-0.1.1/src/monocr/inference.py +80 -0
{monocr-0.1.0 → monocr-0.1.1}/src/monocr/ocr.py +32 -84
monocr-0.1.0/src/monocr/__init__.py +0 -90
monocr-0.1.0/src/monocr/inference.py +0 -117
{monocr-0.1.0 → monocr-0.1.1}/src/monocr/models/monocr_v1_best.pt +0 -0

{monocr-0.1.0 → monocr-0.1.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: monocr
-Version: 0.1.0
+Version: 0.1.1
 Summary: Optical Character Recognition for Mon text
 Keywords: mon,ocr,text-recognition
 Author: janakhpon
@@ -62,6 +62,26 @@ monocr read image.png
 monocr batch images/ --output results.json
 ```
+## Dev Setup
+```bash
+git clone git@github.com:janakhpon/monocr.git
+cd monocr
+uv sync --dev
+# Release workflow
+uv version --bump patch
+git add .
+git commit -m "bump version"
+git tag v0.1.5
+git push origin main --tags
+```
+## Related tools
+- [mon_tokenizer](https://github.com/Code-Yay-Mal/mon_tokenizer)
+- [hugging face mon_tokenizer model](https://huggingface.co/janakhpon/mon_tokenizer)
+- [Mon corpus collection in unicode](https://github.com/MonDevHub/MonCorpusCollection)
 ## License
-MIT License
+MIT - do whatever you want with it.

{monocr-0.1.0 → monocr-0.1.1}/README.md RENAMED Viewed

@@ -33,6 +33,26 @@ monocr read image.png
 monocr batch images/ --output results.json
 ```
+## Dev Setup
+```bash
+git clone git@github.com:janakhpon/monocr.git
+cd monocr
+uv sync --dev
+# Release workflow
+uv version --bump patch
+git add .
+git commit -m "bump version"
+git tag v0.1.5
+git push origin main --tags
+```
+## Related tools
+- [mon_tokenizer](https://github.com/Code-Yay-Mal/mon_tokenizer)
+- [hugging face mon_tokenizer model](https://huggingface.co/janakhpon/mon_tokenizer)
+- [Mon corpus collection in unicode](https://github.com/MonDevHub/MonCorpusCollection)
 ## License
-MIT License
+MIT - do whatever you want with it.

{monocr-0.1.0 → monocr-0.1.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "monocr"
-version = "0.1.0"
+version = "0.1.1"
 description = "Optical Character Recognition for Mon text"
 readme = "README.md"
 requires-python = ">=3.11"

monocr-0.1.1/src/monocr/__init__.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""
+mon ocr - optical character recognition for mon text
+"""
+import os
+from pathlib import Path
+from .ocr import MonOCR
+from .inference import MonOCRInference
+__version__ = "0.1.0"
+__author__ = "janakhpon"
+__email__ = "jnovaxer@gmail.com"
+__all__ = ["MonOCR", "MonOCRInference", "read_text", "read_image", "read_folder"]
+def get_default_model_path():
+    """get bundled model path"""
+    package_dir = Path(__file__).parent
+    model_path = package_dir / "models" / "monocr_v1_best.pt"
+    return str(model_path)
+# global ocr instance for simple api
+_ocr_instance = None
+def _get_ocr():
+    """get or create global ocr instance"""
+    global _ocr_instance
+    if _ocr_instance is None:
+        _ocr_instance = MonOCR()
+    return _ocr_instance
+def read_text(image_path):
+    """read text from single image"""
+    return _get_ocr().read_text(image_path)
+def read_image(image_path):
+    """alias for read_text"""
+    return read_text(image_path)
+def read_folder(folder_path, extensions=None):
+    """read text from all images in folder"""
+    return _get_ocr().read_from_folder(folder_path, extensions)
+def load_ocr(model_path=None, model_type="crnn"):
+    """load ocr model with custom settings"""
+    if model_path is None:
+        model_path = get_default_model_path()
+    return MonOCR(model_path, model_type)

{monocr-0.1.0 → monocr-0.1.1}/src/monocr/cli.py RENAMED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Command Line Interface for Mon OCR
+command line interface for mon ocr
 """
 import click
@@ -15,25 +15,25 @@ from . import get_default_model_path
 @click.group()
 @click.version_option()
 def main():
-    """Mon OCR - Optical Character Recognition for Mon text"""
+    """mon ocr - optical character recognition for mon text"""
     pass
 @main.command()
 @click.argument('image_path', type=click.Path(exists=True))
-@click.option('--model', '-m', help='Path to trained model file (default: uses bundled model)')
-@click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='Type of model to use')
-@click.option('--output', '-o', help='Output file to save results')
+@click.option('--model', '-m', help='path to trained model file (default: uses bundled model)')
+@click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='type of model to use')
+@click.option('--output', '-o', help='output file to save results')
 def read(image_path: str, model: str, model_type: str, output: str):
-    """Read text from a single image"""
+    """read text from a single image"""
     try:
         if model is None:
             model = get_default_model_path()
         ocr = MonOCR(model, model_type)
-        print("Processing image...")
+        print("processing image...")
         text = ocr.read_text(image_path)
-        print(f"\nExtracted text:")
+        print(f"\nextracted text:")
         print(text)
         if output:
@@ -44,30 +44,30 @@ def read(image_path: str, model: str, model_type: str, output: str):
             }
             with open(output, 'w', encoding='utf-8') as f:
                 json.dump(result, f, ensure_ascii=False, indent=2)
-            print(f"\nResults saved to: {output}")
+            print(f"\nresults saved to: {output}")
     except Exception as e:
-        print(f"Error: {e}")
+        print(f"error: {e}")
         raise click.Abort()
 @main.command()
 @click.argument('folder_path', type=click.Path(exists=True, file_okay=False))
-@click.option('--model', '-m', help='Path to trained model file (default: uses bundled model)')
-@click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='Type of model to use')
-@click.option('--output', '-o', help='Output file to save results')
-@click.option('--extensions', default='png,jpg,jpeg', help='File extensions to process (comma-separated)')
+@click.option('--model', '-m', help='path to trained model file (default: uses bundled model)')
+@click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='type of model to use')
+@click.option('--output', '-o', help='output file to save results')
+@click.option('--extensions', default='png,jpg,jpeg', help='file extensions to process (comma-separated)')
 def batch(folder_path: str, model: str, model_type: str, output: str, extensions: str):
-    """Read text from all images in a folder"""
+    """read text from all images in a folder"""
     try:
         if model is None:
             model = get_default_model_path()
         ocr = MonOCR(model, model_type)
         ext_list = [f'.{ext.strip()}' for ext in extensions.split(',')]
-        print("Processing folder...")
+        print("processing folder...")
         results = ocr.read_from_folder(folder_path, ext_list)
-        print("\nOCR Results:")
+        print("\nocr results:")
         print("-" * 40)
         for filename, text in results.items():
             print(f"{filename}: {text}")
@@ -75,30 +75,30 @@ def batch(folder_path: str, model: str, model_type: str, output: str, extensions
         if output:
             with open(output, 'w', encoding='utf-8') as f:
                 json.dump(results, f, ensure_ascii=False, indent=2)
-            print(f"\nResults saved to: {output}")
+            print(f"\nresults saved to: {output}")
     except Exception as e:
-        print(f"Error: {e}")
+        print(f"error: {e}")
         raise click.Abort()
 @main.command()
 @click.argument('image_path', type=click.Path(exists=True))
-@click.option('--model', '-m', help='Path to trained model file (default: uses bundled model)')
-@click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='Type of model to use')
+@click.option('--model', '-m', help='path to trained model file (default: uses bundled model)')
+@click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='type of model to use')
 def confidence(image_path: str, model: str, model_type: str):
-    """Read text with confidence score"""
+    """read text with confidence score"""
     try:
         ocr = MonOCRInference(model, model_type)
-        print("Processing image...")
+        print("processing image...")
         result = ocr.predict_with_confidence(image_path)
-        print(f"\nExtracted text:")
+        print(f"\nextracted text:")
         print(result['text'])
-        print(f"\nConfidence: {result['confidence']:.2%}")
+        print(f"\nconfidence: {result['confidence']:.2%}")
     except Exception as e:
-        print(f"Error: {e}")
+        print(f"error: {e}")
         raise click.Abort()
 if __name__ == '__main__':

{monocr-0.1.0 → monocr-0.1.1}/src/monocr/crnn_model.py RENAMED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-CRNN Model Architecture for Mon OCR
+crnn model architecture for mon ocr
 """
 import torch
@@ -12,11 +12,11 @@ import os
 from typing import List
 class CRNN(nn.Module):
-    """CRNN model for Mon OCR - matches the trained model architecture"""
+    """crnn model for mon ocr"""
     def __init__(self, num_classes):
         super(CRNN, self).__init__()
-        # Enhanced CNN architecture for better capacity
+        # cnn architecture
         self.cnn = nn.Sequential(
             nn.Conv2d(1, 64, 3, 1, 1),
             nn.ReLU(),
@@ -42,10 +42,10 @@ class CRNN(nn.Module):
             nn.Conv2d(512, 512, (4, 1), 1, 0),  # 4->1
             nn.ReLU(),
         )
-        # Two LSTM layers for better sequence modeling
+        # lstm layers
         self.lstm1 = nn.LSTM(512, 256, bidirectional=True, batch_first=True)
         self.lstm2 = nn.LSTM(512, 256, bidirectional=True, batch_first=True)
-        self.dropout = nn.Dropout(0.1)  # add dropout to prevent overfitting
+        self.dropout = nn.Dropout(0.1)
         self.fc = nn.Linear(512, num_classes)
     def forward(self, x):
@@ -54,29 +54,34 @@ class CRNN(nn.Module):
         assert h == 1, "CNN height must be 1"
         conv = conv.squeeze(2).permute(0, 2, 1)  # [B, W, C]
-        # Two LSTM layers for better sequence modeling
+        # lstm layers
         recurrent, _ = self.lstm1(conv)
         recurrent, _ = self.lstm2(recurrent)
-        # Apply dropout before final classification
+        # dropout and final classification
         recurrent = self.dropout(recurrent)
         out = self.fc(recurrent)
         return out  # [B, W, num_classes]
 def build_charset(corpus_dir: str) -> str:
-    """Build charset from corpus files"""
+    """build charset from corpus files"""
     charset = set()
-    txt_files = glob.glob(os.path.join(corpus_dir, "**/*.txt"), recursive=True)
-    for fpath in txt_files:
-        if os.path.getsize(fpath) == 0:
-            continue
-        try:
-            with open(fpath, encoding="utf-8") as f:
-                for line in f:
-                    charset.update(line.strip())
-        except Exception:
-            continue
+    # search for text files in corpus directory
+    for ext in ['*.txt']:
+        pattern = os.path.join(corpus_dir, '**', ext)
+        for file_path in glob.glob(pattern, recursive=True):
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    content = f.read()
+                    charset.update(content)
+            except:
+                continue
+    # remove whitespace and control characters
+    charset = {c for c in charset if c.strip() and ord(c) >= 32}
-    charset_str = "".join(sorted(list(charset)))
-    return charset_str
+    # sort for consistent ordering
+    charset_str = ''.join(sorted(charset))
+    return charset_str

monocr-0.1.1/src/monocr/inference.py ADDED Viewed

@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+advanced inference utilities for mon ocr
+"""
+import os
+import torch
+import numpy as np
+from PIL import Image
+from pathlib import Path
+import json
+from typing import List, Dict, Optional, Union
+from .ocr import MonOCR
+class MonOCRInference:
+    """advanced mon ocr inference with additional utilities"""
+    def __init__(self, model_path: Optional[str] = None, model_type: str = "crnn"):
+        """initialize advanced mon ocr inference"""
+        self.ocr = MonOCR(model_path, model_type)
+    def predict_with_confidence(self, image: Union[str, Image.Image]) -> Dict[str, Union[str, float]]:
+        """predict text with confidence score"""
+        if isinstance(image, str):
+            image = Image.open(image).convert("L")
+        elif not isinstance(image, Image.Image):
+            raise ValueError("Image must be a file path or PIL Image")
+        # get prediction
+        predicted_text = self.ocr.predict(image)
+        # calculate confidence (simplified)
+        confidence = self._calculate_confidence(image, predicted_text)
+        return {
+            'text': predicted_text,
+            'confidence': confidence
+        }
+    def _calculate_confidence(self, image: Image.Image, text: str) -> float:
+        """calculate confidence score (simplified implementation)"""
+        # simple confidence based on text length and image size
+        if not text:
+            return 0.0
+        # normalize confidence based on text length and image dimensions
+        text_length = len(text)
+        image_area = image.width * image.height
+        # simple heuristic: longer text on larger images = higher confidence
+        confidence = min(1.0, (text_length * 100) / image_area)
+        return max(0.0, min(1.0, confidence))
+    def batch_predict_with_confidence(self, images: List[Union[str, Image.Image]]) -> List[Dict[str, Union[str, float]]]:
+        """predict text with confidence for multiple images"""
+        results = []
+        for image in images:
+            try:
+                result = self.predict_with_confidence(image)
+                results.append(result)
+            except Exception as e:
+                results.append({
+                    'text': '',
+                    'confidence': 0.0
+                })
+        return results
+    def save_results(self, results: List[Dict[str, Union[str, float]]], output_path: str):
+        """save prediction results to json file"""
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(results, f, ensure_ascii=False, indent=2)
+    def load_results(self, input_path: str) -> List[Dict[str, Union[str, float]]]:
+        """load prediction results from json file"""
+        with open(input_path, 'r', encoding='utf-8') as f:
+            return json.load(f)

{monocr-0.1.0 → monocr-0.1.1}/src/monocr/ocr.py RENAMED Viewed

@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-Main Mon OCR class - Production-ready OCR for Mon text
-Supports both CRNN and TrOCR models
+main mon ocr class
 """
 import os
@@ -12,11 +11,10 @@ import numpy as np
 from PIL import Image
 from pathlib import Path
 import json
-import logging
 from typing import List, Dict, Optional, Union
 from torchvision import transforms
-# TrOCR imports (optional)
+# trocr imports (optional)
 try:
     from transformers import TrOCRProcessor, VisionEncoderDecoderModel
     TROCR_AVAILABLE = True
@@ -24,23 +22,17 @@ except ImportError:
     TROCR_AVAILABLE = False
 class MonOCR:
-    """Production-ready Mon OCR class supporting both CRNN and TrOCR models"""
+    """mon ocr class supporting crnn and trocr models"""
     def __init__(self, model_path: Optional[str] = None, model_type: str = "crnn"):
-        """
-        Initialize Mon OCR
-        Args:
-            model_path: Path to trained model file (if None, uses bundled model)
-            model_type: Type of model ("crnn" or "trocr")
-        """
+        """initialize mon ocr"""
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model_type = model_type.lower()
         self.model = None
         self.processor = None
         self.charset = None
-        # Load model - use bundled model if no path provided
+        # load model - use bundled model if no path provided
         if model_path is None:
             from . import get_default_model_path
             model_path = get_default_model_path()
@@ -48,7 +40,7 @@ class MonOCR:
         self.load_model(model_path)
     def load_model(self, model_path: str):
-        """Load trained model from file"""
+        """load trained model from file"""
         if not os.path.exists(model_path):
             raise FileNotFoundError(f"Model file not found: {model_path}")
@@ -60,24 +52,23 @@ class MonOCR:
             raise ValueError(f"Unsupported model type: {self.model_type}")
     def _load_crnn_model(self, model_path: str):
-        """Load CRNN model"""
-        # Import CRNN model class (this would need to be included in the package)
+        """load crnn model"""
         from .crnn_model import CRNN, build_charset
-        # Load model state
+        # load model state
         checkpoint = torch.load(model_path, map_location=self.device)
-        # Extract charset from checkpoint or build from corpus
+        # extract charset from checkpoint or build from corpus
         if 'charset' in checkpoint:
             self.charset = checkpoint['charset']
         else:
-            # Fallback: build charset from default corpus
+            # fallback: build charset from default corpus
             self.charset = build_charset("data/raw/corpus")
-        # Initialize model (add 1 for blank token)
+        # initialize model (add 1 for blank token)
         self.model = CRNN(num_classes=len(self.charset) + 1)
-        # Load weights
+        # load weights
         if 'model_state_dict' in checkpoint:
             self.model.load_state_dict(checkpoint['model_state_dict'])
         else:
@@ -87,7 +78,7 @@ class MonOCR:
         self.model.eval()
     def _load_trocr_model(self, model_path: str):
-        """Load TrOCR model"""
+        """load trocr model"""
         if not TROCR_AVAILABLE:
             raise ImportError("TrOCR dependencies not available. Install with: pip install transformers")
@@ -97,15 +88,7 @@ class MonOCR:
         self.model.eval()
     def predict(self, image: Union[str, Image.Image]) -> str:
-        """
-        Predict text from image
-        Args:
-            image: Path to image file or PIL Image object
-        Returns:
-            Predicted text string
-        """
+        """predict text from image"""
         if isinstance(image, str):
             image = Image.open(image).convert("L")
         elif not isinstance(image, Image.Image):
@@ -117,32 +100,32 @@ class MonOCR:
             return self._predict_trocr(image)
     def _predict_crnn(self, image: Image.Image) -> str:
-        """Predict using CRNN model"""
+        """predict using crnn model"""
         if self.model is None:
             raise ValueError("Model not loaded. Call load_model() first.")
-        # Preprocess image - match simple_inference.py exactly
+        # preprocess image - match simple_inference.py exactly
         if isinstance(image, str):
             image = Image.open(image).convert('L')
         elif isinstance(image, Image.Image):
             image = image.convert('L')
-        # Resize image - target_size is (height, width) for the model
-        # PIL resize expects (width, height), so we need to swap
+        # resize image - target_size is (height, width) for the model
+        # pil resize expects (width, height), so we need to swap
         image = image.resize((256, 64), Image.Resampling.LANCZOS)
-        # Convert to tensor and normalize
+        # convert to tensor and normalize
         image_array = np.array(image, dtype=np.float32) / 255.0
         image_tensor = torch.from_numpy(image_array).unsqueeze(0).unsqueeze(0)  # [1, 1, H, W]
-        # Apply the same transform as training
+        # apply the same transform as training
         transform = transforms.Compose([
             transforms.Normalize(mean=[0.5], std=[0.5])
         ])
         image_tensor = transform(image_tensor)
         image_tensor = image_tensor.to(self.device)
-        # Predict
+        # predict
         with torch.no_grad():
             outputs = self.model(image_tensor)
             predicted_text = self._decode_crnn_output(outputs)
@@ -150,14 +133,14 @@ class MonOCR:
         return predicted_text
     def _predict_trocr(self, image: Image.Image) -> str:
-        """Predict using TrOCR model"""
+        """predict using trocr model"""
         if self.model is None or self.processor is None:
             raise ValueError("Model not loaded. Call load_model() first.")
-        # Preprocess image
+        # preprocess image
         pixel_values = self.processor(image, return_tensors="pt").pixel_values.to(self.device)
-        # Predict
+        # predict
         with torch.no_grad():
             generated_ids = self.model.generate(pixel_values)
             predicted_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
@@ -165,14 +148,14 @@ class MonOCR:
         return predicted_text
     def _decode_crnn_output(self, output: torch.Tensor) -> str:
-        """Decode CRNN output to text - match simple_inference.py exactly"""
+        """decode crnn output to text - match simple_inference.py exactly"""
         if self.charset is None:
             raise ValueError("Charset not loaded")
-        # Get predictions - same as working version
+        # get predictions - same as working version
         preds = output.softmax(2).argmax(2).squeeze(0)  # [seq_len]
-        # CTC decoding - exact same logic as working simple_inference.py
+        # ctc decoding - exact same logic as working simple_inference.py
         decoded = []
         prev_char = None
@@ -188,61 +171,27 @@ class MonOCR:
         return ''.join(decoded)
     def batch_predict(self, images: List[Union[str, Image.Image]]) -> List[str]:
-        """
-        Predict text from multiple images
-        Args:
-            images: List of image paths or PIL Image objects
-        Returns:
-            List of predicted text strings
-        """
+        """predict text from multiple images"""
         results = []
         for image in images:
             try:
                 result = self.predict(image)
                 results.append(result)
             except Exception as e:
-                logging.warning(f"Error processing image: {e}")
                 results.append("")
         return results
     def read_text(self, image: Union[str, Image.Image]) -> str:
-        """
-        Read text from image (alias for predict method)
-        Args:
-            image: Path to image file or PIL Image object
-        Returns:
-            Extracted text string
-        """
+        """read text from image (alias for predict method)"""
         return self.predict(image)
     def read_multiple(self, images: List[Union[str, Image.Image]]) -> List[str]:
-        """
-        Read text from multiple images (alias for batch_predict method)
-        Args:
-            images: List of image paths or PIL Image objects
-        Returns:
-            List of extracted text strings
-        """
+        """read text from multiple images (alias for batch_predict method)"""
         return self.batch_predict(images)
     def read_from_folder(self, folder_path: str, extensions: List[str] = None) -> dict:
-        """
-        Read text from all images in a folder
-        Args:
-            folder_path: Path to folder containing images
-            extensions: List of file extensions to process (default: ['.png', '.jpg', '.jpeg'])
-        Returns:
-            Dictionary mapping filename to extracted text
-        """
+        """read text from all images in a folder"""
         if extensions is None:
             extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.tiff']
@@ -262,7 +211,6 @@ class MonOCR:
                 text = self.read_text(str(image_file))
                 results[image_file.name] = text
             except Exception as e:
-                print(f"Error processing {image_file.name}: {e}")
                 results[image_file.name] = ""
-        return results
+        return results

monocr-0.1.0/src/monocr/__init__.py DELETED Viewed

@@ -1,90 +0,0 @@
-"""
-Mon OCR - Optical Character Recognition for Mon text
-A production-ready OCR package for Mon script text recognition
-"""
-import os
-from pathlib import Path
-from .ocr import MonOCR
-from .inference import MonOCRInference
-__version__ = "0.1.0"
-__author__ = "janakhpon"
-__email__ = "jnovaxer@gmail.com"
-__all__ = ["MonOCR", "MonOCRInference", "read_text", "read_image", "read_folder"]
-def get_default_model_path():
-    """Get the path to the bundled default model"""
-    package_dir = Path(__file__).parent
-    model_path = package_dir / "models" / "monocr_v1_best.pt"
-    return str(model_path)
-# Global OCR instance for simple API
-_ocr_instance = None
-def _get_ocr():
-    """Get or create the global OCR instance"""
-    global _ocr_instance
-    if _ocr_instance is None:
-        _ocr_instance = MonOCR()
-    return _ocr_instance
-def read_text(image_path):
-    """
-    Read text from a single image - Simple API
-    Args:
-        image_path: Path to image file
-    Returns:
-        Extracted text string
-    """
-    return _get_ocr().read_text(image_path)
-def read_image(image_path):
-    """
-    Alias for read_text - Read text from a single image
-    Args:
-        image_path: Path to image file
-    Returns:
-        Extracted text string
-    """
-    return read_text(image_path)
-def read_folder(folder_path, extensions=None):
-    """
-    Read text from all images in a folder - Simple API
-    Args:
-        folder_path: Path to folder containing images
-        extensions: List of file extensions to process (default: ['.png', '.jpg', '.jpeg'])
-    Returns:
-        Dictionary mapping filename to extracted text
-    """
-    return _get_ocr().read_from_folder(folder_path, extensions)
-def load_ocr(model_path=None, model_type="crnn"):
-    """
-    Load OCR model with default settings (Advanced API)
-    Args:
-        model_path: Path to trained model file (if None, uses bundled model)
-        model_type: Type of model ("crnn" or "trocr")
-    Returns:
-        MonOCR instance
-    """
-    if model_path is None:
-        model_path = get_default_model_path()
-    return MonOCR(model_path, model_type)

monocr-0.1.0/src/monocr/inference.py DELETED Viewed

@@ -1,117 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Advanced inference utilities for Mon OCR
-"""
-import os
-import torch
-import numpy as np
-from PIL import Image
-from pathlib import Path
-import json
-import logging
-from typing import List, Dict, Optional, Union
-from .ocr import MonOCR
-class MonOCRInference:
-    """Advanced Mon OCR inference with additional utilities"""
-    def __init__(self, model_path: Optional[str] = None, model_type: str = "crnn"):
-        """
-        Initialize advanced Mon OCR inference
-        Args:
-            model_path: Path to trained model file
-            model_type: Type of model ("crnn" or "trocr")
-        """
-        self.ocr = MonOCR(model_path, model_type)
-        self.logger = logging.getLogger(__name__)
-    def predict_with_confidence(self, image: Union[str, Image.Image]) -> Dict[str, Union[str, float]]:
-        """
-        Predict text with confidence score
-        Args:
-            image: Path to image file or PIL Image object
-        Returns:
-            Dictionary with 'text' and 'confidence' keys
-        """
-        try:
-            text = self.ocr.predict(image)
-            # For now, return a placeholder confidence score
-            # In a full implementation, you'd calculate actual confidence
-            confidence = 0.95  # Placeholder
-            return {
-                'text': text,
-                'confidence': confidence
-            }
-        except Exception as e:
-            self.logger.error(f"Error in prediction: {e}")
-            return {
-                'text': "",
-                'confidence': 0.0
-            }
-    def batch_predict_with_confidence(self, images: List[Union[str, Image.Image]]) -> List[Dict[str, Union[str, float]]]:
-        """
-        Predict text with confidence for multiple images
-        Args:
-            images: List of image paths or PIL Image objects
-        Returns:
-            List of dictionaries with 'text' and 'confidence' keys
-        """
-        results = []
-        for image in images:
-            result = self.predict_with_confidence(image)
-            results.append(result)
-        return results
-    def process_document(self, image_path: str, output_path: Optional[str] = None) -> Dict[str, str]:
-        """
-        Process a document image and save results
-        Args:
-            image_path: Path to document image
-            output_path: Path to save results (optional)
-        Returns:
-            Dictionary with processing results
-        """
-        try:
-            # Load and process image
-            image = Image.open(image_path)
-            text = self.ocr.predict(image)
-            results = {
-                'image_path': image_path,
-                'extracted_text': text,
-                'status': 'success'
-            }
-            # Save results if output path provided
-            if output_path:
-                with open(output_path, 'w', encoding='utf-8') as f:
-                    json.dump(results, f, ensure_ascii=False, indent=2)
-            return results
-        except Exception as e:
-            error_result = {
-                'image_path': image_path,
-                'extracted_text': "",
-                'status': 'error',
-                'error': str(e)
-            }
-            if output_path:
-                with open(output_path, 'w', encoding='utf-8') as f:
-                    json.dump(error_result, f, ensure_ascii=False, indent=2)
-            return error_result

{monocr-0.1.0 → monocr-0.1.1}/src/monocr/models/monocr_v1_best.pt RENAMED Viewed

File without changes

monocr 0.1.0__tar.gz → 0.1.1__tar.gz

Potentially problematic release.

monocr 0.1.0tar.gz → 0.1.1tar.gz