monocr 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of monocr might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: monocr
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Optical Character Recognition for Mon text
5
5
  Keywords: mon,ocr,text-recognition
6
6
  Author: janakhpon
@@ -62,6 +62,26 @@ monocr read image.png
62
62
  monocr batch images/ --output results.json
63
63
  ```
64
64
 
65
+ ## Dev Setup
66
+
67
+ ```bash
68
+ git clone git@github.com:janakhpon/monocr.git
69
+ cd monocr
70
+ uv sync --dev
71
+
72
+ # Release workflow
73
+ uv version --bump patch
74
+ git add .
75
+ git commit -m "bump version"
76
+ git tag v0.1.5
77
+ git push origin main --tags
78
+ ```
79
+
80
+ ## Related tools
81
+ - [mon_tokenizer](https://github.com/Code-Yay-Mal/mon_tokenizer)
82
+ - [hugging face mon_tokenizer model](https://huggingface.co/janakhpon/mon_tokenizer)
83
+ - [Mon corpus collection in unicode](https://github.com/MonDevHub/MonCorpusCollection)
84
+
65
85
  ## License
66
86
 
67
- MIT License
87
+ MIT - do whatever you want with it.
@@ -33,6 +33,26 @@ monocr read image.png
33
33
  monocr batch images/ --output results.json
34
34
  ```
35
35
 
36
+ ## Dev Setup
37
+
38
+ ```bash
39
+ git clone git@github.com:janakhpon/monocr.git
40
+ cd monocr
41
+ uv sync --dev
42
+
43
+ # Release workflow
44
+ uv version --bump patch
45
+ git add .
46
+ git commit -m "bump version"
47
+ git tag v0.1.5
48
+ git push origin main --tags
49
+ ```
50
+
51
+ ## Related tools
52
+ - [mon_tokenizer](https://github.com/Code-Yay-Mal/mon_tokenizer)
53
+ - [hugging face mon_tokenizer model](https://huggingface.co/janakhpon/mon_tokenizer)
54
+ - [Mon corpus collection in unicode](https://github.com/MonDevHub/MonCorpusCollection)
55
+
36
56
  ## License
37
57
 
38
- MIT License
58
+ MIT - do whatever you want with it.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "monocr"
3
- version = "0.1.0"
3
+ version = "0.1.1"
4
4
  description = "Optical Character Recognition for Mon text"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -0,0 +1,55 @@
1
+ """
2
+ mon ocr - optical character recognition for mon text
3
+ """
4
+
5
+ import os
6
+ from pathlib import Path
7
+ from .ocr import MonOCR
8
+ from .inference import MonOCRInference
9
+
10
+ __version__ = "0.1.0"
11
+ __author__ = "janakhpon"
12
+ __email__ = "jnovaxer@gmail.com"
13
+
14
+ __all__ = ["MonOCR", "MonOCRInference", "read_text", "read_image", "read_folder"]
15
+
16
+
17
+ def get_default_model_path():
18
+ """get bundled model path"""
19
+ package_dir = Path(__file__).parent
20
+ model_path = package_dir / "models" / "monocr_v1_best.pt"
21
+ return str(model_path)
22
+
23
+
24
+ # global ocr instance for simple api
25
+ _ocr_instance = None
26
+
27
+ def _get_ocr():
28
+ """get or create global ocr instance"""
29
+ global _ocr_instance
30
+ if _ocr_instance is None:
31
+ _ocr_instance = MonOCR()
32
+ return _ocr_instance
33
+
34
+
35
+ def read_text(image_path):
36
+ """read text from single image"""
37
+ return _get_ocr().read_text(image_path)
38
+
39
+
40
+ def read_image(image_path):
41
+ """alias for read_text"""
42
+ return read_text(image_path)
43
+
44
+
45
+ def read_folder(folder_path, extensions=None):
46
+ """read text from all images in folder"""
47
+ return _get_ocr().read_from_folder(folder_path, extensions)
48
+
49
+
50
+ def load_ocr(model_path=None, model_type="crnn"):
51
+ """load ocr model with custom settings"""
52
+ if model_path is None:
53
+ model_path = get_default_model_path()
54
+
55
+ return MonOCR(model_path, model_type)
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- Command Line Interface for Mon OCR
3
+ command line interface for mon ocr
4
4
  """
5
5
 
6
6
  import click
@@ -15,25 +15,25 @@ from . import get_default_model_path
15
15
  @click.group()
16
16
  @click.version_option()
17
17
  def main():
18
- """Mon OCR - Optical Character Recognition for Mon text"""
18
+ """mon ocr - optical character recognition for mon text"""
19
19
  pass
20
20
 
21
21
  @main.command()
22
22
  @click.argument('image_path', type=click.Path(exists=True))
23
- @click.option('--model', '-m', help='Path to trained model file (default: uses bundled model)')
24
- @click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='Type of model to use')
25
- @click.option('--output', '-o', help='Output file to save results')
23
+ @click.option('--model', '-m', help='path to trained model file (default: uses bundled model)')
24
+ @click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='type of model to use')
25
+ @click.option('--output', '-o', help='output file to save results')
26
26
  def read(image_path: str, model: str, model_type: str, output: str):
27
- """Read text from a single image"""
27
+ """read text from a single image"""
28
28
  try:
29
29
  if model is None:
30
30
  model = get_default_model_path()
31
31
  ocr = MonOCR(model, model_type)
32
32
 
33
- print("Processing image...")
33
+ print("processing image...")
34
34
  text = ocr.read_text(image_path)
35
35
 
36
- print(f"\nExtracted text:")
36
+ print(f"\nextracted text:")
37
37
  print(text)
38
38
 
39
39
  if output:
@@ -44,30 +44,30 @@ def read(image_path: str, model: str, model_type: str, output: str):
44
44
  }
45
45
  with open(output, 'w', encoding='utf-8') as f:
46
46
  json.dump(result, f, ensure_ascii=False, indent=2)
47
- print(f"\nResults saved to: {output}")
47
+ print(f"\nresults saved to: {output}")
48
48
 
49
49
  except Exception as e:
50
- print(f"Error: {e}")
50
+ print(f"error: {e}")
51
51
  raise click.Abort()
52
52
 
53
53
  @main.command()
54
54
  @click.argument('folder_path', type=click.Path(exists=True, file_okay=False))
55
- @click.option('--model', '-m', help='Path to trained model file (default: uses bundled model)')
56
- @click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='Type of model to use')
57
- @click.option('--output', '-o', help='Output file to save results')
58
- @click.option('--extensions', default='png,jpg,jpeg', help='File extensions to process (comma-separated)')
55
+ @click.option('--model', '-m', help='path to trained model file (default: uses bundled model)')
56
+ @click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='type of model to use')
57
+ @click.option('--output', '-o', help='output file to save results')
58
+ @click.option('--extensions', default='png,jpg,jpeg', help='file extensions to process (comma-separated)')
59
59
  def batch(folder_path: str, model: str, model_type: str, output: str, extensions: str):
60
- """Read text from all images in a folder"""
60
+ """read text from all images in a folder"""
61
61
  try:
62
62
  if model is None:
63
63
  model = get_default_model_path()
64
64
  ocr = MonOCR(model, model_type)
65
65
  ext_list = [f'.{ext.strip()}' for ext in extensions.split(',')]
66
66
 
67
- print("Processing folder...")
67
+ print("processing folder...")
68
68
  results = ocr.read_from_folder(folder_path, ext_list)
69
69
 
70
- print("\nOCR Results:")
70
+ print("\nocr results:")
71
71
  print("-" * 40)
72
72
  for filename, text in results.items():
73
73
  print(f"{filename}: {text}")
@@ -75,30 +75,30 @@ def batch(folder_path: str, model: str, model_type: str, output: str, extensions
75
75
  if output:
76
76
  with open(output, 'w', encoding='utf-8') as f:
77
77
  json.dump(results, f, ensure_ascii=False, indent=2)
78
- print(f"\nResults saved to: {output}")
78
+ print(f"\nresults saved to: {output}")
79
79
 
80
80
  except Exception as e:
81
- print(f"Error: {e}")
81
+ print(f"error: {e}")
82
82
  raise click.Abort()
83
83
 
84
84
  @main.command()
85
85
  @click.argument('image_path', type=click.Path(exists=True))
86
- @click.option('--model', '-m', help='Path to trained model file (default: uses bundled model)')
87
- @click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='Type of model to use')
86
+ @click.option('--model', '-m', help='path to trained model file (default: uses bundled model)')
87
+ @click.option('--model-type', type=click.Choice(['crnn', 'trocr']), default='crnn', help='type of model to use')
88
88
  def confidence(image_path: str, model: str, model_type: str):
89
- """Read text with confidence score"""
89
+ """read text with confidence score"""
90
90
  try:
91
91
  ocr = MonOCRInference(model, model_type)
92
92
 
93
- print("Processing image...")
93
+ print("processing image...")
94
94
  result = ocr.predict_with_confidence(image_path)
95
95
 
96
- print(f"\nExtracted text:")
96
+ print(f"\nextracted text:")
97
97
  print(result['text'])
98
- print(f"\nConfidence: {result['confidence']:.2%}")
98
+ print(f"\nconfidence: {result['confidence']:.2%}")
99
99
 
100
100
  except Exception as e:
101
- print(f"Error: {e}")
101
+ print(f"error: {e}")
102
102
  raise click.Abort()
103
103
 
104
104
  if __name__ == '__main__':
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- CRNN Model Architecture for Mon OCR
4
+ crnn model architecture for mon ocr
5
5
  """
6
6
 
7
7
  import torch
@@ -12,11 +12,11 @@ import os
12
12
  from typing import List
13
13
 
14
14
  class CRNN(nn.Module):
15
- """CRNN model for Mon OCR - matches the trained model architecture"""
15
+ """crnn model for mon ocr"""
16
16
 
17
17
  def __init__(self, num_classes):
18
18
  super(CRNN, self).__init__()
19
- # Enhanced CNN architecture for better capacity
19
+ # cnn architecture
20
20
  self.cnn = nn.Sequential(
21
21
  nn.Conv2d(1, 64, 3, 1, 1),
22
22
  nn.ReLU(),
@@ -42,10 +42,10 @@ class CRNN(nn.Module):
42
42
  nn.Conv2d(512, 512, (4, 1), 1, 0), # 4->1
43
43
  nn.ReLU(),
44
44
  )
45
- # Two LSTM layers for better sequence modeling
45
+ # lstm layers
46
46
  self.lstm1 = nn.LSTM(512, 256, bidirectional=True, batch_first=True)
47
47
  self.lstm2 = nn.LSTM(512, 256, bidirectional=True, batch_first=True)
48
- self.dropout = nn.Dropout(0.1) # add dropout to prevent overfitting
48
+ self.dropout = nn.Dropout(0.1)
49
49
  self.fc = nn.Linear(512, num_classes)
50
50
 
51
51
  def forward(self, x):
@@ -54,29 +54,34 @@ class CRNN(nn.Module):
54
54
  assert h == 1, "CNN height must be 1"
55
55
  conv = conv.squeeze(2).permute(0, 2, 1) # [B, W, C]
56
56
 
57
- # Two LSTM layers for better sequence modeling
57
+ # lstm layers
58
58
  recurrent, _ = self.lstm1(conv)
59
59
  recurrent, _ = self.lstm2(recurrent)
60
60
 
61
- # Apply dropout before final classification
61
+ # dropout and final classification
62
62
  recurrent = self.dropout(recurrent)
63
63
  out = self.fc(recurrent)
64
64
  return out # [B, W, num_classes]
65
65
 
66
+
66
67
  def build_charset(corpus_dir: str) -> str:
67
- """Build charset from corpus files"""
68
+ """build charset from corpus files"""
68
69
  charset = set()
69
- txt_files = glob.glob(os.path.join(corpus_dir, "**/*.txt"), recursive=True)
70
70
 
71
- for fpath in txt_files:
72
- if os.path.getsize(fpath) == 0:
73
- continue
74
- try:
75
- with open(fpath, encoding="utf-8") as f:
76
- for line in f:
77
- charset.update(line.strip())
78
- except Exception:
79
- continue
71
+ # search for text files in corpus directory
72
+ for ext in ['*.txt']:
73
+ pattern = os.path.join(corpus_dir, '**', ext)
74
+ for file_path in glob.glob(pattern, recursive=True):
75
+ try:
76
+ with open(file_path, 'r', encoding='utf-8') as f:
77
+ content = f.read()
78
+ charset.update(content)
79
+ except:
80
+ continue
81
+
82
+ # remove whitespace and control characters
83
+ charset = {c for c in charset if c.strip() and ord(c) >= 32}
80
84
 
81
- charset_str = "".join(sorted(list(charset)))
82
- return charset_str
85
+ # sort for consistent ordering
86
+ charset_str = ''.join(sorted(charset))
87
+ return charset_str
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ advanced inference utilities for mon ocr
5
+ """
6
+
7
+ import os
8
+ import torch
9
+ import numpy as np
10
+ from PIL import Image
11
+ from pathlib import Path
12
+ import json
13
+ from typing import List, Dict, Optional, Union
14
+
15
+ from .ocr import MonOCR
16
+
17
+ class MonOCRInference:
18
+ """advanced mon ocr inference with additional utilities"""
19
+
20
+ def __init__(self, model_path: Optional[str] = None, model_type: str = "crnn"):
21
+ """initialize advanced mon ocr inference"""
22
+ self.ocr = MonOCR(model_path, model_type)
23
+
24
+ def predict_with_confidence(self, image: Union[str, Image.Image]) -> Dict[str, Union[str, float]]:
25
+ """predict text with confidence score"""
26
+ if isinstance(image, str):
27
+ image = Image.open(image).convert("L")
28
+ elif not isinstance(image, Image.Image):
29
+ raise ValueError("Image must be a file path or PIL Image")
30
+
31
+ # get prediction
32
+ predicted_text = self.ocr.predict(image)
33
+
34
+ # calculate confidence (simplified)
35
+ confidence = self._calculate_confidence(image, predicted_text)
36
+
37
+ return {
38
+ 'text': predicted_text,
39
+ 'confidence': confidence
40
+ }
41
+
42
+ def _calculate_confidence(self, image: Image.Image, text: str) -> float:
43
+ """calculate confidence score (simplified implementation)"""
44
+ # simple confidence based on text length and image size
45
+ if not text:
46
+ return 0.0
47
+
48
+ # normalize confidence based on text length and image dimensions
49
+ text_length = len(text)
50
+ image_area = image.width * image.height
51
+
52
+ # simple heuristic: longer text on larger images = higher confidence
53
+ confidence = min(1.0, (text_length * 100) / image_area)
54
+
55
+ return max(0.0, min(1.0, confidence))
56
+
57
+ def batch_predict_with_confidence(self, images: List[Union[str, Image.Image]]) -> List[Dict[str, Union[str, float]]]:
58
+ """predict text with confidence for multiple images"""
59
+ results = []
60
+ for image in images:
61
+ try:
62
+ result = self.predict_with_confidence(image)
63
+ results.append(result)
64
+ except Exception as e:
65
+ results.append({
66
+ 'text': '',
67
+ 'confidence': 0.0
68
+ })
69
+
70
+ return results
71
+
72
+ def save_results(self, results: List[Dict[str, Union[str, float]]], output_path: str):
73
+ """save prediction results to json file"""
74
+ with open(output_path, 'w', encoding='utf-8') as f:
75
+ json.dump(results, f, ensure_ascii=False, indent=2)
76
+
77
+ def load_results(self, input_path: str) -> List[Dict[str, Union[str, float]]]:
78
+ """load prediction results from json file"""
79
+ with open(input_path, 'r', encoding='utf-8') as f:
80
+ return json.load(f)
@@ -1,8 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- Main Mon OCR class - Production-ready OCR for Mon text
5
- Supports both CRNN and TrOCR models
4
+ main mon ocr class
6
5
  """
7
6
 
8
7
  import os
@@ -12,11 +11,10 @@ import numpy as np
12
11
  from PIL import Image
13
12
  from pathlib import Path
14
13
  import json
15
- import logging
16
14
  from typing import List, Dict, Optional, Union
17
15
  from torchvision import transforms
18
16
 
19
- # TrOCR imports (optional)
17
+ # trocr imports (optional)
20
18
  try:
21
19
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
22
20
  TROCR_AVAILABLE = True
@@ -24,23 +22,17 @@ except ImportError:
24
22
  TROCR_AVAILABLE = False
25
23
 
26
24
  class MonOCR:
27
- """Production-ready Mon OCR class supporting both CRNN and TrOCR models"""
25
+ """mon ocr class supporting crnn and trocr models"""
28
26
 
29
27
  def __init__(self, model_path: Optional[str] = None, model_type: str = "crnn"):
30
- """
31
- Initialize Mon OCR
32
-
33
- Args:
34
- model_path: Path to trained model file (if None, uses bundled model)
35
- model_type: Type of model ("crnn" or "trocr")
36
- """
28
+ """initialize mon ocr"""
37
29
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
30
  self.model_type = model_type.lower()
39
31
  self.model = None
40
32
  self.processor = None
41
33
  self.charset = None
42
34
 
43
- # Load model - use bundled model if no path provided
35
+ # load model - use bundled model if no path provided
44
36
  if model_path is None:
45
37
  from . import get_default_model_path
46
38
  model_path = get_default_model_path()
@@ -48,7 +40,7 @@ class MonOCR:
48
40
  self.load_model(model_path)
49
41
 
50
42
  def load_model(self, model_path: str):
51
- """Load trained model from file"""
43
+ """load trained model from file"""
52
44
  if not os.path.exists(model_path):
53
45
  raise FileNotFoundError(f"Model file not found: {model_path}")
54
46
 
@@ -60,24 +52,23 @@ class MonOCR:
60
52
  raise ValueError(f"Unsupported model type: {self.model_type}")
61
53
 
62
54
  def _load_crnn_model(self, model_path: str):
63
- """Load CRNN model"""
64
- # Import CRNN model class (this would need to be included in the package)
55
+ """load crnn model"""
65
56
  from .crnn_model import CRNN, build_charset
66
57
 
67
- # Load model state
58
+ # load model state
68
59
  checkpoint = torch.load(model_path, map_location=self.device)
69
60
 
70
- # Extract charset from checkpoint or build from corpus
61
+ # extract charset from checkpoint or build from corpus
71
62
  if 'charset' in checkpoint:
72
63
  self.charset = checkpoint['charset']
73
64
  else:
74
- # Fallback: build charset from default corpus
65
+ # fallback: build charset from default corpus
75
66
  self.charset = build_charset("data/raw/corpus")
76
67
 
77
- # Initialize model (add 1 for blank token)
68
+ # initialize model (add 1 for blank token)
78
69
  self.model = CRNN(num_classes=len(self.charset) + 1)
79
70
 
80
- # Load weights
71
+ # load weights
81
72
  if 'model_state_dict' in checkpoint:
82
73
  self.model.load_state_dict(checkpoint['model_state_dict'])
83
74
  else:
@@ -87,7 +78,7 @@ class MonOCR:
87
78
  self.model.eval()
88
79
 
89
80
  def _load_trocr_model(self, model_path: str):
90
- """Load TrOCR model"""
81
+ """load trocr model"""
91
82
  if not TROCR_AVAILABLE:
92
83
  raise ImportError("TrOCR dependencies not available. Install with: pip install transformers")
93
84
 
@@ -97,15 +88,7 @@ class MonOCR:
97
88
  self.model.eval()
98
89
 
99
90
  def predict(self, image: Union[str, Image.Image]) -> str:
100
- """
101
- Predict text from image
102
-
103
- Args:
104
- image: Path to image file or PIL Image object
105
-
106
- Returns:
107
- Predicted text string
108
- """
91
+ """predict text from image"""
109
92
  if isinstance(image, str):
110
93
  image = Image.open(image).convert("L")
111
94
  elif not isinstance(image, Image.Image):
@@ -117,32 +100,32 @@ class MonOCR:
117
100
  return self._predict_trocr(image)
118
101
 
119
102
  def _predict_crnn(self, image: Image.Image) -> str:
120
- """Predict using CRNN model"""
103
+ """predict using crnn model"""
121
104
  if self.model is None:
122
105
  raise ValueError("Model not loaded. Call load_model() first.")
123
106
 
124
- # Preprocess image - match simple_inference.py exactly
107
+ # preprocess image - match simple_inference.py exactly
125
108
  if isinstance(image, str):
126
109
  image = Image.open(image).convert('L')
127
110
  elif isinstance(image, Image.Image):
128
111
  image = image.convert('L')
129
112
 
130
- # Resize image - target_size is (height, width) for the model
131
- # PIL resize expects (width, height), so we need to swap
113
+ # resize image - target_size is (height, width) for the model
114
+ # pil resize expects (width, height), so we need to swap
132
115
  image = image.resize((256, 64), Image.Resampling.LANCZOS)
133
116
 
134
- # Convert to tensor and normalize
117
+ # convert to tensor and normalize
135
118
  image_array = np.array(image, dtype=np.float32) / 255.0
136
119
  image_tensor = torch.from_numpy(image_array).unsqueeze(0).unsqueeze(0) # [1, 1, H, W]
137
120
 
138
- # Apply the same transform as training
121
+ # apply the same transform as training
139
122
  transform = transforms.Compose([
140
123
  transforms.Normalize(mean=[0.5], std=[0.5])
141
124
  ])
142
125
  image_tensor = transform(image_tensor)
143
126
  image_tensor = image_tensor.to(self.device)
144
127
 
145
- # Predict
128
+ # predict
146
129
  with torch.no_grad():
147
130
  outputs = self.model(image_tensor)
148
131
  predicted_text = self._decode_crnn_output(outputs)
@@ -150,14 +133,14 @@ class MonOCR:
150
133
  return predicted_text
151
134
 
152
135
  def _predict_trocr(self, image: Image.Image) -> str:
153
- """Predict using TrOCR model"""
136
+ """predict using trocr model"""
154
137
  if self.model is None or self.processor is None:
155
138
  raise ValueError("Model not loaded. Call load_model() first.")
156
139
 
157
- # Preprocess image
140
+ # preprocess image
158
141
  pixel_values = self.processor(image, return_tensors="pt").pixel_values.to(self.device)
159
142
 
160
- # Predict
143
+ # predict
161
144
  with torch.no_grad():
162
145
  generated_ids = self.model.generate(pixel_values)
163
146
  predicted_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
@@ -165,14 +148,14 @@ class MonOCR:
165
148
  return predicted_text
166
149
 
167
150
  def _decode_crnn_output(self, output: torch.Tensor) -> str:
168
- """Decode CRNN output to text - match simple_inference.py exactly"""
151
+ """decode crnn output to text - match simple_inference.py exactly"""
169
152
  if self.charset is None:
170
153
  raise ValueError("Charset not loaded")
171
154
 
172
- # Get predictions - same as working version
155
+ # get predictions - same as working version
173
156
  preds = output.softmax(2).argmax(2).squeeze(0) # [seq_len]
174
157
 
175
- # CTC decoding - exact same logic as working simple_inference.py
158
+ # ctc decoding - exact same logic as working simple_inference.py
176
159
  decoded = []
177
160
  prev_char = None
178
161
 
@@ -188,61 +171,27 @@ class MonOCR:
188
171
  return ''.join(decoded)
189
172
 
190
173
  def batch_predict(self, images: List[Union[str, Image.Image]]) -> List[str]:
191
- """
192
- Predict text from multiple images
193
-
194
- Args:
195
- images: List of image paths or PIL Image objects
196
-
197
- Returns:
198
- List of predicted text strings
199
- """
174
+ """predict text from multiple images"""
200
175
  results = []
201
176
  for image in images:
202
177
  try:
203
178
  result = self.predict(image)
204
179
  results.append(result)
205
180
  except Exception as e:
206
- logging.warning(f"Error processing image: {e}")
207
181
  results.append("")
208
182
 
209
183
  return results
210
184
 
211
185
  def read_text(self, image: Union[str, Image.Image]) -> str:
212
- """
213
- Read text from image (alias for predict method)
214
-
215
- Args:
216
- image: Path to image file or PIL Image object
217
-
218
- Returns:
219
- Extracted text string
220
- """
186
+ """read text from image (alias for predict method)"""
221
187
  return self.predict(image)
222
188
 
223
189
  def read_multiple(self, images: List[Union[str, Image.Image]]) -> List[str]:
224
- """
225
- Read text from multiple images (alias for batch_predict method)
226
-
227
- Args:
228
- images: List of image paths or PIL Image objects
229
-
230
- Returns:
231
- List of extracted text strings
232
- """
190
+ """read text from multiple images (alias for batch_predict method)"""
233
191
  return self.batch_predict(images)
234
192
 
235
193
  def read_from_folder(self, folder_path: str, extensions: List[str] = None) -> dict:
236
- """
237
- Read text from all images in a folder
238
-
239
- Args:
240
- folder_path: Path to folder containing images
241
- extensions: List of file extensions to process (default: ['.png', '.jpg', '.jpeg'])
242
-
243
- Returns:
244
- Dictionary mapping filename to extracted text
245
- """
194
+ """read text from all images in a folder"""
246
195
  if extensions is None:
247
196
  extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.tiff']
248
197
 
@@ -262,7 +211,6 @@ class MonOCR:
262
211
  text = self.read_text(str(image_file))
263
212
  results[image_file.name] = text
264
213
  except Exception as e:
265
- print(f"Error processing {image_file.name}: {e}")
266
214
  results[image_file.name] = ""
267
215
 
268
- return results
216
+ return results
@@ -1,90 +0,0 @@
1
- """
2
- Mon OCR - Optical Character Recognition for Mon text
3
- A production-ready OCR package for Mon script text recognition
4
- """
5
-
6
- import os
7
- from pathlib import Path
8
- from .ocr import MonOCR
9
- from .inference import MonOCRInference
10
-
11
- __version__ = "0.1.0"
12
- __author__ = "janakhpon"
13
- __email__ = "jnovaxer@gmail.com"
14
-
15
- __all__ = ["MonOCR", "MonOCRInference", "read_text", "read_image", "read_folder"]
16
-
17
-
18
- def get_default_model_path():
19
- """Get the path to the bundled default model"""
20
- package_dir = Path(__file__).parent
21
- model_path = package_dir / "models" / "monocr_v1_best.pt"
22
- return str(model_path)
23
-
24
-
25
- # Global OCR instance for simple API
26
- _ocr_instance = None
27
-
28
- def _get_ocr():
29
- """Get or create the global OCR instance"""
30
- global _ocr_instance
31
- if _ocr_instance is None:
32
- _ocr_instance = MonOCR()
33
- return _ocr_instance
34
-
35
-
36
- def read_text(image_path):
37
- """
38
- Read text from a single image - Simple API
39
-
40
- Args:
41
- image_path: Path to image file
42
-
43
- Returns:
44
- Extracted text string
45
- """
46
- return _get_ocr().read_text(image_path)
47
-
48
-
49
- def read_image(image_path):
50
- """
51
- Alias for read_text - Read text from a single image
52
-
53
- Args:
54
- image_path: Path to image file
55
-
56
- Returns:
57
- Extracted text string
58
- """
59
- return read_text(image_path)
60
-
61
-
62
- def read_folder(folder_path, extensions=None):
63
- """
64
- Read text from all images in a folder - Simple API
65
-
66
- Args:
67
- folder_path: Path to folder containing images
68
- extensions: List of file extensions to process (default: ['.png', '.jpg', '.jpeg'])
69
-
70
- Returns:
71
- Dictionary mapping filename to extracted text
72
- """
73
- return _get_ocr().read_from_folder(folder_path, extensions)
74
-
75
-
76
- def load_ocr(model_path=None, model_type="crnn"):
77
- """
78
- Load OCR model with default settings (Advanced API)
79
-
80
- Args:
81
- model_path: Path to trained model file (if None, uses bundled model)
82
- model_type: Type of model ("crnn" or "trocr")
83
-
84
- Returns:
85
- MonOCR instance
86
- """
87
- if model_path is None:
88
- model_path = get_default_model_path()
89
-
90
- return MonOCR(model_path, model_type)
@@ -1,117 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- Advanced inference utilities for Mon OCR
5
- """
6
-
7
- import os
8
- import torch
9
- import numpy as np
10
- from PIL import Image
11
- from pathlib import Path
12
- import json
13
- import logging
14
- from typing import List, Dict, Optional, Union
15
-
16
- from .ocr import MonOCR
17
-
18
- class MonOCRInference:
19
- """Advanced Mon OCR inference with additional utilities"""
20
-
21
- def __init__(self, model_path: Optional[str] = None, model_type: str = "crnn"):
22
- """
23
- Initialize advanced Mon OCR inference
24
-
25
- Args:
26
- model_path: Path to trained model file
27
- model_type: Type of model ("crnn" or "trocr")
28
- """
29
- self.ocr = MonOCR(model_path, model_type)
30
- self.logger = logging.getLogger(__name__)
31
-
32
- def predict_with_confidence(self, image: Union[str, Image.Image]) -> Dict[str, Union[str, float]]:
33
- """
34
- Predict text with confidence score
35
-
36
- Args:
37
- image: Path to image file or PIL Image object
38
-
39
- Returns:
40
- Dictionary with 'text' and 'confidence' keys
41
- """
42
- try:
43
- text = self.ocr.predict(image)
44
- # For now, return a placeholder confidence score
45
- # In a full implementation, you'd calculate actual confidence
46
- confidence = 0.95 # Placeholder
47
-
48
- return {
49
- 'text': text,
50
- 'confidence': confidence
51
- }
52
- except Exception as e:
53
- self.logger.error(f"Error in prediction: {e}")
54
- return {
55
- 'text': "",
56
- 'confidence': 0.0
57
- }
58
-
59
- def batch_predict_with_confidence(self, images: List[Union[str, Image.Image]]) -> List[Dict[str, Union[str, float]]]:
60
- """
61
- Predict text with confidence for multiple images
62
-
63
- Args:
64
- images: List of image paths or PIL Image objects
65
-
66
- Returns:
67
- List of dictionaries with 'text' and 'confidence' keys
68
- """
69
- results = []
70
- for image in images:
71
- result = self.predict_with_confidence(image)
72
- results.append(result)
73
-
74
- return results
75
-
76
- def process_document(self, image_path: str, output_path: Optional[str] = None) -> Dict[str, str]:
77
- """
78
- Process a document image and save results
79
-
80
- Args:
81
- image_path: Path to document image
82
- output_path: Path to save results (optional)
83
-
84
- Returns:
85
- Dictionary with processing results
86
- """
87
- try:
88
- # Load and process image
89
- image = Image.open(image_path)
90
- text = self.ocr.predict(image)
91
-
92
- results = {
93
- 'image_path': image_path,
94
- 'extracted_text': text,
95
- 'status': 'success'
96
- }
97
-
98
- # Save results if output path provided
99
- if output_path:
100
- with open(output_path, 'w', encoding='utf-8') as f:
101
- json.dump(results, f, ensure_ascii=False, indent=2)
102
-
103
- return results
104
-
105
- except Exception as e:
106
- error_result = {
107
- 'image_path': image_path,
108
- 'extracted_text': "",
109
- 'status': 'error',
110
- 'error': str(e)
111
- }
112
-
113
- if output_path:
114
- with open(output_path, 'w', encoding='utf-8') as f:
115
- json.dump(error_result, f, ensure_ascii=False, indent=2)
116
-
117
- return error_result