rapid-ocr-tool 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rapid_ocr_tool-1.0.0/CLAUDE.md +145 -0
- rapid_ocr_tool-1.0.0/LICENSE +21 -0
- rapid_ocr_tool-1.0.0/MANIFEST.in +9 -0
- rapid_ocr_tool-1.0.0/PKG-INFO +199 -0
- rapid_ocr_tool-1.0.0/README.md +182 -0
- rapid_ocr_tool-1.0.0/config/config.yaml +48 -0
- rapid_ocr_tool-1.0.0/docs//345/221/275/344/273/244/345/217/202/350/200/203.md +220 -0
- rapid_ocr_tool-1.0.0/docs//346/226/207/346/241/243/347/264/242/345/274/225.md +83 -0
- rapid_ocr_tool-1.0.0/docs//350/264/241/347/214/256/346/214/207/345/215/227.md +358 -0
- rapid_ocr_tool-1.0.0/docs//350/277/220/347/273/264/346/211/213/345/206/214.md +546 -0
- rapid_ocr_tool-1.0.0/examples.py +165 -0
- rapid_ocr_tool-1.0.0/main.py +252 -0
- rapid_ocr_tool-1.0.0/pyproject.toml +17 -0
- rapid_ocr_tool-1.0.0/rapid_ocr_tool.egg-info/PKG-INFO +199 -0
- rapid_ocr_tool-1.0.0/rapid_ocr_tool.egg-info/SOURCES.txt +29 -0
- rapid_ocr_tool-1.0.0/rapid_ocr_tool.egg-info/dependency_links.txt +1 -0
- rapid_ocr_tool-1.0.0/rapid_ocr_tool.egg-info/top_level.txt +1 -0
- rapid_ocr_tool-1.0.0/requirements.txt +19 -0
- rapid_ocr_tool-1.0.0/setup.cfg +4 -0
- rapid_ocr_tool-1.0.0/setup.py +62 -0
- rapid_ocr_tool-1.0.0/src/__init__.py +115 -0
- rapid_ocr_tool-1.0.0/src/batch_processor.py +450 -0
- rapid_ocr_tool-1.0.0/src/constants.py +54 -0
- rapid_ocr_tool-1.0.0/src/image_processor.py +281 -0
- rapid_ocr_tool-1.0.0/src/ocr_engine.py +205 -0
- rapid_ocr_tool-1.0.0/src/output_formatter.py +366 -0
- rapid_ocr_tool-1.0.0/tests/__init__.py +1 -0
- rapid_ocr_tool-1.0.0/tests/test_constants.py +50 -0
- rapid_ocr_tool-1.0.0/tests/test_image_processor.py +111 -0
- rapid_ocr_tool-1.0.0/tests/test_ocr_engine.py +113 -0
- rapid_ocr_tool-1.0.0/tests/test_output_formatter.py +148 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
This is a lightweight offline OCR (Optical Character Recognition) tool built with RapidOCR and ONNX Runtime. It achieves 4-5x faster performance than PaddleOCR with 200MB+ lower memory usage while maintaining 99%+ accuracy.
|
|
8
|
+
|
|
9
|
+
**Core Technology Stack:**
|
|
10
|
+
- OCR Engine: RapidOCR (optimized PaddleOCR)
|
|
11
|
+
- Inference: ONNX Runtime
|
|
12
|
+
- Model: PP-OCRv3 lightweight model (12.3MB)
|
|
13
|
+
- Language: Python 3.8+
|
|
14
|
+
|
|
15
|
+
## Essential Commands
|
|
16
|
+
|
|
17
|
+
### Testing
|
|
18
|
+
```bash
|
|
19
|
+
# Run all tests
|
|
20
|
+
pytest tests/ -v
|
|
21
|
+
|
|
22
|
+
# Run specific test file
|
|
23
|
+
pytest tests/test_ocr_engine.py -v
|
|
24
|
+
|
|
25
|
+
# Run with coverage report
|
|
26
|
+
pytest tests/ --cov=src --cov-report=html
|
|
27
|
+
|
|
28
|
+
# Run single test
|
|
29
|
+
pytest tests/test_output_formatter.py::TestOutputFormatter::test_format_text_simple -v
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Running the Tool
|
|
33
|
+
```bash
|
|
34
|
+
# Single image recognition
|
|
35
|
+
python main.py image <image_path>
|
|
36
|
+
|
|
37
|
+
# Batch processing
|
|
38
|
+
python main.py batch <folder_path>
|
|
39
|
+
|
|
40
|
+
# With specific output format
|
|
41
|
+
python main.py image <image_path> --format json --coords
|
|
42
|
+
|
|
43
|
+
# Disable emoji output
|
|
44
|
+
python main.py image <image_path> --no-emoji
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Installation
|
|
48
|
+
```bash
|
|
49
|
+
# Install dependencies
|
|
50
|
+
pip install -r requirements.txt
|
|
51
|
+
|
|
52
|
+
# Run with virtual environment (recommended)
|
|
53
|
+
python -m venv venv
|
|
54
|
+
source venv/bin/activate # Linux/Mac
|
|
55
|
+
venv\Scripts\activate # Windows
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Architecture
|
|
59
|
+
|
|
60
|
+
### Module Structure
|
|
61
|
+
|
|
62
|
+
The codebase follows a modular architecture with clear separation of concerns:
|
|
63
|
+
|
|
64
|
+
**Core Modules (`src/`)**:
|
|
65
|
+
1. **`ocr_engine.py`** - Wraps RapidOCR, handles single/batch recognition, returns structured results with optional coordinates and confidence scores
|
|
66
|
+
2. **`image_processor.py`** - Preprocessing pipeline: validation, resize (max 2048px), grayscale, contrast enhancement, denoising
|
|
67
|
+
3. **`batch_processor.py`** - Multi-threaded batch processing using ThreadPoolExecutor, handles progress tracking and error recovery
|
|
68
|
+
4. **`output_formatter.py`** - Formats OCR results to multiple output types (TXT, JSON, Markdown, HTML, CSV)
|
|
69
|
+
5. **`constants.py`** - Centralized configuration constants (max file size, supported formats, thresholds)
|
|
70
|
+
|
|
71
|
+
**Entry Point**:
|
|
72
|
+
- **`main.py`** - Click-based CLI with emoji output control (global `USE_EMOJI` flag)
|
|
73
|
+
|
|
74
|
+
### Data Flow
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
Image Input → ImageProcessor (optional preprocessing) → OCREngine → Raw OCR Result → OutputFormatter → File Output
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**OCR Result Format**: List of lists where each item is either:
|
|
81
|
+
- `[text]` - Simple text only
|
|
82
|
+
- `[[bbox], text, confidence]` - With bounding box and confidence
|
|
83
|
+
|
|
84
|
+
Example: `[[[[0,0], [10,0], [10,10], [0,10]], "Hello", 0.99], ["World"]]`
|
|
85
|
+
|
|
86
|
+
### Configuration System
|
|
87
|
+
|
|
88
|
+
Two-layer configuration:
|
|
89
|
+
1. **Default config** in `src/__init__.py:get_default_config()`
|
|
90
|
+
2. **YAML override** in `config/config.yaml` (loaded via `load_config()`)
|
|
91
|
+
|
|
92
|
+
Config merging is recursive - custom values override defaults at any depth.
|
|
93
|
+
|
|
94
|
+
### Key Design Patterns
|
|
95
|
+
|
|
96
|
+
**Factory Functions**: Each module exports a `create_*()` function for convenient instantiation:
|
|
97
|
+
- `create_ocr_engine(lang, use_gpu)`
|
|
98
|
+
- `create_image_processor(max_size, auto_rotate)`
|
|
99
|
+
- `create_batch_processor(ocr, threads)`
|
|
100
|
+
- `create_formatter(indent)`
|
|
101
|
+
|
|
102
|
+
**Text Extraction**: `OutputFormatter` uses helper methods to avoid code duplication:
|
|
103
|
+
- `_extract_text(line)` - Extract text from single OCR result line
|
|
104
|
+
- `_extract_texts(result)` - Extract all text lines using list comprehension with walrus operator
|
|
105
|
+
|
|
106
|
+
**Batch Processing**: `BatchProcessor.process_single()` was refactored from 88 lines into 6 smaller methods:
|
|
107
|
+
- `_init_result()` - Initialize result dictionary
|
|
108
|
+
- `_should_skip()` - Check if output exists (for skip_exists mode)
|
|
109
|
+
- `_load_and_process()` - Load and OCR image
|
|
110
|
+
- `_save_result()` - Save to file
|
|
111
|
+
- `_process_error()` - Handle errors
|
|
112
|
+
- Main method orchestrates the workflow
|
|
113
|
+
|
|
114
|
+
## Code Quality Standards
|
|
115
|
+
|
|
116
|
+
**Testing**: TDD methodology with RED→GREEN→REFACTOR cycle. Current test coverage: 51% (37 tests).
|
|
117
|
+
|
|
118
|
+
**Refactoring**: Code underwent TDD refactoring to eliminate duplication. Example: `OutputFormatter` reduced from 330 to 280 lines by extracting common text extraction logic.
|
|
119
|
+
|
|
120
|
+
**Error Handling**: Avoid naked `except:` clauses. Use specific exception types like `(TypeError, ValueError, AttributeError)`.
|
|
121
|
+
|
|
122
|
+
**Emoji Control**: Global `USE_EMOJI` flag in `main.py` controls emoji output. Use `_emoji()` helper function. Pass `--no-emoji` CLI flag to disable.
|
|
123
|
+
|
|
124
|
+
**Constants**: All magic numbers centralized in `src/constants.py`:
|
|
125
|
+
- `MAX_FILE_SIZE = 50 * 1024 * 1024` (50MB)
|
|
126
|
+
- `DEFAULT_MAX_IMAGE_SIZE = 2048`
|
|
127
|
+
- `CONFIDENCE_THRESHOLD = 0.8`
|
|
128
|
+
|
|
129
|
+
## Important Constraints
|
|
130
|
+
|
|
131
|
+
- **First Run**: Auto-downloads ONNX models (~12MB) to `./models/` directory
|
|
132
|
+
- **Image Size**: Automatically resized to max 2048px on longest side
|
|
133
|
+
- **Supported Formats**: .jpg, .jpeg, .png, .bmp, .tiff, .tif
|
|
134
|
+
- **Thread Safety**: BatchProcessor uses ThreadPoolExecutor with configurable thread count (default: 4)
|
|
135
|
+
- **File Encoding**: All file I/O uses UTF-8 encoding
|
|
136
|
+
|
|
137
|
+
## Troubleshooting
|
|
138
|
+
|
|
139
|
+
**Import Errors**: Ensure virtual environment is activated and dependencies installed via `requirements.txt`.
|
|
140
|
+
|
|
141
|
+
**Model Download Failures**: Check internet connection on first run. Models download from RapidOCR repository.
|
|
142
|
+
|
|
143
|
+
**Memory Issues**: Reduce `max_size` in config or `ImageProcessor` initialization. Lower thread count in batch processing.
|
|
144
|
+
|
|
145
|
+
**Low Accuracy**: Enable image preprocessing (contrast enhancement, denoising) or preprocess images externally before OCR.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 OCR Tool Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rapid-ocr-tool
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: 本地轻量级OCR识别工具
|
|
5
|
+
Home-page: https://github.com/yourusername/ocr-tool
|
|
6
|
+
Author: OCR Tool Team
|
|
7
|
+
Author-email: your-email@example.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Dynamic: author
|
|
13
|
+
Dynamic: author-email
|
|
14
|
+
Dynamic: home-page
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
Dynamic: requires-python
|
|
17
|
+
|
|
18
|
+
# 本地轻量级OCR识别工具
|
|
19
|
+
|
|
20
|
+
<div align="center">
|
|
21
|
+
|
|
22
|
+
**基于 RapidOCR + ONNX Runtime**
|
|
23
|
+
|
|
24
|
+
🚀 极速识别 (~1秒/张) | 💻 低资源占用 (<300MB) | 🔒 完全离线
|
|
25
|
+
|
|
26
|
+
[Python](https://www.python.org/) >= 3.8 | [Windows/Linux/Mac](https://github.com/RapidAI/RapidOCR)
|
|
27
|
+
|
|
28
|
+
</div>
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## 项目简介
|
|
33
|
+
|
|
34
|
+
这是一个轻量级的本地OCR文字识别工具,基于[RapidOCR](https://github.com/RapidAI/RapidOCR)实现,采用[ONNX Runtime](https://onnxruntime.ai/)推理框架。相比PaddleOCR,速度提升4-5倍,内存占用降低200MB+,同时保持99%+的识别准确率。
|
|
35
|
+
|
|
36
|
+
- ✅ 完全离线运行,保护隐私
|
|
37
|
+
- ✅ 低资源占用(内存<300MB)
|
|
38
|
+
- ✅ 识别速度快(~1秒/张)
|
|
39
|
+
- ✅ 高准确率(99.5%中文,99%+英文)
|
|
40
|
+
- ✅ 支持批量处理
|
|
41
|
+
- ✅ 多种输出格式
|
|
42
|
+
- ✅ 跨平台支持(Windows/Linux/Mac)
|
|
43
|
+
|
|
44
|
+
## 安装
|
|
45
|
+
|
|
46
|
+
### 1. 创建虚拟环境(推荐)
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# Windows
|
|
50
|
+
python -m venv venv
|
|
51
|
+
venv\Scripts\activate
|
|
52
|
+
|
|
53
|
+
# Linux/Mac
|
|
54
|
+
python3 -m venv venv
|
|
55
|
+
source venv/bin/activate
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### 2. 安装依赖
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install -r requirements.txt
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## 使用方法
|
|
65
|
+
|
|
66
|
+
### 单张图片识别
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
python main.py image 图片路径.jpg
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 批量处理文件夹
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
python main.py batch 文件夹路径
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 指定输出格式
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# 输出为文本
|
|
82
|
+
python main.py image 图片.jpg --format txt
|
|
83
|
+
|
|
84
|
+
# 输出为JSON(包含坐标信息)
|
|
85
|
+
python main.py image 图片.jpg --format json
|
|
86
|
+
|
|
87
|
+
# 输出为Markdown
|
|
88
|
+
python main.py image 图片.jpg --format md
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### 高级选项
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
# 保存到文件
|
|
95
|
+
python main.py image 图片.jpg --output result.txt
|
|
96
|
+
|
|
97
|
+
# 指定识别语言(ch/en)
|
|
98
|
+
python main.py image 图片.jpg --lang ch
|
|
99
|
+
|
|
100
|
+
# 显示详细信息
|
|
101
|
+
python main.py image 图片.jpg --verbose
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## 支持的图片格式
|
|
105
|
+
|
|
106
|
+
- PNG
|
|
107
|
+
- JPG/JPEG
|
|
108
|
+
- BMP
|
|
109
|
+
- TIFF
|
|
110
|
+
|
|
111
|
+
## 性能指标
|
|
112
|
+
|
|
113
|
+
| 指标 | 数值 |
|
|
114
|
+
|------|------|
|
|
115
|
+
| 单张识别速度 | ~1秒(CPU) |
|
|
116
|
+
| 内存占用 | <300MB |
|
|
117
|
+
| 中文准确率 | 99.5% |
|
|
118
|
+
| 英文准确率 | 99%+ |
|
|
119
|
+
| 支持分辨率 | 最高4096px |
|
|
120
|
+
|
|
121
|
+
## 项目结构
|
|
122
|
+
|
|
123
|
+
```
|
|
124
|
+
ocr/
|
|
125
|
+
├── src/
|
|
126
|
+
│ ├── ocr_engine.py # OCR引擎核心
|
|
127
|
+
│ ├── image_processor.py # 图像预处理
|
|
128
|
+
│ ├── batch_processor.py # 批量处理
|
|
129
|
+
│ └── output_formatter.py # 输出格式化
|
|
130
|
+
├── models/ # 模型文件(自动下载)
|
|
131
|
+
├── output/ # 默认输出目录
|
|
132
|
+
├── config/
|
|
133
|
+
│ └── config.yaml # 配置文件
|
|
134
|
+
├── main.py # 主入口
|
|
135
|
+
├── requirements.txt # 依赖列表
|
|
136
|
+
└── README.md # 使用文档
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## 📚 文档
|
|
140
|
+
|
|
141
|
+
- **[命令参考](docs/命令参考.md)** - 完整的CLI命令文档及示例
|
|
142
|
+
- **[贡献指南](docs/贡献指南.md)** - 开发环境设置、编码规范、工作流程
|
|
143
|
+
- **[运维手册](docs/运维手册.md)** - 部署、监控、故障排除指南
|
|
144
|
+
- **[文档索引](docs/文档索引.md)** - 所有文档的导航索引
|
|
145
|
+
|
|
146
|
+
## 常见问题
|
|
147
|
+
|
|
148
|
+
### 1. 首次运行慢?
|
|
149
|
+
首次运行会自动下载模型文件(约12MB),请耐心等待。
|
|
150
|
+
|
|
151
|
+
### 2. 识别结果不准确?
|
|
152
|
+
- 确保图片清晰,分辨率足够
|
|
153
|
+
- 尝试调整图片对比度
|
|
154
|
+
- 对于复杂背景,先进行图像预处理
|
|
155
|
+
|
|
156
|
+
### 3. 批量处理中断?
|
|
157
|
+
已处理的结果会自动保存,可以重新运行继续处理。
|
|
158
|
+
|
|
159
|
+
## 技术栈
|
|
160
|
+
|
|
161
|
+
- **OCR引擎**: [RapidOCR](https://github.com/RapidAI/RapidOCR) (基于PaddleOCR优化)
|
|
162
|
+
- **推理框架**: [ONNX Runtime](https://onnxruntime.ai/)
|
|
163
|
+
- **模型**: PP-OCRv3 轻量级模型 (12.3MB)
|
|
164
|
+
- **图像处理**: OpenCV + Pillow
|
|
165
|
+
- **CLI框架**: Click + Rich
|
|
166
|
+
|
|
167
|
+
## 项目结构
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
ocr/
|
|
171
|
+
├── src/
|
|
172
|
+
│ ├── __init__.py # 模块初始化
|
|
173
|
+
│ ├── ocr_engine.py # OCR引擎核心
|
|
174
|
+
│ ├── image_processor.py # 图像预处理
|
|
175
|
+
│ ├── batch_processor.py # 批量处理
|
|
176
|
+
│ └── output_formatter.py # 输出格式化
|
|
177
|
+
├── config/
|
|
178
|
+
│ └── config.yaml # 配置文件
|
|
179
|
+
├── models/ # 模型文件(自动下载)
|
|
180
|
+
├── output/ # 默认输出目录
|
|
181
|
+
├── main.py # CLI入口
|
|
182
|
+
├── examples.py # 使用示例
|
|
183
|
+
├── test_install.py # 安装测试
|
|
184
|
+
├── requirements.txt # 依赖列表
|
|
185
|
+
├── QUICKSTART.md # 快速开始指南
|
|
186
|
+
└── README.md # 项目文档
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## 开源协议
|
|
190
|
+
|
|
191
|
+
MIT License
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## 致谢
|
|
196
|
+
|
|
197
|
+
- [RapidOCR](https://github.com/RapidAI/RapidOCR) - 高性能OCR引擎
|
|
198
|
+
- [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) - 飞桨OCR工具
|
|
199
|
+
- [ONNX Runtime](https://onnxruntime.ai/) - 跨平台推理框架
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
# 本地轻量级OCR识别工具
|
|
2
|
+
|
|
3
|
+
<div align="center">
|
|
4
|
+
|
|
5
|
+
**基于 RapidOCR + ONNX Runtime**
|
|
6
|
+
|
|
7
|
+
🚀 极速识别 (~1秒/张) | 💻 低资源占用 (<300MB) | 🔒 完全离线
|
|
8
|
+
|
|
9
|
+
[Python](https://www.python.org/) >= 3.8 | [Windows/Linux/Mac](https://github.com/RapidAI/RapidOCR)
|
|
10
|
+
|
|
11
|
+
</div>
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## 项目简介
|
|
16
|
+
|
|
17
|
+
这是一个轻量级的本地OCR文字识别工具,基于[RapidOCR](https://github.com/RapidAI/RapidOCR)实现,采用[ONNX Runtime](https://onnxruntime.ai/)推理框架。相比PaddleOCR,速度提升4-5倍,内存占用降低200MB+,同时保持99%+的识别准确率。
|
|
18
|
+
|
|
19
|
+
- ✅ 完全离线运行,保护隐私
|
|
20
|
+
- ✅ 低资源占用(内存<300MB)
|
|
21
|
+
- ✅ 识别速度快(~1秒/张)
|
|
22
|
+
- ✅ 高准确率(99.5%中文,99%+英文)
|
|
23
|
+
- ✅ 支持批量处理
|
|
24
|
+
- ✅ 多种输出格式
|
|
25
|
+
- ✅ 跨平台支持(Windows/Linux/Mac)
|
|
26
|
+
|
|
27
|
+
## 安装
|
|
28
|
+
|
|
29
|
+
### 1. 创建虚拟环境(推荐)
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# Windows
|
|
33
|
+
python -m venv venv
|
|
34
|
+
venv\Scripts\activate
|
|
35
|
+
|
|
36
|
+
# Linux/Mac
|
|
37
|
+
python3 -m venv venv
|
|
38
|
+
source venv/bin/activate
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 2. 安装依赖
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install -r requirements.txt
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## 使用方法
|
|
48
|
+
|
|
49
|
+
### 单张图片识别
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
python main.py image 图片路径.jpg
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### 批量处理文件夹
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
python main.py batch 文件夹路径
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### 指定输出格式
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# 输出为文本
|
|
65
|
+
python main.py image 图片.jpg --format txt
|
|
66
|
+
|
|
67
|
+
# 输出为JSON(包含坐标信息)
|
|
68
|
+
python main.py image 图片.jpg --format json
|
|
69
|
+
|
|
70
|
+
# 输出为Markdown
|
|
71
|
+
python main.py image 图片.jpg --format md
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### 高级选项
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# 保存到文件
|
|
78
|
+
python main.py image 图片.jpg --output result.txt
|
|
79
|
+
|
|
80
|
+
# 指定识别语言(ch/en)
|
|
81
|
+
python main.py image 图片.jpg --lang ch
|
|
82
|
+
|
|
83
|
+
# 显示详细信息
|
|
84
|
+
python main.py image 图片.jpg --verbose
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## 支持的图片格式
|
|
88
|
+
|
|
89
|
+
- PNG
|
|
90
|
+
- JPG/JPEG
|
|
91
|
+
- BMP
|
|
92
|
+
- TIFF
|
|
93
|
+
|
|
94
|
+
## 性能指标
|
|
95
|
+
|
|
96
|
+
| 指标 | 数值 |
|
|
97
|
+
|------|------|
|
|
98
|
+
| 单张识别速度 | ~1秒(CPU) |
|
|
99
|
+
| 内存占用 | <300MB |
|
|
100
|
+
| 中文准确率 | 99.5% |
|
|
101
|
+
| 英文准确率 | 99%+ |
|
|
102
|
+
| 支持分辨率 | 最高4096px |
|
|
103
|
+
|
|
104
|
+
## 项目结构
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
ocr/
|
|
108
|
+
├── src/
|
|
109
|
+
│ ├── ocr_engine.py # OCR引擎核心
|
|
110
|
+
│ ├── image_processor.py # 图像预处理
|
|
111
|
+
│ ├── batch_processor.py # 批量处理
|
|
112
|
+
│ └── output_formatter.py # 输出格式化
|
|
113
|
+
├── models/ # 模型文件(自动下载)
|
|
114
|
+
├── output/ # 默认输出目录
|
|
115
|
+
├── config/
|
|
116
|
+
│ └── config.yaml # 配置文件
|
|
117
|
+
├── main.py # 主入口
|
|
118
|
+
├── requirements.txt # 依赖列表
|
|
119
|
+
└── README.md # 使用文档
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## 📚 文档
|
|
123
|
+
|
|
124
|
+
- **[命令参考](docs/命令参考.md)** - 完整的CLI命令文档及示例
|
|
125
|
+
- **[贡献指南](docs/贡献指南.md)** - 开发环境设置、编码规范、工作流程
|
|
126
|
+
- **[运维手册](docs/运维手册.md)** - 部署、监控、故障排除指南
|
|
127
|
+
- **[文档索引](docs/文档索引.md)** - 所有文档的导航索引
|
|
128
|
+
|
|
129
|
+
## 常见问题
|
|
130
|
+
|
|
131
|
+
### 1. 首次运行慢?
|
|
132
|
+
首次运行会自动下载模型文件(约12MB),请耐心等待。
|
|
133
|
+
|
|
134
|
+
### 2. 识别结果不准确?
|
|
135
|
+
- 确保图片清晰,分辨率足够
|
|
136
|
+
- 尝试调整图片对比度
|
|
137
|
+
- 对于复杂背景,先进行图像预处理
|
|
138
|
+
|
|
139
|
+
### 3. 批量处理中断?
|
|
140
|
+
已处理的结果会自动保存,可以重新运行继续处理。
|
|
141
|
+
|
|
142
|
+
## 技术栈
|
|
143
|
+
|
|
144
|
+
- **OCR引擎**: [RapidOCR](https://github.com/RapidAI/RapidOCR) (基于PaddleOCR优化)
|
|
145
|
+
- **推理框架**: [ONNX Runtime](https://onnxruntime.ai/)
|
|
146
|
+
- **模型**: PP-OCRv3 轻量级模型 (12.3MB)
|
|
147
|
+
- **图像处理**: OpenCV + Pillow
|
|
148
|
+
- **CLI框架**: Click + Rich
|
|
149
|
+
|
|
150
|
+
## 项目结构
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
ocr/
|
|
154
|
+
├── src/
|
|
155
|
+
│ ├── __init__.py # 模块初始化
|
|
156
|
+
│ ├── ocr_engine.py # OCR引擎核心
|
|
157
|
+
│ ├── image_processor.py # 图像预处理
|
|
158
|
+
│ ├── batch_processor.py # 批量处理
|
|
159
|
+
│ └── output_formatter.py # 输出格式化
|
|
160
|
+
├── config/
|
|
161
|
+
│ └── config.yaml # 配置文件
|
|
162
|
+
├── models/ # 模型文件(自动下载)
|
|
163
|
+
├── output/ # 默认输出目录
|
|
164
|
+
├── main.py # CLI入口
|
|
165
|
+
├── examples.py # 使用示例
|
|
166
|
+
├── test_install.py # 安装测试
|
|
167
|
+
├── requirements.txt # 依赖列表
|
|
168
|
+
├── QUICKSTART.md # 快速开始指南
|
|
169
|
+
└── README.md # 项目文档
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## 开源协议
|
|
173
|
+
|
|
174
|
+
MIT License
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## 致谢
|
|
179
|
+
|
|
180
|
+
- [RapidOCR](https://github.com/RapidAI/RapidOCR) - 高性能OCR引擎
|
|
181
|
+
- [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) - 飞桨OCR工具
|
|
182
|
+
- [ONNX Runtime](https://onnxruntime.ai/) - 跨平台推理框架
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# OCR配置文件
|
|
2
|
+
|
|
3
|
+
# 模型设置
|
|
4
|
+
model:
|
|
5
|
+
# 模型类型: ch(中文), en(英文)
|
|
6
|
+
lang: ch
|
|
7
|
+
# 是否使用GPU(需要CUDA)
|
|
8
|
+
use_gpu: false
|
|
9
|
+
# 模型下载路径
|
|
10
|
+
download_path: ./models
|
|
11
|
+
|
|
12
|
+
# 图像处理设置
|
|
13
|
+
image:
|
|
14
|
+
# 最大图片尺寸(长边)
|
|
15
|
+
max_size: 2048
|
|
16
|
+
# 是否自动旋转
|
|
17
|
+
auto_rotate: true
|
|
18
|
+
# 是否增强对比度
|
|
19
|
+
enhance_contrast: false
|
|
20
|
+
|
|
21
|
+
# 输出设置
|
|
22
|
+
output:
|
|
23
|
+
# 默认输出目录
|
|
24
|
+
default_dir: ./output
|
|
25
|
+
# 默认输出格式: txt/json/md
|
|
26
|
+
default_format: txt
|
|
27
|
+
# 是否包含坐标信息
|
|
28
|
+
include_coords: false
|
|
29
|
+
# JSON缩进
|
|
30
|
+
json_indent: 2
|
|
31
|
+
|
|
32
|
+
# 批量处理设置
|
|
33
|
+
batch:
|
|
34
|
+
# 并发线程数
|
|
35
|
+
threads: 4
|
|
36
|
+
# 失败重试次数
|
|
37
|
+
retry: 2
|
|
38
|
+
# 是否跳过已存在
|
|
39
|
+
skip_exists: false
|
|
40
|
+
|
|
41
|
+
# 日志设置
|
|
42
|
+
logging:
|
|
43
|
+
# 日志级别: DEBUG/INFO/WARNING/ERROR
|
|
44
|
+
level: INFO
|
|
45
|
+
# 是否输出到文件
|
|
46
|
+
to_file: true
|
|
47
|
+
# 日志文件路径
|
|
48
|
+
file_path: ./logs/ocr.log
|