markitdown-paddleocr 0.2.0__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/PKG-INFO +58 -19
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/README.md +57 -18
- markitdown_paddleocr-0.2.3/src/markitdown_paddleocr/__about__.py +1 -0
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/_config.py +3 -2
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/_converter.py +9 -5
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/_dual_converter.py +11 -4
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/_plugin.py +1 -1
- markitdown_paddleocr-0.2.0/src/markitdown_paddleocr/__about__.py +0 -1
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/.gitignore +0 -0
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/pyproject.toml +0 -0
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/__init__.py +0 -0
- {markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/_paddle_client.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: markitdown-paddleocr
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Intelligent PDF/Image to Markdown converter using PaddleOCR cloud API
|
|
5
5
|
Project-URL: Documentation, https://github.com/microsoft/markitdown#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/microsoft/markitdown/issues
|
|
@@ -51,7 +51,7 @@ pip install markitdown-paddleocr
|
|
|
51
51
|
export BAIDU_PADDLE_TOKEN="your-paddle-token"
|
|
52
52
|
|
|
53
53
|
# 可选
|
|
54
|
-
export PADDLE_OCR_MODEL="PaddleOCR-VL-1.
|
|
54
|
+
export PADDLE_OCR_MODEL="PaddleOCR-VL-1.6" # 模型名称
|
|
55
55
|
```
|
|
56
56
|
|
|
57
57
|
### 配置优先级
|
|
@@ -129,7 +129,7 @@ print(markdown)
|
|
|
129
129
|
| 参数 | 类型 | 默认值 | 说明 |
|
|
130
130
|
|------|------|--------|------|
|
|
131
131
|
| `token` | str | 环境变量 `BAIDU_PADDLE_TOKEN` | PaddleOCR Token |
|
|
132
|
-
| `model` | str | `PaddleOCR-VL-1.
|
|
132
|
+
| `model` | str | `PaddleOCR-VL-1.6` | OCR 模型名称 |
|
|
133
133
|
| `poll_interval` | float | 2.0 | 轮询间隔(秒) |
|
|
134
134
|
| `poll_timeout` | float | 300.0 | 轮询超时(秒) |
|
|
135
135
|
| `force_ai` | bool | False | 强制所有页面使用 OCR |
|
|
@@ -142,7 +142,7 @@ print(markdown)
|
|
|
142
142
|
| 变量 | 说明 | 示例 |
|
|
143
143
|
|------|------|------|
|
|
144
144
|
| `BAIDU_PADDLE_TOKEN` | Token(必需) | `7963b85a...` |
|
|
145
|
-
| `PADDLE_OCR_MODEL` | 模型名称 | `PaddleOCR-VL-1.
|
|
145
|
+
| `PADDLE_OCR_MODEL` | 模型名称 | `PaddleOCR-VL-1.6` |
|
|
146
146
|
|
|
147
147
|
## 工作原理
|
|
148
148
|
|
|
@@ -182,39 +182,78 @@ PaddleOcrConverter.convert()
|
|
|
182
182
|
|
|
183
183
|
### 前置条件
|
|
184
184
|
|
|
185
|
-
|
|
185
|
+
1. 安装构建工具:
|
|
186
186
|
|
|
187
187
|
```bash
|
|
188
|
-
pip install build twine
|
|
188
|
+
pip install build twine hatch
|
|
189
189
|
```
|
|
190
190
|
|
|
191
|
-
|
|
191
|
+
2. 配置 PyPI API Token(Windows 用户环境变量):
|
|
192
192
|
|
|
193
|
+
```powershell
|
|
194
|
+
# PowerShell 设置用户环境变量
|
|
195
|
+
[System.Environment]::SetEnvironmentVariable('PYPI_API_TOKEN', 'pypi-...', 'User')
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
或在 Bash/Zsh 中:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
export PYPI_API_TOKEN="pypi-..."
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### 快速发布(推荐)
|
|
205
|
+
|
|
206
|
+
项目根目录提供了上传脚本,可一键发布两个插件:
|
|
207
|
+
|
|
208
|
+
**Bash / Git Bash:**
|
|
193
209
|
```bash
|
|
194
|
-
|
|
210
|
+
# 构建两个插件
|
|
211
|
+
cd packages/markitdown-glmocr && hatch build
|
|
212
|
+
|
|
213
|
+
cd ../markitdown-paddleocr && hatch build
|
|
214
|
+
|
|
215
|
+
# 上传(自动上传所有构建的版本)
|
|
216
|
+
cd ../..
|
|
217
|
+
./scripts/pypi-upload.sh
|
|
218
|
+
|
|
219
|
+
# 或指定版本号
|
|
220
|
+
./scripts/pypi-upload.sh 0.2.0
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**PowerShell:**
|
|
224
|
+
```powershell
|
|
225
|
+
# 构建两个插件
|
|
226
|
+
cd packages/markitdown-glmocr; hatch build
|
|
227
|
+
cd ../markitdown-paddleocr; hatch build
|
|
228
|
+
|
|
229
|
+
# 上传
|
|
230
|
+
cd ../..
|
|
231
|
+
.\scripts\pypi-upload.ps1
|
|
232
|
+
|
|
233
|
+
# 或指定版本号
|
|
234
|
+
.\scripts\pypi-upload.ps1 -Version "0.2.0"
|
|
195
235
|
```
|
|
196
236
|
|
|
197
|
-
###
|
|
237
|
+
### 手动发布
|
|
198
238
|
|
|
199
239
|
```bash
|
|
200
|
-
# 1.
|
|
240
|
+
# 1. 进入项目目录
|
|
201
241
|
cd packages/markitdown-paddleocr
|
|
202
242
|
|
|
203
|
-
# 2.
|
|
204
|
-
|
|
243
|
+
# 2. 构建
|
|
244
|
+
hatch build
|
|
205
245
|
|
|
206
|
-
# 3.
|
|
246
|
+
# 3. 检查
|
|
207
247
|
twine check dist/*
|
|
208
248
|
|
|
209
|
-
# 4.
|
|
210
|
-
twine upload
|
|
249
|
+
# 4. 上传
|
|
250
|
+
twine upload --username __token__ --password "$PYPI_API_TOKEN" --disable-progress-bar dist/*
|
|
211
251
|
```
|
|
212
252
|
|
|
213
253
|
### 发布到 TestPyPI(测试)
|
|
214
254
|
|
|
215
255
|
```bash
|
|
216
|
-
|
|
217
|
-
twine upload --repository testpypi dist/* -u __token__ -p "$PyPI_API_Token"
|
|
256
|
+
twine upload --repository testpypi --username __token__ --password "$PYPI_API_TOKEN" --disable-progress-bar dist/*
|
|
218
257
|
|
|
219
258
|
# 从 TestPyPI 安装验证
|
|
220
259
|
pip install --index-url https://test.pypi.org/simple/ markitdown-paddleocr
|
|
@@ -222,9 +261,9 @@ pip install --index-url https://test.pypi.org/simple/ markitdown-paddleocr
|
|
|
222
261
|
|
|
223
262
|
### 注意事项
|
|
224
263
|
|
|
225
|
-
- 发布前确保 `
|
|
264
|
+
- 发布前确保 `src/markitdown_paddleocr/__about__.py` 中的版本号已更新
|
|
226
265
|
- 同一版本号不能重复上传,如需修正必须 bump 版本号
|
|
227
|
-
- `
|
|
266
|
+
- `PYPI_API_TOKEN` 切勿提交到代码仓库
|
|
228
267
|
|
|
229
268
|
## 许可证
|
|
230
269
|
|
|
@@ -25,7 +25,7 @@ pip install markitdown-paddleocr
|
|
|
25
25
|
export BAIDU_PADDLE_TOKEN="your-paddle-token"
|
|
26
26
|
|
|
27
27
|
# 可选
|
|
28
|
-
export PADDLE_OCR_MODEL="PaddleOCR-VL-1.
|
|
28
|
+
export PADDLE_OCR_MODEL="PaddleOCR-VL-1.6" # 模型名称
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
### 配置优先级
|
|
@@ -103,7 +103,7 @@ print(markdown)
|
|
|
103
103
|
| 参数 | 类型 | 默认值 | 说明 |
|
|
104
104
|
|------|------|--------|------|
|
|
105
105
|
| `token` | str | 环境变量 `BAIDU_PADDLE_TOKEN` | PaddleOCR Token |
|
|
106
|
-
| `model` | str | `PaddleOCR-VL-1.
|
|
106
|
+
| `model` | str | `PaddleOCR-VL-1.6` | OCR 模型名称 |
|
|
107
107
|
| `poll_interval` | float | 2.0 | 轮询间隔(秒) |
|
|
108
108
|
| `poll_timeout` | float | 300.0 | 轮询超时(秒) |
|
|
109
109
|
| `force_ai` | bool | False | 强制所有页面使用 OCR |
|
|
@@ -116,7 +116,7 @@ print(markdown)
|
|
|
116
116
|
| 变量 | 说明 | 示例 |
|
|
117
117
|
|------|------|------|
|
|
118
118
|
| `BAIDU_PADDLE_TOKEN` | Token(必需) | `7963b85a...` |
|
|
119
|
-
| `PADDLE_OCR_MODEL` | 模型名称 | `PaddleOCR-VL-1.
|
|
119
|
+
| `PADDLE_OCR_MODEL` | 模型名称 | `PaddleOCR-VL-1.6` |
|
|
120
120
|
|
|
121
121
|
## 工作原理
|
|
122
122
|
|
|
@@ -156,39 +156,78 @@ PaddleOcrConverter.convert()
|
|
|
156
156
|
|
|
157
157
|
### 前置条件
|
|
158
158
|
|
|
159
|
-
|
|
159
|
+
1. 安装构建工具:
|
|
160
160
|
|
|
161
161
|
```bash
|
|
162
|
-
pip install build twine
|
|
162
|
+
pip install build twine hatch
|
|
163
163
|
```
|
|
164
164
|
|
|
165
|
-
|
|
165
|
+
2. 配置 PyPI API Token(Windows 用户环境变量):
|
|
166
166
|
|
|
167
|
+
```powershell
|
|
168
|
+
# PowerShell 设置用户环境变量
|
|
169
|
+
[System.Environment]::SetEnvironmentVariable('PYPI_API_TOKEN', 'pypi-...', 'User')
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
或在 Bash/Zsh 中:
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
export PYPI_API_TOKEN="pypi-..."
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### 快速发布(推荐)
|
|
179
|
+
|
|
180
|
+
项目根目录提供了上传脚本,可一键发布两个插件:
|
|
181
|
+
|
|
182
|
+
**Bash / Git Bash:**
|
|
167
183
|
```bash
|
|
168
|
-
|
|
184
|
+
# 构建两个插件
|
|
185
|
+
cd packages/markitdown-glmocr && hatch build
|
|
186
|
+
|
|
187
|
+
cd ../markitdown-paddleocr && hatch build
|
|
188
|
+
|
|
189
|
+
# 上传(自动上传所有构建的版本)
|
|
190
|
+
cd ../..
|
|
191
|
+
./scripts/pypi-upload.sh
|
|
192
|
+
|
|
193
|
+
# 或指定版本号
|
|
194
|
+
./scripts/pypi-upload.sh 0.2.0
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
**PowerShell:**
|
|
198
|
+
```powershell
|
|
199
|
+
# 构建两个插件
|
|
200
|
+
cd packages/markitdown-glmocr; hatch build
|
|
201
|
+
cd ../markitdown-paddleocr; hatch build
|
|
202
|
+
|
|
203
|
+
# 上传
|
|
204
|
+
cd ../..
|
|
205
|
+
.\scripts\pypi-upload.ps1
|
|
206
|
+
|
|
207
|
+
# 或指定版本号
|
|
208
|
+
.\scripts\pypi-upload.ps1 -Version "0.2.0"
|
|
169
209
|
```
|
|
170
210
|
|
|
171
|
-
###
|
|
211
|
+
### 手动发布
|
|
172
212
|
|
|
173
213
|
```bash
|
|
174
|
-
# 1.
|
|
214
|
+
# 1. 进入项目目录
|
|
175
215
|
cd packages/markitdown-paddleocr
|
|
176
216
|
|
|
177
|
-
# 2.
|
|
178
|
-
|
|
217
|
+
# 2. 构建
|
|
218
|
+
hatch build
|
|
179
219
|
|
|
180
|
-
# 3.
|
|
220
|
+
# 3. 检查
|
|
181
221
|
twine check dist/*
|
|
182
222
|
|
|
183
|
-
# 4.
|
|
184
|
-
twine upload
|
|
223
|
+
# 4. 上传
|
|
224
|
+
twine upload --username __token__ --password "$PYPI_API_TOKEN" --disable-progress-bar dist/*
|
|
185
225
|
```
|
|
186
226
|
|
|
187
227
|
### 发布到 TestPyPI(测试)
|
|
188
228
|
|
|
189
229
|
```bash
|
|
190
|
-
|
|
191
|
-
twine upload --repository testpypi dist/* -u __token__ -p "$PyPI_API_Token"
|
|
230
|
+
twine upload --repository testpypi --username __token__ --password "$PYPI_API_TOKEN" --disable-progress-bar dist/*
|
|
192
231
|
|
|
193
232
|
# 从 TestPyPI 安装验证
|
|
194
233
|
pip install --index-url https://test.pypi.org/simple/ markitdown-paddleocr
|
|
@@ -196,9 +235,9 @@ pip install --index-url https://test.pypi.org/simple/ markitdown-paddleocr
|
|
|
196
235
|
|
|
197
236
|
### 注意事项
|
|
198
237
|
|
|
199
|
-
- 发布前确保 `
|
|
238
|
+
- 发布前确保 `src/markitdown_paddleocr/__about__.py` 中的版本号已更新
|
|
200
239
|
- 同一版本号不能重复上传,如需修正必须 bump 版本号
|
|
201
|
-
- `
|
|
240
|
+
- `PYPI_API_TOKEN` 切勿提交到代码仓库
|
|
202
241
|
|
|
203
242
|
## 许可证
|
|
204
243
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.3"
|
{markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/_config.py
RENAMED
|
@@ -12,6 +12,7 @@ class ScanDetectionMode(str, Enum):
|
|
|
12
12
|
- FIRST_PAGE_HINT: 首页是扫描件则全文档使用OCR
|
|
13
13
|
- SAMPLING: 抽样前N页,多数是扫描件则全部OCR
|
|
14
14
|
"""
|
|
15
|
+
|
|
15
16
|
PAGE_BY_PAGE = "page_by_page"
|
|
16
17
|
FIRST_PAGE_HINT = "first_page_hint"
|
|
17
18
|
SAMPLING = "sampling"
|
|
@@ -31,7 +32,7 @@ class PaddleOcrConfig:
|
|
|
31
32
|
token: str = "" # Reads from BAIDU_PADDLE_TOKEN by default
|
|
32
33
|
|
|
33
34
|
# OCR model
|
|
34
|
-
model: str = "PaddleOCR-VL-1.
|
|
35
|
+
model: str = "PaddleOCR-VL-1.6"
|
|
35
36
|
|
|
36
37
|
# API endpoint
|
|
37
38
|
job_url: str = "https://paddleocr.aistudio-app.com/api/v2/ocr/jobs"
|
|
@@ -58,7 +59,7 @@ class PaddleOcrConfig:
|
|
|
58
59
|
"""Create config from environment variables with optional overrides."""
|
|
59
60
|
defaults = {
|
|
60
61
|
"token": os.environ.get("BAIDU_PADDLE_TOKEN", ""),
|
|
61
|
-
"model": os.environ.get("PADDLE_OCR_MODEL", "PaddleOCR-VL-1.
|
|
62
|
+
"model": os.environ.get("PADDLE_OCR_MODEL", "PaddleOCR-VL-1.6"),
|
|
62
63
|
}
|
|
63
64
|
defaults.update(overrides)
|
|
64
65
|
return cls(**defaults)
|
{markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/_converter.py
RENAMED
|
@@ -51,7 +51,7 @@ class PaddleOcrConverter(DocumentConverter):
|
|
|
51
51
|
def __init__(
|
|
52
52
|
self,
|
|
53
53
|
token: Optional[str] = None,
|
|
54
|
-
model: str = "PaddleOCR-VL-1.
|
|
54
|
+
model: str = "PaddleOCR-VL-1.6",
|
|
55
55
|
poll_interval: float = 2.0,
|
|
56
56
|
poll_timeout: float = 300.0,
|
|
57
57
|
force_ai: bool = False,
|
|
@@ -67,7 +67,7 @@ class PaddleOcrConverter(DocumentConverter):
|
|
|
67
67
|
|
|
68
68
|
Args:
|
|
69
69
|
token: Baidu PaddleOCR token (reads from BAIDU_PADDLE_TOKEN env var if not provided)
|
|
70
|
-
model: OCR model name (default: PaddleOCR-VL-1.
|
|
70
|
+
model: OCR model name (default: PaddleOCR-VL-1.6)
|
|
71
71
|
poll_interval: Seconds between status polls (default: 2.0)
|
|
72
72
|
poll_timeout: Max seconds to wait for job completion (default: 300.0)
|
|
73
73
|
force_ai: Force all pages to use OCR (default: False)
|
|
@@ -82,7 +82,7 @@ class PaddleOcrConverter(DocumentConverter):
|
|
|
82
82
|
# Build config from explicit params or provided config
|
|
83
83
|
if config:
|
|
84
84
|
self.token = token or config.token
|
|
85
|
-
self.model = model if model != "PaddleOCR-VL-1.
|
|
85
|
+
self.model = model if model != "PaddleOCR-VL-1.6" else config.model
|
|
86
86
|
self.poll_interval = (
|
|
87
87
|
poll_interval if poll_interval != 2.0 else config.poll_interval
|
|
88
88
|
)
|
|
@@ -126,7 +126,9 @@ class PaddleOcrConverter(DocumentConverter):
|
|
|
126
126
|
if scan_detection_mode is not None
|
|
127
127
|
else ScanDetectionMode.SAMPLING
|
|
128
128
|
)
|
|
129
|
-
self.scan_sample_pages =
|
|
129
|
+
self.scan_sample_pages = (
|
|
130
|
+
scan_sample_pages if scan_sample_pages is not None else 3
|
|
131
|
+
)
|
|
130
132
|
self.scan_text_threshold = (
|
|
131
133
|
scan_text_threshold if scan_text_threshold is not None else 50
|
|
132
134
|
)
|
|
@@ -357,7 +359,9 @@ class PaddleOcrConverter(DocumentConverter):
|
|
|
357
359
|
Returns:
|
|
358
360
|
Markdown text from all pages.
|
|
359
361
|
"""
|
|
360
|
-
logger.info(
|
|
362
|
+
logger.info(
|
|
363
|
+
"PaddleOcrConverter: 批量上传PDF到OCR API, 大小=%d bytes", len(pdf_bytes)
|
|
364
|
+
)
|
|
361
365
|
markdown = self._get_client().ocr(
|
|
362
366
|
file_bytes=pdf_bytes,
|
|
363
367
|
filename="document.pdf",
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
"""DualOcrConverter - glmocr (primary) → paddleocr (fallback) automatic degradation."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Optional
|
|
4
|
+
from typing import Any, BinaryIO, Optional
|
|
5
5
|
|
|
6
|
-
from markitdown import
|
|
7
|
-
|
|
6
|
+
from markitdown import (
|
|
7
|
+
DocumentConverter,
|
|
8
|
+
DocumentConverterResult,
|
|
9
|
+
MarkItDown,
|
|
10
|
+
StreamInfo,
|
|
11
|
+
)
|
|
8
12
|
|
|
9
13
|
logger = logging.getLogger(__name__)
|
|
10
14
|
|
|
@@ -28,7 +32,7 @@ class DualOcrConverter(DocumentConverter):
|
|
|
28
32
|
glmocr_force_ai: bool = False,
|
|
29
33
|
# paddleocr kwargs
|
|
30
34
|
paddleocr_token: Optional[str] = None,
|
|
31
|
-
paddleocr_model: str = "PaddleOCR-VL-1.
|
|
35
|
+
paddleocr_model: str = "PaddleOCR-VL-1.6",
|
|
32
36
|
paddleocr_poll_interval: float = 2.0,
|
|
33
37
|
paddleocr_poll_timeout: float = 300.0,
|
|
34
38
|
paddleocr_force_ai: bool = False,
|
|
@@ -61,6 +65,7 @@ class DualOcrConverter(DocumentConverter):
|
|
|
61
65
|
"""Lazily init both converters."""
|
|
62
66
|
try:
|
|
63
67
|
from markitdown_glmocr import GlmOcrConverter
|
|
68
|
+
|
|
64
69
|
# Filter out None values
|
|
65
70
|
kwargs = {k: v for k, v in self.glmocr_kwargs.items() if v is not None}
|
|
66
71
|
self._primary = GlmOcrConverter(**kwargs)
|
|
@@ -71,6 +76,7 @@ class DualOcrConverter(DocumentConverter):
|
|
|
71
76
|
|
|
72
77
|
try:
|
|
73
78
|
from markitdown_paddleocr import PaddleOcrConverter
|
|
79
|
+
|
|
74
80
|
kwargs = {k: v for k, v in self.paddleocr_kwargs.items() if v is not None}
|
|
75
81
|
self._fallback = PaddleOcrConverter(**kwargs)
|
|
76
82
|
logger.info("paddleocr converter initialized (fallback)")
|
|
@@ -155,6 +161,7 @@ class DualOcrConverter(DocumentConverter):
|
|
|
155
161
|
def io_bytes(data: bytes):
|
|
156
162
|
"""Create a seekable BytesIO from bytes."""
|
|
157
163
|
import io
|
|
164
|
+
|
|
158
165
|
buf = io.BytesIO(data)
|
|
159
166
|
buf.seek(0)
|
|
160
167
|
return buf
|
{markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/_plugin.py
RENAMED
|
@@ -28,7 +28,7 @@ def register_converters(markitdown: MarkItDown, **kwargs: Any) -> None:
|
|
|
28
28
|
try:
|
|
29
29
|
converter = PaddleOcrConverter(
|
|
30
30
|
token=kwargs.get("token"),
|
|
31
|
-
model=kwargs.get("model", "PaddleOCR-VL-1.
|
|
31
|
+
model=kwargs.get("model", "PaddleOCR-VL-1.6"),
|
|
32
32
|
poll_interval=kwargs.get("poll_interval", 2.0),
|
|
33
33
|
poll_timeout=kwargs.get("poll_timeout", 300.0),
|
|
34
34
|
force_ai=kwargs.get("force_ai", False),
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.2.0"
|
|
File without changes
|
|
File without changes
|
{markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/__init__.py
RENAMED
|
File without changes
|
{markitdown_paddleocr-0.2.0 → markitdown_paddleocr-0.2.3}/src/markitdown_paddleocr/_paddle_client.py
RENAMED
|
File without changes
|