paddleocr-skills 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +220 -220
- package/bin/paddleocr-skills.js +33 -20
- package/lib/copy.js +39 -39
- package/lib/installer.js +76 -70
- package/lib/prompts.js +67 -67
- package/lib/python.js +75 -75
- package/lib/verify.js +121 -121
- package/package.json +42 -42
- package/templates/.env.example +12 -12
- package/templates/{paddleocr-vl/references/paddleocr-vl → paddleocr-vl-1.5/references/paddleocr-vl-1.5}/layout_schema.md +64 -64
- package/templates/{paddleocr-vl/references/paddleocr-vl → paddleocr-vl-1.5/references/paddleocr-vl-1.5}/output_format.md +154 -154
- package/templates/{paddleocr-vl/references/paddleocr-vl → paddleocr-vl-1.5/references/paddleocr-vl-1.5}/vl_model_spec.md +157 -157
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/_lib.py +780 -780
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/configure.py +270 -270
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/optimize_file.py +226 -226
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/requirements-optimize.txt +8 -8
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/requirements.txt +7 -7
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/smoke_test.py +199 -199
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/vl_caller.py +232 -232
- package/templates/{paddleocr-vl/skills/paddleocr-vl → paddleocr-vl-1.5/skills/paddleocr-vl-1.5}/SKILL.md +481 -481
- package/templates/ppocrv5/references/ppocrv5/agent_policy.md +258 -258
- package/templates/ppocrv5/references/ppocrv5/normalized_schema.md +257 -257
- package/templates/ppocrv5/references/ppocrv5/provider_api.md +140 -140
- package/templates/ppocrv5/scripts/ppocrv5/_lib.py +635 -635
- package/templates/ppocrv5/scripts/ppocrv5/configure.py +346 -346
- package/templates/ppocrv5/scripts/ppocrv5/ocr_caller.py +684 -684
- package/templates/ppocrv5/scripts/ppocrv5/requirements.txt +4 -4
- package/templates/ppocrv5/scripts/ppocrv5/smoke_test.py +139 -139
- package/templates/ppocrv5/skills/ppocrv5/SKILL.md +272 -272
package/lib/verify.js
CHANGED
|
@@ -1,121 +1,121 @@
|
|
|
1
|
-
const { spawn } = require('child_process');
|
|
2
|
-
const path = require('path');
|
|
3
|
-
const chalk = require('chalk');
|
|
4
|
-
const ora = require('ora');
|
|
5
|
-
const { promptForCredentials } = require('./prompts');
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Run configuration wizard for selected skills
|
|
9
|
-
*/
|
|
10
|
-
async function runConfigurationWizard(skills, targetDir) {
|
|
11
|
-
for (const skill of skills) {
|
|
12
|
-
console.log(chalk.bold(`\nConfiguring ${skill}...`));
|
|
13
|
-
|
|
14
|
-
// Prompt for credentials
|
|
15
|
-
const { apiUrl, token } = await promptForCredentials(skill);
|
|
16
|
-
|
|
17
|
-
// Run configure.py script
|
|
18
|
-
const configScript = path.join(targetDir, 'scripts', skill, 'configure.py');
|
|
19
|
-
const spinner = ora('Saving configuration...').start();
|
|
20
|
-
|
|
21
|
-
try {
|
|
22
|
-
await runConfigureScript(configScript, apiUrl, token);
|
|
23
|
-
spinner.succeed(`${skill} configured successfully`);
|
|
24
|
-
} catch (error) {
|
|
25
|
-
spinner.fail(`Failed to configure ${skill}`);
|
|
26
|
-
throw error;
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Run configure.py script with credentials
|
|
33
|
-
*/
|
|
34
|
-
function runConfigureScript(scriptPath, apiUrl, token) {
|
|
35
|
-
return new Promise((resolve, reject) => {
|
|
36
|
-
const python = spawn('python', [
|
|
37
|
-
scriptPath,
|
|
38
|
-
'--api-url', apiUrl,
|
|
39
|
-
'--token', token,
|
|
40
|
-
'--quiet'
|
|
41
|
-
]);
|
|
42
|
-
|
|
43
|
-
let stdout = '';
|
|
44
|
-
let stderr = '';
|
|
45
|
-
|
|
46
|
-
python.stdout.on('data', (data) => {
|
|
47
|
-
stdout += data.toString();
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
python.stderr.on('data', (data) => {
|
|
51
|
-
stderr += data.toString();
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
python.on('close', (code) => {
|
|
55
|
-
if (code === 0) {
|
|
56
|
-
resolve();
|
|
57
|
-
} else {
|
|
58
|
-
reject(new Error(stderr || stdout || `Configuration failed with exit code ${code}`));
|
|
59
|
-
}
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
python.on('error', (error) => {
|
|
63
|
-
reject(error);
|
|
64
|
-
});
|
|
65
|
-
});
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* Run verification tests for selected skills
|
|
70
|
-
*/
|
|
71
|
-
async function runVerification(skills, targetDir) {
|
|
72
|
-
for (const skill of skills) {
|
|
73
|
-
const smokeTestScript = path.join(targetDir, 'scripts', skill, 'smoke_test.py');
|
|
74
|
-
const spinner = ora(`Testing ${skill}...`).start();
|
|
75
|
-
|
|
76
|
-
try {
|
|
77
|
-
await runSmokeTest(smokeTestScript);
|
|
78
|
-
spinner.succeed(`${skill} verification passed`);
|
|
79
|
-
} catch (error) {
|
|
80
|
-
spinner.warn(`${skill} verification skipped (API test requires credentials)`);
|
|
81
|
-
console.log(chalk.gray(` You can test manually: python ${smokeTestScript}`));
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
* Run smoke_test.py script
|
|
88
|
-
*/
|
|
89
|
-
function runSmokeTest(scriptPath) {
|
|
90
|
-
return new Promise((resolve, reject) => {
|
|
91
|
-
const python = spawn('python', [scriptPath]);
|
|
92
|
-
|
|
93
|
-
let stdout = '';
|
|
94
|
-
let stderr = '';
|
|
95
|
-
|
|
96
|
-
python.stdout.on('data', (data) => {
|
|
97
|
-
stdout += data.toString();
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
python.stderr.on('data', (data) => {
|
|
101
|
-
stderr += data.toString();
|
|
102
|
-
});
|
|
103
|
-
|
|
104
|
-
python.on('close', (code) => {
|
|
105
|
-
if (code === 0) {
|
|
106
|
-
resolve();
|
|
107
|
-
} else {
|
|
108
|
-
reject(new Error(stderr || stdout || `Smoke test failed with exit code ${code}`));
|
|
109
|
-
}
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
python.on('error', (error) => {
|
|
113
|
-
reject(error);
|
|
114
|
-
});
|
|
115
|
-
});
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
module.exports = {
|
|
119
|
-
runConfigurationWizard,
|
|
120
|
-
runVerification
|
|
121
|
-
};
|
|
1
|
+
const { spawn } = require('child_process');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const chalk = require('chalk');
|
|
4
|
+
const ora = require('ora');
|
|
5
|
+
const { promptForCredentials } = require('./prompts');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Run configuration wizard for selected skills
|
|
9
|
+
*/
|
|
10
|
+
async function runConfigurationWizard(skills, targetDir) {
|
|
11
|
+
for (const skill of skills) {
|
|
12
|
+
console.log(chalk.bold(`\nConfiguring ${skill}...`));
|
|
13
|
+
|
|
14
|
+
// Prompt for credentials
|
|
15
|
+
const { apiUrl, token } = await promptForCredentials(skill);
|
|
16
|
+
|
|
17
|
+
// Run configure.py script
|
|
18
|
+
const configScript = path.join(targetDir, 'scripts', skill, 'configure.py');
|
|
19
|
+
const spinner = ora('Saving configuration...').start();
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
await runConfigureScript(configScript, apiUrl, token);
|
|
23
|
+
spinner.succeed(`${skill} configured successfully`);
|
|
24
|
+
} catch (error) {
|
|
25
|
+
spinner.fail(`Failed to configure ${skill}`);
|
|
26
|
+
throw error;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Run configure.py script with credentials
|
|
33
|
+
*/
|
|
34
|
+
function runConfigureScript(scriptPath, apiUrl, token) {
|
|
35
|
+
return new Promise((resolve, reject) => {
|
|
36
|
+
const python = spawn('python', [
|
|
37
|
+
scriptPath,
|
|
38
|
+
'--api-url', apiUrl,
|
|
39
|
+
'--token', token,
|
|
40
|
+
'--quiet'
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
let stdout = '';
|
|
44
|
+
let stderr = '';
|
|
45
|
+
|
|
46
|
+
python.stdout.on('data', (data) => {
|
|
47
|
+
stdout += data.toString();
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
python.stderr.on('data', (data) => {
|
|
51
|
+
stderr += data.toString();
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
python.on('close', (code) => {
|
|
55
|
+
if (code === 0) {
|
|
56
|
+
resolve();
|
|
57
|
+
} else {
|
|
58
|
+
reject(new Error(stderr || stdout || `Configuration failed with exit code ${code}`));
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
python.on('error', (error) => {
|
|
63
|
+
reject(error);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Run verification tests for selected skills
|
|
70
|
+
*/
|
|
71
|
+
async function runVerification(skills, targetDir) {
|
|
72
|
+
for (const skill of skills) {
|
|
73
|
+
const smokeTestScript = path.join(targetDir, 'scripts', skill, 'smoke_test.py');
|
|
74
|
+
const spinner = ora(`Testing ${skill}...`).start();
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
await runSmokeTest(smokeTestScript);
|
|
78
|
+
spinner.succeed(`${skill} verification passed`);
|
|
79
|
+
} catch (error) {
|
|
80
|
+
spinner.warn(`${skill} verification skipped (API test requires credentials)`);
|
|
81
|
+
console.log(chalk.gray(` You can test manually: python ${smokeTestScript}`));
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Run smoke_test.py script
|
|
88
|
+
*/
|
|
89
|
+
function runSmokeTest(scriptPath) {
|
|
90
|
+
return new Promise((resolve, reject) => {
|
|
91
|
+
const python = spawn('python', [scriptPath]);
|
|
92
|
+
|
|
93
|
+
let stdout = '';
|
|
94
|
+
let stderr = '';
|
|
95
|
+
|
|
96
|
+
python.stdout.on('data', (data) => {
|
|
97
|
+
stdout += data.toString();
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
python.stderr.on('data', (data) => {
|
|
101
|
+
stderr += data.toString();
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
python.on('close', (code) => {
|
|
105
|
+
if (code === 0) {
|
|
106
|
+
resolve();
|
|
107
|
+
} else {
|
|
108
|
+
reject(new Error(stderr || stdout || `Smoke test failed with exit code ${code}`));
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
python.on('error', (error) => {
|
|
113
|
+
reject(error);
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
module.exports = {
|
|
119
|
+
runConfigurationWizard,
|
|
120
|
+
runVerification
|
|
121
|
+
};
|
package/package.json
CHANGED
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "paddleocr-skills",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "PaddleOCR Skills - Install PP-OCRv5 and PaddleOCR-VL for Claude Code",
|
|
5
|
-
"main": "lib/installer.js",
|
|
6
|
-
"bin": {
|
|
7
|
-
"paddleocr-skills": "bin/paddleocr-skills.js"
|
|
8
|
-
},
|
|
9
|
-
"keywords": [
|
|
10
|
-
"paddle-ocr",
|
|
11
|
-
"ocr",
|
|
12
|
-
"claude-code",
|
|
13
|
-
"skill",
|
|
14
|
-
"ai-studio",
|
|
15
|
-
"ppocrv5",
|
|
16
|
-
"paddleocr-vl"
|
|
17
|
-
],
|
|
18
|
-
"author": "Aidenwu0209 <wujinhe0209@aliyun.com>",
|
|
19
|
-
"license": "MIT",
|
|
20
|
-
"engines": {
|
|
21
|
-
"node": ">=14.0.0"
|
|
22
|
-
},
|
|
23
|
-
"dependencies": {
|
|
24
|
-
"prompts": "^2.4.2",
|
|
25
|
-
"chalk": "^4.1.2",
|
|
26
|
-
"fs-extra": "^11.2.0",
|
|
27
|
-
"ora": "^5.4.1"
|
|
28
|
-
},
|
|
29
|
-
"repository": {
|
|
30
|
-
"type": "git",
|
|
31
|
-
"url": "https://github.com/Aidenwu0209/
|
|
32
|
-
},
|
|
33
|
-
"homepage": "https://github.com/Aidenwu0209/
|
|
34
|
-
"bugs": {
|
|
35
|
-
"url": "https://github.com/Aidenwu0209/
|
|
36
|
-
},
|
|
37
|
-
"files": [
|
|
38
|
-
"bin",
|
|
39
|
-
"lib",
|
|
40
|
-
"templates"
|
|
41
|
-
]
|
|
42
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "paddleocr-skills",
|
|
3
|
+
"version": "1.1.0",
|
|
4
|
+
"description": "PaddleOCR Skills - Install PP-OCRv5 and PaddleOCR-VL 1.5 for Claude Code",
|
|
5
|
+
"main": "lib/installer.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"paddleocr-skills": "bin/paddleocr-skills.js"
|
|
8
|
+
},
|
|
9
|
+
"keywords": [
|
|
10
|
+
"paddle-ocr",
|
|
11
|
+
"ocr",
|
|
12
|
+
"claude-code",
|
|
13
|
+
"skill",
|
|
14
|
+
"ai-studio",
|
|
15
|
+
"ppocrv5",
|
|
16
|
+
"paddleocr-vl-1.5"
|
|
17
|
+
],
|
|
18
|
+
"author": "Aidenwu0209 <wujinhe0209@aliyun.com>",
|
|
19
|
+
"license": "MIT",
|
|
20
|
+
"engines": {
|
|
21
|
+
"node": ">=14.0.0"
|
|
22
|
+
},
|
|
23
|
+
"dependencies": {
|
|
24
|
+
"prompts": "^2.4.2",
|
|
25
|
+
"chalk": "^4.1.2",
|
|
26
|
+
"fs-extra": "^11.2.0",
|
|
27
|
+
"ora": "^5.4.1"
|
|
28
|
+
},
|
|
29
|
+
"repository": {
|
|
30
|
+
"type": "git",
|
|
31
|
+
"url": "https://github.com/Aidenwu0209/PaddleOCR-Skills.git"
|
|
32
|
+
},
|
|
33
|
+
"homepage": "https://github.com/Aidenwu0209/PaddleOCR-Skills#readme",
|
|
34
|
+
"bugs": {
|
|
35
|
+
"url": "https://github.com/Aidenwu0209/PaddleOCR-Skills/issues"
|
|
36
|
+
},
|
|
37
|
+
"files": [
|
|
38
|
+
"bin",
|
|
39
|
+
"lib",
|
|
40
|
+
"templates"
|
|
41
|
+
]
|
|
42
|
+
}
|
package/templates/.env.example
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
# PP-OCRv5 Configuration
|
|
2
|
-
# Get your API credentials at: https://aistudio.baidu.com/paddleocr/task
|
|
3
|
-
API_URL=
|
|
4
|
-
TOKEN=
|
|
5
|
-
|
|
6
|
-
# PaddleOCR-VL Configuration
|
|
7
|
-
VL_API_URL=
|
|
8
|
-
VL_TOKEN=
|
|
9
|
-
|
|
10
|
-
# Optional: Maximum file size for local files (in MB)
|
|
11
|
-
# Set to 0 for unlimited size
|
|
12
|
-
# VL_MAX_FILE_SIZE_MB=20
|
|
1
|
+
# PP-OCRv5 Configuration
|
|
2
|
+
# Get your API credentials at: https://aistudio.baidu.com/paddleocr/task
|
|
3
|
+
API_URL=
|
|
4
|
+
TOKEN=
|
|
5
|
+
|
|
6
|
+
# PaddleOCR-VL 1.5 Configuration
|
|
7
|
+
VL_API_URL=
|
|
8
|
+
VL_TOKEN=
|
|
9
|
+
|
|
10
|
+
# Optional: Maximum file size for local files (in MB)
|
|
11
|
+
# Set to 0 for unlimited size
|
|
12
|
+
# VL_MAX_FILE_SIZE_MB=20
|
|
@@ -1,64 +1,64 @@
|
|
|
1
|
-
# PaddleOCR-VL Layout Schema
|
|
2
|
-
|
|
3
|
-
## Layout Detection Overview
|
|
4
|
-
|
|
5
|
-
PaddleOCR-VL uses PP-DocLayoutV2 for automatic layout analysis, detecting semantic regions and determining reading order.
|
|
6
|
-
|
|
7
|
-
## Region Types
|
|
8
|
-
|
|
9
|
-
### Text Regions
|
|
10
|
-
- **paragraph**: Regular text paragraphs
|
|
11
|
-
- **title**: Headings and titles
|
|
12
|
-
- **caption**: Image/table captions
|
|
13
|
-
- **footnote**: Footnotes and references
|
|
14
|
-
|
|
15
|
-
### Non-Text Regions
|
|
16
|
-
- **table**: Tabular data
|
|
17
|
-
- **figure**: Images, charts, diagrams
|
|
18
|
-
- **formula**: Mathematical formulas
|
|
19
|
-
- **header**: Page headers
|
|
20
|
-
- **footer**: Page footers
|
|
21
|
-
|
|
22
|
-
## Layout Structure
|
|
23
|
-
|
|
24
|
-
```json
|
|
25
|
-
{
|
|
26
|
-
"layout": {
|
|
27
|
-
"regions": [
|
|
28
|
-
{
|
|
29
|
-
"id": 0,
|
|
30
|
-
"type": "title",
|
|
31
|
-
"bbox": [x1, y1, x2, y2],
|
|
32
|
-
"confidence": 0.95
|
|
33
|
-
},
|
|
34
|
-
{
|
|
35
|
-
"id": 1,
|
|
36
|
-
"type": "paragraph",
|
|
37
|
-
"bbox": [x1, y1, x2, y2],
|
|
38
|
-
"confidence": 0.92
|
|
39
|
-
}
|
|
40
|
-
],
|
|
41
|
-
"reading_order": [0, 1, 2, 3],
|
|
42
|
-
"page_number": 1
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
## Reading Order Algorithm
|
|
48
|
-
|
|
49
|
-
The model automatically determines the correct reading order based on:
|
|
50
|
-
- Spatial layout (top-to-bottom, left-to-right)
|
|
51
|
-
- Document structure (titles before content)
|
|
52
|
-
- Column detection (multi-column layouts)
|
|
53
|
-
- Semantic relationships
|
|
54
|
-
|
|
55
|
-
## Bounding Box Format
|
|
56
|
-
|
|
57
|
-
All bounding boxes use the format: `[x1, y1, x2, y2]`
|
|
58
|
-
- (x1, y1): Top-left corner
|
|
59
|
-
- (x2, y2): Bottom-right corner
|
|
60
|
-
- Coordinates are absolute pixel positions
|
|
61
|
-
|
|
62
|
-
---
|
|
63
|
-
|
|
64
|
-
*This is a placeholder document. Full layout schema specifications will be added when integration is complete.*
|
|
1
|
+
# PaddleOCR-VL 1.5 Layout Schema
|
|
2
|
+
|
|
3
|
+
## Layout Detection Overview
|
|
4
|
+
|
|
5
|
+
PaddleOCR-VL 1.5 uses PP-DocLayoutV2 for automatic layout analysis, detecting semantic regions and determining reading order.
|
|
6
|
+
|
|
7
|
+
## Region Types
|
|
8
|
+
|
|
9
|
+
### Text Regions
|
|
10
|
+
- **paragraph**: Regular text paragraphs
|
|
11
|
+
- **title**: Headings and titles
|
|
12
|
+
- **caption**: Image/table captions
|
|
13
|
+
- **footnote**: Footnotes and references
|
|
14
|
+
|
|
15
|
+
### Non-Text Regions
|
|
16
|
+
- **table**: Tabular data
|
|
17
|
+
- **figure**: Images, charts, diagrams
|
|
18
|
+
- **formula**: Mathematical formulas
|
|
19
|
+
- **header**: Page headers
|
|
20
|
+
- **footer**: Page footers
|
|
21
|
+
|
|
22
|
+
## Layout Structure
|
|
23
|
+
|
|
24
|
+
```json
|
|
25
|
+
{
|
|
26
|
+
"layout": {
|
|
27
|
+
"regions": [
|
|
28
|
+
{
|
|
29
|
+
"id": 0,
|
|
30
|
+
"type": "title",
|
|
31
|
+
"bbox": [x1, y1, x2, y2],
|
|
32
|
+
"confidence": 0.95
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"id": 1,
|
|
36
|
+
"type": "paragraph",
|
|
37
|
+
"bbox": [x1, y1, x2, y2],
|
|
38
|
+
"confidence": 0.92
|
|
39
|
+
}
|
|
40
|
+
],
|
|
41
|
+
"reading_order": [0, 1, 2, 3],
|
|
42
|
+
"page_number": 1
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Reading Order Algorithm
|
|
48
|
+
|
|
49
|
+
The model automatically determines the correct reading order based on:
|
|
50
|
+
- Spatial layout (top-to-bottom, left-to-right)
|
|
51
|
+
- Document structure (titles before content)
|
|
52
|
+
- Column detection (multi-column layouts)
|
|
53
|
+
- Semantic relationships
|
|
54
|
+
|
|
55
|
+
## Bounding Box Format
|
|
56
|
+
|
|
57
|
+
All bounding boxes use the format: `[x1, y1, x2, y2]`
|
|
58
|
+
- (x1, y1): Top-left corner
|
|
59
|
+
- (x2, y2): Bottom-right corner
|
|
60
|
+
- Coordinates are absolute pixel positions
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
*This is a placeholder document. Full layout schema specifications will be added when integration is complete.*
|