paddleocr-skills 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +220 -220
- package/bin/paddleocr-skills.js +33 -20
- package/lib/copy.js +39 -39
- package/lib/installer.js +76 -70
- package/lib/prompts.js +67 -67
- package/lib/python.js +75 -75
- package/lib/verify.js +121 -121
- package/package.json +42 -42
- package/templates/.env.example +12 -12
- package/templates/{paddleocr-vl/references/paddleocr-vl → paddleocr-vl-1.5/references/paddleocr-vl-1.5}/layout_schema.md +64 -64
- package/templates/{paddleocr-vl/references/paddleocr-vl → paddleocr-vl-1.5/references/paddleocr-vl-1.5}/output_format.md +154 -154
- package/templates/{paddleocr-vl/references/paddleocr-vl → paddleocr-vl-1.5/references/paddleocr-vl-1.5}/vl_model_spec.md +157 -157
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/_lib.py +780 -780
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/configure.py +270 -270
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/optimize_file.py +226 -226
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/requirements-optimize.txt +8 -8
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/requirements.txt +7 -7
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/smoke_test.py +199 -199
- package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/vl_caller.py +232 -232
- package/templates/{paddleocr-vl/skills/paddleocr-vl → paddleocr-vl-1.5/skills/paddleocr-vl-1.5}/SKILL.md +481 -481
- package/templates/ppocrv5/references/ppocrv5/agent_policy.md +258 -258
- package/templates/ppocrv5/references/ppocrv5/normalized_schema.md +257 -257
- package/templates/ppocrv5/references/ppocrv5/provider_api.md +140 -140
- package/templates/ppocrv5/scripts/ppocrv5/_lib.py +635 -635
- package/templates/ppocrv5/scripts/ppocrv5/configure.py +346 -346
- package/templates/ppocrv5/scripts/ppocrv5/ocr_caller.py +684 -684
- package/templates/ppocrv5/scripts/ppocrv5/requirements.txt +4 -4
- package/templates/ppocrv5/scripts/ppocrv5/smoke_test.py +139 -139
- package/templates/ppocrv5/skills/ppocrv5/SKILL.md +272 -272
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Runtime dependencies for PP-OCRv5 API Skill
|
|
2
|
-
|
|
3
|
-
httpx>=0.24.0
|
|
4
|
-
python-dotenv>=0.19.0
|
|
1
|
+
# Runtime dependencies for PP-OCRv5 API Skill
|
|
2
|
+
|
|
3
|
+
httpx>=0.24.0
|
|
4
|
+
python-dotenv>=0.19.0
|
|
@@ -1,139 +1,139 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Smoke Test for PP-OCRv5 API Skill
|
|
4
|
-
Verifies that AISTUDIO_HOST and PADDLE_OCR_TOKEN are correctly configured
|
|
5
|
-
and that the provider API is accessible.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import json
|
|
9
|
-
import os
|
|
10
|
-
import subprocess
|
|
11
|
-
import sys
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def main():
|
|
16
|
-
print("=" * 60)
|
|
17
|
-
print("PP-OCRv5 API Skill - Smoke Test")
|
|
18
|
-
print("=" * 60)
|
|
19
|
-
|
|
20
|
-
# Check configuration (all sources)
|
|
21
|
-
print("\n[1/3] Checking configuration...")
|
|
22
|
-
|
|
23
|
-
# Add scripts dir to path for imports
|
|
24
|
-
script_dir = Path(__file__).parent
|
|
25
|
-
sys.path.insert(0, str(script_dir))
|
|
26
|
-
|
|
27
|
-
from _lib import Config
|
|
28
|
-
|
|
29
|
-
try:
|
|
30
|
-
# Try to get config from .env file
|
|
31
|
-
api_url = Config.get_api_url()
|
|
32
|
-
token = Config.get_token()
|
|
33
|
-
except ValueError as e:
|
|
34
|
-
print(f"\nConfiguration error: {e}")
|
|
35
|
-
sys.exit(1)
|
|
36
|
-
|
|
37
|
-
test_file_url = os.getenv("TEST_FILE_URL", "").strip()
|
|
38
|
-
|
|
39
|
-
if not test_file_url:
|
|
40
|
-
print("WARNING: TEST_FILE_URL is not set, using default test image")
|
|
41
|
-
# Use a default public test image (Chinese text)
|
|
42
|
-
test_file_url = "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/doc/imgs/11.jpg"
|
|
43
|
-
|
|
44
|
-
print(f" API_URL: {api_url}")
|
|
45
|
-
print(f" PADDLE_OCR_TOKEN: {'*' * 8}{token[-4:] if len(token) > 4 else '****'}")
|
|
46
|
-
print(f" TEST_FILE_URL: {test_file_url}")
|
|
47
|
-
|
|
48
|
-
# Run ocr_caller.py
|
|
49
|
-
print("\n[2/3] Running OCR on test file...")
|
|
50
|
-
|
|
51
|
-
script_dir = Path(__file__).parent
|
|
52
|
-
ocr_caller = script_dir / "ocr_caller.py"
|
|
53
|
-
|
|
54
|
-
cmd = [
|
|
55
|
-
sys.executable,
|
|
56
|
-
str(ocr_caller),
|
|
57
|
-
"--mode", "auto",
|
|
58
|
-
"--file-url", test_file_url,
|
|
59
|
-
"--max-attempts", "2",
|
|
60
|
-
"--budget-ms", "20000"
|
|
61
|
-
]
|
|
62
|
-
|
|
63
|
-
try:
|
|
64
|
-
result = subprocess.run(
|
|
65
|
-
cmd,
|
|
66
|
-
capture_output=True,
|
|
67
|
-
text=True,
|
|
68
|
-
timeout=30
|
|
69
|
-
)
|
|
70
|
-
except subprocess.TimeoutExpired:
|
|
71
|
-
print("ERROR: OCR call timed out after 30 seconds")
|
|
72
|
-
sys.exit(4)
|
|
73
|
-
except Exception as e:
|
|
74
|
-
print(f"ERROR: Failed to run ocr_caller.py: {e}")
|
|
75
|
-
sys.exit(4)
|
|
76
|
-
|
|
77
|
-
# Parse output
|
|
78
|
-
print("\n[3/3] Validating response...")
|
|
79
|
-
|
|
80
|
-
if result.returncode != 0:
|
|
81
|
-
print(f"ERROR: ocr_caller.py exited with code {result.returncode}")
|
|
82
|
-
print("\nStderr:")
|
|
83
|
-
print(result.stderr)
|
|
84
|
-
print("\nStdout:")
|
|
85
|
-
print(result.stdout)
|
|
86
|
-
sys.exit(result.returncode)
|
|
87
|
-
|
|
88
|
-
try:
|
|
89
|
-
response = json.loads(result.stdout)
|
|
90
|
-
except json.JSONDecodeError as e:
|
|
91
|
-
print(f"ERROR: Failed to parse JSON response: {e}")
|
|
92
|
-
print("\nStdout:")
|
|
93
|
-
print(result.stdout)
|
|
94
|
-
sys.exit(4)
|
|
95
|
-
|
|
96
|
-
# Validate response structure
|
|
97
|
-
if not response.get("ok"):
|
|
98
|
-
error = response.get("error", {})
|
|
99
|
-
print(f"ERROR: OCR failed with error code: {error.get('code')}")
|
|
100
|
-
print(f"Message: {error.get('message')}")
|
|
101
|
-
print(f"\nFull response:\n{json.dumps(response, indent=2, ensure_ascii=False)}")
|
|
102
|
-
sys.exit(3)
|
|
103
|
-
|
|
104
|
-
# Check for content
|
|
105
|
-
result_data = response.get("result", {})
|
|
106
|
-
full_text = result_data.get("full_text", "")
|
|
107
|
-
pages = result_data.get("pages", [])
|
|
108
|
-
|
|
109
|
-
total_items = sum(len(page.get("items", [])) for page in pages)
|
|
110
|
-
|
|
111
|
-
if not full_text and total_items == 0:
|
|
112
|
-
print("WARNING: OCR succeeded but returned no text. This may indicate:")
|
|
113
|
-
print(" - The test image is blank or unreadable")
|
|
114
|
-
print(" - Provider API is working but returned empty results")
|
|
115
|
-
print(f"\nFull response:\n{json.dumps(response, indent=2, ensure_ascii=False)}")
|
|
116
|
-
# Still pass, as API is working
|
|
117
|
-
else:
|
|
118
|
-
print(f"SUCCESS: OCR completed")
|
|
119
|
-
print(f" - Total text items: {total_items}")
|
|
120
|
-
print(f" - Quality score: {response.get('quality', {}).get('quality_score', 0):.4f}")
|
|
121
|
-
print(f" - Avg confidence: {response.get('quality', {}).get('avg_rec_score', 0):.4f}")
|
|
122
|
-
print(f" - Mode: {response.get('agent_trace', {}).get('mode')}")
|
|
123
|
-
print(f" - Attempts: {len(response.get('agent_trace', {}).get('attempts', []))}")
|
|
124
|
-
|
|
125
|
-
# Print first 200 chars of text
|
|
126
|
-
if full_text:
|
|
127
|
-
preview = full_text[:200].replace("\n", " ")
|
|
128
|
-
if len(full_text) > 200:
|
|
129
|
-
preview += "..."
|
|
130
|
-
print(f"\n Preview: {preview}")
|
|
131
|
-
|
|
132
|
-
print("\n" + "=" * 60)
|
|
133
|
-
print("Smoke test PASSED")
|
|
134
|
-
print("=" * 60)
|
|
135
|
-
sys.exit(0)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
if __name__ == "__main__":
|
|
139
|
-
main()
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Smoke Test for PP-OCRv5 API Skill
|
|
4
|
+
Verifies that AISTUDIO_HOST and PADDLE_OCR_TOKEN are correctly configured
|
|
5
|
+
and that the provider API is accessible.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main():
|
|
16
|
+
print("=" * 60)
|
|
17
|
+
print("PP-OCRv5 API Skill - Smoke Test")
|
|
18
|
+
print("=" * 60)
|
|
19
|
+
|
|
20
|
+
# Check configuration (all sources)
|
|
21
|
+
print("\n[1/3] Checking configuration...")
|
|
22
|
+
|
|
23
|
+
# Add scripts dir to path for imports
|
|
24
|
+
script_dir = Path(__file__).parent
|
|
25
|
+
sys.path.insert(0, str(script_dir))
|
|
26
|
+
|
|
27
|
+
from _lib import Config
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
# Try to get config from .env file
|
|
31
|
+
api_url = Config.get_api_url()
|
|
32
|
+
token = Config.get_token()
|
|
33
|
+
except ValueError as e:
|
|
34
|
+
print(f"\nConfiguration error: {e}")
|
|
35
|
+
sys.exit(1)
|
|
36
|
+
|
|
37
|
+
test_file_url = os.getenv("TEST_FILE_URL", "").strip()
|
|
38
|
+
|
|
39
|
+
if not test_file_url:
|
|
40
|
+
print("WARNING: TEST_FILE_URL is not set, using default test image")
|
|
41
|
+
# Use a default public test image (Chinese text)
|
|
42
|
+
test_file_url = "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/doc/imgs/11.jpg"
|
|
43
|
+
|
|
44
|
+
print(f" API_URL: {api_url}")
|
|
45
|
+
print(f" PADDLE_OCR_TOKEN: {'*' * 8}{token[-4:] if len(token) > 4 else '****'}")
|
|
46
|
+
print(f" TEST_FILE_URL: {test_file_url}")
|
|
47
|
+
|
|
48
|
+
# Run ocr_caller.py
|
|
49
|
+
print("\n[2/3] Running OCR on test file...")
|
|
50
|
+
|
|
51
|
+
script_dir = Path(__file__).parent
|
|
52
|
+
ocr_caller = script_dir / "ocr_caller.py"
|
|
53
|
+
|
|
54
|
+
cmd = [
|
|
55
|
+
sys.executable,
|
|
56
|
+
str(ocr_caller),
|
|
57
|
+
"--mode", "auto",
|
|
58
|
+
"--file-url", test_file_url,
|
|
59
|
+
"--max-attempts", "2",
|
|
60
|
+
"--budget-ms", "20000"
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
result = subprocess.run(
|
|
65
|
+
cmd,
|
|
66
|
+
capture_output=True,
|
|
67
|
+
text=True,
|
|
68
|
+
timeout=30
|
|
69
|
+
)
|
|
70
|
+
except subprocess.TimeoutExpired:
|
|
71
|
+
print("ERROR: OCR call timed out after 30 seconds")
|
|
72
|
+
sys.exit(4)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
print(f"ERROR: Failed to run ocr_caller.py: {e}")
|
|
75
|
+
sys.exit(4)
|
|
76
|
+
|
|
77
|
+
# Parse output
|
|
78
|
+
print("\n[3/3] Validating response...")
|
|
79
|
+
|
|
80
|
+
if result.returncode != 0:
|
|
81
|
+
print(f"ERROR: ocr_caller.py exited with code {result.returncode}")
|
|
82
|
+
print("\nStderr:")
|
|
83
|
+
print(result.stderr)
|
|
84
|
+
print("\nStdout:")
|
|
85
|
+
print(result.stdout)
|
|
86
|
+
sys.exit(result.returncode)
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
response = json.loads(result.stdout)
|
|
90
|
+
except json.JSONDecodeError as e:
|
|
91
|
+
print(f"ERROR: Failed to parse JSON response: {e}")
|
|
92
|
+
print("\nStdout:")
|
|
93
|
+
print(result.stdout)
|
|
94
|
+
sys.exit(4)
|
|
95
|
+
|
|
96
|
+
# Validate response structure
|
|
97
|
+
if not response.get("ok"):
|
|
98
|
+
error = response.get("error", {})
|
|
99
|
+
print(f"ERROR: OCR failed with error code: {error.get('code')}")
|
|
100
|
+
print(f"Message: {error.get('message')}")
|
|
101
|
+
print(f"\nFull response:\n{json.dumps(response, indent=2, ensure_ascii=False)}")
|
|
102
|
+
sys.exit(3)
|
|
103
|
+
|
|
104
|
+
# Check for content
|
|
105
|
+
result_data = response.get("result", {})
|
|
106
|
+
full_text = result_data.get("full_text", "")
|
|
107
|
+
pages = result_data.get("pages", [])
|
|
108
|
+
|
|
109
|
+
total_items = sum(len(page.get("items", [])) for page in pages)
|
|
110
|
+
|
|
111
|
+
if not full_text and total_items == 0:
|
|
112
|
+
print("WARNING: OCR succeeded but returned no text. This may indicate:")
|
|
113
|
+
print(" - The test image is blank or unreadable")
|
|
114
|
+
print(" - Provider API is working but returned empty results")
|
|
115
|
+
print(f"\nFull response:\n{json.dumps(response, indent=2, ensure_ascii=False)}")
|
|
116
|
+
# Still pass, as API is working
|
|
117
|
+
else:
|
|
118
|
+
print(f"SUCCESS: OCR completed")
|
|
119
|
+
print(f" - Total text items: {total_items}")
|
|
120
|
+
print(f" - Quality score: {response.get('quality', {}).get('quality_score', 0):.4f}")
|
|
121
|
+
print(f" - Avg confidence: {response.get('quality', {}).get('avg_rec_score', 0):.4f}")
|
|
122
|
+
print(f" - Mode: {response.get('agent_trace', {}).get('mode')}")
|
|
123
|
+
print(f" - Attempts: {len(response.get('agent_trace', {}).get('attempts', []))}")
|
|
124
|
+
|
|
125
|
+
# Print first 200 chars of text
|
|
126
|
+
if full_text:
|
|
127
|
+
preview = full_text[:200].replace("\n", " ")
|
|
128
|
+
if len(full_text) > 200:
|
|
129
|
+
preview += "..."
|
|
130
|
+
print(f"\n Preview: {preview}")
|
|
131
|
+
|
|
132
|
+
print("\n" + "=" * 60)
|
|
133
|
+
print("Smoke test PASSED")
|
|
134
|
+
print("=" * 60)
|
|
135
|
+
sys.exit(0)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
if __name__ == "__main__":
|
|
139
|
+
main()
|