paddleocr-skills 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +220 -220
  2. package/bin/paddleocr-skills.js +33 -20
  3. package/lib/copy.js +39 -39
  4. package/lib/installer.js +76 -70
  5. package/lib/prompts.js +67 -67
  6. package/lib/python.js +75 -75
  7. package/lib/verify.js +121 -121
  8. package/package.json +42 -42
  9. package/templates/.env.example +12 -12
  10. package/templates/{paddleocr-vl/references/paddleocr-vl → paddleocr-vl-1.5/references/paddleocr-vl-1.5}/layout_schema.md +64 -64
  11. package/templates/{paddleocr-vl/references/paddleocr-vl → paddleocr-vl-1.5/references/paddleocr-vl-1.5}/output_format.md +154 -154
  12. package/templates/{paddleocr-vl/references/paddleocr-vl → paddleocr-vl-1.5/references/paddleocr-vl-1.5}/vl_model_spec.md +157 -157
  13. package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/_lib.py +780 -780
  14. package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/configure.py +270 -270
  15. package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/optimize_file.py +226 -226
  16. package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/requirements-optimize.txt +8 -8
  17. package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/requirements.txt +7 -7
  18. package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/smoke_test.py +199 -199
  19. package/templates/{paddleocr-vl/scripts/paddleocr-vl → paddleocr-vl-1.5/scripts/paddleocr-vl-1.5}/vl_caller.py +232 -232
  20. package/templates/{paddleocr-vl/skills/paddleocr-vl → paddleocr-vl-1.5/skills/paddleocr-vl-1.5}/SKILL.md +481 -481
  21. package/templates/ppocrv5/references/ppocrv5/agent_policy.md +258 -258
  22. package/templates/ppocrv5/references/ppocrv5/normalized_schema.md +257 -257
  23. package/templates/ppocrv5/references/ppocrv5/provider_api.md +140 -140
  24. package/templates/ppocrv5/scripts/ppocrv5/_lib.py +635 -635
  25. package/templates/ppocrv5/scripts/ppocrv5/configure.py +346 -346
  26. package/templates/ppocrv5/scripts/ppocrv5/ocr_caller.py +684 -684
  27. package/templates/ppocrv5/scripts/ppocrv5/requirements.txt +4 -4
  28. package/templates/ppocrv5/scripts/ppocrv5/smoke_test.py +139 -139
  29. package/templates/ppocrv5/skills/ppocrv5/SKILL.md +272 -272
@@ -1,4 +1,4 @@
1
- # Runtime dependencies for PP-OCRv5 API Skill
2
-
3
- httpx>=0.24.0
4
- python-dotenv>=0.19.0
1
+ # Runtime dependencies for PP-OCRv5 API Skill
2
+
3
+ httpx>=0.24.0
4
+ python-dotenv>=0.19.0
@@ -1,139 +1,139 @@
1
- #!/usr/bin/env python3
2
- """
3
- Smoke Test for PP-OCRv5 API Skill
4
- Verifies that AISTUDIO_HOST and PADDLE_OCR_TOKEN are correctly configured
5
- and that the provider API is accessible.
6
- """
7
-
8
- import json
9
- import os
10
- import subprocess
11
- import sys
12
- from pathlib import Path
13
-
14
-
15
- def main():
16
- print("=" * 60)
17
- print("PP-OCRv5 API Skill - Smoke Test")
18
- print("=" * 60)
19
-
20
- # Check configuration (all sources)
21
- print("\n[1/3] Checking configuration...")
22
-
23
- # Add scripts dir to path for imports
24
- script_dir = Path(__file__).parent
25
- sys.path.insert(0, str(script_dir))
26
-
27
- from _lib import Config
28
-
29
- try:
30
- # Try to get config from .env file
31
- api_url = Config.get_api_url()
32
- token = Config.get_token()
33
- except ValueError as e:
34
- print(f"\nConfiguration error: {e}")
35
- sys.exit(1)
36
-
37
- test_file_url = os.getenv("TEST_FILE_URL", "").strip()
38
-
39
- if not test_file_url:
40
- print("WARNING: TEST_FILE_URL is not set, using default test image")
41
- # Use a default public test image (Chinese text)
42
- test_file_url = "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/doc/imgs/11.jpg"
43
-
44
- print(f" API_URL: {api_url}")
45
- print(f" PADDLE_OCR_TOKEN: {'*' * 8}{token[-4:] if len(token) > 4 else '****'}")
46
- print(f" TEST_FILE_URL: {test_file_url}")
47
-
48
- # Run ocr_caller.py
49
- print("\n[2/3] Running OCR on test file...")
50
-
51
- script_dir = Path(__file__).parent
52
- ocr_caller = script_dir / "ocr_caller.py"
53
-
54
- cmd = [
55
- sys.executable,
56
- str(ocr_caller),
57
- "--mode", "auto",
58
- "--file-url", test_file_url,
59
- "--max-attempts", "2",
60
- "--budget-ms", "20000"
61
- ]
62
-
63
- try:
64
- result = subprocess.run(
65
- cmd,
66
- capture_output=True,
67
- text=True,
68
- timeout=30
69
- )
70
- except subprocess.TimeoutExpired:
71
- print("ERROR: OCR call timed out after 30 seconds")
72
- sys.exit(4)
73
- except Exception as e:
74
- print(f"ERROR: Failed to run ocr_caller.py: {e}")
75
- sys.exit(4)
76
-
77
- # Parse output
78
- print("\n[3/3] Validating response...")
79
-
80
- if result.returncode != 0:
81
- print(f"ERROR: ocr_caller.py exited with code {result.returncode}")
82
- print("\nStderr:")
83
- print(result.stderr)
84
- print("\nStdout:")
85
- print(result.stdout)
86
- sys.exit(result.returncode)
87
-
88
- try:
89
- response = json.loads(result.stdout)
90
- except json.JSONDecodeError as e:
91
- print(f"ERROR: Failed to parse JSON response: {e}")
92
- print("\nStdout:")
93
- print(result.stdout)
94
- sys.exit(4)
95
-
96
- # Validate response structure
97
- if not response.get("ok"):
98
- error = response.get("error", {})
99
- print(f"ERROR: OCR failed with error code: {error.get('code')}")
100
- print(f"Message: {error.get('message')}")
101
- print(f"\nFull response:\n{json.dumps(response, indent=2, ensure_ascii=False)}")
102
- sys.exit(3)
103
-
104
- # Check for content
105
- result_data = response.get("result", {})
106
- full_text = result_data.get("full_text", "")
107
- pages = result_data.get("pages", [])
108
-
109
- total_items = sum(len(page.get("items", [])) for page in pages)
110
-
111
- if not full_text and total_items == 0:
112
- print("WARNING: OCR succeeded but returned no text. This may indicate:")
113
- print(" - The test image is blank or unreadable")
114
- print(" - Provider API is working but returned empty results")
115
- print(f"\nFull response:\n{json.dumps(response, indent=2, ensure_ascii=False)}")
116
- # Still pass, as API is working
117
- else:
118
- print(f"SUCCESS: OCR completed")
119
- print(f" - Total text items: {total_items}")
120
- print(f" - Quality score: {response.get('quality', {}).get('quality_score', 0):.4f}")
121
- print(f" - Avg confidence: {response.get('quality', {}).get('avg_rec_score', 0):.4f}")
122
- print(f" - Mode: {response.get('agent_trace', {}).get('mode')}")
123
- print(f" - Attempts: {len(response.get('agent_trace', {}).get('attempts', []))}")
124
-
125
- # Print first 200 chars of text
126
- if full_text:
127
- preview = full_text[:200].replace("\n", " ")
128
- if len(full_text) > 200:
129
- preview += "..."
130
- print(f"\n Preview: {preview}")
131
-
132
- print("\n" + "=" * 60)
133
- print("Smoke test PASSED")
134
- print("=" * 60)
135
- sys.exit(0)
136
-
137
-
138
- if __name__ == "__main__":
139
- main()
1
+ #!/usr/bin/env python3
2
+ """
3
+ Smoke Test for PP-OCRv5 API Skill
4
+ Verifies that AISTUDIO_HOST and PADDLE_OCR_TOKEN are correctly configured
5
+ and that the provider API is accessible.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import subprocess
11
+ import sys
12
+ from pathlib import Path
13
+
14
+
15
+ def main():
16
+ print("=" * 60)
17
+ print("PP-OCRv5 API Skill - Smoke Test")
18
+ print("=" * 60)
19
+
20
+ # Check configuration (all sources)
21
+ print("\n[1/3] Checking configuration...")
22
+
23
+ # Add scripts dir to path for imports
24
+ script_dir = Path(__file__).parent
25
+ sys.path.insert(0, str(script_dir))
26
+
27
+ from _lib import Config
28
+
29
+ try:
30
+ # Try to get config from .env file
31
+ api_url = Config.get_api_url()
32
+ token = Config.get_token()
33
+ except ValueError as e:
34
+ print(f"\nConfiguration error: {e}")
35
+ sys.exit(1)
36
+
37
+ test_file_url = os.getenv("TEST_FILE_URL", "").strip()
38
+
39
+ if not test_file_url:
40
+ print("WARNING: TEST_FILE_URL is not set, using default test image")
41
+ # Use a default public test image (Chinese text)
42
+ test_file_url = "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.7/doc/imgs/11.jpg"
43
+
44
+ print(f" API_URL: {api_url}")
45
+ print(f" PADDLE_OCR_TOKEN: {'*' * 8}{token[-4:] if len(token) > 4 else '****'}")
46
+ print(f" TEST_FILE_URL: {test_file_url}")
47
+
48
+ # Run ocr_caller.py
49
+ print("\n[2/3] Running OCR on test file...")
50
+
51
+ script_dir = Path(__file__).parent
52
+ ocr_caller = script_dir / "ocr_caller.py"
53
+
54
+ cmd = [
55
+ sys.executable,
56
+ str(ocr_caller),
57
+ "--mode", "auto",
58
+ "--file-url", test_file_url,
59
+ "--max-attempts", "2",
60
+ "--budget-ms", "20000"
61
+ ]
62
+
63
+ try:
64
+ result = subprocess.run(
65
+ cmd,
66
+ capture_output=True,
67
+ text=True,
68
+ timeout=30
69
+ )
70
+ except subprocess.TimeoutExpired:
71
+ print("ERROR: OCR call timed out after 30 seconds")
72
+ sys.exit(4)
73
+ except Exception as e:
74
+ print(f"ERROR: Failed to run ocr_caller.py: {e}")
75
+ sys.exit(4)
76
+
77
+ # Parse output
78
+ print("\n[3/3] Validating response...")
79
+
80
+ if result.returncode != 0:
81
+ print(f"ERROR: ocr_caller.py exited with code {result.returncode}")
82
+ print("\nStderr:")
83
+ print(result.stderr)
84
+ print("\nStdout:")
85
+ print(result.stdout)
86
+ sys.exit(result.returncode)
87
+
88
+ try:
89
+ response = json.loads(result.stdout)
90
+ except json.JSONDecodeError as e:
91
+ print(f"ERROR: Failed to parse JSON response: {e}")
92
+ print("\nStdout:")
93
+ print(result.stdout)
94
+ sys.exit(4)
95
+
96
+ # Validate response structure
97
+ if not response.get("ok"):
98
+ error = response.get("error", {})
99
+ print(f"ERROR: OCR failed with error code: {error.get('code')}")
100
+ print(f"Message: {error.get('message')}")
101
+ print(f"\nFull response:\n{json.dumps(response, indent=2, ensure_ascii=False)}")
102
+ sys.exit(3)
103
+
104
+ # Check for content
105
+ result_data = response.get("result", {})
106
+ full_text = result_data.get("full_text", "")
107
+ pages = result_data.get("pages", [])
108
+
109
+ total_items = sum(len(page.get("items", [])) for page in pages)
110
+
111
+ if not full_text and total_items == 0:
112
+ print("WARNING: OCR succeeded but returned no text. This may indicate:")
113
+ print(" - The test image is blank or unreadable")
114
+ print(" - Provider API is working but returned empty results")
115
+ print(f"\nFull response:\n{json.dumps(response, indent=2, ensure_ascii=False)}")
116
+ # Still pass, as API is working
117
+ else:
118
+ print(f"SUCCESS: OCR completed")
119
+ print(f" - Total text items: {total_items}")
120
+ print(f" - Quality score: {response.get('quality', {}).get('quality_score', 0):.4f}")
121
+ print(f" - Avg confidence: {response.get('quality', {}).get('avg_rec_score', 0):.4f}")
122
+ print(f" - Mode: {response.get('agent_trace', {}).get('mode')}")
123
+ print(f" - Attempts: {len(response.get('agent_trace', {}).get('attempts', []))}")
124
+
125
+ # Print first 200 chars of text
126
+ if full_text:
127
+ preview = full_text[:200].replace("\n", " ")
128
+ if len(full_text) > 200:
129
+ preview += "..."
130
+ print(f"\n Preview: {preview}")
131
+
132
+ print("\n" + "=" * 60)
133
+ print("Smoke test PASSED")
134
+ print("=" * 60)
135
+ sys.exit(0)
136
+
137
+
138
+ if __name__ == "__main__":
139
+ main()