paddleocr-skills 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +220 -0
  2. package/bin/paddleocr-skills.js +20 -0
  3. package/lib/copy.js +39 -0
  4. package/lib/installer.js +70 -0
  5. package/lib/prompts.js +67 -0
  6. package/lib/python.js +75 -0
  7. package/lib/verify.js +121 -0
  8. package/package.json +42 -0
  9. package/templates/.env.example +12 -0
  10. package/templates/paddleocr-vl/references/paddleocr-vl/layout_schema.md +64 -0
  11. package/templates/paddleocr-vl/references/paddleocr-vl/output_format.md +154 -0
  12. package/templates/paddleocr-vl/references/paddleocr-vl/vl_model_spec.md +157 -0
  13. package/templates/paddleocr-vl/scripts/paddleocr-vl/_lib.py +780 -0
  14. package/templates/paddleocr-vl/scripts/paddleocr-vl/configure.py +270 -0
  15. package/templates/paddleocr-vl/scripts/paddleocr-vl/optimize_file.py +226 -0
  16. package/templates/paddleocr-vl/scripts/paddleocr-vl/requirements-optimize.txt +8 -0
  17. package/templates/paddleocr-vl/scripts/paddleocr-vl/requirements.txt +7 -0
  18. package/templates/paddleocr-vl/scripts/paddleocr-vl/smoke_test.py +199 -0
  19. package/templates/paddleocr-vl/scripts/paddleocr-vl/vl_caller.py +232 -0
  20. package/templates/paddleocr-vl/skills/paddleocr-vl/SKILL.md +481 -0
  21. package/templates/ppocrv5/references/ppocrv5/agent_policy.md +258 -0
  22. package/templates/ppocrv5/references/ppocrv5/normalized_schema.md +257 -0
  23. package/templates/ppocrv5/references/ppocrv5/provider_api.md +140 -0
  24. package/templates/ppocrv5/scripts/ppocrv5/_lib.py +635 -0
  25. package/templates/ppocrv5/scripts/ppocrv5/configure.py +346 -0
  26. package/templates/ppocrv5/scripts/ppocrv5/ocr_caller.py +684 -0
  27. package/templates/ppocrv5/scripts/ppocrv5/requirements.txt +4 -0
  28. package/templates/ppocrv5/scripts/ppocrv5/smoke_test.py +139 -0
  29. package/templates/ppocrv5/skills/ppocrv5/SKILL.md +272 -0
@@ -0,0 +1,346 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Configuration Wizard for PP-OCRv5 API Skill
4
+
5
+ Supports two modes:
6
+ 1. Interactive mode (default): python configure.py
7
+ 2. CLI mode: python configure.py --api-url URL --token TOKEN
8
+
9
+ Supports pasting Python code format (e.g., API_URL = "...")
10
+ """
11
+
12
+ import argparse
13
+ import re
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ # Add parent directory to path for imports
18
+ sys.path.insert(0, str(Path(__file__).parent.parent))
19
+
20
+
21
+ def parse_input(user_input: str) -> str:
22
+ """
23
+ Intelligently parse user input, supporting multiple formats:
24
+ - API_URL = "https://..."
25
+ - "https://..."
26
+ - https://...
27
+ - TOKEN = "abc123..."
28
+
29
+ Returns the extracted value
30
+ """
31
+ user_input = user_input.strip()
32
+
33
+ # Format 1: KEY = "value" or KEY = 'value'
34
+ match = re.match(r'^\w+\s*=\s*["\'](.+?)["\']$', user_input)
35
+ if match:
36
+ return match.group(1)
37
+
38
+ # Format 2: "value" or 'value'
39
+ match = re.match(r'^["\'](.+?)["\']$', user_input)
40
+ if match:
41
+ return match.group(1)
42
+
43
+ # Format 3: value (direct input)
44
+ return user_input
45
+
46
+
47
+ def normalize_api_url(url: str) -> str:
48
+ """
49
+ Normalize API URL
50
+ Supports:
51
+ - https://xxx.aistudio-app.com/ocr
52
+ - https://xxx.aistudio-app.com
53
+ - xxx.aistudio-app.com
54
+
55
+ Returns: https://xxx.aistudio-app.com/ocr
56
+ """
57
+ url = url.strip()
58
+
59
+ # Remove http:// or https://
60
+ url = re.sub(r'^https?://', '', url)
61
+
62
+ # Remove trailing /ocr or other paths
63
+ url = re.sub(r'/.*$', '', url)
64
+
65
+ # Return complete API URL
66
+ return f"https://{url}/ocr"
67
+
68
+
69
+ def mask_token(token: str) -> str:
70
+ """Mask token, only show first and last parts"""
71
+ if len(token) <= 8:
72
+ return "****"
73
+ return f"{token[:4]}...{token[-4:]}"
74
+
75
+
76
+ def test_connection(api_url: str, token: str) -> bool:
77
+ """Test API connection (optional)"""
78
+ try:
79
+ import httpx
80
+
81
+ print("\nTesting connection...")
82
+
83
+ # Simple test request (using a small base64 image)
84
+ test_payload = {
85
+ "file": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==",
86
+ "fileType": 1,
87
+ "visualize": False
88
+ }
89
+
90
+ headers = {
91
+ "Authorization": f"token {token}",
92
+ "Content-Type": "application/json"
93
+ }
94
+
95
+ client = httpx.Client(timeout=10.0)
96
+ try:
97
+ resp = client.post(api_url, json=test_payload, headers=headers)
98
+ resp_json = resp.json()
99
+
100
+ if resp.status_code == 200 and resp_json.get("errorCode") == 0:
101
+ print("✓ API connection successful!")
102
+ print("✓ OCR function is working!")
103
+ return True
104
+ elif resp.status_code == 403:
105
+ print("✗ Token verification failed, please check if the Token is correct")
106
+ return False
107
+ elif resp.status_code == 429:
108
+ print("⚠ API quota exhausted, but connection is working")
109
+ return True
110
+ else:
111
+ print(f"⚠ API returned error: {resp_json.get('errorMsg', 'Unknown error')}")
112
+ return False
113
+ finally:
114
+ client.close()
115
+
116
+ except ImportError:
117
+ print("⚠ httpx not installed, skipping connection test")
118
+ print(" Install with: pip install httpx")
119
+ return True
120
+ except Exception as e:
121
+ print(f"✗ Connection test failed: {e}")
122
+ return False
123
+
124
+
125
+ def save_config(api_url: str, token: str, project_root: Path, quiet: bool = False) -> bool:
126
+ """
127
+ Save configuration to .env file
128
+
129
+ Args:
130
+ api_url: Normalized API URL
131
+ token: Access token
132
+ project_root: Project root directory
133
+ quiet: If True, suppress output messages
134
+
135
+ Returns:
136
+ True if successful, False otherwise
137
+ """
138
+ env_file = project_root / ".env"
139
+
140
+ # Read existing configuration (if exists)
141
+ existing_config = {}
142
+ if env_file.exists():
143
+ if not quiet:
144
+ print(f"\nDetected existing configuration file: {env_file}")
145
+ overwrite = input("Overwrite? [Y/n]: ").strip().lower()
146
+ if overwrite == 'n':
147
+ print("Configuration cancelled")
148
+ return False
149
+
150
+ # Preserve other configuration items
151
+ with open(env_file, 'r', encoding='utf-8') as f:
152
+ for line in f:
153
+ line = line.strip()
154
+ if line and not line.startswith('#'):
155
+ if '=' in line and not line.startswith('API_URL=') and not line.startswith('PADDLE_OCR_TOKEN='):
156
+ key, value = line.split('=', 1)
157
+ existing_config[key.strip()] = value.strip()
158
+
159
+ # Write new configuration
160
+ try:
161
+ with open(env_file, 'w', encoding='utf-8') as f:
162
+ f.write("# PP-OCRv5 API Configuration\n")
163
+ f.write("# This file was automatically generated by configure.py\n\n")
164
+
165
+ f.write(f"API_URL={api_url}\n")
166
+ f.write(f"PADDLE_OCR_TOKEN={token}\n")
167
+
168
+ # Write other preserved configurations
169
+ if existing_config:
170
+ f.write("\n# Other configurations\n")
171
+ for key, value in existing_config.items():
172
+ f.write(f"{key}={value}\n")
173
+
174
+ if not quiet:
175
+ print(f"\n✓ Configuration saved to: {env_file}")
176
+ return True
177
+
178
+ except Exception as e:
179
+ print(f"\n✗ Failed to save configuration: {e}")
180
+ return False
181
+
182
+
183
+ def main():
184
+ # Parse command-line arguments
185
+ parser = argparse.ArgumentParser(
186
+ description='PP-OCRv5 API Configuration Tool',
187
+ formatter_class=argparse.RawDescriptionHelpFormatter,
188
+ epilog="""
189
+ Examples:
190
+ # Interactive mode
191
+ python configure.py
192
+
193
+ # CLI mode (non-interactive)
194
+ python configure.py --api-url "https://xxx.aistudio-app.com/ocr" --token "your_token"
195
+ """
196
+ )
197
+ parser.add_argument('--api-url', help='API URL (non-interactive mode)')
198
+ parser.add_argument('--token', help='Access token (non-interactive mode)')
199
+ parser.add_argument('--quiet', action='store_true', help='Suppress output messages')
200
+
201
+ args = parser.parse_args()
202
+
203
+ # Get project root directory (parent of scripts directory)
204
+ project_root = Path(__file__).parent.parent
205
+
206
+ # ========================================
207
+ # CLI Mode (non-interactive)
208
+ # ========================================
209
+ if args.api_url and args.token:
210
+ try:
211
+ # Normalize API URL
212
+ api_url = normalize_api_url(parse_input(args.api_url))
213
+ token = parse_input(args.token)
214
+
215
+ # Validate
216
+ if len(token) < 16:
217
+ print("Error: Token seems too short. Please check and try again.")
218
+ sys.exit(1)
219
+
220
+ # Save configuration
221
+ if save_config(api_url, token, project_root, quiet=args.quiet):
222
+ if not args.quiet:
223
+ print("\n✓ Configuration complete!")
224
+ print(f" API_URL: {api_url}")
225
+ print(f" TOKEN: {mask_token(token)}")
226
+ sys.exit(0)
227
+ else:
228
+ sys.exit(1)
229
+
230
+ except Exception as e:
231
+ print(f"Error: {e}")
232
+ sys.exit(1)
233
+
234
+ elif args.api_url or args.token:
235
+ print("Error: Both --api-url and --token are required for CLI mode")
236
+ print("Run without arguments for interactive mode")
237
+ sys.exit(1)
238
+
239
+ # ========================================
240
+ # Interactive Mode
241
+ # ========================================
242
+ print("\n" + "=" * 60)
243
+ print("PP-OCRv5 API Skill - Configuration Wizard")
244
+ print("=" * 60)
245
+ print("\nCopy your configuration from Paddle AI Studio\n")
246
+
247
+ # ========================================
248
+ # Step 1: Get API URL
249
+ # ========================================
250
+ print("[Step 1/2] Please enter your API URL")
251
+ print('Tip: You can paste directly, for example:')
252
+ print(' API_URL = "https://33a9f5e7p5r9bek9.aistudio-app.com/ocr"')
253
+ print(' or: https://33a9f5e7p5r9bek9.aistudio-app.com/ocr')
254
+ print()
255
+
256
+ while True:
257
+ api_url_input = input("> ").strip()
258
+
259
+ if not api_url_input:
260
+ print("Error: API URL cannot be empty, please enter again")
261
+ continue
262
+
263
+ # Parse input
264
+ api_url_raw = parse_input(api_url_input)
265
+
266
+ # Normalize
267
+ try:
268
+ api_url = normalize_api_url(api_url_raw)
269
+ print(f"✓ Recognized: {api_url}\n")
270
+ break
271
+ except Exception as e:
272
+ print(f"Error: Cannot parse API URL: {e}")
273
+ print("Please enter again\n")
274
+
275
+ # ========================================
276
+ # Step 2: Get Token
277
+ # ========================================
278
+ print("[Step 2/2] Please enter your Access Token")
279
+ print('Tip: You can paste directly, for example:')
280
+ print(' TOKEN = "1505a1bd17e9b74004f9c7e54e0d707ddddca7dc"')
281
+ print(' or: 1505a1bd17e9b74004f9c7e54e0d707ddddca7dc')
282
+ print()
283
+
284
+ while True:
285
+ token_input = input("> ").strip()
286
+
287
+ if not token_input:
288
+ print("Error: Token cannot be empty, please enter again")
289
+ continue
290
+
291
+ # Parse input
292
+ token = parse_input(token_input)
293
+
294
+ if len(token) < 16:
295
+ print("⚠ Token length seems too short, please confirm if correct")
296
+ confirm = input("Continue? [y/N]: ").strip().lower()
297
+ if confirm != 'y':
298
+ continue
299
+
300
+ print(f"✓ Recognized: {mask_token(token)}\n")
301
+ break
302
+
303
+ # ========================================
304
+ # Save configuration
305
+ # ========================================
306
+ print("=" * 60)
307
+ print("Saving configuration...")
308
+ print("=" * 60)
309
+
310
+ if not save_config(api_url, token, project_root):
311
+ sys.exit(1)
312
+
313
+ # ========================================
314
+ # Test connection (optional)
315
+ # ========================================
316
+ print("\n" + "=" * 60)
317
+ test_choice = input("Test connection? [Y/n]: ").strip().lower()
318
+
319
+ if test_choice != 'n':
320
+ success = test_connection(api_url, token)
321
+ if not success:
322
+ print("\n⚠ Connection test failed, but configuration has been saved")
323
+ print(" Please check if API URL and Token are correct")
324
+
325
+ # ========================================
326
+ # Complete
327
+ # ========================================
328
+ print("\n" + "=" * 60)
329
+ print("Configuration complete!")
330
+ print("=" * 60)
331
+ print("\nYou can now use the OCR function:")
332
+ print(f" cd {project_root}")
333
+ print(' python scripts/ocr_caller.py --file-url "https://example.com/image.jpg"')
334
+ print("\nTo reconfigure, run this script again.")
335
+ print("=" * 60 + "\n")
336
+
337
+
338
+ if __name__ == "__main__":
339
+ try:
340
+ main()
341
+ except KeyboardInterrupt:
342
+ print("\n\nConfiguration cancelled")
343
+ sys.exit(0)
344
+ except Exception as e:
345
+ print(f"\n\nError: {e}")
346
+ sys.exit(1)