@xiaotianxt/skills 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/EXCLUDED.md +42 -0
  2. package/LICENSE +21 -0
  3. package/README.md +165 -0
  4. package/SECURITY.md +23 -0
  5. package/SOURCES.md +45 -0
  6. package/bin/skills.mjs +241 -0
  7. package/package.json +38 -0
  8. package/skills/1password/SKILL.md +94 -0
  9. package/skills/1password/agents/openai.yaml +4 -0
  10. package/skills/1password/references/item-management.md +80 -0
  11. package/skills/1password/references/op-cli.md +107 -0
  12. package/skills/apple-calendar-event/SKILL.md +81 -0
  13. package/skills/apple-calendar-event/agents/openai.yaml +4 -0
  14. package/skills/apple-calendar-event/scripts/calendar_audit.py +201 -0
  15. package/skills/apple-calendar-event/scripts/calendar_event.py +164 -0
  16. package/skills/bro-browser/SKILL.md +118 -0
  17. package/skills/bro-browser/agents/openai.yaml +4 -0
  18. package/skills/bro-browser/references/tool-map.md +102 -0
  19. package/skills/bro-browser/references/workflows.md +146 -0
  20. package/skills/bro-browser/scripts/bro-call.mjs +189 -0
  21. package/skills/calendar/SKILL.md +182 -0
  22. package/skills/calendar/agents/openai.yaml +4 -0
  23. package/skills/calendar/references/operations.md +255 -0
  24. package/skills/calendar/scripts/calendar_list_review.py +157 -0
  25. package/skills/calendar/scripts/event_dedupe_preview.py +155 -0
  26. package/skills/canvas/SKILL.md +70 -0
  27. package/skills/canvas/agents/openai.yaml +4 -0
  28. package/skills/canvas/references/canvas-api.md +76 -0
  29. package/skills/course-exam-review-planner/SKILL.md +127 -0
  30. package/skills/cx/SKILL.md +25 -0
  31. package/skills/gh-fix-ci/LICENSE.txt +201 -0
  32. package/skills/gh-fix-ci/SKILL.md +81 -0
  33. package/skills/gh-fix-ci/agents/openai.yaml +6 -0
  34. package/skills/gh-fix-ci/assets/github-small.svg +3 -0
  35. package/skills/gh-fix-ci/assets/github.png +0 -0
  36. package/skills/gh-fix-ci/scripts/inspect_pr_checks.py +509 -0
  37. package/skills/gh-review-workflow/SKILL.md +61 -0
  38. package/skills/gh-review-workflow/agents/openai.yaml +4 -0
  39. package/skills/gh-review-workflow/references/workflow.md +48 -0
  40. package/skills/gh-review-workflow/scripts/fetch_review_state.py +222 -0
  41. package/skills/gh-review-workflow/scripts/resolve_review_threads.py +83 -0
  42. package/skills/github/SKILL.md +74 -0
  43. package/skills/github/agents/openai.yaml +6 -0
  44. package/skills/github/assets/github-small.svg +3 -0
  45. package/skills/github/assets/github.png +0 -0
  46. package/skills/gws-calendar/SKILL.md +126 -0
  47. package/skills/gws-calendar-agenda/SKILL.md +52 -0
  48. package/skills/gws-calendar-insert/SKILL.md +66 -0
  49. package/skills/gws-docs/SKILL.md +48 -0
  50. package/skills/gws-docs-write/SKILL.md +49 -0
  51. package/skills/gws-drive/SKILL.md +137 -0
  52. package/skills/gws-drive-upload/SKILL.md +52 -0
  53. package/skills/gws-gmail/SKILL.md +62 -0
  54. package/skills/gws-gmail-forward/SKILL.md +55 -0
  55. package/skills/gws-gmail-reply/SKILL.md +58 -0
  56. package/skills/gws-gmail-reply-all/SKILL.md +62 -0
  57. package/skills/gws-gmail-send/SKILL.md +57 -0
  58. package/skills/gws-gmail-triage/SKILL.md +50 -0
  59. package/skills/gws-gmail-watch/SKILL.md +58 -0
  60. package/skills/gws-shared/SKILL.md +27 -0
  61. package/skills/helium-browser-mcp/SKILL.md +137 -0
  62. package/skills/helium-browser-mcp/agents/openai.yaml +4 -0
  63. package/skills/helium-browser-mcp/scripts/obmcp.mjs +92 -0
  64. package/skills/helium-browser-mcp/scripts/openbrowsermcp-stdio-proxy.mjs +170 -0
  65. package/skills/learn/SKILL.md +122 -0
  66. package/skills/learn/agents/openai.yaml +7 -0
  67. package/skills/learn/assets/AGENTS.template.md +33 -0
  68. package/skills/learn/assets/errorlog.template.typ +61 -0
  69. package/skills/learn/assets/reading-sequence.template.md +23 -0
  70. package/skills/learn/assets/source-index.template.md +17 -0
  71. package/skills/learn/assets/tasklog.template.typ +57 -0
  72. package/skills/learn/assets/workbook.template.typ +60 -0
  73. package/skills/learn/references/learning-science.md +103 -0
  74. package/skills/learn/scripts/init_learning_workspace.py +70 -0
  75. package/skills/macos-messages/SKILL.md +258 -0
  76. package/skills/memory/SKILL.md +33 -0
  77. package/skills/memory/codex.md +186 -0
  78. package/skills/memory/opencode.md +164 -0
  79. package/skills/mimestreamctl/SKILL.md +170 -0
  80. package/skills/mimestreamctl/agents/openai.yaml +4 -0
  81. package/skills/mimestreamctl/scripts/mimestreamctl +33 -0
  82. package/skills/mon/SKILL.md +51 -0
  83. package/skills/mon/scripts/mon_spend_review.py +458 -0
  84. package/skills/ocr/SKILL.md +136 -0
  85. package/skills/ocr/agents/openai.yaml +4 -0
  86. package/skills/ocr/references/local-ocr-best-practices.md +297 -0
  87. package/skills/ocr/references/mineru-api.md +159 -0
  88. package/skills/ocr/scripts/ocr-router +22 -0
  89. package/skills/ocr/scripts/ocr_router.py +741 -0
  90. package/skills/panopto-mp4-bulk-download/SKILL.md +57 -0
  91. package/skills/panopto-mp4-bulk-download/agents/openai.yaml +4 -0
  92. package/skills/panopto-mp4-bulk-download/references/url-patterns.md +26 -0
  93. package/skills/panopto-mp4-bulk-download/scripts/panopto_bulk_mp4.sh +213 -0
  94. package/skills/rust-systems-style/SKILL.md +109 -0
  95. package/skills/rust-systems-style/agents/openai.yaml +4 -0
  96. package/skills/rust-systems-style/references/rust-review-checklist.md +77 -0
  97. package/skills/rust-systems-style/references/style-sources.md +68 -0
  98. package/skills/ship-ai-native-cli/SKILL.md +76 -0
  99. package/skills/ship-ai-native-cli/agents/openai.yaml +4 -0
  100. package/skills/ship-ai-native-cli/references/case-notes.md +83 -0
  101. package/skills/ship-ai-native-cli/references/product-method.md +82 -0
  102. package/skills/ship-ai-native-cli/references/release-checklist.md +147 -0
  103. package/skills/ship-ai-native-cli/references/rust-cli-shape.md +111 -0
  104. package/skills/telegram-mtproto-session/SKILL.md +125 -0
  105. package/skills/telegram-mtproto-session/agents/openai.yaml +4 -0
  106. package/skills/telegram-mtproto-session/scripts/telegram_session.py +687 -0
  107. package/skills/tg/SKILL.md +173 -0
  108. package/skills/things3-manager/SKILL.md +116 -0
  109. package/skills/things3-manager/scripts/things +42 -0
  110. package/skills/things3-manager/scripts/things_cli.py +514 -0
  111. package/skills/web-artifacts-builder/LICENSE.txt +202 -0
  112. package/skills/web-artifacts-builder/SKILL.md +74 -0
  113. package/skills/web-artifacts-builder/scripts/bundle-artifact.sh +54 -0
  114. package/skills/web-artifacts-builder/scripts/init-artifact.sh +379 -0
  115. package/skills/web-artifacts-builder/scripts/shadcn-components.tar.gz +0 -0
  116. package/skills/yeet/LICENSE.txt +201 -0
  117. package/skills/yeet/SKILL.md +71 -0
  118. package/skills/yeet/agents/openai.yaml +6 -0
  119. package/skills/yeet/assets/yeet-small.svg +3 -0
  120. package/skills/yeet/assets/yeet.png +0 -0
@@ -0,0 +1,741 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import json
6
+ import os
7
+ import re
8
+ import shutil
9
+ import subprocess
10
+ import sys
11
+ import tempfile
12
+ import time
13
+ import urllib.error
14
+ import urllib.request
15
+ import uuid
16
+ import zipfile
17
+ from dataclasses import dataclass
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ try:
22
+ import fitz # PyMuPDF
23
+ except ImportError: # pragma: no cover - fallback path for minimal hosts
24
+ fitz = None
25
+
26
+ try:
27
+ import requests
28
+ except ImportError: # pragma: no cover - fallback path for minimal hosts
29
+ requests = None
30
+
31
+
32
+ MINERU_BASE_URL = "https://mineru.net"
33
+ MINERU_TOKEN_KEYCHAIN = ("codex.mineru", "credential")
34
+ IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".tif", ".tiff", ".webp", ".bmp"}
35
+ PDF_SUFFIXES = {".pdf"}
36
+ CLOUD_DOC_SUFFIXES = {
37
+ ".pdf",
38
+ ".png",
39
+ ".jpg",
40
+ ".jpeg",
41
+ ".doc",
42
+ ".docx",
43
+ ".ppt",
44
+ ".pptx",
45
+ ".xls",
46
+ ".xlsx",
47
+ ".html",
48
+ ".htm",
49
+ }
50
+
51
+
52
+ @dataclass
53
+ class PdfProfile:
54
+ path: Path
55
+ size_bytes: int
56
+ page_count: int | None
57
+ sample_pages: list[int]
58
+ sample_text_chars: int
59
+ pages_with_text: int
60
+ sample_math_hits: int
61
+ image_count: int | None
62
+
63
+ @property
64
+ def text_page_ratio(self) -> float:
65
+ if not self.sample_pages:
66
+ return 0.0
67
+ return self.pages_with_text / len(self.sample_pages)
68
+
69
+ @property
70
+ def avg_text_chars(self) -> float:
71
+ if not self.sample_pages:
72
+ return 0.0
73
+ return self.sample_text_chars / len(self.sample_pages)
74
+
75
+ @property
76
+ def has_good_text_layer(self) -> bool:
77
+ return self.text_page_ratio >= 0.75 and self.avg_text_chars >= 250
78
+
79
+
80
+ def eprint(*parts: object) -> None:
81
+ print(*parts, file=sys.stderr)
82
+
83
+
84
+ def human_size(size: int) -> str:
85
+ units = ["B", "KB", "MB", "GB"]
86
+ value = float(size)
87
+ for unit in units:
88
+ if value < 1024 or unit == units[-1]:
89
+ return f"{value:.1f}{unit}" if unit != "B" else f"{int(value)}B"
90
+ value /= 1024
91
+ return f"{size}B"
92
+
93
+
94
+ def require_tool(name: str) -> str:
95
+ path = shutil.which(name)
96
+ if not path:
97
+ raise SystemExit(f"Required tool not found on PATH: {name}")
98
+ return path
99
+
100
+
101
+ def run_text(cmd: list[str], *, timeout: int = 120) -> str:
102
+ proc = subprocess.run(
103
+ cmd,
104
+ check=False,
105
+ stdout=subprocess.PIPE,
106
+ stderr=subprocess.PIPE,
107
+ text=True,
108
+ timeout=timeout,
109
+ )
110
+ if proc.returncode != 0:
111
+ raise RuntimeError(f"{cmd[0]} failed: {proc.stderr.strip() or proc.stdout.strip()}")
112
+ return proc.stdout
113
+
114
+
115
+ def text_char_count(value: str) -> int:
116
+ return sum(1 for ch in value if ch.isalnum() or "\u4e00" <= ch <= "\u9fff")
117
+
118
+
119
+ def math_hit_count(value: str) -> int:
120
+ patterns = [
121
+ r"\b(equation|formula|theorem|lemma|corollary)\b",
122
+ r"[=+\-*/]\s*[A-Za-z0-9(]",
123
+ r"[∑∫√≤≥≈≠∞σΣΔθλμ]",
124
+ r"\b[A-Za-z]_[A-Za-z0-9]\b",
125
+ ]
126
+ return sum(len(re.findall(pattern, value, flags=re.IGNORECASE)) for pattern in patterns)
127
+
128
+
129
+ def pdf_page_count(path: Path) -> int | None:
130
+ if fitz is not None:
131
+ try:
132
+ with fitz.open(path) as doc:
133
+ return len(doc)
134
+ except Exception:
135
+ pass
136
+ try:
137
+ out = run_text(["pdfinfo", str(path)])
138
+ except Exception:
139
+ return None
140
+ for line in out.splitlines():
141
+ if line.startswith("Pages:"):
142
+ try:
143
+ return int(line.split(":", 1)[1].strip())
144
+ except ValueError:
145
+ return None
146
+ return None
147
+
148
+
149
+ def sample_pages(page_count: int | None) -> list[int]:
150
+ if not page_count or page_count <= 0:
151
+ return [1]
152
+ candidates = [1, 2, max(1, page_count // 2), page_count]
153
+ return sorted({p for p in candidates if 1 <= p <= page_count})
154
+
155
+
156
+ def pdftotext_page(path: Path, page: int) -> str:
157
+ return run_text(
158
+ ["pdftotext", "-f", str(page), "-l", str(page), "-layout", "-enc", "UTF-8", str(path), "-"],
159
+ timeout=120,
160
+ )
161
+
162
+
163
+ def pdf_image_count(path: Path) -> int | None:
164
+ if fitz is not None:
165
+ try:
166
+ with fitz.open(path) as doc:
167
+ return sum(len(page.get_images(full=True)) for page in doc)
168
+ except Exception:
169
+ pass
170
+ if not shutil.which("pdfimages"):
171
+ return None
172
+ try:
173
+ out = run_text(["pdfimages", "-list", str(path)], timeout=120)
174
+ except Exception:
175
+ return None
176
+ lines = [line for line in out.splitlines() if line.strip()]
177
+ if len(lines) <= 2:
178
+ return 0
179
+ return max(0, len(lines) - 2)
180
+
181
+
182
+ def inspect_pdf(path: Path) -> PdfProfile:
183
+ page_count = pdf_page_count(path)
184
+ pages = sample_pages(page_count)
185
+ total_chars = 0
186
+ pages_with_text = 0
187
+ total_math_hits = 0
188
+ if fitz is not None:
189
+ try:
190
+ with fitz.open(path) as doc:
191
+ for page in pages:
192
+ try:
193
+ text = doc[page - 1].get_text("text")
194
+ except Exception:
195
+ text = ""
196
+ chars = text_char_count(text)
197
+ total_chars += chars
198
+ total_math_hits += math_hit_count(text)
199
+ if chars >= 80:
200
+ pages_with_text += 1
201
+ except Exception:
202
+ total_chars = 0
203
+ pages_with_text = 0
204
+ total_math_hits = 0
205
+ else:
206
+ require_tool("pdfinfo")
207
+ require_tool("pdftotext")
208
+ for page in pages:
209
+ try:
210
+ text = pdftotext_page(path, page)
211
+ except Exception:
212
+ text = ""
213
+ chars = text_char_count(text)
214
+ total_chars += chars
215
+ total_math_hits += math_hit_count(text)
216
+ if chars >= 80:
217
+ pages_with_text += 1
218
+ return PdfProfile(
219
+ path=path,
220
+ size_bytes=path.stat().st_size,
221
+ page_count=page_count,
222
+ sample_pages=pages,
223
+ sample_text_chars=total_chars,
224
+ pages_with_text=pages_with_text,
225
+ sample_math_hits=total_math_hits,
226
+ image_count=pdf_image_count(path),
227
+ )
228
+
229
+
230
+ def print_profile(profile: PdfProfile) -> None:
231
+ payload = {
232
+ "path": str(profile.path),
233
+ "size": human_size(profile.size_bytes),
234
+ "page_count": profile.page_count,
235
+ "sample_pages": profile.sample_pages,
236
+ "text_page_ratio": round(profile.text_page_ratio, 3),
237
+ "avg_text_chars_per_sample_page": round(profile.avg_text_chars, 1),
238
+ "sample_math_hits": profile.sample_math_hits,
239
+ "image_count": profile.image_count,
240
+ "has_good_text_layer": profile.has_good_text_layer,
241
+ }
242
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
243
+
244
+
245
+ def default_out_dir(input_path: Path, engine: str) -> Path:
246
+ return input_path.with_suffix("").with_name(f"{input_path.stem}_ocr_{engine}")
247
+
248
+
249
+ def confirm_cloud_upload(path_or_url: str, service: str, args: argparse.Namespace) -> None:
250
+ if args.allow_cloud:
251
+ return
252
+ if args.no_cloud:
253
+ raise SystemExit(f"{service} requires uploading input to a cloud service, but --no-cloud was set.")
254
+
255
+ eprint("")
256
+ eprint("Cloud upload confirmation required.")
257
+ eprint(f"Service: {service}")
258
+ eprint(f"Input: {path_or_url}")
259
+ eprint("Only continue for non-confidential documents that you are allowed to upload.")
260
+ if not sys.stdin.isatty():
261
+ raise SystemExit("Refusing cloud upload in non-interactive mode. Re-run with --allow-cloud if this is intended.")
262
+ answer = input("Type UPLOAD to continue: ").strip()
263
+ if answer != "UPLOAD":
264
+ raise SystemExit("Cloud upload cancelled.")
265
+
266
+
267
+ def keychain_secret(service: str, account: str) -> str | None:
268
+ tool = shutil.which("keychain-secret")
269
+ if not tool:
270
+ return None
271
+ proc = subprocess.run(
272
+ [tool, "get", service, account],
273
+ check=False,
274
+ stdout=subprocess.PIPE,
275
+ stderr=subprocess.PIPE,
276
+ text=True,
277
+ timeout=10,
278
+ )
279
+ if proc.returncode != 0:
280
+ return None
281
+ return proc.stdout.replace("\r", "").split("\n", 1)[0] or None
282
+
283
+
284
+ def mineru_token() -> str:
285
+ token = os.environ.get("MINERU_API_TOKEN") or os.environ.get("MINERU_TOKEN")
286
+ if token:
287
+ return token
288
+ token = keychain_secret(*MINERU_TOKEN_KEYCHAIN)
289
+ if token:
290
+ return token
291
+ raise SystemExit(
292
+ "MinerU API token not found. Set MINERU_API_TOKEN or store it in Keychain as "
293
+ "service codex.mineru account credential."
294
+ )
295
+
296
+
297
+ def http_json(method: str, url: str, payload: dict[str, Any] | None, headers: dict[str, str], timeout: int = 60) -> dict[str, Any]:
298
+ if requests is not None:
299
+ try:
300
+ response = requests.request(method, url, json=payload, headers=headers, timeout=timeout)
301
+ except requests.RequestException as exc:
302
+ raise RuntimeError(f"HTTP request failed for {url}: {exc}") from exc
303
+ if response.status_code >= 400:
304
+ raise RuntimeError(f"HTTP {response.status_code} from {url}: {response.text[:800]}")
305
+ return response.json()
306
+
307
+ data = None if payload is None else json.dumps(payload).encode("utf-8")
308
+ req_headers = {"Content-Type": "application/json", **headers}
309
+ req = urllib.request.Request(url, data=data, headers=req_headers, method=method)
310
+ try:
311
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
312
+ body = resp.read().decode("utf-8")
313
+ except urllib.error.HTTPError as exc:
314
+ body = exc.read().decode("utf-8", errors="replace")
315
+ raise RuntimeError(f"HTTP {exc.code} from {url}: {body[:800]}") from exc
316
+ return json.loads(body)
317
+
318
+
319
+ def curl_upload_file(path: Path, url: str) -> None:
320
+ if requests is not None:
321
+ with path.open("rb") as handle:
322
+ response = requests.put(url, data=handle, timeout=300)
323
+ if response.status_code >= 400:
324
+ raise RuntimeError(f"Signed URL upload failed: HTTP {response.status_code} {response.text[:500]}")
325
+ return
326
+
327
+ require_tool("curl")
328
+ proc = subprocess.run(["curl", "-sS", "-f", "-X", "PUT", "-T", str(path), url], check=False)
329
+ if proc.returncode != 0:
330
+ raise RuntimeError("Signed URL upload failed.")
331
+
332
+
333
+ def curl_download(url: str, out: Path) -> None:
334
+ if requests is not None:
335
+ out.parent.mkdir(parents=True, exist_ok=True)
336
+ with requests.get(url, stream=True, timeout=300) as response:
337
+ if response.status_code >= 400:
338
+ raise RuntimeError(f"Download failed: HTTP {response.status_code} {response.text[:500]}")
339
+ with out.open("wb") as handle:
340
+ for chunk in response.iter_content(chunk_size=1024 * 1024):
341
+ if chunk:
342
+ handle.write(chunk)
343
+ return
344
+
345
+ require_tool("curl")
346
+ out.parent.mkdir(parents=True, exist_ok=True)
347
+ proc = subprocess.run(["curl", "-L", "-sS", "-f", "-o", str(out), url], check=False)
348
+ if proc.returncode != 0:
349
+ raise RuntimeError(f"Download failed: {url}")
350
+
351
+
352
+ def extract_data(response: dict[str, Any]) -> dict[str, Any]:
353
+ data = response.get("data")
354
+ if isinstance(data, dict):
355
+ return data
356
+ raise RuntimeError(f"Unexpected MinerU response: {response}")
357
+
358
+
359
+ def first_url_value(value: Any, preferred_keys: tuple[str, ...], require_zip: bool = False) -> str | None:
360
+ def is_usable(candidate: Any) -> bool:
361
+ return isinstance(candidate, str) and (not require_zip or ".zip" in candidate.lower())
362
+
363
+ if is_usable(value):
364
+ return value
365
+ if isinstance(value, dict):
366
+ for key in preferred_keys:
367
+ candidate = value.get(key)
368
+ if is_usable(candidate):
369
+ return candidate
370
+ for candidate in value.values():
371
+ found = first_url_value(candidate, preferred_keys, require_zip=require_zip)
372
+ if found:
373
+ return found
374
+ if isinstance(value, list):
375
+ for candidate in value:
376
+ found = first_url_value(candidate, preferred_keys, require_zip=require_zip)
377
+ if found:
378
+ return found
379
+ return None
380
+
381
+
382
+ def run_native_text(input_path: Path, out_dir: Path) -> Path:
383
+ out_dir.mkdir(parents=True, exist_ok=True)
384
+ if shutil.which("pdftotext"):
385
+ raw = run_text(["pdftotext", "-layout", "-enc", "UTF-8", str(input_path), "-"], timeout=900)
386
+ pages = raw.split("\f")
387
+ note = "Extracted from the PDF text layer with pdftotext -layout."
388
+ elif fitz is not None:
389
+ with fitz.open(input_path) as doc:
390
+ pages = [page.get_text("text") for page in doc]
391
+ note = "Extracted from the PDF text layer with PyMuPDF."
392
+ else:
393
+ raise SystemExit("native-text requires either pdftotext or PyMuPDF.")
394
+ lines = [f"# {input_path.stem}", "", f"<!-- {note} -->", ""]
395
+ for idx, page_text in enumerate(pages, start=1):
396
+ if not page_text.strip() and idx == len(pages):
397
+ continue
398
+ lines.append(f"<!-- PDF_PAGE {idx:04d} -->")
399
+ lines.append(f"## PDF Page {idx}")
400
+ lines.append("")
401
+ lines.append(page_text.strip())
402
+ lines.append("")
403
+ out_path = out_dir / "full.md"
404
+ out_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
405
+ return out_path
406
+
407
+
408
+ def run_apple_vision(input_path: Path, out_dir: Path, language: str | None) -> Path:
409
+ tool = Path("/Users/yupeit/bin/ocr")
410
+ if not tool.exists():
411
+ raise SystemExit("Apple Vision OCR tool not found at /Users/yupeit/bin/ocr")
412
+ out_dir.mkdir(parents=True, exist_ok=True)
413
+ cmd = [str(tool)]
414
+ if language and language != "auto":
415
+ cmd.extend(["--language", language])
416
+ cmd.append(str(input_path))
417
+ text = run_text(cmd, timeout=300)
418
+ out_path = out_dir / f"{input_path.stem}.txt"
419
+ out_path.write_text(text, encoding="utf-8")
420
+ return out_path
421
+
422
+
423
+ def run_gemini_vlm(input_path: Path, out_dir: Path, args: argparse.Namespace) -> Path:
424
+ confirm_cloud_upload(str(input_path), "Gemini VLM via /Users/yupeit/bin/ocr.py", args)
425
+ tool = Path("/Users/yupeit/bin/ocr.py")
426
+ if not tool.exists():
427
+ raise SystemExit("Gemini OCR script not found at /Users/yupeit/bin/ocr.py")
428
+ out_dir.mkdir(parents=True, exist_ok=True)
429
+ out_path = out_dir / "full.md"
430
+ cmd = [
431
+ str(tool),
432
+ str(input_path),
433
+ "--output",
434
+ str(out_path),
435
+ "--concurrent",
436
+ str(args.concurrent),
437
+ "--model",
438
+ args.vlm_model,
439
+ ]
440
+ proc = subprocess.run(cmd, check=False)
441
+ if proc.returncode != 0:
442
+ raise SystemExit(f"Gemini VLM OCR failed with exit code {proc.returncode}")
443
+ return out_path
444
+
445
+
446
+ def run_local_mineru(input_path: Path, out_dir: Path, args: argparse.Namespace) -> Path:
447
+ require_tool("uvx")
448
+ out_dir.mkdir(parents=True, exist_ok=True)
449
+ env = os.environ.copy()
450
+ env.setdefault("MINERU_TASK_RESULT_TIMEOUT_SECONDS", str(args.timeout_seconds))
451
+ env.setdefault("MINERU_TASK_RESULT_DOWNLOAD_TIMEOUT_SECONDS", "1800")
452
+ env.setdefault("MINERU_LOCAL_API_STARTUP_TIMEOUT_SECONDS", "600")
453
+
454
+ cmd = [
455
+ "uvx",
456
+ "mineru[all]",
457
+ "-p",
458
+ str(input_path),
459
+ "-o",
460
+ str(out_dir),
461
+ "-b",
462
+ args.mineru_backend,
463
+ "-l",
464
+ args.language,
465
+ "-f",
466
+ "true" if args.enable_formula else "false",
467
+ "-t",
468
+ "true" if args.enable_table else "false",
469
+ "--image-analysis",
470
+ "true" if args.image_analysis else "false",
471
+ ]
472
+ if args.mineru_method:
473
+ cmd.extend(["-m", args.mineru_method])
474
+ if args.start_page is not None:
475
+ cmd.extend(["-s", str(args.start_page)])
476
+ if args.end_page is not None:
477
+ cmd.extend(["-e", str(args.end_page)])
478
+
479
+ log_path = out_dir / "mineru-local.log"
480
+ with log_path.open("w", encoding="utf-8") as log:
481
+ log.write("$ " + " ".join(cmd) + "\n\n")
482
+ log.flush()
483
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, env=env)
484
+ assert proc.stdout is not None
485
+ for line in proc.stdout:
486
+ log.write(line)
487
+ log.flush()
488
+ if "Completed batch" in line or "Error:" in line or "Timed out" in line:
489
+ print(line.rstrip(), flush=True)
490
+ code = proc.wait()
491
+ if code != 0:
492
+ raise SystemExit(f"Local MinerU failed with exit code {code}; see {log_path}")
493
+ markdowns = sorted(out_dir.rglob("*.md"))
494
+ return markdowns[0] if markdowns else out_dir
495
+
496
+
497
+ def run_mineru_agent(input_path: Path, out_dir: Path, args: argparse.Namespace) -> Path:
498
+ profile = inspect_pdf(input_path) if input_path.suffix.lower() == ".pdf" else None
499
+ if input_path.stat().st_size > 10 * 1024 * 1024 or (profile and profile.page_count and profile.page_count > 20):
500
+ raise SystemExit("MinerU agent API is limited to 10MB/20 pages. Use --engine mineru-api instead.")
501
+ confirm_cloud_upload(str(input_path), "MinerU agent parse API", args)
502
+ out_dir.mkdir(parents=True, exist_ok=True)
503
+ payload = {"file_name": input_path.name}
504
+ created = http_json("POST", f"{MINERU_BASE_URL}/api/v1/agent/parse/file", payload, {})
505
+ data = extract_data(created)
506
+ task_id = data.get("task_id") or data.get("id")
507
+ upload_url = data.get("file_url") or data.get("upload_url")
508
+ if not task_id or not upload_url:
509
+ raise RuntimeError(f"Unexpected MinerU agent create response: {created}")
510
+ curl_upload_file(input_path, upload_url)
511
+ result = poll_agent_task(str(task_id), args.poll_interval, args.timeout_seconds)
512
+ md_url = result.get("full_md_url") or result.get("md_url") or result.get("markdown_url")
513
+ if not md_url:
514
+ raise RuntimeError(f"Agent task completed but no Markdown URL was found: {result}")
515
+ out_path = out_dir / "full.md"
516
+ curl_download(md_url, out_path)
517
+ return out_path
518
+
519
+
520
+ def poll_agent_task(task_id: str, interval: float, timeout_seconds: int) -> dict[str, Any]:
521
+ deadline = time.time() + timeout_seconds
522
+ while True:
523
+ response = http_json("GET", f"{MINERU_BASE_URL}/api/v1/agent/parse/{task_id}", None, {})
524
+ data = extract_data(response)
525
+ state = str(data.get("state") or data.get("status") or "").lower()
526
+ if state in {"done", "completed", "success", "succeeded"}:
527
+ return data
528
+ if state in {"failed", "error"}:
529
+ raise RuntimeError(f"MinerU agent task failed: {data}")
530
+ if time.time() > deadline:
531
+ raise TimeoutError(f"Timed out waiting for MinerU agent task {task_id}")
532
+ eprint(f"MinerU agent task {task_id}: {state or 'processing'}")
533
+ time.sleep(interval)
534
+
535
+
536
+ def run_mineru_api(input_path: Path, out_dir: Path, args: argparse.Namespace) -> Path:
537
+ confirm_cloud_upload(str(input_path), "MinerU official API", args)
538
+ out_dir.mkdir(parents=True, exist_ok=True)
539
+ token = mineru_token()
540
+ headers = {"Authorization": f"Bearer {token}"}
541
+
542
+ data_id = str(uuid.uuid4())
543
+ file_payload: dict[str, Any] = {
544
+ "name": input_path.name,
545
+ "data_id": data_id,
546
+ "is_ocr": args.is_ocr,
547
+ }
548
+ if args.page_ranges:
549
+ file_payload["page_ranges"] = args.page_ranges
550
+
551
+ create_payload = {
552
+ "enable_formula": args.enable_formula,
553
+ "enable_table": args.enable_table,
554
+ "language": args.language,
555
+ "model_version": args.model_version,
556
+ "files": [file_payload],
557
+ }
558
+ created = http_json("POST", f"{MINERU_BASE_URL}/api/v4/file-urls/batch", create_payload, headers)
559
+ created_data = extract_data(created)
560
+ batch_id = created_data.get("batch_id")
561
+ files = created_data.get("file_urls") or created_data.get("files") or []
562
+ if not batch_id or not files:
563
+ raise RuntimeError(f"Unexpected MinerU signed URL response: {created}")
564
+ upload_url = first_url_value(files, ("upload_url", "file_url", "url"))
565
+ if not upload_url:
566
+ raise RuntimeError(f"Signed URL response did not include upload_url: {created}")
567
+
568
+ eprint(f"Uploading to MinerU batch {batch_id} ...")
569
+ curl_upload_file(input_path, upload_url)
570
+ result = poll_mineru_batch(str(batch_id), data_id, headers, args.poll_interval, args.timeout_seconds)
571
+ zip_url = first_url_value(result, ("full_zip_url", "zip_url"), require_zip=True)
572
+ if not zip_url:
573
+ raise RuntimeError(f"MinerU task completed but no zip URL was found: {result}")
574
+
575
+ zip_path = out_dir / "mineru_result.zip"
576
+ curl_download(str(zip_url), zip_path)
577
+ extract_dir = out_dir / "mineru_result"
578
+ extract_dir.mkdir(parents=True, exist_ok=True)
579
+ with zipfile.ZipFile(zip_path) as archive:
580
+ archive.extractall(extract_dir)
581
+ markdowns = sorted(extract_dir.rglob("*.md"))
582
+ return markdowns[0] if markdowns else extract_dir
583
+
584
+
585
+ def poll_mineru_batch(
586
+ batch_id: str,
587
+ data_id: str,
588
+ headers: dict[str, str],
589
+ interval: float,
590
+ timeout_seconds: int,
591
+ ) -> dict[str, Any]:
592
+ deadline = time.time() + timeout_seconds
593
+ while True:
594
+ response = http_json("GET", f"{MINERU_BASE_URL}/api/v4/extract-results/batch/{batch_id}", None, headers)
595
+ data = extract_data(response)
596
+ results = data.get("extract_result") or data.get("results") or data.get("files") or []
597
+ if isinstance(results, dict):
598
+ results = [results]
599
+ if not isinstance(results, list):
600
+ results = []
601
+ target = None
602
+ for item in results:
603
+ if not isinstance(item, dict):
604
+ continue
605
+ if item.get("data_id") == data_id or not target:
606
+ target = item
607
+ state = str((target or data).get("state") or (target or data).get("status") or "").lower()
608
+ if state in {"done", "completed", "success", "succeeded"}:
609
+ return target or data
610
+ if state in {"failed", "error"}:
611
+ raise RuntimeError(f"MinerU batch task failed: {target or data}")
612
+ if time.time() > deadline:
613
+ raise TimeoutError(f"Timed out waiting for MinerU batch {batch_id}")
614
+ eprint(f"MinerU batch {batch_id}: {state or 'processing'}")
615
+ time.sleep(interval)
616
+
617
+
618
+ def choose_engine(input_path: Path, args: argparse.Namespace) -> tuple[str, PdfProfile | None]:
619
+ suffix = input_path.suffix.lower()
620
+ if args.engine != "auto":
621
+ profile = inspect_pdf(input_path) if suffix == ".pdf" and args.show_profile else None
622
+ return args.engine, profile
623
+ if suffix in IMAGE_SUFFIXES:
624
+ return "apple-vision", None
625
+ if suffix not in PDF_SUFFIXES:
626
+ return "mineru-api", None
627
+
628
+ profile = inspect_pdf(input_path)
629
+ if args.require_structure or args.need_formulas or args.need_tables:
630
+ if shutil.which("uvx"):
631
+ return "mineru-local", profile
632
+ return "mineru-api", profile
633
+ if profile.has_good_text_layer:
634
+ return "native-text", profile
635
+ if shutil.which("uvx"):
636
+ return "mineru-local", profile
637
+ if profile.size_bytes <= 10 * 1024 * 1024 and (profile.page_count or 999999) <= 20:
638
+ return "mineru-agent", profile
639
+ return "mineru-api", profile
640
+
641
+
642
+ def print_file_profile(input_path: Path, engine: str, profile: PdfProfile | None) -> None:
643
+ if profile is not None:
644
+ payload = {
645
+ "recommended_engine": engine,
646
+ "path": str(profile.path),
647
+ "size": human_size(profile.size_bytes),
648
+ "page_count": profile.page_count,
649
+ "sample_pages": profile.sample_pages,
650
+ "text_page_ratio": round(profile.text_page_ratio, 3),
651
+ "avg_text_chars_per_sample_page": round(profile.avg_text_chars, 1),
652
+ "sample_math_hits": profile.sample_math_hits,
653
+ "image_count": profile.image_count,
654
+ "has_good_text_layer": profile.has_good_text_layer,
655
+ }
656
+ else:
657
+ payload = {
658
+ "recommended_engine": engine,
659
+ "path": str(input_path),
660
+ "size": human_size(input_path.stat().st_size),
661
+ "suffix": input_path.suffix.lower(),
662
+ }
663
+ print(json.dumps(payload, ensure_ascii=False, indent=2))
664
+
665
+
666
+ def parse_args() -> argparse.Namespace:
667
+ parser = argparse.ArgumentParser(description="Route OCR/document extraction to the best local or MinerU workflow.")
668
+ parser.add_argument("input", type=Path, help="Local PDF/image/document path")
669
+ parser.add_argument("--engine", choices=["auto", "native-text", "apple-vision", "gemini-vlm", "mineru-local", "mineru-api", "mineru-agent"], default="auto")
670
+ parser.add_argument("--out-dir", type=Path)
671
+ parser.add_argument("--show-profile", action="store_true")
672
+ parser.add_argument("--profile-only", action="store_true", help="Inspect the file and recommended engine without extracting")
673
+ parser.add_argument("--allow-cloud", action="store_true", help="Allow upload without an interactive confirmation prompt")
674
+ parser.add_argument("--no-cloud", action="store_true", help="Forbid cloud OCR/API upload")
675
+ parser.add_argument("--language", default="en")
676
+ parser.add_argument("--need-formulas", action="store_true")
677
+ parser.add_argument("--need-tables", action="store_true")
678
+ parser.add_argument("--require-structure", action="store_true", help="Prefer MinerU structured output over plain text layer extraction")
679
+ parser.add_argument("--enable-formula", action=argparse.BooleanOptionalAction, default=True)
680
+ parser.add_argument("--enable-table", action=argparse.BooleanOptionalAction, default=True)
681
+ parser.add_argument("--image-analysis", action=argparse.BooleanOptionalAction, default=False)
682
+ parser.add_argument("--model-version", default="vlm", help="MinerU official API model_version, e.g. vlm, pipeline, MinerU-HTML")
683
+ parser.add_argument("--is-ocr", action=argparse.BooleanOptionalAction, default=True, help="MinerU official API OCR flag")
684
+ parser.add_argument("--page-ranges", help="MinerU API page ranges string, e.g. 1-5,9")
685
+ parser.add_argument("--mineru-backend", default="pipeline")
686
+ parser.add_argument("--mineru-method", default="txt")
687
+ parser.add_argument("--start-page", type=int, help="0-based local MinerU start page")
688
+ parser.add_argument("--end-page", type=int, help="0-based local MinerU inclusive end page")
689
+ parser.add_argument("--vlm-model", default="gemini-2.5-flash-lite")
690
+ parser.add_argument("--concurrent", type=int, default=2)
691
+ parser.add_argument("--poll-interval", type=float, default=5.0)
692
+ parser.add_argument("--timeout-seconds", type=int, default=86400)
693
+ return parser.parse_args()
694
+
695
+
696
+ def main() -> int:
697
+ args = parse_args()
698
+ input_path = args.input.expanduser().resolve()
699
+ if not input_path.exists():
700
+ raise SystemExit(f"Input not found: {input_path}")
701
+ if input_path.suffix.lower() not in CLOUD_DOC_SUFFIXES and args.engine in {"mineru-api", "mineru-agent"}:
702
+ raise SystemExit(f"MinerU cloud engine does not support this suffix: {input_path.suffix}")
703
+
704
+ engine, profile = choose_engine(input_path, args)
705
+ if args.profile_only:
706
+ print_file_profile(input_path, engine, profile)
707
+ return 0
708
+ if args.show_profile and profile is not None:
709
+ print_profile(profile)
710
+ elif profile is not None:
711
+ eprint(
712
+ "PDF profile:",
713
+ f"pages={profile.page_count}",
714
+ f"text_layer={profile.has_good_text_layer}",
715
+ f"avg_chars={profile.avg_text_chars:.1f}",
716
+ f"images={profile.image_count}",
717
+ )
718
+ eprint(f"Selected engine: {engine}")
719
+
720
+ out_dir = (args.out_dir.expanduser().resolve() if args.out_dir else default_out_dir(input_path, engine))
721
+ if engine == "native-text":
722
+ out_path = run_native_text(input_path, out_dir)
723
+ elif engine == "apple-vision":
724
+ out_path = run_apple_vision(input_path, out_dir, args.language)
725
+ elif engine == "gemini-vlm":
726
+ out_path = run_gemini_vlm(input_path, out_dir, args)
727
+ elif engine == "mineru-local":
728
+ out_path = run_local_mineru(input_path, out_dir, args)
729
+ elif engine == "mineru-api":
730
+ out_path = run_mineru_api(input_path, out_dir, args)
731
+ elif engine == "mineru-agent":
732
+ out_path = run_mineru_agent(input_path, out_dir, args)
733
+ else:
734
+ raise SystemExit(f"Unknown engine: {engine}")
735
+
736
+ print(str(out_path))
737
+ return 0
738
+
739
+
740
+ if __name__ == "__main__":
741
+ raise SystemExit(main())