deepextract-skill 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,22 @@
1
+ # deepextract-skill
2
+
3
+ Install the `deepextract-doc-converter` skill into OpenCode via `npx`.
4
+
5
+ ## Usage
6
+
7
+ ```bash
8
+ npx deepextract-skill
9
+ ```
10
+
11
+ This installs the skill to:
12
+
13
+ `~/.config/opencode/skills/deepextract-doc-converter`
14
+
15
+ ## After install
16
+
17
+ Set environment variables (recommended):
18
+
19
+ ```bash
20
+ export DEEPEXTRACT_ROOT="/absolute/path/to/deepextract"
21
+ export MINERU_API_KEY="your_mineru_key"
22
+ ```
package/bin/install.js ADDED
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env node
2
+ const fs = require("fs");
3
+ const path = require("path");
4
+ const os = require("os");
5
+
6
+ function log(msg) {
7
+ process.stdout.write(`${msg}\n`);
8
+ }
9
+
10
+ function copyDir(src, dst) {
11
+ fs.mkdirSync(dst, { recursive: true });
12
+ for (const entry of fs.readdirSync(src, { withFileTypes: true })) {
13
+ const s = path.join(src, entry.name);
14
+ const d = path.join(dst, entry.name);
15
+ if (entry.isDirectory()) {
16
+ copyDir(s, d);
17
+ } else {
18
+ fs.copyFileSync(s, d);
19
+ }
20
+ }
21
+ }
22
+
23
+ function ensureExecutable(filePath) {
24
+ if (process.platform === "win32") return;
25
+ const st = fs.statSync(filePath);
26
+ fs.chmodSync(filePath, st.mode | 0o111);
27
+ }
28
+
29
+ function main() {
30
+ const home = os.homedir();
31
+ const skillBase = path.join(home, ".config", "opencode", "skills");
32
+ const skillName = "deepextract-doc-converter";
33
+ const target = path.join(skillBase, skillName);
34
+
35
+ const templateRoot = path.join(__dirname, "..", "templates", skillName);
36
+ if (!fs.existsSync(templateRoot)) {
37
+ throw new Error(`Template not found: ${templateRoot}`);
38
+ }
39
+
40
+ fs.mkdirSync(skillBase, { recursive: true });
41
+
42
+ if (fs.existsSync(target)) {
43
+ fs.rmSync(target, { recursive: true, force: true });
44
+ }
45
+
46
+ copyDir(templateRoot, target);
47
+
48
+ const scriptPath = path.join(target, "scripts", "convert_with_deepextract.py");
49
+ if (fs.existsSync(scriptPath)) {
50
+ ensureExecutable(scriptPath);
51
+ }
52
+
53
+ log("[OK] DeepExtract skill installed.");
54
+ log(`Path: ${target}`);
55
+ log("");
56
+ log("Next steps:");
57
+ log("1) Set DEEPEXTRACT_ROOT to your project path");
58
+ log("2) Set MINERU_API_KEY globally if needed");
59
+ log("");
60
+ log("Example:");
61
+ log('export DEEPEXTRACT_ROOT="/path/to/deepextract"');
62
+ log('export MINERU_API_KEY="your_key"');
63
+ }
64
+
65
+ try {
66
+ main();
67
+ } catch (err) {
68
+ process.stderr.write(`[ERROR] ${err.message}\n`);
69
+ process.exit(1);
70
+ }
package/package.json ADDED
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "deepextract-skill",
3
+ "version": "0.1.0",
4
+ "description": "Install DeepExtract document conversion skill into OpenCode",
5
+ "bin": {
6
+ "deepextract-skill": "bin/install.js"
7
+ },
8
+ "type": "commonjs",
9
+ "files": [
10
+ "bin",
11
+ "templates",
12
+ "README.md"
13
+ ],
14
+ "license": "MIT"
15
+ }
@@ -0,0 +1,36 @@
1
+ ---
2
+ name: deepextract-doc-converter
3
+ description: Convert documents with local DeepExtract when users ask things like "帮我把这个文档转为xx格式", "转成 Word", "转成 Markdown", or "convert this file to docx/markdown". Use for PDF, images, Word, PPT, HTML, and Markdown inputs, with outputs in Markdown or DOCX.
4
+ ---
5
+
6
+ # DeepExtract Document Converter
7
+
8
+ Use this skill to execute real local file conversion through the DeepExtract codebase.
9
+
10
+ ## Workflow
11
+
12
+ 1. Parse user intent:
13
+ - Input file path
14
+ - Target format (`markdown` or `docx`)
15
+ 2. Run the bundled script:
16
+
17
+ ```bash
18
+ python "$HOME/.config/opencode/skills/deepextract-doc-converter/scripts/convert_with_deepextract.py" --input "<input_path>" --target "<target>"
19
+ ```
20
+
21
+ 3. If user gave an explicit output file path, add `--output`.
22
+ 4. Return the generated result path to the user.
23
+
24
+ ## Target format mapping
25
+
26
+ - `markdown`, `md` -> Markdown output
27
+ - `word`, `docx`, `doc` -> DOCX output
28
+
29
+ ## Notes
30
+
31
+ - The script auto-detects project root from current directory or parent directories.
32
+ - For global usage in any directory, set `DEEPEXTRACT_ROOT` to your DeepExtract project path.
33
+ - For MinerU-based conversions, API key is required:
34
+ - `MINERU_API_KEY` environment variable, or
35
+ - `apikey.md` with `MINERU_API_KEY=...`
36
+ - If both input path and target are clear, run directly without asking extra questions.
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import os
4
+ import shutil
5
+ import sys
6
+ from pathlib import Path
7
+
8
+
9
+ def find_project_root(start: Path) -> Path:
10
+ env_root = os.getenv("DEEPEXTRACT_ROOT", "").strip()
11
+ if env_root:
12
+ p = Path(env_root).expanduser().resolve()
13
+ if (p / "md2word_final.py").exists() and (p / "mineru_extract.py").exists():
14
+ return p
15
+ raise FileNotFoundError(
16
+ f"DEEPEXTRACT_ROOT is set but invalid: {p}. Missing md2word_final.py or mineru_extract.py"
17
+ )
18
+
19
+ cur = start.resolve()
20
+ candidates = [cur] + list(cur.parents)
21
+ for p in candidates:
22
+ if (p / "md2word_final.py").exists() and (p / "mineru_extract.py").exists():
23
+ return p
24
+ raise FileNotFoundError("Cannot locate DeepExtract project root.")
25
+
26
+
27
+ def normalize_target(target: str) -> str:
28
+ t = target.strip().lower()
29
+ if t in {"md", "markdown"}:
30
+ return "md"
31
+ if t in {"doc", "docx", "word"}:
32
+ return "docx"
33
+ raise ValueError(f"Unsupported target format: {target}")
34
+
35
+
36
+ def ensure_key_notice(project_root: Path) -> None:
37
+ has_env = bool(os.getenv("MINERU_API_KEY", "").strip())
38
+ key_file = project_root / "apikey.md"
39
+ has_file_key = False
40
+ if key_file.exists():
41
+ for line in key_file.read_text(encoding="utf-8").splitlines():
42
+ line = line.strip()
43
+ if line.startswith("MINERU_API_KEY=") and line.split("=", 1)[1].strip():
44
+ has_file_key = True
45
+ break
46
+ if not has_env and not has_file_key:
47
+ print(
48
+ "[WARN] MINERU_API_KEY not found. Conversions requiring MinerU may fail.",
49
+ file=sys.stderr,
50
+ )
51
+
52
+
53
+ def convert_markdown_to_docx(project_root: Path, input_file: Path, output_file: Path) -> Path:
54
+ sys.path.insert(0, str(project_root))
55
+ import md2word_final
56
+
57
+ output_file.parent.mkdir(parents=True, exist_ok=True)
58
+ md2word_final.convert_with_python_docx(str(input_file), str(output_file))
59
+ return output_file
60
+
61
+
62
+ def convert_with_mineru(project_root: Path, input_file: Path, target: str, output_file: Path) -> Path:
63
+ sys.path.insert(0, str(project_root))
64
+ import mineru_extract
65
+ import md2word_final
66
+
67
+ result = mineru_extract.upload_and_extract(str(input_file))
68
+ output_file.parent.mkdir(parents=True, exist_ok=True)
69
+
70
+ if target == "md":
71
+ src_zip = Path(result["zip_path"])
72
+ if output_file.suffix.lower() != ".zip":
73
+ output_file = output_file.with_suffix(".zip")
74
+ shutil.copy2(src_zip, output_file)
75
+ return output_file
76
+
77
+ md2word_final.convert_with_python_docx(result["md_path"], str(output_file))
78
+ return output_file
79
+
80
+
81
+ def build_default_output(input_file: Path, target: str) -> Path:
82
+ if target == "md":
83
+ return input_file.with_suffix(".zip")
84
+ return input_file.with_suffix(".docx")
85
+
86
+
87
+ def main() -> int:
88
+ parser = argparse.ArgumentParser(description="Convert files with local DeepExtract")
89
+ parser.add_argument("--input", required=True, help="Input file path")
90
+ parser.add_argument("--target", required=True, help="Target format: markdown/docx")
91
+ parser.add_argument("--output", default="", help="Optional output path")
92
+ args = parser.parse_args()
93
+
94
+ input_file = Path(args.input).expanduser().resolve()
95
+ if not input_file.exists() or not input_file.is_file():
96
+ raise FileNotFoundError(f"Input file not found: {input_file}")
97
+
98
+ target = normalize_target(args.target)
99
+ output_file = (
100
+ Path(args.output).expanduser().resolve()
101
+ if args.output.strip()
102
+ else build_default_output(input_file, target)
103
+ )
104
+
105
+ project_root = find_project_root(Path.cwd())
106
+
107
+ ext = input_file.suffix.lower()
108
+ markdown_input = {".md", ".markdown"}
109
+
110
+ if ext in markdown_input and target == "docx":
111
+ final_path = convert_markdown_to_docx(project_root, input_file, output_file)
112
+ elif ext in markdown_input and target == "md":
113
+ if output_file.suffix.lower() not in {".md", ".markdown"}:
114
+ output_file = output_file.with_suffix(".md")
115
+ output_file.parent.mkdir(parents=True, exist_ok=True)
116
+ shutil.copy2(input_file, output_file)
117
+ final_path = output_file
118
+ else:
119
+ ensure_key_notice(project_root)
120
+ final_path = convert_with_mineru(project_root, input_file, target, output_file)
121
+
122
+ print(str(final_path))
123
+ return 0
124
+
125
+
126
+ if __name__ == "__main__":
127
+ try:
128
+ raise SystemExit(main())
129
+ except Exception as exc:
130
+ print(f"ERROR: {exc}", file=sys.stderr)
131
+ raise