@noedgeai-org/doc2x-mcp 0.1.3-dev.2.2 → 0.1.3-dev.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/index.d.ts +15 -0
- package/dist/doc2x/client.d.ts +15 -0
- package/dist/doc2x/client.js +64 -25
- package/dist/doc2x/constants.d.ts +3 -0
- package/dist/doc2x/convert.d.ts +32 -0
- package/dist/doc2x/convert.js +2 -2
- package/dist/doc2x/download.d.ts +7 -0
- package/dist/doc2x/download.js +31 -10
- package/dist/doc2x/http.d.ts +4 -0
- package/dist/doc2x/image.d.ts +23 -0
- package/dist/doc2x/image.js +4 -3
- package/dist/doc2x/materialize.d.ts +8 -0
- package/dist/doc2x/paths.d.ts +2 -0
- package/dist/doc2x/pdf.d.ts +29 -0
- package/dist/doc2x/pdf.js +9 -5
- package/dist/errors/error.d.ts +34 -0
- package/dist/errors/error.js +61 -0
- package/dist/errors/errorCodes.d.ts +13 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +6 -1
- package/dist/mcp/registerConvertTools.d.ts +3 -0
- package/dist/mcp/registerConvertTools.js +65 -0
- package/dist/mcp/registerImageTools.d.ts +3 -0
- package/dist/mcp/registerImageTools.js +86 -0
- package/dist/mcp/registerMiscTools.d.ts +2 -0
- package/dist/mcp/registerMiscTools.js +36 -0
- package/dist/mcp/registerPdfTools.d.ts +3 -0
- package/dist/mcp/registerPdfTools.js +123 -0
- package/dist/mcp/registerTools.d.ts +2 -0
- package/dist/mcp/registerTools.js +10 -420
- package/dist/mcp/registerToolsShared.d.ts +106 -0
- package/dist/mcp/registerToolsShared.js +194 -0
- package/dist/mcp/results.d.ts +19 -0
- package/dist/mcp/results.js +6 -10
- package/dist/shared/utils.d.ts +2 -0
- package/dist/shared/utils.js +13 -0
- package/package.json +30 -8
- package/dist/errors.js +0 -17
- package/dist/utils.js +0 -25
- /package/dist/{config.js → config/index.js} +0 -0
- /package/dist/{errorCodes.js → errors/errorCodes.js} +0 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { isRetryableError } from '#errors';
|
|
2
|
+
import { parseImageLayoutStatus, parseImageLayoutSubmit, parseImageLayoutSync, parseImageLayoutWaitTextByUid, } from '#doc2x/image';
|
|
3
|
+
import { asJsonResult, asTextResult } from '#mcp/results';
|
|
4
|
+
import { deleteUidCache, fileSig, getSubmittedUidFromCache, imagePathForWaitSchema, imagePathSchema, missingEitherFieldError, parseImageUidSchema, positiveIntMsSchema, setFailedUidCache, setSubmittedUidCache, withToolErrorHandling, } from '#mcp/registerToolsShared';
|
|
5
|
+
export function registerImageTools(server, ctx) {
|
|
6
|
+
server.registerTool('doc2x_parse_image_layout_sync', {
|
|
7
|
+
description: 'Parse an image layout synchronously and return the raw Doc2x result JSON (including convert_zip when present).',
|
|
8
|
+
inputSchema: {
|
|
9
|
+
image_path: imagePathSchema,
|
|
10
|
+
},
|
|
11
|
+
}, withToolErrorHandling(async ({ image_path }) => asJsonResult(await parseImageLayoutSync(image_path))));
|
|
12
|
+
server.registerTool('doc2x_parse_image_layout_submit', {
|
|
13
|
+
description: 'Create an async image-layout parse task and return {uid}. After this, call doc2x_parse_image_layout_wait_text (with uid) or doc2x_parse_image_layout_status.',
|
|
14
|
+
inputSchema: {
|
|
15
|
+
image_path: imagePathSchema,
|
|
16
|
+
},
|
|
17
|
+
}, withToolErrorHandling(async ({ image_path }) => {
|
|
18
|
+
const sig = await fileSig(image_path);
|
|
19
|
+
const res = await parseImageLayoutSubmit(image_path);
|
|
20
|
+
setSubmittedUidCache(ctx, { kind: 'image', key: sig.absPath, sig, uid: res.uid });
|
|
21
|
+
return asJsonResult(res);
|
|
22
|
+
}));
|
|
23
|
+
server.registerTool('doc2x_parse_image_layout_status', {
|
|
24
|
+
description: 'Get status/result for an existing async image-layout parse task by uid.',
|
|
25
|
+
inputSchema: {
|
|
26
|
+
uid: parseImageUidSchema,
|
|
27
|
+
},
|
|
28
|
+
}, withToolErrorHandling(async ({ uid }) => asJsonResult(await parseImageLayoutStatus(uid))));
|
|
29
|
+
server.registerTool('doc2x_parse_image_layout_wait_text', {
|
|
30
|
+
description: 'Wait for an image-layout parse task until success, returning first page markdown. Prefer passing uid (no re-submit). If only image_path is provided, it will (a) reuse an in-process cached uid if available, otherwise (b) submit a new async task then wait.',
|
|
31
|
+
inputSchema: {
|
|
32
|
+
uid: parseImageUidSchema.optional(),
|
|
33
|
+
image_path: imagePathForWaitSchema.optional(),
|
|
34
|
+
poll_interval_ms: positiveIntMsSchema.optional(),
|
|
35
|
+
max_wait_ms: positiveIntMsSchema.optional(),
|
|
36
|
+
},
|
|
37
|
+
}, withToolErrorHandling(async (args) => {
|
|
38
|
+
const uid = String(args.uid || '').trim();
|
|
39
|
+
if (uid) {
|
|
40
|
+
const out = await parseImageLayoutWaitTextByUid({
|
|
41
|
+
uid,
|
|
42
|
+
poll_interval_ms: args.poll_interval_ms,
|
|
43
|
+
max_wait_ms: args.max_wait_ms,
|
|
44
|
+
});
|
|
45
|
+
return asTextResult(out.text);
|
|
46
|
+
}
|
|
47
|
+
const imagePath = String(args.image_path || '').trim();
|
|
48
|
+
if (!imagePath)
|
|
49
|
+
throw missingEitherFieldError('uid', 'image_path');
|
|
50
|
+
const sig = await fileSig(imagePath);
|
|
51
|
+
const resolvedUid = getSubmittedUidFromCache(ctx, { kind: 'image', key: sig.absPath, sig });
|
|
52
|
+
const finalUid = resolvedUid || (await parseImageLayoutSubmit(imagePath)).uid;
|
|
53
|
+
setSubmittedUidCache(ctx, { kind: 'image', key: sig.absPath, sig, uid: finalUid });
|
|
54
|
+
const waitByUid = async (uid) => parseImageLayoutWaitTextByUid({
|
|
55
|
+
uid,
|
|
56
|
+
poll_interval_ms: args.poll_interval_ms,
|
|
57
|
+
max_wait_ms: args.max_wait_ms,
|
|
58
|
+
});
|
|
59
|
+
const markFailed = (uid) => setFailedUidCache(ctx, { kind: 'image', key: sig.absPath, sig, uid });
|
|
60
|
+
try {
|
|
61
|
+
const out = await waitByUid(finalUid);
|
|
62
|
+
return asTextResult(out.text);
|
|
63
|
+
}
|
|
64
|
+
catch (e) {
|
|
65
|
+
if (!resolvedUid) {
|
|
66
|
+
markFailed(finalUid);
|
|
67
|
+
throw e;
|
|
68
|
+
}
|
|
69
|
+
deleteUidCache(ctx, { kind: 'image', key: sig.absPath });
|
|
70
|
+
if (!isRetryableError(e)) {
|
|
71
|
+
markFailed(finalUid);
|
|
72
|
+
throw e;
|
|
73
|
+
}
|
|
74
|
+
const retryUid = (await parseImageLayoutSubmit(imagePath)).uid;
|
|
75
|
+
setSubmittedUidCache(ctx, { kind: 'image', key: sig.absPath, sig, uid: retryUid });
|
|
76
|
+
try {
|
|
77
|
+
const out = await waitByUid(retryUid);
|
|
78
|
+
return asTextResult(out.text);
|
|
79
|
+
}
|
|
80
|
+
catch (retryErr) {
|
|
81
|
+
markFailed(retryUid);
|
|
82
|
+
throw retryErr;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}));
|
|
86
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { CONFIG, RESOLVED_KEY, parseDownloadUrlAllowlist } from '#config';
|
|
2
|
+
import { downloadUrlToFile } from '#doc2x/download';
|
|
3
|
+
import { materializeConvertZip } from '#doc2x/materialize';
|
|
4
|
+
import { asJsonResult } from '#mcp/results';
|
|
5
|
+
import { convertZipBase64Schema, doc2xDownloadUrlSchema, outputDirSchema, outputPathSchema, withToolErrorHandling, } from '#mcp/registerToolsShared';
|
|
6
|
+
export function registerMiscTools(server) {
|
|
7
|
+
server.registerTool('doc2x_download_url_to_file', {
|
|
8
|
+
description: 'Download a Doc2x-provided URL (e.g. from doc2x_convert_export_result) to a local file path.',
|
|
9
|
+
inputSchema: {
|
|
10
|
+
url: doc2xDownloadUrlSchema,
|
|
11
|
+
output_path: outputPathSchema,
|
|
12
|
+
},
|
|
13
|
+
}, withToolErrorHandling(async (args) => asJsonResult(await downloadUrlToFile(args))));
|
|
14
|
+
server.registerTool('doc2x_materialize_convert_zip', {
|
|
15
|
+
description: 'Materialize convert_zip (base64) into output_dir. Best-effort: tries system unzip first; otherwise writes the zip file.',
|
|
16
|
+
inputSchema: { convert_zip_base64: convertZipBase64Schema, output_dir: outputDirSchema },
|
|
17
|
+
}, withToolErrorHandling(async (args) => asJsonResult(await materializeConvertZip({
|
|
18
|
+
convert_zip_base64: args.convert_zip_base64,
|
|
19
|
+
output_dir: args.output_dir,
|
|
20
|
+
}))));
|
|
21
|
+
server.registerTool('doc2x_debug_config', {
|
|
22
|
+
description: 'Debug helper: return resolved config and API key source for troubleshooting.',
|
|
23
|
+
inputSchema: {},
|
|
24
|
+
}, withToolErrorHandling(async () => asJsonResult({
|
|
25
|
+
baseUrl: CONFIG.baseUrl,
|
|
26
|
+
apiKeySource: RESOLVED_KEY.source,
|
|
27
|
+
apiKeyLen: CONFIG.apiKey.length,
|
|
28
|
+
apiKeyPrefix: CONFIG.apiKey ? CONFIG.apiKey.slice(0, 6) : '',
|
|
29
|
+
pollIntervalMs: CONFIG.pollIntervalMs,
|
|
30
|
+
httpTimeoutMs: CONFIG.httpTimeoutMs,
|
|
31
|
+
maxWaitMs: CONFIG.maxWaitMs,
|
|
32
|
+
parsePdfMaxOutputChars: CONFIG.parsePdfMaxOutputChars,
|
|
33
|
+
parsePdfMaxOutputPages: CONFIG.parsePdfMaxOutputPages,
|
|
34
|
+
downloadUrlAllowlist: parseDownloadUrlAllowlist(),
|
|
35
|
+
})));
|
|
36
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { CONFIG } from '#config';
|
|
2
|
+
import { isRetryableError } from '#errors';
|
|
3
|
+
import { parsePdfStatus, parsePdfSubmit, parsePdfWaitTextByUid, } from '#doc2x/pdf';
|
|
4
|
+
import { asJsonResult, asTextResult } from '#mcp/results';
|
|
5
|
+
import { deleteUidCache, fileSig, getSubmittedUidFromCache, joinWithSchema, makePdfUidCacheKey, missingEitherFieldError, nonNegativeIntSchema, parsePdfModelSchema, parsePdfUidSchema, pdfPathForWaitSchema, pdfPathSchema, positiveIntMsSchema, setFailedUidCache, setSubmittedUidCache, withToolErrorHandling, } from '#mcp/registerToolsShared';
|
|
6
|
+
export function registerPdfTools(server, ctx) {
|
|
7
|
+
server.registerTool('doc2x_parse_pdf_submit', {
|
|
8
|
+
description: 'Create a Doc2x PDF parse task for a local file and return {uid}. Prefer calling doc2x_parse_pdf_status to monitor progress/result; only call doc2x_parse_pdf_wait_text if the user explicitly asks to wait/return merged text.',
|
|
9
|
+
inputSchema: {
|
|
10
|
+
pdf_path: pdfPathSchema,
|
|
11
|
+
model: parsePdfModelSchema.describe("Optional parse model. Use 'v3-2026' to try the latest model. Omit this field to use default v2."),
|
|
12
|
+
},
|
|
13
|
+
}, withToolErrorHandling(async ({ pdf_path, model }) => {
|
|
14
|
+
const sig = await fileSig(pdf_path);
|
|
15
|
+
const res = await parsePdfSubmit(pdf_path, { model });
|
|
16
|
+
setSubmittedUidCache(ctx, {
|
|
17
|
+
kind: 'pdf',
|
|
18
|
+
key: makePdfUidCacheKey(sig.absPath, model),
|
|
19
|
+
sig,
|
|
20
|
+
uid: res.uid,
|
|
21
|
+
});
|
|
22
|
+
return asJsonResult(res);
|
|
23
|
+
}));
|
|
24
|
+
server.registerTool('doc2x_parse_pdf_status', {
|
|
25
|
+
description: 'Query parse task status by uid. Returns {status, progress, detail}. status is one of processing/failed/success; progress is an integer 0..100; detail is populated only when status=failed. Fetch parsed content via doc2x_convert_export_*.',
|
|
26
|
+
inputSchema: {
|
|
27
|
+
uid: parsePdfUidSchema,
|
|
28
|
+
},
|
|
29
|
+
}, withToolErrorHandling(async (args) => {
|
|
30
|
+
const st = await parsePdfStatus(args.uid);
|
|
31
|
+
return asJsonResult({ status: st.status, progress: st.progress, detail: st.detail });
|
|
32
|
+
}));
|
|
33
|
+
server.registerTool('doc2x_parse_pdf_wait_text', {
|
|
34
|
+
description: 'Wait for a PDF parse task until success and return merged text. Prefer passing uid (no re-submit). If only pdf_path is provided, it will (a) reuse an in-process cached uid if available, otherwise (b) submit a new task then wait.',
|
|
35
|
+
inputSchema: {
|
|
36
|
+
uid: parsePdfUidSchema.optional(),
|
|
37
|
+
pdf_path: pdfPathForWaitSchema.optional(),
|
|
38
|
+
poll_interval_ms: positiveIntMsSchema.optional(),
|
|
39
|
+
max_wait_ms: positiveIntMsSchema.optional(),
|
|
40
|
+
join_with: joinWithSchema,
|
|
41
|
+
max_output_chars: nonNegativeIntSchema
|
|
42
|
+
.optional()
|
|
43
|
+
.describe('Max characters of returned text (0 = unlimited). Useful to avoid LLM context overflow. Default can be set via env DOC2X_PARSE_PDF_MAX_OUTPUT_CHARS.'),
|
|
44
|
+
max_output_pages: nonNegativeIntSchema
|
|
45
|
+
.optional()
|
|
46
|
+
.describe('Max pages to merge into returned text (0 = unlimited). Default can be set via env DOC2X_PARSE_PDF_MAX_OUTPUT_PAGES.'),
|
|
47
|
+
model: parsePdfModelSchema
|
|
48
|
+
.describe("Optional parse model used only when submitting from pdf_path. Use 'v3-2026' to try latest model. Omit this field to use default v2."),
|
|
49
|
+
},
|
|
50
|
+
}, withToolErrorHandling(async (args) => {
|
|
51
|
+
const maxOutputChars = args.max_output_chars ?? CONFIG.parsePdfMaxOutputChars;
|
|
52
|
+
const maxOutputPages = args.max_output_pages ?? CONFIG.parsePdfMaxOutputPages;
|
|
53
|
+
const appendNotice = (text, notice) => {
|
|
54
|
+
if (maxOutputChars <= 0)
|
|
55
|
+
return text + notice;
|
|
56
|
+
if (text.length + notice.length <= maxOutputChars)
|
|
57
|
+
return text + notice;
|
|
58
|
+
const keep = Math.min(Math.max(maxOutputChars - notice.length, 0), maxOutputChars);
|
|
59
|
+
if (keep <= 0)
|
|
60
|
+
return notice.slice(0, maxOutputChars);
|
|
61
|
+
return text.slice(0, keep) + notice;
|
|
62
|
+
};
|
|
63
|
+
const truncationNotice = (o) => `\n\n---\n[doc2x-mcp] Output truncated (pages ${o.returnedPages}/${o.totalPages}, uid=${o.uid}). Fetch full markdown via doc2x_convert_export_* (to=md).\n`;
|
|
64
|
+
const uid = String(args.uid || '').trim();
|
|
65
|
+
if (uid) {
|
|
66
|
+
const out = await parsePdfWaitTextByUid({
|
|
67
|
+
uid,
|
|
68
|
+
poll_interval_ms: args.poll_interval_ms,
|
|
69
|
+
max_wait_ms: args.max_wait_ms,
|
|
70
|
+
join_with: args.join_with,
|
|
71
|
+
max_output_chars: maxOutputChars,
|
|
72
|
+
max_output_pages: maxOutputPages,
|
|
73
|
+
});
|
|
74
|
+
const notice = out.truncated ? truncationNotice(out) : '';
|
|
75
|
+
return asTextResult(notice ? appendNotice(out.text, notice) : out.text);
|
|
76
|
+
}
|
|
77
|
+
const pdfPath = String(args.pdf_path || '').trim();
|
|
78
|
+
if (!pdfPath)
|
|
79
|
+
throw missingEitherFieldError('uid', 'pdf_path');
|
|
80
|
+
const sig = await fileSig(pdfPath);
|
|
81
|
+
const model = args.model;
|
|
82
|
+
const cacheKey = makePdfUidCacheKey(sig.absPath, model);
|
|
83
|
+
const resolvedUid = getSubmittedUidFromCache(ctx, { kind: 'pdf', key: cacheKey, sig });
|
|
84
|
+
const finalUid = resolvedUid || (await parsePdfSubmit(pdfPath, { model })).uid;
|
|
85
|
+
setSubmittedUidCache(ctx, { kind: 'pdf', key: cacheKey, sig, uid: finalUid });
|
|
86
|
+
const waitByUid = async (uid) => parsePdfWaitTextByUid({
|
|
87
|
+
uid,
|
|
88
|
+
poll_interval_ms: args.poll_interval_ms,
|
|
89
|
+
max_wait_ms: args.max_wait_ms,
|
|
90
|
+
join_with: args.join_with,
|
|
91
|
+
max_output_chars: maxOutputChars,
|
|
92
|
+
max_output_pages: maxOutputPages,
|
|
93
|
+
});
|
|
94
|
+
const markFailed = (uid) => setFailedUidCache(ctx, { kind: 'pdf', key: cacheKey, sig, uid });
|
|
95
|
+
try {
|
|
96
|
+
const out = await waitByUid(finalUid);
|
|
97
|
+
const notice = out.truncated ? truncationNotice(out) : '';
|
|
98
|
+
return asTextResult(notice ? appendNotice(out.text, notice) : out.text);
|
|
99
|
+
}
|
|
100
|
+
catch (e) {
|
|
101
|
+
if (!resolvedUid) {
|
|
102
|
+
markFailed(finalUid);
|
|
103
|
+
throw e;
|
|
104
|
+
}
|
|
105
|
+
deleteUidCache(ctx, { kind: 'pdf', key: cacheKey });
|
|
106
|
+
if (!isRetryableError(e)) {
|
|
107
|
+
markFailed(finalUid);
|
|
108
|
+
throw e;
|
|
109
|
+
}
|
|
110
|
+
const retryUid = (await parsePdfSubmit(pdfPath, { model })).uid;
|
|
111
|
+
setSubmittedUidCache(ctx, { kind: 'pdf', key: cacheKey, sig, uid: retryUid });
|
|
112
|
+
try {
|
|
113
|
+
const out = await waitByUid(retryUid);
|
|
114
|
+
const notice = out.truncated ? truncationNotice(out) : '';
|
|
115
|
+
return asTextResult(notice ? appendNotice(out.text, notice) : out.text);
|
|
116
|
+
}
|
|
117
|
+
catch (retryErr) {
|
|
118
|
+
markFailed(retryUid);
|
|
119
|
+
throw retryErr;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}));
|
|
123
|
+
}
|