@noedgeai-org/doc2x-mcp 0.1.3-dev.2.2 → 0.1.3-dev.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config/index.d.ts +15 -0
- package/dist/doc2x/client.d.ts +15 -0
- package/dist/doc2x/client.js +64 -25
- package/dist/doc2x/constants.d.ts +3 -0
- package/dist/doc2x/convert.d.ts +32 -0
- package/dist/doc2x/convert.js +2 -2
- package/dist/doc2x/download.d.ts +7 -0
- package/dist/doc2x/download.js +31 -10
- package/dist/doc2x/http.d.ts +4 -0
- package/dist/doc2x/image.d.ts +23 -0
- package/dist/doc2x/image.js +4 -3
- package/dist/doc2x/materialize.d.ts +8 -0
- package/dist/doc2x/paths.d.ts +2 -0
- package/dist/doc2x/pdf.d.ts +29 -0
- package/dist/doc2x/pdf.js +9 -5
- package/dist/errors/error.d.ts +34 -0
- package/dist/errors/error.js +61 -0
- package/dist/errors/errorCodes.d.ts +13 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +6 -1
- package/dist/mcp/registerConvertTools.d.ts +3 -0
- package/dist/mcp/registerConvertTools.js +65 -0
- package/dist/mcp/registerImageTools.d.ts +3 -0
- package/dist/mcp/registerImageTools.js +86 -0
- package/dist/mcp/registerMiscTools.d.ts +2 -0
- package/dist/mcp/registerMiscTools.js +36 -0
- package/dist/mcp/registerPdfTools.d.ts +3 -0
- package/dist/mcp/registerPdfTools.js +123 -0
- package/dist/mcp/registerTools.d.ts +2 -0
- package/dist/mcp/registerTools.js +10 -420
- package/dist/mcp/registerToolsShared.d.ts +106 -0
- package/dist/mcp/registerToolsShared.js +194 -0
- package/dist/mcp/results.d.ts +19 -0
- package/dist/mcp/results.js +6 -10
- package/dist/shared/utils.d.ts +2 -0
- package/dist/shared/utils.js +13 -0
- package/package.json +30 -8
- package/dist/errors.js +0 -17
- package/dist/utils.js +0 -25
- /package/dist/{config.js → config/index.js} +0 -0
- /package/dist/{errorCodes.js → errors/errorCodes.js} +0 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export declare const RESOLVED_KEY: {
|
|
2
|
+
apiKey: string;
|
|
3
|
+
source: "inline" | "env" | "missing";
|
|
4
|
+
};
|
|
5
|
+
export declare const CONFIG: Readonly<{
|
|
6
|
+
baseUrl: string;
|
|
7
|
+
apiKey: string;
|
|
8
|
+
httpTimeoutMs: number;
|
|
9
|
+
pollIntervalMs: number;
|
|
10
|
+
maxWaitMs: number;
|
|
11
|
+
parsePdfMaxOutputChars: number;
|
|
12
|
+
parsePdfMaxOutputPages: number;
|
|
13
|
+
}>;
|
|
14
|
+
export declare function parseDownloadUrlAllowlist(): string[];
|
|
15
|
+
export declare function isHostAllowedByAllowlist(hostname: string, allowlist: string[]): boolean;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { type HttpMethod } from '#doc2x/http';
|
|
2
|
+
export declare function normalizeUrl(u: string): string;
|
|
3
|
+
export declare function isRetryableDoc2xBusinessCode(code: string): boolean;
|
|
4
|
+
export declare function doc2xHeaders(extra?: Record<string, string>): {
|
|
5
|
+
Authorization: string;
|
|
6
|
+
};
|
|
7
|
+
type Doc2xRequestOpts = {
|
|
8
|
+
query?: Record<string, string>;
|
|
9
|
+
body?: unknown;
|
|
10
|
+
raw_body?: BodyInit;
|
|
11
|
+
headers?: Record<string, string>;
|
|
12
|
+
};
|
|
13
|
+
export declare function doc2xRequestJson<TData = Record<string, unknown>>(method: HttpMethod, pathname: string, opts?: Doc2xRequestOpts): Promise<TData>;
|
|
14
|
+
export declare function putToSignedUrl(signedUrl: string, filePath: string): Promise<void>;
|
|
15
|
+
export {};
|
package/dist/doc2x/client.js
CHANGED
|
@@ -1,25 +1,39 @@
|
|
|
1
1
|
import fs from 'node:fs';
|
|
2
2
|
import fsp from 'node:fs/promises';
|
|
3
3
|
import { CONFIG } from '#config';
|
|
4
|
-
import { ToolError } from '#errors';
|
|
5
|
-
import { TOOL_ERROR_CODE_INVALID_JSON, TOOL_ERROR_CODE_MISSING_API_KEY, httpErrorCode, putFailedCode, } from '#errorCodes';
|
|
4
|
+
import { ToolError, coerceToolError, isRetryableError } from '#errors';
|
|
5
|
+
import { TOOL_ERROR_CODE_INTERNAL_ERROR, TOOL_ERROR_CODE_INVALID_JSON, TOOL_ERROR_CODE_MISSING_API_KEY, httpErrorCode, putFailedCode, } from '#errorCodes';
|
|
6
6
|
import { jitteredBackoffMs, sleep } from '#utils';
|
|
7
7
|
import { DOC2X_API_CODE_SUCCESS } from '#doc2x/constants';
|
|
8
8
|
import { HTTP_METHOD_PUT } from '#doc2x/http';
|
|
9
|
+
function asJsonObject(v) {
|
|
10
|
+
if (!v || typeof v !== 'object' || Array.isArray(v))
|
|
11
|
+
return null;
|
|
12
|
+
return v;
|
|
13
|
+
}
|
|
9
14
|
async function fetchJson(url, init, timeoutMs) {
|
|
10
15
|
const ctrl = new AbortController();
|
|
11
16
|
const t = setTimeout(() => ctrl.abort(), timeoutMs);
|
|
12
17
|
try {
|
|
13
|
-
const res = await fetch(url, { ...init, signal: ctrl.signal });
|
|
14
|
-
const text = await res.text();
|
|
15
|
-
let json;
|
|
16
18
|
try {
|
|
17
|
-
|
|
19
|
+
const res = await fetch(url, { ...init, signal: ctrl.signal });
|
|
20
|
+
const text = await res.text();
|
|
21
|
+
let json;
|
|
22
|
+
try {
|
|
23
|
+
json = text ? asJsonObject(JSON.parse(text)) : null;
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
json = null;
|
|
27
|
+
}
|
|
28
|
+
return { res, text, json };
|
|
18
29
|
}
|
|
19
|
-
catch {
|
|
20
|
-
|
|
30
|
+
catch (e) {
|
|
31
|
+
throw coerceToolError(e, {
|
|
32
|
+
defaultCode: TOOL_ERROR_CODE_INTERNAL_ERROR,
|
|
33
|
+
defaultRetryable: true,
|
|
34
|
+
defaultMessage: 'Doc2x request failed',
|
|
35
|
+
});
|
|
21
36
|
}
|
|
22
|
-
return { res, text, json };
|
|
23
37
|
}
|
|
24
38
|
finally {
|
|
25
39
|
clearTimeout(t);
|
|
@@ -44,7 +58,7 @@ export function doc2xHeaders(extra) {
|
|
|
44
58
|
if (!CONFIG.apiKey) {
|
|
45
59
|
throw new ToolError({
|
|
46
60
|
code: TOOL_ERROR_CODE_MISSING_API_KEY,
|
|
47
|
-
message: 'Doc2x API key is not configured (set INLINE_DOC2X_API_KEY in src/config.ts or provide DOC2X_API_KEY env).',
|
|
61
|
+
message: 'Doc2x API key is not configured (set INLINE_DOC2X_API_KEY in src/config/index.ts or provide DOC2X_API_KEY env).',
|
|
48
62
|
retryable: false,
|
|
49
63
|
});
|
|
50
64
|
}
|
|
@@ -69,7 +83,19 @@ export async function doc2xRequestJson(method, pathname, opts) {
|
|
|
69
83
|
}
|
|
70
84
|
let attempt = 0;
|
|
71
85
|
while (true) {
|
|
72
|
-
|
|
86
|
+
let res;
|
|
87
|
+
let json;
|
|
88
|
+
let text;
|
|
89
|
+
try {
|
|
90
|
+
({ res, json, text } = await fetchJson(url.toString(), init, CONFIG.httpTimeoutMs));
|
|
91
|
+
}
|
|
92
|
+
catch (e) {
|
|
93
|
+
if (isRetryableError(e)) {
|
|
94
|
+
await sleep(jitteredBackoffMs(attempt++));
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
throw e;
|
|
98
|
+
}
|
|
73
99
|
if (res.status === 429) {
|
|
74
100
|
await sleep(jitteredBackoffMs(attempt++));
|
|
75
101
|
continue;
|
|
@@ -89,16 +115,18 @@ export async function doc2xRequestJson(method, pathname, opts) {
|
|
|
89
115
|
retryable: false,
|
|
90
116
|
});
|
|
91
117
|
}
|
|
92
|
-
|
|
93
|
-
|
|
118
|
+
const envelope = json;
|
|
119
|
+
const apiCode = String(envelope.code || '');
|
|
120
|
+
if (apiCode !== DOC2X_API_CODE_SUCCESS) {
|
|
121
|
+
const code = String(envelope.code || 'doc2x_error');
|
|
94
122
|
const retryable = isRetryableDoc2xBusinessCode(code);
|
|
95
123
|
if (retryable) {
|
|
96
124
|
await sleep(jitteredBackoffMs(attempt++));
|
|
97
125
|
continue;
|
|
98
126
|
}
|
|
99
|
-
throw new ToolError({ code, message: String(
|
|
127
|
+
throw new ToolError({ code, message: String(envelope.msg || 'Doc2x error'), retryable });
|
|
100
128
|
}
|
|
101
|
-
return
|
|
129
|
+
return envelope.data;
|
|
102
130
|
}
|
|
103
131
|
}
|
|
104
132
|
export async function putToSignedUrl(signedUrl, filePath) {
|
|
@@ -107,16 +135,27 @@ export async function putToSignedUrl(signedUrl, filePath) {
|
|
|
107
135
|
const ctrl = new AbortController();
|
|
108
136
|
const t = setTimeout(() => ctrl.abort(), CONFIG.httpTimeoutMs);
|
|
109
137
|
try {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
138
|
+
let res;
|
|
139
|
+
try {
|
|
140
|
+
const putInit = {
|
|
141
|
+
method: HTTP_METHOD_PUT,
|
|
142
|
+
body: body,
|
|
143
|
+
duplex: 'half',
|
|
144
|
+
headers: {
|
|
145
|
+
'Content-Type': 'application/pdf',
|
|
146
|
+
'Content-Length': String(stat.size),
|
|
147
|
+
},
|
|
148
|
+
signal: ctrl.signal,
|
|
149
|
+
};
|
|
150
|
+
res = await fetch(signedUrl, putInit);
|
|
151
|
+
}
|
|
152
|
+
catch (e) {
|
|
153
|
+
throw coerceToolError(e, {
|
|
154
|
+
defaultCode: TOOL_ERROR_CODE_INTERNAL_ERROR,
|
|
155
|
+
defaultRetryable: true,
|
|
156
|
+
defaultMessage: 'PUT to signed url failed',
|
|
157
|
+
});
|
|
158
|
+
}
|
|
120
159
|
if (!res.ok) {
|
|
121
160
|
const txt = await res.text().catch(() => '');
|
|
122
161
|
throw new ToolError({
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
type ExportFilenameMode = 'auto' | 'raw';
|
|
2
|
+
export declare const CONVERT_FORMULA_LEVELS: readonly [0, 1, 2];
|
|
3
|
+
export type ConvertFormulaLevel = (typeof CONVERT_FORMULA_LEVELS)[number];
|
|
4
|
+
export declare function convertExportSubmit(args: {
|
|
5
|
+
uid: string;
|
|
6
|
+
to: 'md' | 'tex' | 'docx';
|
|
7
|
+
formula_mode: 'normal' | 'dollar';
|
|
8
|
+
formula_level?: ConvertFormulaLevel;
|
|
9
|
+
filename?: string;
|
|
10
|
+
merge_cross_page_forms?: boolean;
|
|
11
|
+
filename_mode?: ExportFilenameMode;
|
|
12
|
+
}): Promise<{
|
|
13
|
+
uid: string;
|
|
14
|
+
status: string;
|
|
15
|
+
url: string;
|
|
16
|
+
}>;
|
|
17
|
+
export declare function convertExportResult(uid: string): Promise<{
|
|
18
|
+
uid: string;
|
|
19
|
+
status: string;
|
|
20
|
+
url: string;
|
|
21
|
+
}>;
|
|
22
|
+
export declare function convertExportWaitByUid(args: {
|
|
23
|
+
uid: string;
|
|
24
|
+
to: 'md' | 'tex' | 'docx';
|
|
25
|
+
poll_interval_ms?: number;
|
|
26
|
+
max_wait_ms?: number;
|
|
27
|
+
}): Promise<{
|
|
28
|
+
uid: string;
|
|
29
|
+
status: string;
|
|
30
|
+
url: string;
|
|
31
|
+
}>;
|
|
32
|
+
export {};
|
package/dist/doc2x/convert.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
import { CONFIG } from '#config';
|
|
3
|
-
import { ToolError } from '#errors';
|
|
3
|
+
import { ToolError, isRetryableError } from '#errors';
|
|
4
4
|
import { TOOL_ERROR_CODE_CONVERT_FAILED, TOOL_ERROR_CODE_TIMEOUT } from '#errorCodes';
|
|
5
5
|
import { jitteredBackoffMs, sleep } from '#utils';
|
|
6
6
|
import { doc2xRequestJson, normalizeUrl } from '#doc2x/client';
|
|
@@ -72,7 +72,7 @@ export async function convertExportWaitByUid(args) {
|
|
|
72
72
|
attempt = 0;
|
|
73
73
|
}
|
|
74
74
|
catch (e) {
|
|
75
|
-
if (e
|
|
75
|
+
if (isRetryableError(e)) {
|
|
76
76
|
await sleep(jitteredBackoffMs(attempt++));
|
|
77
77
|
continue;
|
|
78
78
|
}
|
package/dist/doc2x/download.js
CHANGED
|
@@ -3,8 +3,8 @@ import fsp from 'node:fs/promises';
|
|
|
3
3
|
import path from 'node:path';
|
|
4
4
|
import { Readable } from 'node:stream';
|
|
5
5
|
import { CONFIG, isHostAllowedByAllowlist, parseDownloadUrlAllowlist } from '#config';
|
|
6
|
-
import { ToolError } from '#errors';
|
|
7
|
-
import { TOOL_ERROR_CODE_EMPTY_BODY, TOOL_ERROR_CODE_INVALID_URL, TOOL_ERROR_CODE_UNSAFE_URL, httpErrorCode, } from '#errorCodes';
|
|
6
|
+
import { ToolError, coerceToolError } from '#errors';
|
|
7
|
+
import { TOOL_ERROR_CODE_EMPTY_BODY, TOOL_ERROR_CODE_INTERNAL_ERROR, TOOL_ERROR_CODE_INVALID_URL, TOOL_ERROR_CODE_UNSAFE_URL, httpErrorCode, } from '#errorCodes';
|
|
8
8
|
import { HTTP_METHOD_GET } from '#doc2x/http';
|
|
9
9
|
import { normalizeUrl } from '#doc2x/client';
|
|
10
10
|
export async function downloadUrlToFile(args) {
|
|
@@ -40,7 +40,18 @@ export async function downloadUrlToFile(args) {
|
|
|
40
40
|
const ctrl = new AbortController();
|
|
41
41
|
const t = setTimeout(() => ctrl.abort(), CONFIG.httpTimeoutMs);
|
|
42
42
|
try {
|
|
43
|
-
|
|
43
|
+
let res;
|
|
44
|
+
try {
|
|
45
|
+
res = await fetch(normalizedUrl, { method: HTTP_METHOD_GET, signal: ctrl.signal });
|
|
46
|
+
}
|
|
47
|
+
catch (e) {
|
|
48
|
+
throw coerceToolError(e, {
|
|
49
|
+
defaultCode: TOOL_ERROR_CODE_INTERNAL_ERROR,
|
|
50
|
+
defaultRetryable: true,
|
|
51
|
+
defaultMessage: 'download failed',
|
|
52
|
+
details: { url: parsed.hostname },
|
|
53
|
+
});
|
|
54
|
+
}
|
|
44
55
|
if (!res.ok) {
|
|
45
56
|
throw new ToolError({
|
|
46
57
|
code: httpErrorCode(res.status),
|
|
@@ -55,13 +66,23 @@ export async function downloadUrlToFile(args) {
|
|
|
55
66
|
retryable: true,
|
|
56
67
|
});
|
|
57
68
|
const file = fs.createWriteStream(outPath);
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
.
|
|
63
|
-
|
|
64
|
-
|
|
69
|
+
try {
|
|
70
|
+
await new Promise((resolve, reject) => {
|
|
71
|
+
file.on('error', reject);
|
|
72
|
+
file.on('finish', resolve);
|
|
73
|
+
Readable.fromWeb(res.body)
|
|
74
|
+
.on('error', reject)
|
|
75
|
+
.pipe(file);
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
catch (e) {
|
|
79
|
+
throw coerceToolError(e, {
|
|
80
|
+
defaultCode: TOOL_ERROR_CODE_INTERNAL_ERROR,
|
|
81
|
+
defaultRetryable: false,
|
|
82
|
+
defaultMessage: 'download failed while writing file',
|
|
83
|
+
details: { output_path: outPath },
|
|
84
|
+
});
|
|
85
|
+
}
|
|
65
86
|
const stat = await fsp.stat(outPath);
|
|
66
87
|
return { output_path: outPath, bytes_written: stat.size };
|
|
67
88
|
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export declare function parseImageLayoutSync(imagePath: string): Promise<{
|
|
2
|
+
uid: string;
|
|
3
|
+
result: unknown;
|
|
4
|
+
convert_zip: {} | null;
|
|
5
|
+
}>;
|
|
6
|
+
export declare function parseImageLayoutSubmit(imagePath: string): Promise<{
|
|
7
|
+
uid: string;
|
|
8
|
+
}>;
|
|
9
|
+
export declare function parseImageLayoutStatus(uid: string): Promise<{
|
|
10
|
+
uid: string;
|
|
11
|
+
status: string;
|
|
12
|
+
result: {} | null;
|
|
13
|
+
convert_zip: {} | null;
|
|
14
|
+
}>;
|
|
15
|
+
export declare function parseImageLayoutWaitTextByUid(args: {
|
|
16
|
+
uid: string;
|
|
17
|
+
poll_interval_ms?: number;
|
|
18
|
+
max_wait_ms?: number;
|
|
19
|
+
}): Promise<{
|
|
20
|
+
uid: string;
|
|
21
|
+
status: "success";
|
|
22
|
+
text: string;
|
|
23
|
+
}>;
|
package/dist/doc2x/image.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fsp from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { CONFIG } from '#config';
|
|
4
|
-
import { ToolError } from '#errors';
|
|
4
|
+
import { ToolError, isRetryableError } from '#errors';
|
|
5
5
|
import { TOOL_ERROR_CODE_FILE_TOO_LARGE, TOOL_ERROR_CODE_INVALID_ARGUMENT, TOOL_ERROR_CODE_PARSE_FAILED, TOOL_ERROR_CODE_TIMEOUT, } from '#errorCodes';
|
|
6
6
|
import { jitteredBackoffMs, sleep } from '#utils';
|
|
7
7
|
import { doc2xRequestJson } from '#doc2x/client';
|
|
@@ -70,14 +70,15 @@ export async function parseImageLayoutWaitTextByUid(args) {
|
|
|
70
70
|
attempt = 0;
|
|
71
71
|
}
|
|
72
72
|
catch (e) {
|
|
73
|
-
if (e
|
|
73
|
+
if (isRetryableError(e)) {
|
|
74
74
|
await sleep(jitteredBackoffMs(attempt++));
|
|
75
75
|
continue;
|
|
76
76
|
}
|
|
77
77
|
throw e;
|
|
78
78
|
}
|
|
79
79
|
if (st.status === DOC2X_TASK_STATUS_SUCCESS) {
|
|
80
|
-
const
|
|
80
|
+
const result = st.result ?? null;
|
|
81
|
+
const md = String(result?.pages?.[0]?.md || '');
|
|
81
82
|
return { uid, status: DOC2X_TASK_STATUS_SUCCESS, text: md };
|
|
82
83
|
}
|
|
83
84
|
if (st.status === DOC2X_TASK_STATUS_FAILED)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export declare const PARSE_PDF_MODELS: readonly ["v3-2026"];
|
|
2
|
+
export type ParsePdfModel = (typeof PARSE_PDF_MODELS)[number];
|
|
3
|
+
export declare function parsePdfSubmit(pdfPath: string, opts?: {
|
|
4
|
+
model?: ParsePdfModel;
|
|
5
|
+
}): Promise<{
|
|
6
|
+
uid: string;
|
|
7
|
+
}>;
|
|
8
|
+
export declare function parsePdfStatus(uid: string): Promise<{
|
|
9
|
+
uid: string;
|
|
10
|
+
status: string;
|
|
11
|
+
progress: number;
|
|
12
|
+
detail: string;
|
|
13
|
+
result: {} | null;
|
|
14
|
+
}>;
|
|
15
|
+
export declare function parsePdfWaitTextByUid(args: {
|
|
16
|
+
uid: string;
|
|
17
|
+
poll_interval_ms?: number;
|
|
18
|
+
max_wait_ms?: number;
|
|
19
|
+
join_with?: string;
|
|
20
|
+
max_output_chars?: number;
|
|
21
|
+
max_output_pages?: number;
|
|
22
|
+
}): Promise<{
|
|
23
|
+
text: string;
|
|
24
|
+
truncated: boolean;
|
|
25
|
+
returnedPages: number;
|
|
26
|
+
totalPages: number;
|
|
27
|
+
uid: string;
|
|
28
|
+
status: "success";
|
|
29
|
+
}>;
|
package/dist/doc2x/pdf.js
CHANGED
|
@@ -2,7 +2,7 @@ import fsp from 'node:fs/promises';
|
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import _ from 'lodash';
|
|
4
4
|
import { CONFIG } from '#config';
|
|
5
|
-
import { ToolError } from '#errors';
|
|
5
|
+
import { ToolError, isRetryableError } from '#errors';
|
|
6
6
|
import { TOOL_ERROR_CODE_INVALID_ARGUMENT, TOOL_ERROR_CODE_PARSE_FAILED, TOOL_ERROR_CODE_TIMEOUT, } from '#errorCodes';
|
|
7
7
|
import { jitteredBackoffMs, sleep } from '#utils';
|
|
8
8
|
import { doc2xRequestJson, putToSignedUrl } from '#doc2x/client';
|
|
@@ -11,7 +11,9 @@ import { HTTP_METHOD_GET, HTTP_METHOD_POST } from '#doc2x/http';
|
|
|
11
11
|
import { v2 } from '#doc2x/paths';
|
|
12
12
|
export const PARSE_PDF_MODELS = ['v3-2026'];
|
|
13
13
|
function mergePagesToTextWithLimit(result, joinWith, limits) {
|
|
14
|
-
const
|
|
14
|
+
const parsed = result ?? null;
|
|
15
|
+
const sourcePages = _.isArray(parsed?.pages) ? parsed.pages : [];
|
|
16
|
+
const pages = _.sortBy(sourcePages, (p) => Number(p?.page_idx ?? 0));
|
|
15
17
|
const maxPages = (limits?.maxOutputPages ?? 0) > 0 ? Number(limits?.maxOutputPages) : Number.POSITIVE_INFINITY;
|
|
16
18
|
const maxChars = (limits?.maxOutputChars ?? 0) > 0 ? Number(limits?.maxOutputChars) : Number.POSITIVE_INFINITY;
|
|
17
19
|
const parts = [];
|
|
@@ -66,7 +68,7 @@ async function preuploadPdfWithRetry(model) {
|
|
|
66
68
|
return { uid: String(data.uid), url: String(data.url) };
|
|
67
69
|
}
|
|
68
70
|
catch (e) {
|
|
69
|
-
if (e
|
|
71
|
+
if (isRetryableError(e)) {
|
|
70
72
|
await sleep(jitteredBackoffMs(attempt++));
|
|
71
73
|
continue;
|
|
72
74
|
}
|
|
@@ -88,7 +90,9 @@ export async function parsePdfSubmit(pdfPath, opts) {
|
|
|
88
90
|
try {
|
|
89
91
|
await putToSignedUrl(String(data.url), p);
|
|
90
92
|
}
|
|
91
|
-
catch {
|
|
93
|
+
catch (e) {
|
|
94
|
+
if (!isRetryableError(e))
|
|
95
|
+
throw e;
|
|
92
96
|
data = await preuploadPdfWithRetry(model);
|
|
93
97
|
await putToSignedUrl(String(data.url), p);
|
|
94
98
|
}
|
|
@@ -135,7 +139,7 @@ export async function parsePdfWaitTextByUid(args) {
|
|
|
135
139
|
attempt = 0;
|
|
136
140
|
}
|
|
137
141
|
catch (e) {
|
|
138
|
-
if (e
|
|
142
|
+
if (isRetryableError(e)) {
|
|
139
143
|
await sleep(jitteredBackoffMs(attempt++));
|
|
140
144
|
continue;
|
|
141
145
|
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
export type Retryable = boolean;
|
|
2
|
+
type ToolErrorDetails = Record<string, unknown>;
|
|
3
|
+
export declare class ToolError extends Error {
|
|
4
|
+
readonly code: string;
|
|
5
|
+
readonly retryable: Retryable;
|
|
6
|
+
readonly uid?: string;
|
|
7
|
+
readonly details?: ToolErrorDetails;
|
|
8
|
+
constructor(args: {
|
|
9
|
+
code: string;
|
|
10
|
+
message: string;
|
|
11
|
+
retryable: Retryable;
|
|
12
|
+
uid?: string;
|
|
13
|
+
details?: ToolErrorDetails;
|
|
14
|
+
cause?: unknown;
|
|
15
|
+
});
|
|
16
|
+
toPayload(): {
|
|
17
|
+
error: {
|
|
18
|
+
code: string;
|
|
19
|
+
message: string;
|
|
20
|
+
retryable: Retryable;
|
|
21
|
+
uid?: string;
|
|
22
|
+
details?: ToolErrorDetails;
|
|
23
|
+
};
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
export declare function isRetryableError(e: unknown): boolean;
|
|
27
|
+
export declare function coerceToolError(e: unknown, opts?: {
|
|
28
|
+
defaultCode: string;
|
|
29
|
+
defaultRetryable: Retryable;
|
|
30
|
+
defaultMessage: string;
|
|
31
|
+
uid?: string;
|
|
32
|
+
details?: ToolErrorDetails;
|
|
33
|
+
}): ToolError;
|
|
34
|
+
export {};
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { TOOL_ERROR_CODE_INTERNAL_ERROR, TOOL_ERROR_CODE_TIMEOUT } from '#errorCodes';
|
|
2
|
+
export class ToolError extends Error {
|
|
3
|
+
code;
|
|
4
|
+
retryable;
|
|
5
|
+
uid;
|
|
6
|
+
details;
|
|
7
|
+
constructor(args) {
|
|
8
|
+
super(args.message, args.cause === undefined ? undefined : { cause: args.cause });
|
|
9
|
+
this.name = 'ToolError';
|
|
10
|
+
this.code = args.code;
|
|
11
|
+
this.retryable = args.retryable;
|
|
12
|
+
this.uid = args.uid;
|
|
13
|
+
this.details = args.details;
|
|
14
|
+
}
|
|
15
|
+
toPayload() {
|
|
16
|
+
const error = { code: this.code, message: this.message, retryable: this.retryable };
|
|
17
|
+
if (this.uid)
|
|
18
|
+
error.uid = this.uid;
|
|
19
|
+
if (this.details && Object.keys(this.details).length > 0)
|
|
20
|
+
error.details = this.details;
|
|
21
|
+
return {
|
|
22
|
+
error,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
function stringMessageOf(e) {
|
|
27
|
+
if (e instanceof Error)
|
|
28
|
+
return e.message;
|
|
29
|
+
return String(e);
|
|
30
|
+
}
|
|
31
|
+
function isAbortLikeError(e) {
|
|
32
|
+
const name = e instanceof Error ? e.name : '';
|
|
33
|
+
return name === 'AbortError' || name === 'TimeoutError';
|
|
34
|
+
}
|
|
35
|
+
export function isRetryableError(e) {
|
|
36
|
+
return e instanceof ToolError && e.retryable;
|
|
37
|
+
}
|
|
38
|
+
export function coerceToolError(e, opts) {
|
|
39
|
+
if (e instanceof ToolError)
|
|
40
|
+
return e;
|
|
41
|
+
if (isAbortLikeError(e)) {
|
|
42
|
+
return new ToolError({
|
|
43
|
+
code: TOOL_ERROR_CODE_TIMEOUT,
|
|
44
|
+
message: opts?.defaultMessage ? `${opts.defaultMessage}: request timeout` : 'request timeout',
|
|
45
|
+
retryable: true,
|
|
46
|
+
uid: opts?.uid,
|
|
47
|
+
details: opts?.details,
|
|
48
|
+
cause: e,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
return new ToolError({
|
|
52
|
+
code: opts?.defaultCode ?? TOOL_ERROR_CODE_INTERNAL_ERROR,
|
|
53
|
+
message: opts?.defaultMessage
|
|
54
|
+
? `${opts.defaultMessage}: ${stringMessageOf(e)}`
|
|
55
|
+
: stringMessageOf(e),
|
|
56
|
+
retryable: opts?.defaultRetryable ?? false,
|
|
57
|
+
uid: opts?.uid,
|
|
58
|
+
details: opts?.details,
|
|
59
|
+
cause: e,
|
|
60
|
+
});
|
|
61
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export declare const TOOL_ERROR_CODE_CONVERT_FAILED: "convert_failed";
|
|
2
|
+
export declare const TOOL_ERROR_CODE_EMPTY_BODY: "empty_body";
|
|
3
|
+
export declare const TOOL_ERROR_CODE_FILE_TOO_LARGE: "file_too_large";
|
|
4
|
+
export declare const TOOL_ERROR_CODE_INTERNAL_ERROR: "internal_error";
|
|
5
|
+
export declare const TOOL_ERROR_CODE_INVALID_ARGUMENT: "invalid_argument";
|
|
6
|
+
export declare const TOOL_ERROR_CODE_INVALID_JSON: "invalid_json";
|
|
7
|
+
export declare const TOOL_ERROR_CODE_INVALID_URL: "invalid_url";
|
|
8
|
+
export declare const TOOL_ERROR_CODE_MISSING_API_KEY: "missing_api_key";
|
|
9
|
+
export declare const TOOL_ERROR_CODE_PARSE_FAILED: "parse_failed";
|
|
10
|
+
export declare const TOOL_ERROR_CODE_TIMEOUT: "timeout";
|
|
11
|
+
export declare const TOOL_ERROR_CODE_UNSAFE_URL: "unsafe_url";
|
|
12
|
+
export declare function httpErrorCode(status: number): string;
|
|
13
|
+
export declare function putFailedCode(status: number): string;
|
package/dist/index.d.ts
ADDED
package/dist/index.js
CHANGED
|
@@ -23,7 +23,12 @@ async function main() {
|
|
|
23
23
|
const transport = new StdioServerTransport();
|
|
24
24
|
await server.connect(transport);
|
|
25
25
|
}
|
|
26
|
+
function formatFatalError(e) {
|
|
27
|
+
if (e instanceof Error)
|
|
28
|
+
return e.stack || `${e.name}: ${e.message}`;
|
|
29
|
+
return String(e);
|
|
30
|
+
}
|
|
26
31
|
main().catch((e) => {
|
|
27
|
-
process.stderr.write(JSON.stringify({ ts: new Date().toISOString(), err:
|
|
32
|
+
process.stderr.write(JSON.stringify({ ts: new Date().toISOString(), err: formatFatalError(e) }) + os.EOL);
|
|
28
33
|
process.exitCode = 1;
|
|
29
34
|
});
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { convertExportResult, convertExportSubmit, convertExportWaitByUid, } from '#doc2x/convert';
|
|
2
|
+
import { asJsonResult } from '#mcp/results';
|
|
3
|
+
import { convertFilenameModeSchema, convertFilenameSchema, convertFormulaLevelValueSchema, convertFormulaModeSchema, convertToSchema, makeConvertSubmitKey, optionalBooleanSchema, parsePdfUidSchema, positiveIntMsSchema, runConvertSubmitAtomically, withToolErrorHandling, } from '#mcp/registerToolsShared';
|
|
4
|
+
export function registerConvertTools(server, ctx) {
|
|
5
|
+
const submitConvertWithDedup = async (args, opts) => {
|
|
6
|
+
const key = makeConvertSubmitKey(args);
|
|
7
|
+
return await runConvertSubmitAtomically(ctx, {
|
|
8
|
+
key,
|
|
9
|
+
skipIfSubmitted: opts?.skipIfSubmitted,
|
|
10
|
+
submit: () => convertExportSubmit(args),
|
|
11
|
+
});
|
|
12
|
+
};
|
|
13
|
+
server.registerTool('doc2x_convert_export_submit', {
|
|
14
|
+
description: 'Start an export (convert) job for a parsed PDF uid. After this, poll with doc2x_convert_export_wait or doc2x_convert_export_result. Do NOT call doc2x_convert_export_submit twice for the same uid+format in parallel.',
|
|
15
|
+
inputSchema: {
|
|
16
|
+
uid: parsePdfUidSchema,
|
|
17
|
+
to: convertToSchema,
|
|
18
|
+
formula_mode: convertFormulaModeSchema,
|
|
19
|
+
formula_level: convertFormulaLevelValueSchema
|
|
20
|
+
.optional()
|
|
21
|
+
.describe('Optional formula degradation level. Effective only when source parse uses model=v3-2026 (ignored by v2). 0: keep formulas, 1: degrade inline formulas, 2: degrade inline and block formulas.'),
|
|
22
|
+
filename: convertFilenameSchema
|
|
23
|
+
.describe("Optional output filename (for md/tex only). Tip: pass a basename WITHOUT extension to avoid getting 'name.md.md' / 'name.tex.tex'."),
|
|
24
|
+
filename_mode: convertFilenameModeSchema
|
|
25
|
+
.describe("How to treat filename. 'auto' strips common extensions for the target format; 'raw' passes basename as-is.")
|
|
26
|
+
.optional(),
|
|
27
|
+
merge_cross_page_forms: optionalBooleanSchema,
|
|
28
|
+
},
|
|
29
|
+
}, withToolErrorHandling(async (args) => asJsonResult(await submitConvertWithDedup(args))));
|
|
30
|
+
server.registerTool('doc2x_convert_export_result', {
|
|
31
|
+
description: 'Get the latest export (convert) result for a parsed PDF uid (may contain an escaped URL).',
|
|
32
|
+
inputSchema: {
|
|
33
|
+
uid: parsePdfUidSchema,
|
|
34
|
+
},
|
|
35
|
+
}, withToolErrorHandling(async ({ uid }) => asJsonResult(await convertExportResult(uid))));
|
|
36
|
+
server.registerTool('doc2x_convert_export_wait', {
|
|
37
|
+
description: 'Wait for an export job to finish. Prefer calling doc2x_convert_export_submit first, then wait with uid+to. For backward compatibility, if formula_mode is provided and this job was not submitted in-process, this tool will submit once then wait.',
|
|
38
|
+
inputSchema: {
|
|
39
|
+
uid: parsePdfUidSchema,
|
|
40
|
+
to: convertToSchema.describe('Expected target format. Used to verify the result URL.'),
|
|
41
|
+
formula_mode: convertFormulaModeSchema.optional(),
|
|
42
|
+
formula_level: convertFormulaLevelValueSchema
|
|
43
|
+
.optional()
|
|
44
|
+
.describe('Optional formula degradation level used when this tool auto-submits export (formula_mode must be provided). Effective only when source parse uses model=v3-2026 (ignored by v2).'),
|
|
45
|
+
filename: convertFilenameSchema,
|
|
46
|
+
filename_mode: convertFilenameModeSchema.optional(),
|
|
47
|
+
merge_cross_page_forms: optionalBooleanSchema,
|
|
48
|
+
poll_interval_ms: positiveIntMsSchema.optional(),
|
|
49
|
+
max_wait_ms: positiveIntMsSchema.optional(),
|
|
50
|
+
},
|
|
51
|
+
}, withToolErrorHandling(async (args) => {
|
|
52
|
+
if (args.formula_mode) {
|
|
53
|
+
await submitConvertWithDedup({
|
|
54
|
+
uid: args.uid,
|
|
55
|
+
to: args.to,
|
|
56
|
+
formula_mode: args.formula_mode,
|
|
57
|
+
formula_level: args.formula_level,
|
|
58
|
+
filename: args.filename,
|
|
59
|
+
filename_mode: args.filename_mode,
|
|
60
|
+
merge_cross_page_forms: args.merge_cross_page_forms,
|
|
61
|
+
}, { skipIfSubmitted: true });
|
|
62
|
+
}
|
|
63
|
+
return asJsonResult(await convertExportWaitByUid(args));
|
|
64
|
+
}));
|
|
65
|
+
}
|