@helloxiaohu/plugin-mineru 0.0.19 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +213 -56
- package/dist/lib/integration.strategy.js +1 -1
- package/dist/lib/mineru-toolset.strategy.d.ts +83 -16
- package/dist/lib/mineru-toolset.strategy.d.ts.map +1 -1
- package/dist/lib/mineru-toolset.strategy.js +131 -41
- package/dist/lib/mineru.client.d.ts +5 -1
- package/dist/lib/mineru.client.d.ts.map +1 -1
- package/dist/lib/mineru.client.js +46 -9
- package/dist/lib/mineru.tool.d.ts +8 -43
- package/dist/lib/mineru.tool.d.ts.map +1 -1
- package/dist/lib/mineru.tool.js +63 -51
- package/dist/lib/mineru.toolset.d.ts +4 -5
- package/dist/lib/mineru.toolset.d.ts.map +1 -1
- package/dist/lib/mineru.toolset.js +52 -9
- package/dist/lib/result-parser.service.d.ts +2 -2
- package/dist/lib/result-parser.service.d.ts.map +1 -1
- package/dist/lib/result-parser.service.js +72 -43
- package/dist/lib/types.d.ts +27 -20
- package/dist/lib/types.d.ts.map +1 -1
- package/package.json +62 -62
|
@@ -5,6 +5,7 @@ import { ToolsetStrategy, } from '@xpert-ai/plugin-sdk';
|
|
|
5
5
|
import { MinerUResultParserService } from './result-parser.service.js';
|
|
6
6
|
import { MinerUToolset } from './mineru.toolset.js';
|
|
7
7
|
import { MinerU, icon } from './types.js';
|
|
8
|
+
import { buildMinerUTool } from './mineru.tool.js';
|
|
8
9
|
/**
|
|
9
10
|
* ToolsetStrategy for MinerU PDF parser tool
|
|
10
11
|
* Registers MinerU as a toolset that can be used in agent workflows
|
|
@@ -29,6 +30,11 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
29
30
|
zh_Hans: '使用 MinerU 将 PDF 文件转换为 Markdown 格式。支持 OCR、公式识别和表格提取。',
|
|
30
31
|
},
|
|
31
32
|
icon: {
|
|
33
|
+
// Provide both shapes to maximize compatibility with different platform icon resolvers
|
|
34
|
+
// - builtin-provider icon endpoints may look for `type/value`
|
|
35
|
+
// - toolset registries may look for `svg`
|
|
36
|
+
type: 'svg',
|
|
37
|
+
value: icon,
|
|
32
38
|
svg: icon,
|
|
33
39
|
color: '#14b8a6',
|
|
34
40
|
},
|
|
@@ -51,10 +57,11 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
51
57
|
zh_Hans: 'Base URL',
|
|
52
58
|
},
|
|
53
59
|
description: {
|
|
54
|
-
en_US: 'MinerU API base url. Official: https://
|
|
55
|
-
zh_Hans: 'MinerU 服务地址。官方: https://
|
|
60
|
+
en_US: 'MinerU API base url. Official: https://mineru.net/api/v4',
|
|
61
|
+
zh_Hans: 'MinerU 服务地址。官方: https://mineru.net/api/v4',
|
|
56
62
|
},
|
|
57
|
-
default: 'https://
|
|
63
|
+
default: 'https://mineru.net/api/v4',
|
|
64
|
+
// Note: apiUrl is not in required array because it's optional with a default value
|
|
58
65
|
},
|
|
59
66
|
apiKey: {
|
|
60
67
|
type: 'string',
|
|
@@ -63,8 +70,8 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
63
70
|
zh_Hans: 'API Key',
|
|
64
71
|
},
|
|
65
72
|
description: {
|
|
66
|
-
en_US: 'The API Key of the MinerU server (required
|
|
67
|
-
zh_Hans: 'MinerU
|
|
73
|
+
en_US: 'The API Key of the MinerU server (required)',
|
|
74
|
+
zh_Hans: 'MinerU 服务令牌(必填)',
|
|
68
75
|
},
|
|
69
76
|
'x-ui': {
|
|
70
77
|
component: 'secretInput',
|
|
@@ -75,55 +82,84 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
75
82
|
persist: true,
|
|
76
83
|
},
|
|
77
84
|
},
|
|
78
|
-
serverType: {
|
|
79
|
-
type: 'string',
|
|
80
|
-
title: {
|
|
81
|
-
en_US: 'Server Type',
|
|
82
|
-
zh_Hans: '服务类型',
|
|
83
|
-
},
|
|
84
|
-
description: {
|
|
85
|
-
en_US: 'Select MinerU service type: official API or self-hosted',
|
|
86
|
-
zh_Hans: '选择 MinerU 服务类型:官方 API 或自部署',
|
|
87
|
-
},
|
|
88
|
-
enum: ['official', 'self-hosted'],
|
|
89
|
-
default: 'official',
|
|
90
|
-
},
|
|
91
85
|
// Default parsing settings (optional, can be overridden when calling the tool)
|
|
86
|
+
// Changed isOcr from boolean to string enum
|
|
92
87
|
isOcr: {
|
|
93
|
-
type: '
|
|
88
|
+
type: 'string',
|
|
94
89
|
title: {
|
|
95
90
|
en_US: 'Enable OCR',
|
|
96
91
|
zh_Hans: '启用 OCR',
|
|
97
92
|
},
|
|
98
93
|
description: {
|
|
99
|
-
en_US: 'Enable OCR for image-based PDFs
|
|
100
|
-
zh_Hans: '为基于图像的 PDF 启用 OCR
|
|
94
|
+
en_US: 'Enable OCR for image-based PDFs',
|
|
95
|
+
zh_Hans: '为基于图像的 PDF 启用 OCR',
|
|
96
|
+
},
|
|
97
|
+
enum: ['true', 'false'],
|
|
98
|
+
default: 'true',
|
|
99
|
+
'x-ui': {
|
|
100
|
+
enumLabels: {
|
|
101
|
+
'true': {
|
|
102
|
+
en_US: 'Enabled',
|
|
103
|
+
zh_Hans: '启用',
|
|
104
|
+
},
|
|
105
|
+
'false': {
|
|
106
|
+
en_US: 'Disabled',
|
|
107
|
+
zh_Hans: '禁用',
|
|
108
|
+
},
|
|
109
|
+
},
|
|
101
110
|
},
|
|
102
|
-
default: true,
|
|
103
111
|
},
|
|
112
|
+
// Changed enableFormula from boolean to string enum
|
|
104
113
|
enableFormula: {
|
|
105
|
-
type: '
|
|
114
|
+
type: 'string',
|
|
106
115
|
title: {
|
|
107
116
|
en_US: 'Enable Formula Recognition',
|
|
108
117
|
zh_Hans: '启用公式识别',
|
|
109
118
|
},
|
|
110
119
|
description: {
|
|
111
|
-
en_US: 'Enable formula recognition
|
|
112
|
-
zh_Hans: '
|
|
120
|
+
en_US: 'Enable formula recognition',
|
|
121
|
+
zh_Hans: '启用公式识别',
|
|
122
|
+
},
|
|
123
|
+
enum: ['true', 'false'],
|
|
124
|
+
default: 'true',
|
|
125
|
+
'x-ui': {
|
|
126
|
+
enumLabels: {
|
|
127
|
+
'true': {
|
|
128
|
+
en_US: 'Enabled',
|
|
129
|
+
zh_Hans: '启用',
|
|
130
|
+
},
|
|
131
|
+
'false': {
|
|
132
|
+
en_US: 'Disabled',
|
|
133
|
+
zh_Hans: '禁用',
|
|
134
|
+
},
|
|
135
|
+
},
|
|
113
136
|
},
|
|
114
|
-
default: true,
|
|
115
137
|
},
|
|
138
|
+
// Changed enableTable from boolean to string enum
|
|
116
139
|
enableTable: {
|
|
117
|
-
type: '
|
|
140
|
+
type: 'string',
|
|
118
141
|
title: {
|
|
119
142
|
en_US: 'Enable Table Recognition',
|
|
120
143
|
zh_Hans: '启用表格识别',
|
|
121
144
|
},
|
|
122
145
|
description: {
|
|
123
|
-
en_US: 'Enable table recognition
|
|
124
|
-
zh_Hans: '
|
|
146
|
+
en_US: 'Enable table recognition',
|
|
147
|
+
zh_Hans: '启用表格识别',
|
|
148
|
+
},
|
|
149
|
+
enum: ['true', 'false'],
|
|
150
|
+
default: 'true',
|
|
151
|
+
'x-ui': {
|
|
152
|
+
enumLabels: {
|
|
153
|
+
'true': {
|
|
154
|
+
en_US: 'Enabled',
|
|
155
|
+
zh_Hans: '启用',
|
|
156
|
+
},
|
|
157
|
+
'false': {
|
|
158
|
+
en_US: 'Disabled',
|
|
159
|
+
zh_Hans: '禁用',
|
|
160
|
+
},
|
|
161
|
+
},
|
|
125
162
|
},
|
|
126
|
-
default: true,
|
|
127
163
|
},
|
|
128
164
|
language: {
|
|
129
165
|
type: 'string',
|
|
@@ -137,6 +173,18 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
137
173
|
},
|
|
138
174
|
enum: ['en', 'ch'],
|
|
139
175
|
default: 'ch',
|
|
176
|
+
'x-ui': {
|
|
177
|
+
enumLabels: {
|
|
178
|
+
'en': {
|
|
179
|
+
en_US: 'en',
|
|
180
|
+
zh_Hans: '英文',
|
|
181
|
+
},
|
|
182
|
+
'ch': {
|
|
183
|
+
en_US: 'ch',
|
|
184
|
+
zh_Hans: '中文',
|
|
185
|
+
},
|
|
186
|
+
},
|
|
187
|
+
},
|
|
140
188
|
},
|
|
141
189
|
modelVersion: {
|
|
142
190
|
type: 'string',
|
|
@@ -150,9 +198,21 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
150
198
|
},
|
|
151
199
|
enum: ['pipeline', 'vlm'],
|
|
152
200
|
default: 'pipeline',
|
|
201
|
+
'x-ui': {
|
|
202
|
+
enumLabels: {
|
|
203
|
+
'pipeline': {
|
|
204
|
+
en_US: 'pipeline',
|
|
205
|
+
zh_Hans: 'pipeline',
|
|
206
|
+
},
|
|
207
|
+
'vlm': {
|
|
208
|
+
en_US: 'vlm',
|
|
209
|
+
zh_Hans: 'vlm',
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
},
|
|
153
213
|
},
|
|
154
214
|
},
|
|
155
|
-
required: ['
|
|
215
|
+
required: ['apiKey'],
|
|
156
216
|
},
|
|
157
217
|
};
|
|
158
218
|
/**
|
|
@@ -173,22 +233,33 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
173
233
|
if (!config) {
|
|
174
234
|
return Promise.resolve();
|
|
175
235
|
}
|
|
176
|
-
|
|
177
|
-
if (
|
|
178
|
-
throw new Error('MinerU apiKey is required
|
|
236
|
+
// apiKey is now a required field, validated by schema.required
|
|
237
|
+
if (!config.apiKey) {
|
|
238
|
+
throw new Error('MinerU apiKey is required');
|
|
179
239
|
}
|
|
180
240
|
return Promise.resolve();
|
|
181
241
|
}
|
|
182
242
|
/**
|
|
183
243
|
* Create MinerU toolset instance
|
|
184
244
|
* Note: config may be null/undefined during authorization phase
|
|
245
|
+
* Modified to read from toolset.credentials (like @searchapi/@email)
|
|
185
246
|
*/
|
|
186
247
|
async create(config) {
|
|
187
|
-
//
|
|
188
|
-
|
|
189
|
-
|
|
248
|
+
// Check if config is an IXpertToolset object with credentials property
|
|
249
|
+
const toolset = (config && typeof config === 'object' && 'credentials' in config)
|
|
250
|
+
? config
|
|
251
|
+
: null;
|
|
252
|
+
// Priority: toolset.credentials > config (flat structure) > empty object
|
|
253
|
+
const creds = toolset?.credentials ?? config ?? {};
|
|
254
|
+
// Build config with dependencies
|
|
190
255
|
const configWithDependencies = {
|
|
191
|
-
|
|
256
|
+
apiUrl: creds.apiUrl,
|
|
257
|
+
apiKey: creds.apiKey,
|
|
258
|
+
isOcr: creds.isOcr,
|
|
259
|
+
enableFormula: creds.enableFormula,
|
|
260
|
+
enableTable: creds.enableTable,
|
|
261
|
+
language: creds.language,
|
|
262
|
+
modelVersion: creds.modelVersion,
|
|
192
263
|
configService: this.configService,
|
|
193
264
|
resultParser: this.resultParser,
|
|
194
265
|
};
|
|
@@ -200,9 +271,28 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
200
271
|
* based on the toolset credentials/configuration
|
|
201
272
|
*/
|
|
202
273
|
createTools() {
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
274
|
+
/**
|
|
275
|
+
* IMPORTANT:
|
|
276
|
+
* The console UI requires builtin providers to expose at least one tool so users can
|
|
277
|
+
* enable it (otherwise it fails the "Enable at least one tool" validation).
|
|
278
|
+
*
|
|
279
|
+
* The returned tools here are used for listing/preview & toggling in UI. Actual execution
|
|
280
|
+
* will use the toolset instance created by `create()` -> `MinerUToolset.initTools()`,
|
|
281
|
+
* which wires credentials (apiUrl/apiKey/serverType) correctly.
|
|
282
|
+
*/
|
|
283
|
+
return [
|
|
284
|
+
buildMinerUTool(this.configService, this.resultParser,
|
|
285
|
+
// No credentials at listing time
|
|
286
|
+
undefined, undefined,
|
|
287
|
+
// Defaults used if user doesn't pass tool-call parameters
|
|
288
|
+
{
|
|
289
|
+
isOcr: true,
|
|
290
|
+
enableFormula: true,
|
|
291
|
+
enableTable: true,
|
|
292
|
+
language: 'ch',
|
|
293
|
+
modelVersion: 'pipeline',
|
|
294
|
+
}),
|
|
295
|
+
];
|
|
206
296
|
}
|
|
207
297
|
};
|
|
208
298
|
MinerUToolsetStrategy = __decorate([
|
|
@@ -93,7 +93,11 @@ export declare class MinerUClient {
|
|
|
93
93
|
*/
|
|
94
94
|
waitForTask(taskId: string, timeoutMs?: number, intervalMs?: number): Promise<any>;
|
|
95
95
|
private ensureOfficial;
|
|
96
|
-
|
|
96
|
+
/**
|
|
97
|
+
* Automatically determine serverType from URL
|
|
98
|
+
* Returns 'official' if URL is the official address (https://mineru.net/api/v4), otherwise 'self-hosted'
|
|
99
|
+
*/
|
|
100
|
+
private resolveServerTypeFromUrl;
|
|
97
101
|
private resolveCredentials;
|
|
98
102
|
private readIntegrationOptions;
|
|
99
103
|
private normalizeBaseUrl;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mineru.client.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhD,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAc,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAK7C,OAAO,EAIL,wBAAwB,EAExB,0BAA0B,EAC1B,gBAAgB,EACjB,MAAM,YAAY,CAAC;AAIpB,UAAU,iBAAiB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mEAAmE;IACnE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4EAA4E;IAC5E,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,UAAU,mBAAmB;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,sBAAsB;IAC9B,KAAK,EAAE,mBAAmB,EAAE,CAAC;IAC7B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,UAAU,iBAAiB;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AASD,qBAAa,YAAY;IAWrB,OAAO,CAAC,QAAQ,CAAC,aAAa;IAC9B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;IAX/B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IACxD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,SAAgB,UAAU,EAAE,gBAAgB,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAiD;IAE5E,IAAI,UAAU,IAAI,YAAY,GAAG,SAAS,CAEzC;gBAEkB,aAAa,EAAE,aAAa,EAC5B,WAAW,CAAC,EAAE;QACvB,UAAU,CAAC,EAAE,YAAY,CAAC;QAC1B,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,CAAC;KACjE;
|
|
1
|
+
{"version":3,"file":"mineru.client.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhD,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAc,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAK7C,OAAO,EAIL,wBAAwB,EAExB,0BAA0B,EAC1B,gBAAgB,EACjB,MAAM,YAAY,CAAC;AAIpB,UAAU,iBAAiB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mEAAmE;IACnE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4EAA4E;IAC5E,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,UAAU,mBAAmB;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,sBAAsB;IAC9B,KAAK,EAAE,mBAAmB,EAAE,CAAC;IAC7B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,UAAU,iBAAiB;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AASD,qBAAa,YAAY;IAWrB,OAAO,CAAC,QAAQ,CAAC,aAAa;IAC9B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;IAX/B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IACxD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,SAAgB,UAAU,EAAE,gBAAgB,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAiD;IAE5E,IAAI,UAAU,IAAI,YAAY,GAAG,SAAS,CAEzC;gBAEkB,aAAa,EAAE,aAAa,EAC5B,WAAW,CAAC,EAAE;QACvB,UAAU,CAAC,EAAE,YAAY,CAAC;QAC1B,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,CAAC;KACjE;IAqCP;;;OAGG;IACG,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAYzE;;OAEG;IACG,eAAe,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAmCzG,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,0BAA0B,GAAG,SAAS;IAOzE;;OAEG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC;QACxE,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;IAoBF;;OAEG;IACG,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC;IAiBnD;;OAEG;IACG,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,SAAgB,EAAE,UAAU,SAAO,GAAG,OAAO,CAAC,GAAG,CAAC;IA4B7F,OAAO,CAAC,cAAc;IAMtB;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAwBhC,OAAO,CAAC,kBAAkB;IAwB1B,OAAO,CAAC,sBAAsB;IAI9B,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,oBAAoB;YAYd,kBAAkB;YAiClB,oBAAoB;YASpB,qBAAqB;YA0DrB,uBAAuB;IA+CrC,OAAO,CAAC,iBAAiB;IAgBzB,OAAO,CAAC,2BAA2B;IAenC,OAAO,CAAC,6BAA6B;IAcrC,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,aAAa;IAcrB,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,eAAe;YAIT,YAAY;IAkB1B,OAAO,CAAC,eAAe;IA0BvB,wBAAwB,IAAI,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAKtD,wBAAwB;CAU/B"}
|
|
@@ -17,13 +17,26 @@ export class MinerUClient {
|
|
|
17
17
|
this.logger = new Logger(MinerUClient.name);
|
|
18
18
|
this.localTasks = new Map();
|
|
19
19
|
const integration = this.permissions?.integration;
|
|
20
|
-
|
|
20
|
+
// First, resolve credentials without depending on serverType
|
|
21
21
|
const { baseUrl, token } = this.resolveCredentials(integration);
|
|
22
|
+
const maskedToken = token && token.length > 8
|
|
23
|
+
? `${token.slice(0, 4)}***${token.slice(-4)}`
|
|
24
|
+
: token
|
|
25
|
+
? 'provided'
|
|
26
|
+
: 'missing';
|
|
27
|
+
this.logger.debug('[MinerU] MinerUClient credentials resolved', {
|
|
28
|
+
hasIntegration: Boolean(integration),
|
|
29
|
+
apiUrl: baseUrl,
|
|
30
|
+
token: maskedToken,
|
|
31
|
+
serverTypeFromUrl: this.resolveServerTypeFromUrl(baseUrl || '', integration),
|
|
32
|
+
});
|
|
22
33
|
if (!baseUrl) {
|
|
23
34
|
throw new Error('MinerU base URL is required');
|
|
24
35
|
}
|
|
25
36
|
this.baseUrl = this.normalizeBaseUrl(baseUrl);
|
|
26
37
|
this.token = token;
|
|
38
|
+
// Automatically determine serverType from URL: official if it's the official URL, otherwise self-hosted
|
|
39
|
+
this.serverType = this.resolveServerTypeFromUrl(this.baseUrl, integration);
|
|
27
40
|
if (this.serverType === 'official' && !this.token) {
|
|
28
41
|
throw new Error('MinerU official API requires an access token');
|
|
29
42
|
}
|
|
@@ -146,7 +159,13 @@ export class MinerUClient {
|
|
|
146
159
|
const start = Date.now();
|
|
147
160
|
while (true) {
|
|
148
161
|
const result = await this.getTaskResult(taskId);
|
|
149
|
-
this.logger.debug(
|
|
162
|
+
this.logger.debug('[MinerU] waiting task result', {
|
|
163
|
+
taskId,
|
|
164
|
+
hasZip: Boolean(result?.full_zip_url),
|
|
165
|
+
hasUrl: Boolean(result?.full_url),
|
|
166
|
+
hasContent: Boolean(result?.content),
|
|
167
|
+
status: result?.status,
|
|
168
|
+
});
|
|
150
169
|
if (result?.full_zip_url || result?.full_url || result?.content || result?.status === 'done') {
|
|
151
170
|
return result;
|
|
152
171
|
}
|
|
@@ -161,28 +180,41 @@ export class MinerUClient {
|
|
|
161
180
|
throw new Error(`${feature} is only supported for official MinerU deployments`);
|
|
162
181
|
}
|
|
163
182
|
}
|
|
164
|
-
|
|
183
|
+
/**
|
|
184
|
+
* Automatically determine serverType from URL
|
|
185
|
+
* Returns 'official' if URL is the official address (https://mineru.net/api/v4), otherwise 'self-hosted'
|
|
186
|
+
*/
|
|
187
|
+
resolveServerTypeFromUrl(baseUrl, integration) {
|
|
188
|
+
// Prefer explicitly specified serverType (backward compatibility)
|
|
165
189
|
const integrationType = this.readIntegrationOptions(integration)?.serverType;
|
|
166
190
|
if (integrationType === 'self-hosted' || integrationType === 'official') {
|
|
167
191
|
return integrationType;
|
|
168
192
|
}
|
|
193
|
+
// Check environment variable (backward compatibility)
|
|
169
194
|
const envValue = this.configService.get(ENV_MINERU_SERVER_TYPE)?.toLowerCase();
|
|
170
195
|
if (envValue === 'self-hosted') {
|
|
171
196
|
return 'self-hosted';
|
|
172
197
|
}
|
|
173
|
-
return 'official'
|
|
198
|
+
// Automatically determine from URL: if normalized URL matches official address, return 'official'
|
|
199
|
+
const normalizedOfficialUrl = this.normalizeBaseUrl(DEFAULT_OFFICIAL_BASE_URL);
|
|
200
|
+
const normalizedBaseUrl = this.normalizeBaseUrl(baseUrl);
|
|
201
|
+
if (normalizedBaseUrl === normalizedOfficialUrl) {
|
|
202
|
+
return 'official';
|
|
203
|
+
}
|
|
204
|
+
return 'self-hosted';
|
|
174
205
|
}
|
|
175
206
|
resolveCredentials(integration) {
|
|
176
207
|
const options = this.readIntegrationOptions(integration);
|
|
177
208
|
const baseUrlFromIntegration = options?.apiUrl;
|
|
178
209
|
const tokenFromIntegration = options?.apiKey;
|
|
179
|
-
|
|
180
|
-
const
|
|
181
|
-
const
|
|
182
|
-
|
|
210
|
+
// Read from environment variables (same keys for both official and self-hosted)
|
|
211
|
+
const baseUrlFromEnv = this.configService.get(ENV_MINERU_API_BASE_URL);
|
|
212
|
+
const tokenFromEnv = this.configService.get(ENV_MINERU_API_TOKEN);
|
|
213
|
+
// Determine baseUrl: prefer integration config, then env, then default to official URL
|
|
183
214
|
const baseUrl = baseUrlFromIntegration ||
|
|
184
215
|
baseUrlFromEnv ||
|
|
185
|
-
|
|
216
|
+
DEFAULT_OFFICIAL_BASE_URL;
|
|
217
|
+
// Determine token: prefer integration config, then env
|
|
186
218
|
const token = tokenFromIntegration || tokenFromEnv;
|
|
187
219
|
return { baseUrl, token };
|
|
188
220
|
}
|
|
@@ -238,6 +270,11 @@ export class MinerUClient {
|
|
|
238
270
|
if (options.seed)
|
|
239
271
|
body.seed = options.seed;
|
|
240
272
|
try {
|
|
273
|
+
this.logger.debug('[MinerU] createOfficialTask request', {
|
|
274
|
+
url,
|
|
275
|
+
body,
|
|
276
|
+
hasAuthHeader: Boolean(this.getOfficialHeaders().Authorization),
|
|
277
|
+
});
|
|
241
278
|
const resp = await axios.post(url, body, { headers: this.getOfficialHeaders() });
|
|
242
279
|
const data = resp.data;
|
|
243
280
|
if (data.code !== 0) {
|
|
@@ -7,9 +7,9 @@ import { MinerUIntegrationOptions } from './types.js';
|
|
|
7
7
|
* Default parsing settings for MinerU tool
|
|
8
8
|
*/
|
|
9
9
|
export interface MinerUToolDefaults {
|
|
10
|
-
isOcr?: boolean;
|
|
11
|
-
enableFormula?: boolean;
|
|
12
|
-
enableTable?: boolean;
|
|
10
|
+
isOcr?: boolean | string;
|
|
11
|
+
enableFormula?: boolean | string;
|
|
12
|
+
enableTable?: boolean | string;
|
|
13
13
|
language?: 'en' | 'ch';
|
|
14
14
|
modelVersion?: 'pipeline' | 'vlm';
|
|
15
15
|
}
|
|
@@ -18,50 +18,15 @@ export interface MinerUToolDefaults {
|
|
|
18
18
|
* This tool converts PDF files to markdown format using MinerU service
|
|
19
19
|
*/
|
|
20
20
|
export declare function buildMinerUTool(configService: ConfigService, resultParser: MinerUResultParserService, options?: MinerUIntegrationOptions, fileSystem?: XpFileSystem, defaults?: MinerUToolDefaults): import("@langchain/core/tools").DynamicStructuredTool<z.ZodObject<{
|
|
21
|
-
|
|
22
|
-
filePath: z.ZodNullable<z.ZodOptional<z.ZodString>>;
|
|
23
|
-
fileName: z.ZodNullable<z.ZodOptional<z.ZodString>>;
|
|
24
|
-
isOcr: z.ZodNullable<z.ZodOptional<z.ZodBoolean>>;
|
|
25
|
-
enableFormula: z.ZodNullable<z.ZodOptional<z.ZodBoolean>>;
|
|
26
|
-
enableTable: z.ZodNullable<z.ZodOptional<z.ZodBoolean>>;
|
|
27
|
-
language: z.ZodNullable<z.ZodOptional<z.ZodEnum<["en", "ch"]>>>;
|
|
28
|
-
modelVersion: z.ZodNullable<z.ZodOptional<z.ZodEnum<["pipeline", "vlm"]>>>;
|
|
21
|
+
doc_url: z.ZodString;
|
|
29
22
|
}, "strip", z.ZodTypeAny, {
|
|
30
|
-
|
|
31
|
-
fileUrl?: string;
|
|
32
|
-
isOcr?: boolean;
|
|
33
|
-
enableFormula?: boolean;
|
|
34
|
-
enableTable?: boolean;
|
|
35
|
-
language?: "en" | "ch";
|
|
36
|
-
modelVersion?: "vlm" | "pipeline";
|
|
37
|
-
fileName?: string;
|
|
23
|
+
doc_url?: string;
|
|
38
24
|
}, {
|
|
39
|
-
|
|
40
|
-
fileUrl?: string;
|
|
41
|
-
isOcr?: boolean;
|
|
42
|
-
enableFormula?: boolean;
|
|
43
|
-
enableTable?: boolean;
|
|
44
|
-
language?: "en" | "ch";
|
|
45
|
-
modelVersion?: "vlm" | "pipeline";
|
|
46
|
-
fileName?: string;
|
|
25
|
+
doc_url?: string;
|
|
47
26
|
}>, {
|
|
48
|
-
|
|
49
|
-
fileUrl?: string;
|
|
50
|
-
isOcr?: boolean;
|
|
51
|
-
enableFormula?: boolean;
|
|
52
|
-
enableTable?: boolean;
|
|
53
|
-
language?: "en" | "ch";
|
|
54
|
-
modelVersion?: "vlm" | "pipeline";
|
|
55
|
-
fileName?: string;
|
|
27
|
+
doc_url?: string;
|
|
56
28
|
}, {
|
|
57
|
-
|
|
58
|
-
fileUrl?: string;
|
|
59
|
-
isOcr?: boolean;
|
|
60
|
-
enableFormula?: boolean;
|
|
61
|
-
enableTable?: boolean;
|
|
62
|
-
language?: "en" | "ch";
|
|
63
|
-
modelVersion?: "vlm" | "pipeline";
|
|
64
|
-
fileName?: string;
|
|
29
|
+
doc_url?: string;
|
|
65
30
|
}, (string | {
|
|
66
31
|
files: any[];
|
|
67
32
|
taskId: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mineru.tool.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.tool.ts"],"names":[],"mappings":"AAEA,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAEvE,OAAO,EAAqB,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,kBAAkB;
|
|
1
|
+
{"version":3,"file":"mineru.tool.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.tool.ts"],"names":[],"mappings":"AAEA,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAEvE,OAAO,EAAqB,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAEjC,KAAK,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACjC,WAAW,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAC/B,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IACvB,YAAY,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC;CACnC;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,aAAa,EAAE,aAAa,EAC5B,YAAY,EAAE,yBAAyB,EACvC,OAAO,CAAC,EAAE,wBAAwB,EAClC,UAAU,CAAC,EAAE,YAAY,EACzB,QAAQ,CAAC,EAAE,kBAAkB;;;;;;;;;;;;;;MAmL9B"}
|
package/dist/lib/mineru.tool.js
CHANGED
|
@@ -11,55 +11,75 @@ import { MinerUIntegration } from './types.js';
|
|
|
11
11
|
export function buildMinerUTool(configService, resultParser, options, fileSystem, defaults) {
|
|
12
12
|
return tool(async (input) => {
|
|
13
13
|
try {
|
|
14
|
-
const {
|
|
15
|
-
//
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
const finalLanguage = language ?? defaults?.language ?? 'ch';
|
|
20
|
-
const finalModelVersion = modelVersion ?? defaults?.modelVersion ?? 'pipeline';
|
|
21
|
-
if (!fileUrl && !filePath) {
|
|
22
|
-
throw new Error('Either fileUrl or filePath must be provided');
|
|
14
|
+
const { doc_url } = input;
|
|
15
|
+
// Log raw input (mask nothing sensitive here because doc_url is public; avoid logging other fields)
|
|
16
|
+
console.debug('[MinerU] tool invoked with input', { doc_url, extraKeys: Object.keys(input || {}).filter((k) => k !== 'doc_url') });
|
|
17
|
+
if (!doc_url) {
|
|
18
|
+
throw new Error('doc_url is required');
|
|
23
19
|
}
|
|
24
20
|
// Get workspace context from current task
|
|
25
21
|
const currentState = getCurrentTaskInput();
|
|
26
22
|
const workspacePath = currentState?.['sys']?.['volume'] ?? '/tmp/xpert';
|
|
27
|
-
|
|
28
|
-
//
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
}
|
|
34
|
-
:
|
|
23
|
+
// Use configuration from authorization page (passed via options and defaults parameters)
|
|
24
|
+
// These values come from the authorization page configuration and are set when the tool is created
|
|
25
|
+
const finalApiUrl = options?.apiUrl || 'https://mineru.net/api/v4';
|
|
26
|
+
const finalApiKey = options?.apiKey; // apiKey is required and validated in authorization page
|
|
27
|
+
// Log effective config (mask key) to确认是否拿到了授权页的凭据
|
|
28
|
+
const maskedKey = finalApiKey && finalApiKey.length > 8
|
|
29
|
+
? `${finalApiKey.slice(0, 4)}***${finalApiKey.slice(-4)}`
|
|
30
|
+
: finalApiKey
|
|
31
|
+
? 'provided'
|
|
32
|
+
: 'missing';
|
|
33
|
+
console.debug('[MinerU] buildMinerUTool config', {
|
|
34
|
+
fromOptions: Boolean(options),
|
|
35
|
+
apiUrl: finalApiUrl,
|
|
36
|
+
apiKey: maskedKey,
|
|
37
|
+
defaults: {
|
|
38
|
+
isOcr: defaults?.isOcr,
|
|
39
|
+
enableFormula: defaults?.enableFormula,
|
|
40
|
+
enableTable: defaults?.enableTable,
|
|
41
|
+
language: defaults?.language,
|
|
42
|
+
modelVersion: defaults?.modelVersion,
|
|
43
|
+
},
|
|
44
|
+
});
|
|
45
|
+
// Use configuration values from authorization page (passed via defaults parameter)
|
|
46
|
+
// Convert string enum values ('true'/'false') to boolean, or use boolean values directly
|
|
47
|
+
// If undefined, default to true
|
|
48
|
+
const finalIsOcr = defaults?.isOcr === undefined
|
|
49
|
+
? true
|
|
50
|
+
: (typeof defaults.isOcr === 'string' ? defaults.isOcr === 'true' : defaults.isOcr === true);
|
|
51
|
+
const finalEnableFormula = defaults?.enableFormula === undefined
|
|
52
|
+
? true
|
|
53
|
+
: (typeof defaults.enableFormula === 'string' ? defaults.enableFormula === 'true' : defaults.enableFormula === true);
|
|
54
|
+
const finalEnableTable = defaults?.enableTable === undefined
|
|
55
|
+
? true
|
|
56
|
+
: (typeof defaults.enableTable === 'string' ? defaults.enableTable === 'true' : defaults.enableTable === true);
|
|
57
|
+
const finalLanguage = defaults?.language || 'ch';
|
|
58
|
+
const finalModelVersion = defaults?.modelVersion || 'pipeline';
|
|
59
|
+
const effectiveOptions = {
|
|
60
|
+
apiUrl: finalApiUrl,
|
|
61
|
+
apiKey: finalApiKey,
|
|
62
|
+
};
|
|
63
|
+
const integration = {
|
|
64
|
+
provider: MinerUIntegration,
|
|
65
|
+
options: effectiveOptions,
|
|
66
|
+
};
|
|
35
67
|
const mineruClient = new MinerUClient(configService, {
|
|
36
68
|
fileSystem,
|
|
37
69
|
integration,
|
|
38
70
|
});
|
|
39
|
-
// Determine file name
|
|
40
|
-
let finalFileName =
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
catch {
|
|
48
|
-
finalFileName = 'document.pdf';
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
else if (filePath) {
|
|
52
|
-
const pathParts = filePath.split(/[/\\]/);
|
|
53
|
-
finalFileName = pathParts[pathParts.length - 1] || 'document.pdf';
|
|
54
|
-
}
|
|
55
|
-
else {
|
|
56
|
-
finalFileName = 'document.pdf';
|
|
57
|
-
}
|
|
71
|
+
// Determine file name from URL
|
|
72
|
+
let finalFileName = 'document.pdf';
|
|
73
|
+
try {
|
|
74
|
+
const parsed = new URL(doc_url);
|
|
75
|
+
finalFileName = parsed.pathname.split('/').pop() || 'document.pdf';
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
// ignore
|
|
58
79
|
}
|
|
59
80
|
// Create MinerU task
|
|
60
81
|
const { taskId } = await mineruClient.createTask({
|
|
61
|
-
url:
|
|
62
|
-
filePath: filePath,
|
|
82
|
+
url: doc_url,
|
|
63
83
|
fileName: finalFileName,
|
|
64
84
|
isOcr: finalIsOcr,
|
|
65
85
|
enableFormula: finalEnableFormula,
|
|
@@ -75,8 +95,7 @@ export function buildMinerUTool(configService, resultParser, options, fileSystem
|
|
|
75
95
|
throw new Error('Failed to get MinerU task result');
|
|
76
96
|
}
|
|
77
97
|
parsedResult = await resultParser.parseLocalTask(taskResult, taskId, {
|
|
78
|
-
fileUrl,
|
|
79
|
-
filePath,
|
|
98
|
+
fileUrl: doc_url,
|
|
80
99
|
name: finalFileName,
|
|
81
100
|
folder: workspacePath,
|
|
82
101
|
}, fileSystem);
|
|
@@ -85,8 +104,7 @@ export function buildMinerUTool(configService, resultParser, options, fileSystem
|
|
|
85
104
|
// Official API: wait for completion
|
|
86
105
|
const result = await mineruClient.waitForTask(taskId, 5 * 60 * 1000, 5000);
|
|
87
106
|
parsedResult = await resultParser.parseFromUrl(result.full_zip_url, taskId, {
|
|
88
|
-
fileUrl,
|
|
89
|
-
filePath,
|
|
107
|
+
fileUrl: doc_url,
|
|
90
108
|
name: finalFileName,
|
|
91
109
|
folder: workspacePath,
|
|
92
110
|
}, fileSystem);
|
|
@@ -115,8 +133,9 @@ export function buildMinerUTool(configService, resultParser, options, fileSystem
|
|
|
115
133
|
const markdownContent = parsedResult.chunks
|
|
116
134
|
?.map((chunk) => chunk.pageContent)
|
|
117
135
|
.join('\n\n') || '';
|
|
136
|
+
// Return full markdown (do NOT truncate). If the platform/UI needs a preview, it can truncate client-side.
|
|
118
137
|
return [
|
|
119
|
-
|
|
138
|
+
markdownContent,
|
|
120
139
|
{
|
|
121
140
|
files: fileArtifacts,
|
|
122
141
|
taskId,
|
|
@@ -131,14 +150,7 @@ export function buildMinerUTool(configService, resultParser, options, fileSystem
|
|
|
131
150
|
name: 'mineru_pdf_parser',
|
|
132
151
|
description: 'Convert PDF files to markdown format using MinerU. Supports OCR, formula recognition, and table extraction. Returns markdown content and extracted files (images, JSON, etc.).',
|
|
133
152
|
schema: z.object({
|
|
134
|
-
|
|
135
|
-
filePath: z.string().optional().nullable().describe('Local file path of the PDF file'),
|
|
136
|
-
fileName: z.string().optional().nullable().describe('Name of the PDF file'),
|
|
137
|
-
isOcr: z.boolean().optional().nullable().describe('Enable OCR for image-based PDFs (default: true)'),
|
|
138
|
-
enableFormula: z.boolean().optional().nullable().describe('Enable formula recognition (default: true)'),
|
|
139
|
-
enableTable: z.boolean().optional().nullable().describe('Enable table recognition (default: true)'),
|
|
140
|
-
language: z.enum(['en', 'ch']).optional().nullable().describe('Document language: "en" for English, "ch" for Chinese (default: "ch")'),
|
|
141
|
-
modelVersion: z.enum(['pipeline', 'vlm']).optional().nullable().describe('Model version: "pipeline" or "vlm" (default: "pipeline")'),
|
|
153
|
+
doc_url: z.string().min(1).describe('PDF URL (required)'),
|
|
142
154
|
}),
|
|
143
155
|
responseFormat: 'content_and_artifact',
|
|
144
156
|
});
|