@helloxiaohu/plugin-mineru 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,218 @@
1
+ import { ConfigService } from '@nestjs/config';
2
+ import { BuiltinToolset, IToolsetStrategy, FileSystemPermission, ISchemaSecretField } from '@xpert-ai/plugin-sdk';
3
+ import { MinerUResultParserService } from './result-parser.service.js';
4
+ import { MinerUToolsetConfig } from './mineru.toolset.js';
5
+ export declare class MinerUToolsetStrategy implements IToolsetStrategy<MinerUToolsetConfig> {
6
+ private readonly configService;
7
+ private readonly resultParser;
8
+ meta: {
9
+ author: string;
10
+ tags: string[];
11
+ name: string;
12
+ label: {
13
+ en_US: string;
14
+ zh_Hans: string;
15
+ };
16
+ description: {
17
+ en_US: string;
18
+ zh_Hans: string;
19
+ };
20
+ icon: {
21
+ type: string;
22
+ value: string;
23
+ svg: string;
24
+ color: string;
25
+ };
26
+ configSchema: {
27
+ type: string;
28
+ properties: {
29
+ apiUrl: {
30
+ type: string;
31
+ title: {
32
+ en_US: string;
33
+ zh_Hans: string;
34
+ };
35
+ description: {
36
+ en_US: string;
37
+ zh_Hans: string;
38
+ };
39
+ default: string;
40
+ };
41
+ apiKey: {
42
+ type: string;
43
+ title: {
44
+ en_US: string;
45
+ zh_Hans: string;
46
+ };
47
+ description: {
48
+ en_US: string;
49
+ zh_Hans: string;
50
+ };
51
+ 'x-ui': ISchemaSecretField;
52
+ };
53
+ isOcr: {
54
+ type: string;
55
+ title: {
56
+ en_US: string;
57
+ zh_Hans: string;
58
+ };
59
+ description: {
60
+ en_US: string;
61
+ zh_Hans: string;
62
+ };
63
+ enum: string[];
64
+ default: string;
65
+ 'x-ui': {
66
+ enumLabels: {
67
+ true: {
68
+ en_US: string;
69
+ zh_Hans: string;
70
+ };
71
+ false: {
72
+ en_US: string;
73
+ zh_Hans: string;
74
+ };
75
+ };
76
+ };
77
+ };
78
+ enableFormula: {
79
+ type: string;
80
+ title: {
81
+ en_US: string;
82
+ zh_Hans: string;
83
+ };
84
+ description: {
85
+ en_US: string;
86
+ zh_Hans: string;
87
+ };
88
+ enum: string[];
89
+ default: string;
90
+ 'x-ui': {
91
+ enumLabels: {
92
+ true: {
93
+ en_US: string;
94
+ zh_Hans: string;
95
+ };
96
+ false: {
97
+ en_US: string;
98
+ zh_Hans: string;
99
+ };
100
+ };
101
+ };
102
+ };
103
+ enableTable: {
104
+ type: string;
105
+ title: {
106
+ en_US: string;
107
+ zh_Hans: string;
108
+ };
109
+ description: {
110
+ en_US: string;
111
+ zh_Hans: string;
112
+ };
113
+ enum: string[];
114
+ default: string;
115
+ 'x-ui': {
116
+ enumLabels: {
117
+ true: {
118
+ en_US: string;
119
+ zh_Hans: string;
120
+ };
121
+ false: {
122
+ en_US: string;
123
+ zh_Hans: string;
124
+ };
125
+ };
126
+ };
127
+ };
128
+ language: {
129
+ type: string;
130
+ title: {
131
+ en_US: string;
132
+ zh_Hans: string;
133
+ };
134
+ description: {
135
+ en_US: string;
136
+ zh_Hans: string;
137
+ };
138
+ enum: string[];
139
+ default: string;
140
+ 'x-ui': {
141
+ enumLabels: {
142
+ en: {
143
+ en_US: string;
144
+ zh_Hans: string;
145
+ };
146
+ ch: {
147
+ en_US: string;
148
+ zh_Hans: string;
149
+ };
150
+ };
151
+ };
152
+ };
153
+ modelVersion: {
154
+ type: string;
155
+ title: {
156
+ en_US: string;
157
+ zh_Hans: string;
158
+ };
159
+ description: {
160
+ en_US: string;
161
+ zh_Hans: string;
162
+ };
163
+ enum: string[];
164
+ default: string;
165
+ 'x-ui': {
166
+ enumLabels: {
167
+ pipeline: {
168
+ en_US: string;
169
+ zh_Hans: string;
170
+ };
171
+ vlm: {
172
+ en_US: string;
173
+ zh_Hans: string;
174
+ };
175
+ };
176
+ };
177
+ };
178
+ extraFormats: {
179
+ type: string;
180
+ title: {
181
+ en_US: string;
182
+ zh_Hans: string;
183
+ };
184
+ description: {
185
+ en_US: string;
186
+ zh_Hans: string;
187
+ };
188
+ };
189
+ };
190
+ required: string[];
191
+ };
192
+ };
193
+ readonly permissions: FileSystemPermission[];
194
+ constructor(configService: ConfigService, resultParser: MinerUResultParserService);
195
+ validateConfig(config: MinerUToolsetConfig | null | undefined): Promise<void>;
196
+ create(config: any): Promise<BuiltinToolset>;
197
+ createTools(): import("@langchain/core/tools").DynamicStructuredTool<import("zod").ZodObject<{
198
+ doc_url: import("zod").ZodString;
199
+ page_ranges: import("zod").ZodNullable<import("zod").ZodOptional<import("zod").ZodString>>;
200
+ }, "strip", import("zod").ZodTypeAny, {
201
+ doc_url?: string;
202
+ page_ranges?: string;
203
+ }, {
204
+ doc_url?: string;
205
+ page_ranges?: string;
206
+ }>, {
207
+ doc_url?: string;
208
+ page_ranges?: string;
209
+ }, {
210
+ doc_url?: string;
211
+ page_ranges?: string;
212
+ }, (string | {
213
+ files: any[];
214
+ taskId: string;
215
+ metadata: any;
216
+ })[]>[];
217
+ }
218
+ //# sourceMappingURL=mineru-toolset.strategy.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mineru-toolset.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/mineru-toolset.strategy.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EACL,cAAc,EACd,gBAAgB,EAEhB,oBAAoB,EACpB,kBAAkB,EACnB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAiB,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AASzE,qBAEa,qBAAsB,YAAW,gBAAgB,CAAC,mBAAmB,CAAC;IA6M/E,OAAO,CAAC,QAAQ,CAAC,aAAa;IAE9B,OAAO,CAAC,QAAQ,CAAC,YAAY;IA9M/B,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;4BA4Ca,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAoJjC;IAEF,QAAQ,CAAC,WAAW,yBAMlB;gBAIiB,aAAa,EAAE,aAAa,EAE5B,YAAY,EAAE,yBAAyB;IAG1D,cAAc,CAAC,MAAM,EAAE,mBAAmB,GAAG,IAAI,GAAG,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;IAWvE,MAAM,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO,CAAC,cAAc,CAAC;IAwBlD,WAAW;;;;;;;;;;;;;;;;;;;;CAiBZ"}
@@ -0,0 +1,261 @@
1
+ import { __decorate, __metadata, __param } from "tslib";
2
+ import { Injectable, forwardRef, Inject } from '@nestjs/common';
3
+ import { ConfigService } from '@nestjs/config';
4
+ import { ToolsetStrategy, } from '@xpert-ai/plugin-sdk';
5
+ import { MinerUResultParserService } from './result-parser.service.js';
6
+ import { MinerUToolset } from './mineru.toolset.js';
7
+ import { MinerU, icon } from './types.js';
8
+ import { buildMinerUTool } from './mineru.tool.js';
9
+ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
10
+ constructor(configService, resultParser) {
11
+ this.configService = configService;
12
+ this.resultParser = resultParser;
13
+ this.meta = {
14
+ author: 'Xpert AI',
15
+ tags: ['pdf', 'markdown', 'parser', 'ocr', 'mineru', 'document', 'extraction'],
16
+ name: MinerU,
17
+ label: {
18
+ en_US: 'MinerU PDF Parser',
19
+ zh_Hans: 'MinerU PDF 解析器',
20
+ },
21
+ description: {
22
+ en_US: 'Convert documents to markdown format using MinerU. Supports OCR, formula recognition, and table extraction.',
23
+ zh_Hans: '使用 MinerU 将文档转换为 Markdown 格式。支持 OCR、公式识别和表格提取。',
24
+ },
25
+ icon: {
26
+ type: 'svg',
27
+ value: icon,
28
+ svg: icon,
29
+ color: '#14b8a6',
30
+ },
31
+ configSchema: {
32
+ type: 'object',
33
+ properties: {
34
+ apiUrl: {
35
+ type: 'string',
36
+ title: {
37
+ en_US: 'Base URL',
38
+ zh_Hans: 'Base URL',
39
+ },
40
+ description: {
41
+ en_US: 'MinerU API base url. Official: https://mineru.net/api/v4',
42
+ zh_Hans: 'MinerU 服务地址。官方: https://mineru.net/api/v4',
43
+ },
44
+ default: 'https://mineru.net/api/v4',
45
+ },
46
+ apiKey: {
47
+ type: 'string',
48
+ title: {
49
+ en_US: 'API Key',
50
+ zh_Hans: 'API Key',
51
+ },
52
+ description: {
53
+ en_US: 'The API Key of the MinerU server (required)',
54
+ zh_Hans: 'MinerU 服务令牌(必填)',
55
+ },
56
+ 'x-ui': {
57
+ component: 'secretInput',
58
+ label: 'API Key',
59
+ placeholder: 'MinerU API Key',
60
+ revealable: true,
61
+ maskSymbol: '*',
62
+ persist: true,
63
+ },
64
+ },
65
+ isOcr: {
66
+ type: 'string',
67
+ title: {
68
+ en_US: 'Enable OCR',
69
+ zh_Hans: '启用 OCR',
70
+ },
71
+ description: {
72
+ en_US: 'Enable OCR for image-based documents',
73
+ zh_Hans: '为基于图像的文档启用 OCR',
74
+ },
75
+ enum: ['true', 'false'],
76
+ default: 'true',
77
+ 'x-ui': {
78
+ enumLabels: {
79
+ 'true': {
80
+ en_US: 'Enabled',
81
+ zh_Hans: '启用',
82
+ },
83
+ 'false': {
84
+ en_US: 'Disabled',
85
+ zh_Hans: '禁用',
86
+ },
87
+ },
88
+ },
89
+ },
90
+ enableFormula: {
91
+ type: 'string',
92
+ title: {
93
+ en_US: 'Enable Formula Recognition',
94
+ zh_Hans: '启用公式识别',
95
+ },
96
+ description: {
97
+ en_US: 'Enable formula recognition',
98
+ zh_Hans: '启用公式识别',
99
+ },
100
+ enum: ['true', 'false'],
101
+ default: 'true',
102
+ 'x-ui': {
103
+ enumLabels: {
104
+ 'true': {
105
+ en_US: 'Enabled',
106
+ zh_Hans: '启用',
107
+ },
108
+ 'false': {
109
+ en_US: 'Disabled',
110
+ zh_Hans: '禁用',
111
+ },
112
+ },
113
+ },
114
+ },
115
+ enableTable: {
116
+ type: 'string',
117
+ title: {
118
+ en_US: 'Enable Table Recognition',
119
+ zh_Hans: '启用表格识别',
120
+ },
121
+ description: {
122
+ en_US: 'Enable table recognition',
123
+ zh_Hans: '启用表格识别',
124
+ },
125
+ enum: ['true', 'false'],
126
+ default: 'true',
127
+ 'x-ui': {
128
+ enumLabels: {
129
+ 'true': {
130
+ en_US: 'Enabled',
131
+ zh_Hans: '启用',
132
+ },
133
+ 'false': {
134
+ en_US: 'Disabled',
135
+ zh_Hans: '禁用',
136
+ },
137
+ },
138
+ },
139
+ },
140
+ language: {
141
+ type: 'string',
142
+ title: {
143
+ en_US: 'Document Language',
144
+ zh_Hans: '文档语言',
145
+ },
146
+ description: {
147
+ en_US: 'Document language: "en" for English, "ch" for Chinese (default: "ch")',
148
+ zh_Hans: '文档语言:"en" 表示英语,"ch" 表示中文(默认:"ch")',
149
+ },
150
+ enum: ['en', 'ch'],
151
+ default: 'ch',
152
+ 'x-ui': {
153
+ enumLabels: {
154
+ 'en': {
155
+ en_US: 'en',
156
+ zh_Hans: '英文',
157
+ },
158
+ 'ch': {
159
+ en_US: 'ch',
160
+ zh_Hans: '中文',
161
+ },
162
+ },
163
+ },
164
+ },
165
+ modelVersion: {
166
+ type: 'string',
167
+ title: {
168
+ en_US: 'Model Version',
169
+ zh_Hans: '模型版本',
170
+ },
171
+ description: {
172
+ en_US: 'Model version: "pipeline" or "vlm" (default: "pipeline")',
173
+ zh_Hans: '模型版本:"pipeline" 或 "vlm"(默认:"pipeline")',
174
+ },
175
+ enum: ['pipeline', 'vlm'],
176
+ default: 'pipeline',
177
+ 'x-ui': {
178
+ enumLabels: {
179
+ 'pipeline': {
180
+ en_US: 'pipeline',
181
+ zh_Hans: 'pipeline',
182
+ },
183
+ 'vlm': {
184
+ en_US: 'vlm',
185
+ zh_Hans: 'vlm',
186
+ },
187
+ },
188
+ },
189
+ },
190
+ extraFormats: {
191
+ type: 'string',
192
+ title: {
193
+ en_US: 'Extra Formats',
194
+ zh_Hans: '额外输出格式',
195
+ },
196
+ description: {
197
+ en_US: 'Optional extra formats, comma-separated (docx, html, latex).',
198
+ zh_Hans: '可选额外输出格式,逗号分隔(docx、html、latex)。',
199
+ },
200
+ },
201
+ },
202
+ required: ['apiKey'],
203
+ },
204
+ };
205
+ this.permissions = [
206
+ {
207
+ type: 'filesystem',
208
+ operations: ['read', 'write', 'list'],
209
+ scope: [],
210
+ },
211
+ ];
212
+ }
213
+ validateConfig(config) {
214
+ if (!config) {
215
+ return Promise.resolve();
216
+ }
217
+ if (!config.apiKey) {
218
+ throw new Error('MinerU apiKey is required');
219
+ }
220
+ return Promise.resolve();
221
+ }
222
+ async create(config) {
223
+ const toolset = config && typeof config === 'object' && 'credentials' in config
224
+ ? config
225
+ : null;
226
+ const creds = toolset?.credentials ?? config ?? {};
227
+ const configWithDependencies = {
228
+ apiUrl: creds.apiUrl,
229
+ apiKey: creds.apiKey,
230
+ extraFormats: creds.extraFormats,
231
+ isOcr: creds.isOcr,
232
+ enableFormula: creds.enableFormula,
233
+ enableTable: creds.enableTable,
234
+ language: creds.language,
235
+ modelVersion: creds.modelVersion,
236
+ configService: this.configService,
237
+ resultParser: this.resultParser,
238
+ };
239
+ return new MinerUToolset(configWithDependencies);
240
+ }
241
+ createTools() {
242
+ return [
243
+ buildMinerUTool(this.configService, this.resultParser, undefined, undefined, {
244
+ isOcr: true,
245
+ enableFormula: true,
246
+ enableTable: true,
247
+ language: 'ch',
248
+ modelVersion: 'pipeline',
249
+ }),
250
+ ];
251
+ }
252
+ };
253
+ MinerUToolsetStrategy = __decorate([
254
+ Injectable(),
255
+ ToolsetStrategy(MinerU),
256
+ __param(0, Inject(forwardRef(() => ConfigService))),
257
+ __param(1, Inject(MinerUResultParserService)),
258
+ __metadata("design:paramtypes", [ConfigService,
259
+ MinerUResultParserService])
260
+ ], MinerUToolsetStrategy);
261
+ export { MinerUToolsetStrategy };
@@ -1 +1 @@
1
- {"version":3,"file":"mineru.plugin.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.plugin.ts"],"names":[],"mappings":"AACA,OAAO,EAAqB,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAO/F,qBAiBa,YAAa,YAAW,kBAAkB,EAAE,gBAAgB;IAExE,OAAO,CAAC,UAAU,CAAQ;IAE1B;;OAEG;IACH,iBAAiB,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAMzC;;OAEG;IACH,eAAe,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;CAKvC"}
1
+ {"version":3,"file":"mineru.plugin.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.plugin.ts"],"names":[],"mappings":"AACA,OAAO,EAAqB,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAQ/F,qBAkBa,YAAa,YAAW,kBAAkB,EAAE,gBAAgB;IAExE,OAAO,CAAC,UAAU,CAAQ;IAE1B;;OAEG;IACH,iBAAiB,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAMzC;;OAEG;IACH,eAAe,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;CAKvC"}
@@ -7,6 +7,7 @@ import { MinerUTransformerStrategy } from './transformer-mineru.strategy.js';
7
7
  import { MinerUResultParserService } from './result-parser.service.js';
8
8
  import { MinerUIntegrationStrategy } from './integration.strategy.js';
9
9
  import { MinerUController } from './mineru.controller.js';
10
+ import { MinerUToolsetStrategy } from './mineru-toolset.strategy.js';
10
11
  let MinerUPlugin = MinerUPlugin_1 = class MinerUPlugin {
11
12
  constructor() {
12
13
  // We disable by default additional logging for each event to avoid cluttering the logs
@@ -41,6 +42,7 @@ MinerUPlugin = MinerUPlugin_1 = __decorate([
41
42
  MinerUIntegrationStrategy,
42
43
  MinerUTransformerStrategy,
43
44
  MinerUResultParserService,
45
+ MinerUToolsetStrategy,
44
46
  ],
45
47
  controllers: [
46
48
  MinerUController
@@ -0,0 +1,33 @@
1
+ import { XpFileSystem } from '@xpert-ai/plugin-sdk';
2
+ import { z } from 'zod';
3
+ import { ConfigService } from '@nestjs/config';
4
+ import { MinerUResultParserService } from './result-parser.service.js';
5
+ import { MinerUIntegrationOptions } from './types.js';
6
+ export interface MinerUToolDefaults {
7
+ isOcr?: boolean | string;
8
+ enableFormula?: boolean | string;
9
+ enableTable?: boolean | string;
10
+ language?: 'en' | 'ch';
11
+ modelVersion?: 'pipeline' | 'vlm';
12
+ }
13
+ export declare function buildMinerUTool(configService: ConfigService, resultParser: MinerUResultParserService, options?: MinerUIntegrationOptions, fileSystem?: XpFileSystem, defaults?: MinerUToolDefaults): import("@langchain/core/tools").DynamicStructuredTool<z.ZodObject<{
14
+ doc_url: z.ZodString;
15
+ page_ranges: z.ZodNullable<z.ZodOptional<z.ZodString>>;
16
+ }, "strip", z.ZodTypeAny, {
17
+ doc_url?: string;
18
+ page_ranges?: string;
19
+ }, {
20
+ doc_url?: string;
21
+ page_ranges?: string;
22
+ }>, {
23
+ doc_url?: string;
24
+ page_ranges?: string;
25
+ }, {
26
+ doc_url?: string;
27
+ page_ranges?: string;
28
+ }, (string | {
29
+ files: any[];
30
+ taskId: string;
31
+ metadata: any;
32
+ })[]>;
33
+ //# sourceMappingURL=mineru.tool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mineru.tool.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.tool.ts"],"names":[],"mappings":"AAEA,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAEvE,OAAO,EAAU,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAE9D,MAAM,WAAW,kBAAkB;IACjC,KAAK,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACjC,WAAW,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAC/B,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IACvB,YAAY,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC;CACnC;AAiBD,wBAAgB,eAAe,CAC7B,aAAa,EAAE,aAAa,EAC5B,YAAY,EAAE,yBAAyB,EACvC,OAAO,CAAC,EAAE,wBAAwB,EAClC,UAAU,CAAC,EAAE,YAAY,EACzB,QAAQ,CAAC,EAAE,kBAAkB;;;;;;;;;;;;;;;;;;;MAsM9B"}
@@ -0,0 +1,180 @@
1
+ import { tool } from '@langchain/core/tools';
2
+ import { getCurrentTaskInput } from '@langchain/langgraph';
3
+ import { getErrorMessage } from '@xpert-ai/plugin-sdk';
4
+ import { z } from 'zod';
5
+ import { MinerUClient } from './mineru.client.js';
6
+ import { MinerU } from './types.js';
7
+ function normalizeExtraFormats(value) {
8
+ if (!value) {
9
+ return undefined;
10
+ }
11
+ if (Array.isArray(value)) {
12
+ const formats = value.map((item) => String(item).trim()).filter(Boolean);
13
+ return formats.length ? formats : undefined;
14
+ }
15
+ const formats = value
16
+ .split(',')
17
+ .map((item) => item.trim())
18
+ .filter(Boolean);
19
+ return formats.length ? formats : undefined;
20
+ }
21
+ export function buildMinerUTool(configService, resultParser, options, fileSystem, defaults) {
22
+ return tool(async (input) => {
23
+ try {
24
+ const { doc_url, page_ranges } = input;
25
+ console.debug('[MinerU] tool invoked with input', {
26
+ doc_url,
27
+ extraKeys: Object.keys(input || {}).filter((key) => key !== 'doc_url'),
28
+ });
29
+ if (!doc_url) {
30
+ throw new Error('doc_url is required');
31
+ }
32
+ const currentState = getCurrentTaskInput();
33
+ const workspacePath = currentState?.['sys']?.['volume'] ?? '/tmp/xpert';
34
+ const finalApiUrl = options?.apiUrl || 'https://mineru.net/api/v4';
35
+ const finalApiKey = options?.apiKey;
36
+ const maskedKey = finalApiKey && finalApiKey.length > 8
37
+ ? `${finalApiKey.slice(0, 4)}***${finalApiKey.slice(-4)}`
38
+ : finalApiKey
39
+ ? 'provided'
40
+ : 'missing';
41
+ console.debug('[MinerU] buildMinerUTool config', {
42
+ fromOptions: Boolean(options),
43
+ apiUrl: finalApiUrl,
44
+ apiKey: maskedKey,
45
+ defaults: {
46
+ isOcr: defaults?.isOcr,
47
+ enableFormula: defaults?.enableFormula,
48
+ enableTable: defaults?.enableTable,
49
+ language: defaults?.language,
50
+ modelVersion: defaults?.modelVersion,
51
+ },
52
+ });
53
+ const finalIsOcr = defaults?.isOcr === undefined
54
+ ? true
55
+ : typeof defaults.isOcr === 'string'
56
+ ? defaults.isOcr === 'true'
57
+ : defaults.isOcr === true;
58
+ const finalEnableFormula = defaults?.enableFormula === undefined
59
+ ? true
60
+ : typeof defaults.enableFormula === 'string'
61
+ ? defaults.enableFormula === 'true'
62
+ : defaults.enableFormula === true;
63
+ const finalEnableTable = defaults?.enableTable === undefined
64
+ ? true
65
+ : typeof defaults.enableTable === 'string'
66
+ ? defaults.enableTable === 'true'
67
+ : defaults.enableTable === true;
68
+ const finalLanguage = defaults?.language || 'ch';
69
+ const finalModelVersion = defaults?.modelVersion || 'pipeline';
70
+ const finalExtraFormats = normalizeExtraFormats(options?.extraFormats);
71
+ const effectiveOptions = {
72
+ apiUrl: finalApiUrl,
73
+ apiKey: finalApiKey,
74
+ extraFormats: finalExtraFormats,
75
+ };
76
+ const integration = {
77
+ provider: MinerU,
78
+ options: effectiveOptions,
79
+ };
80
+ const mineruClient = new MinerUClient(configService, {
81
+ fileSystem,
82
+ integration,
83
+ });
84
+ let finalFileName = 'document.pdf';
85
+ try {
86
+ const parsed = new URL(doc_url);
87
+ finalFileName = parsed.pathname.split('/').pop() || 'document.pdf';
88
+ }
89
+ catch {
90
+ // Ignore URL parsing errors.
91
+ }
92
+ const { taskId } = await mineruClient.createTask({
93
+ url: doc_url,
94
+ fileName: finalFileName,
95
+ isOcr: finalIsOcr,
96
+ enableFormula: finalEnableFormula,
97
+ enableTable: finalEnableTable,
98
+ language: finalLanguage,
99
+ modelVersion: finalModelVersion,
100
+ pageRanges: page_ranges ?? undefined,
101
+ extraFormats: finalExtraFormats,
102
+ });
103
+ let parsedResult;
104
+ if (mineruClient.serverType === 'self-hosted') {
105
+ const taskResult = mineruClient.getSelfHostedTask(taskId);
106
+ if (!taskResult) {
107
+ throw new Error('Failed to get MinerU task result');
108
+ }
109
+ parsedResult = await resultParser.parseLocalTask(taskResult, taskId, {
110
+ fileUrl: doc_url,
111
+ name: finalFileName,
112
+ folder: workspacePath,
113
+ }, fileSystem);
114
+ }
115
+ else {
116
+ const result = await mineruClient.waitForTask(taskId, 5 * 60 * 1000, 5000);
117
+ const fullZipUrl = result.full_zip_url;
118
+ parsedResult = await resultParser.parseFromUrl(fullZipUrl, taskId, {
119
+ fileUrl: doc_url,
120
+ name: finalFileName,
121
+ folder: workspacePath,
122
+ }, fileSystem);
123
+ if (fullZipUrl) {
124
+ parsedResult.metadata = parsedResult.metadata ?? {};
125
+ parsedResult.metadata.fullZipUrl = parsedResult.metadata.fullZipUrl ?? fullZipUrl;
126
+ parsedResult.metadata.full_zip_url = parsedResult.metadata.full_zip_url ?? fullZipUrl;
127
+ }
128
+ }
129
+ const fileArtifacts = [];
130
+ if (parsedResult.metadata?.assets) {
131
+ for (const asset of parsedResult.metadata.assets) {
132
+ if (asset.type === 'file' || asset.type === 'image') {
133
+ const fileName = asset.filePath?.split(/[/\\]/).pop() ||
134
+ asset.url?.split('/').pop() ||
135
+ 'file';
136
+ const extension = fileName.split('.').pop()?.toLowerCase() || 'md';
137
+ const mimeType = asset.type === 'image'
138
+ ? extension === 'png'
139
+ ? 'image/png'
140
+ : 'image/jpeg'
141
+ : extension === 'md'
142
+ ? 'text/markdown'
143
+ : 'application/json';
144
+ fileArtifacts.push({
145
+ fileName: fileName,
146
+ filePath: asset.filePath,
147
+ fileUrl: asset.url,
148
+ mimeType: mimeType,
149
+ extension: extension,
150
+ });
151
+ }
152
+ }
153
+ }
154
+ const markdownContent = parsedResult.chunks?.map((chunk) => chunk.pageContent).join('\n\n') || '';
155
+ return [
156
+ markdownContent,
157
+ {
158
+ files: fileArtifacts,
159
+ taskId,
160
+ metadata: parsedResult.metadata,
161
+ },
162
+ ];
163
+ }
164
+ catch (error) {
165
+ throw new Error(`MinerU processing failed: ${getErrorMessage(error)}`);
166
+ }
167
+ }, {
168
+ name: 'mineru_pdf_parser',
169
+ description: 'Convert documents to markdown using MinerU. Supports OCR, formula recognition, and table extraction. Returns markdown content and extracted files.',
170
+ schema: z.object({
171
+ doc_url: z.string().min(1).describe('Document URL (required)'),
172
+ page_ranges: z
173
+ .string()
174
+ .optional()
175
+ .nullable()
176
+ .describe('Page ranges like "2,4-6" or "2--2"'),
177
+ }),
178
+ responseFormat: 'content_and_artifact',
179
+ });
180
+ }
@@ -0,0 +1,25 @@
1
+ import { StructuredToolInterface, ToolSchemaBase } from '@langchain/core/tools';
2
+ import { BuiltinToolset, XpFileSystem } from '@xpert-ai/plugin-sdk';
3
+ import { ConfigService } from '@nestjs/config';
4
+ import { MinerUResultParserService } from './result-parser.service.js';
5
+ export interface MinerUToolsetConfig {
6
+ apiUrl?: string;
7
+ apiKey?: string;
8
+ extraFormats?: string | string[];
9
+ fileSystem?: XpFileSystem;
10
+ configService?: ConfigService;
11
+ resultParser?: MinerUResultParserService;
12
+ isOcr?: boolean | string;
13
+ enableFormula?: boolean | string;
14
+ enableTable?: boolean | string;
15
+ language?: 'en' | 'ch';
16
+ modelVersion?: 'pipeline' | 'vlm';
17
+ }
18
+ export declare class MinerUToolset extends BuiltinToolset<StructuredToolInterface, MinerUToolsetConfig> {
19
+ private readonly config;
20
+ tools: any[];
21
+ constructor(config: MinerUToolsetConfig);
22
+ _validateCredentials(_credentials: MinerUToolsetConfig): Promise<void>;
23
+ initTools(): Promise<StructuredToolInterface<ToolSchemaBase, any, any>[]>;
24
+ }
25
+ //# sourceMappingURL=mineru.toolset.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mineru.toolset.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.toolset.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAChF,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpE,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAIvE,MAAM,WAAW,mBAAmB;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACjC,UAAU,CAAC,EAAE,YAAY,CAAC;IAC1B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,YAAY,CAAC,EAAE,yBAAyB,CAAC;IACzC,KAAK,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACjC,WAAW,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAC/B,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IACvB,YAAY,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC;CACnC;AAED,qBAAa,aAAc,SAAQ,cAAc,CAAC,uBAAuB,EAAE,mBAAmB,CAAC;IAC7F,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IACpC,KAAK,EAAE,GAAG,EAAE,CAAM;gBAEf,MAAM,EAAE,mBAAmB;IAiBxB,oBAAoB,CAAC,YAAY,EAAE,mBAAmB,GAAG,OAAO,CAAC,IAAI,CAAC;IAItE,SAAS,IAAI,OAAO,CAAC,uBAAuB,CAAC,cAAc,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;CA+CzF"}
@@ -0,0 +1,48 @@
1
+ import { BuiltinToolset } from '@xpert-ai/plugin-sdk';
2
+ import { buildMinerUTool } from './mineru.tool.js';
3
+ export class MinerUToolset extends BuiltinToolset {
4
+ constructor(config) {
5
+ super('mineru', undefined, config);
6
+ this.tools = [];
7
+ this.config = config;
8
+ const configForLog = { ...config };
9
+ if (configForLog.apiKey) {
10
+ configForLog.apiKey =
11
+ configForLog.apiKey.length > 8
12
+ ? `${configForLog.apiKey.substring(0, 4)}...${configForLog.apiKey.substring(configForLog.apiKey.length - 4)}`
13
+ : '***';
14
+ }
15
+ if ('logger' in this && this.logger) {
16
+ this.logger.log(`[MinerU] MinerUToolset constructor received config: ${JSON.stringify(configForLog, null, 2)}`);
17
+ }
18
+ }
19
+ async _validateCredentials(_credentials) {
20
+ // No validation during authorization phase.
21
+ }
22
+ async initTools() {
23
+ const { configService, resultParser, apiUrl, apiKey, extraFormats, fileSystem, isOcr, enableFormula, enableTable, language, modelVersion, } = this.config;
24
+ if (!configService || !resultParser) {
25
+ throw new Error('ConfigService and MinerUResultParserService are required');
26
+ }
27
+ const finalApiUrl = apiUrl || 'https://mineru.net/api/v4';
28
+ const finalIsOcr = isOcr === 'true' || isOcr === true;
29
+ const finalEnableFormula = enableFormula === 'true' || enableFormula === true;
30
+ const finalEnableTable = enableTable === 'true' || enableTable === true;
31
+ const finalLanguage = language || 'ch';
32
+ const finalModelVersion = modelVersion || 'pipeline';
33
+ this.tools = [
34
+ buildMinerUTool(configService, resultParser, {
35
+ apiUrl: finalApiUrl,
36
+ apiKey,
37
+ extraFormats,
38
+ }, fileSystem, {
39
+ isOcr: finalIsOcr,
40
+ enableFormula: finalEnableFormula,
41
+ enableTable: finalEnableTable,
42
+ language: finalLanguage,
43
+ modelVersion: finalModelVersion,
44
+ }),
45
+ ];
46
+ return this.tools;
47
+ }
48
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@helloxiaohu/plugin-mineru",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "MinerU document converter plugin for Xpert AI platform",
5
5
  "license": "AGPL-3.0",
6
6
  "repository": {