@helloxiaohu/plugin-mineru 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,306 +0,0 @@
1
- import { __decorate, __metadata, __param } from "tslib";
2
- import { Injectable, forwardRef, Inject } from '@nestjs/common';
3
- import { ConfigService } from '@nestjs/config';
4
- import { ToolsetStrategy, } from '@xpert-ai/plugin-sdk';
5
- import { MinerUResultParserService } from './result-parser.service.js';
6
- import { MinerUToolset } from './mineru.toolset.js';
7
- import { MinerU, icon } from './types.js';
8
- import { buildMinerUTool } from './mineru.tool.js';
9
- /**
10
- * ToolsetStrategy for MinerU PDF parser tool
11
- * Registers MinerU as a toolset that can be used in agent workflows
12
- */
13
- let MinerUToolsetStrategy = class MinerUToolsetStrategy {
14
- constructor(configService, resultParser) {
15
- this.configService = configService;
16
- this.resultParser = resultParser;
17
- /**
18
- * Metadata for MinerU toolset
19
- */
20
- this.meta = {
21
- author: 'Xpert AI',
22
- tags: ['pdf', 'markdown', 'parser', 'ocr', 'mineru', 'document', 'extraction'],
23
- name: MinerU,
24
- label: {
25
- en_US: 'MinerU PDF Parser',
26
- zh_Hans: 'MinerU PDF 解析器',
27
- },
28
- description: {
29
- en_US: 'Convert PDF files to markdown format using MinerU. Supports OCR, formula recognition, and table extraction.',
30
- zh_Hans: '使用 MinerU 将 PDF 文件转换为 Markdown 格式。支持 OCR、公式识别和表格提取。',
31
- },
32
- icon: {
33
- // Provide both shapes to maximize compatibility with different platform icon resolvers
34
- // - builtin-provider icon endpoints may look for `type/value`
35
- // - toolset registries may look for `svg`
36
- type: 'svg',
37
- value: icon,
38
- svg: icon,
39
- color: '#14b8a6',
40
- },
41
- configSchema: {
42
- type: 'object',
43
- properties: {
44
- /**
45
- * NOTE:
46
- * We intentionally keep MinerU as a "self-contained" toolset that stores its own API credentials,
47
- * instead of relying on the platform IntegrationPermission flow.
48
- *
49
- * Reason: during the built-in toolset authorization step, the platform may send `credentials = null`,
50
- * and backend may access `credentials.integration`, causing a 500 (`Cannot read properties of null (reading 'integration')`).
51
- * Defining API fields directly ensures the authorization UI renders fields and always submits an object.
52
- */
53
- apiUrl: {
54
- type: 'string',
55
- title: {
56
- en_US: 'Base URL',
57
- zh_Hans: 'Base URL',
58
- },
59
- description: {
60
- en_US: 'MinerU API base url. Official: https://mineru.net/api/v4',
61
- zh_Hans: 'MinerU 服务地址。官方: https://mineru.net/api/v4',
62
- },
63
- default: 'https://mineru.net/api/v4',
64
- // Note: apiUrl is not in required array because it's optional with a default value
65
- },
66
- apiKey: {
67
- type: 'string',
68
- title: {
69
- en_US: 'API Key',
70
- zh_Hans: 'API Key',
71
- },
72
- description: {
73
- en_US: 'The API Key of the MinerU server (required)',
74
- zh_Hans: 'MinerU 服务令牌(必填)',
75
- },
76
- 'x-ui': {
77
- component: 'secretInput',
78
- label: 'API Key',
79
- placeholder: 'MinerU API Key',
80
- revealable: true,
81
- maskSymbol: '*',
82
- persist: true,
83
- },
84
- },
85
- // Default parsing settings (optional, can be overridden when calling the tool)
86
- // Changed isOcr from boolean to string enum
87
- isOcr: {
88
- type: 'string',
89
- title: {
90
- en_US: 'Enable OCR',
91
- zh_Hans: '启用 OCR',
92
- },
93
- description: {
94
- en_US: 'Enable OCR for image-based PDFs',
95
- zh_Hans: '为基于图像的 PDF 启用 OCR',
96
- },
97
- enum: ['true', 'false'],
98
- default: 'true',
99
- 'x-ui': {
100
- enumLabels: {
101
- 'true': {
102
- en_US: 'Enabled',
103
- zh_Hans: '启用',
104
- },
105
- 'false': {
106
- en_US: 'Disabled',
107
- zh_Hans: '禁用',
108
- },
109
- },
110
- },
111
- },
112
- // Changed enableFormula from boolean to string enum
113
- enableFormula: {
114
- type: 'string',
115
- title: {
116
- en_US: 'Enable Formula Recognition',
117
- zh_Hans: '启用公式识别',
118
- },
119
- description: {
120
- en_US: 'Enable formula recognition',
121
- zh_Hans: '启用公式识别',
122
- },
123
- enum: ['true', 'false'],
124
- default: 'true',
125
- 'x-ui': {
126
- enumLabels: {
127
- 'true': {
128
- en_US: 'Enabled',
129
- zh_Hans: '启用',
130
- },
131
- 'false': {
132
- en_US: 'Disabled',
133
- zh_Hans: '禁用',
134
- },
135
- },
136
- },
137
- },
138
- // Changed enableTable from boolean to string enum
139
- enableTable: {
140
- type: 'string',
141
- title: {
142
- en_US: 'Enable Table Recognition',
143
- zh_Hans: '启用表格识别',
144
- },
145
- description: {
146
- en_US: 'Enable table recognition',
147
- zh_Hans: '启用表格识别',
148
- },
149
- enum: ['true', 'false'],
150
- default: 'true',
151
- 'x-ui': {
152
- enumLabels: {
153
- 'true': {
154
- en_US: 'Enabled',
155
- zh_Hans: '启用',
156
- },
157
- 'false': {
158
- en_US: 'Disabled',
159
- zh_Hans: '禁用',
160
- },
161
- },
162
- },
163
- },
164
- language: {
165
- type: 'string',
166
- title: {
167
- en_US: 'Document Language',
168
- zh_Hans: '文档语言',
169
- },
170
- description: {
171
- en_US: 'Document language: "en" for English, "ch" for Chinese (default: "ch")',
172
- zh_Hans: '文档语言:"en" 表示英语,"ch" 表示中文(默认:"ch")',
173
- },
174
- enum: ['en', 'ch'],
175
- default: 'ch',
176
- 'x-ui': {
177
- enumLabels: {
178
- 'en': {
179
- en_US: 'en',
180
- zh_Hans: '英文',
181
- },
182
- 'ch': {
183
- en_US: 'ch',
184
- zh_Hans: '中文',
185
- },
186
- },
187
- },
188
- },
189
- modelVersion: {
190
- type: 'string',
191
- title: {
192
- en_US: 'Model Version',
193
- zh_Hans: '模型版本',
194
- },
195
- description: {
196
- en_US: 'Model version: "pipeline" or "vlm" (default: "pipeline")',
197
- zh_Hans: '模型版本:"pipeline" 或 "vlm"(默认:"pipeline")',
198
- },
199
- enum: ['pipeline', 'vlm'],
200
- default: 'pipeline',
201
- 'x-ui': {
202
- enumLabels: {
203
- 'pipeline': {
204
- en_US: 'pipeline',
205
- zh_Hans: 'pipeline',
206
- },
207
- 'vlm': {
208
- en_US: 'vlm',
209
- zh_Hans: 'vlm',
210
- },
211
- },
212
- },
213
- },
214
- },
215
- required: ['apiKey'],
216
- },
217
- };
218
- /**
219
- * Permissions required by MinerU toolset
220
- */
221
- this.permissions = [
222
- {
223
- type: 'filesystem',
224
- operations: ['read', 'write', 'list'],
225
- scope: [],
226
- },
227
- ];
228
- }
229
- /**
230
- * Validate toolset configuration
231
- */
232
- validateConfig(config) {
233
- if (!config) {
234
- return Promise.resolve();
235
- }
236
- // apiKey is now a required field, validated by schema.required
237
- if (!config.apiKey) {
238
- throw new Error('MinerU apiKey is required');
239
- }
240
- return Promise.resolve();
241
- }
242
- /**
243
- * Create MinerU toolset instance
244
- * Note: config may be null/undefined during authorization phase
245
- * Modified to read from toolset.credentials (like @searchapi/@email)
246
- */
247
- async create(config) {
248
- // Check if config is an IXpertToolset object with credentials property
249
- const toolset = (config && typeof config === 'object' && 'credentials' in config)
250
- ? config
251
- : null;
252
- // Priority: toolset.credentials > config (flat structure) > empty object
253
- const creds = toolset?.credentials ?? config ?? {};
254
- // Build config with dependencies
255
- const configWithDependencies = {
256
- apiUrl: creds.apiUrl,
257
- apiKey: creds.apiKey,
258
- isOcr: creds.isOcr,
259
- enableFormula: creds.enableFormula,
260
- enableTable: creds.enableTable,
261
- language: creds.language,
262
- modelVersion: creds.modelVersion,
263
- configService: this.configService,
264
- resultParser: this.resultParser,
265
- };
266
- return new MinerUToolset(configWithDependencies);
267
- }
268
- /**
269
- * Create tools for MinerU toolset
270
- * Tools are created dynamically in MinerUToolset.initTools()
271
- * based on the toolset credentials/configuration
272
- */
273
- createTools() {
274
- /**
275
- * IMPORTANT:
276
- * The console UI requires builtin providers to expose at least one tool so users can
277
- * enable it (otherwise it fails the "Enable at least one tool" validation).
278
- *
279
- * The returned tools here are used for listing/preview & toggling in UI. Actual execution
280
- * will use the toolset instance created by `create()` -> `MinerUToolset.initTools()`,
281
- * which wires credentials (apiUrl/apiKey/serverType) correctly.
282
- */
283
- return [
284
- buildMinerUTool(this.configService, this.resultParser,
285
- // No credentials at listing time
286
- undefined, undefined,
287
- // Defaults used if user doesn't pass tool-call parameters
288
- {
289
- isOcr: true,
290
- enableFormula: true,
291
- enableTable: true,
292
- language: 'ch',
293
- modelVersion: 'pipeline',
294
- }),
295
- ];
296
- }
297
- };
298
- MinerUToolsetStrategy = __decorate([
299
- Injectable(),
300
- ToolsetStrategy(MinerU),
301
- __param(0, Inject(forwardRef(() => ConfigService))),
302
- __param(1, Inject(MinerUResultParserService)),
303
- __metadata("design:paramtypes", [ConfigService,
304
- MinerUResultParserService])
305
- ], MinerUToolsetStrategy);
306
- export { MinerUToolsetStrategy };
@@ -1,35 +0,0 @@
1
- import { XpFileSystem } from '@xpert-ai/plugin-sdk';
2
- import { z } from 'zod';
3
- import { ConfigService } from '@nestjs/config';
4
- import { MinerUResultParserService } from './result-parser.service.js';
5
- import { MinerUIntegrationOptions } from './types.js';
6
- /**
7
- * Default parsing settings for MinerU tool
8
- */
9
- export interface MinerUToolDefaults {
10
- isOcr?: boolean | string;
11
- enableFormula?: boolean | string;
12
- enableTable?: boolean | string;
13
- language?: 'en' | 'ch';
14
- modelVersion?: 'pipeline' | 'vlm';
15
- }
16
- /**
17
- * Build MinerU PDF parser tool
18
- * This tool converts PDF files to markdown format using MinerU service
19
- */
20
- export declare function buildMinerUTool(configService: ConfigService, resultParser: MinerUResultParserService, options?: MinerUIntegrationOptions, fileSystem?: XpFileSystem, defaults?: MinerUToolDefaults): import("@langchain/core/tools").DynamicStructuredTool<z.ZodObject<{
21
- doc_url: z.ZodString;
22
- }, "strip", z.ZodTypeAny, {
23
- doc_url?: string;
24
- }, {
25
- doc_url?: string;
26
- }>, {
27
- doc_url?: string;
28
- }, {
29
- doc_url?: string;
30
- }, (string | {
31
- files: any[];
32
- taskId: string;
33
- metadata: any;
34
- })[]>;
35
- //# sourceMappingURL=mineru.tool.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"mineru.tool.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.tool.ts"],"names":[],"mappings":"AAEA,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAEvE,OAAO,EAAqB,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAEjC,KAAK,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACjC,WAAW,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAC/B,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IACvB,YAAY,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC;CACnC;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,aAAa,EAAE,aAAa,EAC5B,YAAY,EAAE,yBAAyB,EACvC,OAAO,CAAC,EAAE,wBAAwB,EAClC,UAAU,CAAC,EAAE,YAAY,EACzB,QAAQ,CAAC,EAAE,kBAAkB;;;;;;;;;;;;;;MAmL9B"}
@@ -1,157 +0,0 @@
1
- import { tool } from '@langchain/core/tools';
2
- import { getCurrentTaskInput } from '@langchain/langgraph';
3
- import { getErrorMessage } from '@xpert-ai/plugin-sdk';
4
- import { z } from 'zod';
5
- import { MinerUClient } from './mineru.client.js';
6
- import { MinerUIntegration } from './types.js';
7
- /**
8
- * Build MinerU PDF parser tool
9
- * This tool converts PDF files to markdown format using MinerU service
10
- */
11
- export function buildMinerUTool(configService, resultParser, options, fileSystem, defaults) {
12
- return tool(async (input) => {
13
- try {
14
- const { doc_url } = input;
15
- // Log raw input (mask nothing sensitive here because doc_url is public; avoid logging other fields)
16
- console.debug('[MinerU] tool invoked with input', { doc_url, extraKeys: Object.keys(input || {}).filter((k) => k !== 'doc_url') });
17
- if (!doc_url) {
18
- throw new Error('doc_url is required');
19
- }
20
- // Get workspace context from current task
21
- const currentState = getCurrentTaskInput();
22
- const workspacePath = currentState?.['sys']?.['volume'] ?? '/tmp/xpert';
23
- // Use configuration from authorization page (passed via options and defaults parameters)
24
- // These values come from the authorization page configuration and are set when the tool is created
25
- const finalApiUrl = options?.apiUrl || 'https://mineru.net/api/v4';
26
- const finalApiKey = options?.apiKey; // apiKey is required and validated in authorization page
27
- // Log effective config (mask key) to确认是否拿到了授权页的凭据
28
- const maskedKey = finalApiKey && finalApiKey.length > 8
29
- ? `${finalApiKey.slice(0, 4)}***${finalApiKey.slice(-4)}`
30
- : finalApiKey
31
- ? 'provided'
32
- : 'missing';
33
- console.debug('[MinerU] buildMinerUTool config', {
34
- fromOptions: Boolean(options),
35
- apiUrl: finalApiUrl,
36
- apiKey: maskedKey,
37
- defaults: {
38
- isOcr: defaults?.isOcr,
39
- enableFormula: defaults?.enableFormula,
40
- enableTable: defaults?.enableTable,
41
- language: defaults?.language,
42
- modelVersion: defaults?.modelVersion,
43
- },
44
- });
45
- // Use configuration values from authorization page (passed via defaults parameter)
46
- // Convert string enum values ('true'/'false') to boolean, or use boolean values directly
47
- // If undefined, default to true
48
- const finalIsOcr = defaults?.isOcr === undefined
49
- ? true
50
- : (typeof defaults.isOcr === 'string' ? defaults.isOcr === 'true' : defaults.isOcr === true);
51
- const finalEnableFormula = defaults?.enableFormula === undefined
52
- ? true
53
- : (typeof defaults.enableFormula === 'string' ? defaults.enableFormula === 'true' : defaults.enableFormula === true);
54
- const finalEnableTable = defaults?.enableTable === undefined
55
- ? true
56
- : (typeof defaults.enableTable === 'string' ? defaults.enableTable === 'true' : defaults.enableTable === true);
57
- const finalLanguage = defaults?.language || 'ch';
58
- const finalModelVersion = defaults?.modelVersion || 'pipeline';
59
- const effectiveOptions = {
60
- apiUrl: finalApiUrl,
61
- apiKey: finalApiKey,
62
- };
63
- const integration = {
64
- provider: MinerUIntegration,
65
- options: effectiveOptions,
66
- };
67
- const mineruClient = new MinerUClient(configService, {
68
- fileSystem,
69
- integration,
70
- });
71
- // Determine file name from URL
72
- let finalFileName = 'document.pdf';
73
- try {
74
- const parsed = new URL(doc_url);
75
- finalFileName = parsed.pathname.split('/').pop() || 'document.pdf';
76
- }
77
- catch {
78
- // ignore
79
- }
80
- // Create MinerU task
81
- const { taskId } = await mineruClient.createTask({
82
- url: doc_url,
83
- fileName: finalFileName,
84
- isOcr: finalIsOcr,
85
- enableFormula: finalEnableFormula,
86
- enableTable: finalEnableTable,
87
- language: finalLanguage,
88
- modelVersion: finalModelVersion,
89
- });
90
- let parsedResult;
91
- if (mineruClient.serverType === 'self-hosted') {
92
- // Self-hosted: get result immediately
93
- const taskResult = mineruClient.getSelfHostedTask(taskId);
94
- if (!taskResult) {
95
- throw new Error('Failed to get MinerU task result');
96
- }
97
- parsedResult = await resultParser.parseLocalTask(taskResult, taskId, {
98
- fileUrl: doc_url,
99
- name: finalFileName,
100
- folder: workspacePath,
101
- }, fileSystem);
102
- }
103
- else {
104
- // Official API: wait for completion
105
- const result = await mineruClient.waitForTask(taskId, 5 * 60 * 1000, 5000);
106
- parsedResult = await resultParser.parseFromUrl(result.full_zip_url, taskId, {
107
- fileUrl: doc_url,
108
- name: finalFileName,
109
- folder: workspacePath,
110
- }, fileSystem);
111
- }
112
- // Build file artifacts from parsed result
113
- const fileArtifacts = [];
114
- if (parsedResult.metadata?.assets) {
115
- for (const asset of parsedResult.metadata.assets) {
116
- if (asset.type === 'file' || asset.type === 'image') {
117
- const fileName = asset.filePath?.split(/[/\\]/).pop() || asset.url?.split('/').pop() || 'file';
118
- const extension = fileName.split('.').pop()?.toLowerCase() || 'md';
119
- const mimeType = asset.type === 'image'
120
- ? (extension === 'png' ? 'image/png' : 'image/jpeg')
121
- : (extension === 'md' ? 'text/markdown' : 'application/json');
122
- fileArtifacts.push({
123
- fileName: fileName,
124
- filePath: asset.filePath,
125
- fileUrl: asset.url,
126
- mimeType: mimeType,
127
- extension: extension,
128
- });
129
- }
130
- }
131
- }
132
- // Extract markdown content from chunks
133
- const markdownContent = parsedResult.chunks
134
- ?.map((chunk) => chunk.pageContent)
135
- .join('\n\n') || '';
136
- // Return full markdown (do NOT truncate). If the platform/UI needs a preview, it can truncate client-side.
137
- return [
138
- markdownContent,
139
- {
140
- files: fileArtifacts,
141
- taskId,
142
- metadata: parsedResult.metadata,
143
- },
144
- ];
145
- }
146
- catch (error) {
147
- throw new Error(`MinerU processing failed: ${getErrorMessage(error)}`);
148
- }
149
- }, {
150
- name: 'mineru_pdf_parser',
151
- description: 'Convert PDF files to markdown format using MinerU. Supports OCR, formula recognition, and table extraction. Returns markdown content and extracted files (images, JSON, etc.).',
152
- schema: z.object({
153
- doc_url: z.string().min(1).describe('PDF URL (required)'),
154
- }),
155
- responseFormat: 'content_and_artifact',
156
- });
157
- }
@@ -1,50 +0,0 @@
1
- import { StructuredToolInterface, ToolSchemaBase } from '@langchain/core/tools';
2
- import { BuiltinToolset, XpFileSystem } from '@xpert-ai/plugin-sdk';
3
- import { ConfigService } from '@nestjs/config';
4
- import { MinerUResultParserService } from './result-parser.service.js';
5
- /**
6
- * Configuration for MinerU Toolset
7
- */
8
- export interface MinerUToolsetConfig {
9
- /**
10
- * MinerU API options stored in toolset credentials
11
- */
12
- apiUrl?: string;
13
- apiKey?: string;
14
- fileSystem?: XpFileSystem;
15
- configService?: ConfigService;
16
- resultParser?: MinerUResultParserService;
17
- isOcr?: boolean | string;
18
- enableFormula?: boolean | string;
19
- enableTable?: boolean | string;
20
- language?: 'en' | 'ch';
21
- modelVersion?: 'pipeline' | 'vlm';
22
- }
23
- /**
24
- * MinerU Toolset implementation
25
- * Provides PDF to markdown conversion tool using MinerU service
26
- */
27
- export declare class MinerUToolset extends BuiltinToolset<StructuredToolInterface, MinerUToolsetConfig> {
28
- private readonly config;
29
- tools: any[];
30
- /**
31
- * Constructor for MinerU Toolset
32
- * Accepts config which contains credentials and dependencies
33
- * Note: Using 'as any' for params because TBuiltinToolsetParams requires system-provided
34
- * properties (tenantId, env) that are added at runtime
35
- */
36
- constructor(config: MinerUToolsetConfig);
37
- /**
38
- * Validate credentials for MinerU toolset
39
- * Note: During authorization phase, credentials may be incomplete.
40
- * configService and resultParser are runtime dependencies injected by the strategy.
41
- * We don't validate anything here to allow authorization to proceed.
42
- */
43
- _validateCredentials(credentials: MinerUToolsetConfig): Promise<void>;
44
- /**
45
- * Initialize tools for MinerU toolset
46
- * Creates the PDF parser tool with necessary dependencies
47
- */
48
- initTools(): Promise<StructuredToolInterface<ToolSchemaBase, any, any>[]>;
49
- }
50
- //# sourceMappingURL=mineru.toolset.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"mineru.toolset.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.toolset.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAChF,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpE,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAIvE;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,YAAY,CAAC;IAC1B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,YAAY,CAAC,EAAE,yBAAyB,CAAC;IAGzC,KAAK,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACjC,WAAW,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAC/B,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IACvB,YAAY,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC;CACnC;AAED;;;GAGG;AACH,qBAAa,aAAc,SAAQ,cAAc,CAAC,uBAAuB,EAAE,mBAAmB,CAAC;IAC7F,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAEpC,KAAK,EAAE,GAAG,EAAE,CAAM;IAE3B;;;;;OAKG;gBACS,MAAM,EAAE,mBAAmB;IAgBvC;;;;;OAKG;IACY,oBAAoB,CAAC,WAAW,EAAE,mBAAmB,GAAG,OAAO,CAAC,IAAI,CAAC;IAKpF;;;OAGG;IACY,SAAS,IAAI,OAAO,CAAC,uBAAuB,CAAC,cAAc,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;CA+DzF"}
@@ -1,95 +0,0 @@
1
- import { BuiltinToolset } from '@xpert-ai/plugin-sdk';
2
- import { buildMinerUTool } from './mineru.tool.js';
3
- /**
4
- * MinerU Toolset implementation
5
- * Provides PDF to markdown conversion tool using MinerU service
6
- */
7
- export class MinerUToolset extends BuiltinToolset {
8
- /**
9
- * Constructor for MinerU Toolset
10
- * Accepts config which contains credentials and dependencies
11
- * Note: Using 'as any' for params because TBuiltinToolsetParams requires system-provided
12
- * properties (tenantId, env) that are added at runtime
13
- */
14
- constructor(config) {
15
- super('mineru', undefined, config);
16
- // Ensure `tools` exists even if upstream BuiltinToolset typings differ across versions.
17
- this.tools = [];
18
- this.config = config;
19
- // Log config received in constructor (mask apiKey for security)
20
- const configForLog = { ...config };
21
- if (configForLog.apiKey) {
22
- configForLog.apiKey = configForLog.apiKey.length > 8
23
- ? `${configForLog.apiKey.substring(0, 4)}...${configForLog.apiKey.substring(configForLog.apiKey.length - 4)}`
24
- : '***';
25
- }
26
- // Use base class logger (protected access)
27
- if ('logger' in this && this.logger) {
28
- this.logger.log(`[MinerU] MinerUToolset constructor received config: ${JSON.stringify(configForLog, null, 2)}`);
29
- }
30
- }
31
- /**
32
- * Validate credentials for MinerU toolset
33
- * Note: During authorization phase, credentials may be incomplete.
34
- * configService and resultParser are runtime dependencies injected by the strategy.
35
- * We don't validate anything here to allow authorization to proceed.
36
- */
37
- async _validateCredentials(credentials) {
38
- // No validation needed during authorization phase
39
- // API key validity will be enforced by MinerU server when tool is used
40
- }
41
- /**
42
- * Initialize tools for MinerU toolset
43
- * Creates the PDF parser tool with necessary dependencies
44
- */
45
- async initTools() {
46
- const { configService, resultParser, apiUrl, apiKey, fileSystem, isOcr, enableFormula, enableTable, language, modelVersion } = this.config;
47
- // Log config before destructuring
48
- const configKeys = Object.keys(this.config);
49
- const hasApiKey = 'apiKey' in this.config;
50
- const apiKeyValue = this.config.apiKey;
51
- const maskedApiKey = apiKeyValue
52
- ? (apiKeyValue.length > 8 ? `${apiKeyValue.substring(0, 4)}...${apiKeyValue.substring(apiKeyValue.length - 4)}` : '***')
53
- : 'missing';
54
- // Use base class logger (protected access)
55
- if ('logger' in this && this.logger) {
56
- this.logger.log(`[MinerU] MinerUToolset.initTools() - config keys: ${configKeys.join(', ')}, hasApiKey: ${hasApiKey}, apiKey: ${maskedApiKey}`);
57
- this.logger.log(`[MinerU] MinerUToolset.initTools() - destructured apiKey: ${apiKey ? (apiKey.length > 8 ? `${apiKey.substring(0, 4)}...${apiKey.substring(apiKey.length - 4)}` : '***') : 'missing'}`);
58
- }
59
- if (!configService || !resultParser) {
60
- throw new Error('ConfigService and MinerUResultParserService are required');
61
- }
62
- // Use configuration from authorization page
63
- // apiUrl: use provided value or default to official server URL
64
- const finalApiUrl = apiUrl || 'https://mineru.net/api/v4';
65
- // Convert string enum values to boolean (compatible with 'true'/'false' strings and boolean values)
66
- // Use provided values from authorization page, or default to true
67
- const finalIsOcr = isOcr === 'true' || isOcr === true;
68
- const finalEnableFormula = enableFormula === 'true' || enableFormula === true;
69
- const finalEnableTable = enableTable === 'true' || enableTable === true;
70
- // Use provided values from authorization page, or use defaults
71
- const finalLanguage = language || 'ch';
72
- const finalModelVersion = modelVersion || 'pipeline';
73
- // Log what we're passing to buildMinerUTool
74
- const maskedFinalApiKey = apiKey
75
- ? (apiKey.length > 8 ? `${apiKey.substring(0, 4)}...${apiKey.substring(apiKey.length - 4)}` : '***')
76
- : 'missing';
77
- // Use base class logger (protected access)
78
- if ('logger' in this && this.logger) {
79
- this.logger.log(`[MinerU] MinerUToolset.initTools() - passing to buildMinerUTool: apiUrl=${finalApiUrl}, apiKey=${maskedFinalApiKey}`);
80
- }
81
- this.tools = [
82
- buildMinerUTool(configService, resultParser, {
83
- apiUrl: finalApiUrl,
84
- apiKey, // apiKey is required and validated in authorization page
85
- }, fileSystem, {
86
- isOcr: finalIsOcr,
87
- enableFormula: finalEnableFormula,
88
- enableTable: finalEnableTable,
89
- language: finalLanguage,
90
- modelVersion: finalModelVersion,
91
- }),
92
- ];
93
- return this.tools;
94
- }
95
- }