@helloxiaohu/plugin-mineru6 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +101 -0
  2. package/dist/index.d.ts +6 -0
  3. package/dist/index.d.ts.map +1 -0
  4. package/dist/index.js +40 -0
  5. package/dist/lib/integration.strategy.d.ts +10 -0
  6. package/dist/lib/integration.strategy.d.ts.map +1 -0
  7. package/dist/lib/integration.strategy.js +118 -0
  8. package/dist/lib/mineru-toolset.strategy.d.ts +221 -0
  9. package/dist/lib/mineru-toolset.strategy.d.ts.map +1 -0
  10. package/dist/lib/mineru-toolset.strategy.js +236 -0
  11. package/dist/lib/mineru.client.d.ts +120 -0
  12. package/dist/lib/mineru.client.d.ts.map +1 -0
  13. package/dist/lib/mineru.client.js +456 -0
  14. package/dist/lib/mineru.controller.d.ts +9 -0
  15. package/dist/lib/mineru.controller.d.ts.map +1 -0
  16. package/dist/lib/mineru.controller.js +41 -0
  17. package/dist/lib/mineru.plugin.d.ts +13 -0
  18. package/dist/lib/mineru.plugin.d.ts.map +1 -0
  19. package/dist/lib/mineru.plugin.js +52 -0
  20. package/dist/lib/mineru.tool.d.ts +75 -0
  21. package/dist/lib/mineru.tool.d.ts.map +1 -0
  22. package/dist/lib/mineru.tool.js +141 -0
  23. package/dist/lib/mineru.toolset.d.ts +51 -0
  24. package/dist/lib/mineru.toolset.d.ts.map +1 -0
  25. package/dist/lib/mineru.toolset.js +52 -0
  26. package/dist/lib/path-meta.d.ts +5 -0
  27. package/dist/lib/path-meta.d.ts.map +1 -0
  28. package/dist/lib/path-meta.js +8 -0
  29. package/dist/lib/result-parser.service.d.ts +18 -0
  30. package/dist/lib/result-parser.service.d.ts.map +1 -0
  31. package/dist/lib/result-parser.service.js +171 -0
  32. package/dist/lib/transformer-mineru.strategy.d.ts +95 -0
  33. package/dist/lib/transformer-mineru.strategy.d.ts.map +1 -0
  34. package/dist/lib/transformer-mineru.strategy.js +163 -0
  35. package/dist/lib/types.d.ts +53 -0
  36. package/dist/lib/types.d.ts.map +1 -0
  37. package/dist/lib/types.js +40 -0
  38. package/package.json +62 -0
@@ -0,0 +1,236 @@
1
+ import { __decorate, __metadata, __param } from "tslib";
2
+ import { Injectable, forwardRef, Inject } from '@nestjs/common';
3
+ import { ConfigService } from '@nestjs/config';
4
+ import { ToolsetStrategy, } from '@xpert-ai/plugin-sdk';
5
+ import { MinerUResultParserService } from './result-parser.service.js';
6
+ import { MinerUToolset } from './mineru.toolset.js';
7
+ import { MinerU, icon } from './types.js';
8
+ import { buildMinerUTool } from './mineru.tool.js';
9
+ /**
10
+ * ToolsetStrategy for MinerU PDF parser tool
11
+ * Registers MinerU as a toolset that can be used in agent workflows
12
+ */
13
+ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
14
+ constructor(configService, resultParser) {
15
+ this.configService = configService;
16
+ this.resultParser = resultParser;
17
+ /**
18
+ * Metadata for MinerU toolset
19
+ */
20
+ this.meta = {
21
+ author: 'Xpert AI',
22
+ tags: ['pdf', 'markdown', 'parser', 'ocr', 'mineru', 'document', 'extraction'],
23
+ name: MinerU,
24
+ label: {
25
+ en_US: 'MinerU PDF Parser',
26
+ zh_Hans: 'MinerU PDF 解析器',
27
+ },
28
+ description: {
29
+ en_US: 'Convert PDF files to markdown format using MinerU. Supports OCR, formula recognition, and table extraction.',
30
+ zh_Hans: '使用 MinerU 将 PDF 文件转换为 Markdown 格式。支持 OCR、公式识别和表格提取。',
31
+ },
32
+ icon: {
33
+ svg: icon,
34
+ color: '#14b8a6',
35
+ },
36
+ configSchema: {
37
+ type: 'object',
38
+ properties: {
39
+ /**
40
+ * NOTE:
41
+ * We intentionally keep MinerU as a "self-contained" toolset that stores its own API credentials,
42
+ * instead of relying on the platform IntegrationPermission flow.
43
+ *
44
+ * Reason: during the built-in toolset authorization step, the platform may send `credentials = null`,
45
+ * and backend may access `credentials.integration`, causing a 500 (`Cannot read properties of null (reading 'integration')`).
46
+ * Defining API fields directly ensures the authorization UI renders fields and always submits an object.
47
+ */
48
+ apiUrl: {
49
+ type: 'string',
50
+ title: {
51
+ en_US: 'Base URL',
52
+ zh_Hans: 'Base URL',
53
+ },
54
+ description: {
55
+ en_US: 'MinerU API base url. Official: https://mineru.net/api/v4',
56
+ zh_Hans: 'MinerU 服务地址。官方: https://mineru.net/api/v4',
57
+ },
58
+ default: 'https://mineru.net/api/v4',
59
+ },
60
+ apiKey: {
61
+ type: 'string',
62
+ title: {
63
+ en_US: 'API Key',
64
+ zh_Hans: 'API Key',
65
+ },
66
+ description: {
67
+ en_US: 'The API Key of the MinerU server (required for official API)',
68
+ zh_Hans: 'MinerU 服务令牌(官方 API 必填)',
69
+ },
70
+ 'x-ui': {
71
+ component: 'secretInput',
72
+ label: 'API Key',
73
+ placeholder: 'MinerU API Key',
74
+ revealable: true,
75
+ maskSymbol: '*',
76
+ persist: true,
77
+ },
78
+ },
79
+ serverType: {
80
+ type: 'string',
81
+ title: {
82
+ en_US: 'Server Type',
83
+ zh_Hans: '服务类型',
84
+ },
85
+ description: {
86
+ en_US: 'Select MinerU service type: official API or self-hosted',
87
+ zh_Hans: '选择 MinerU 服务类型:官方 API 或自部署',
88
+ },
89
+ enum: ['official', 'self-hosted'],
90
+ default: 'official',
91
+ },
92
+ // Default parsing settings (optional, can be overridden when calling the tool)
93
+ isOcr: {
94
+ type: 'boolean',
95
+ title: {
96
+ en_US: 'Enable OCR',
97
+ zh_Hans: '启用 OCR',
98
+ },
99
+ description: {
100
+ en_US: 'Enable OCR for image-based PDFs (default: true)',
101
+ zh_Hans: '为基于图像的 PDF 启用 OCR(默认:true)',
102
+ },
103
+ default: true,
104
+ },
105
+ enableFormula: {
106
+ type: 'boolean',
107
+ title: {
108
+ en_US: 'Enable Formula Recognition',
109
+ zh_Hans: '启用公式识别',
110
+ },
111
+ description: {
112
+ en_US: 'Enable formula recognition (default: true)',
113
+ zh_Hans: '启用公式识别(默认:true)',
114
+ },
115
+ default: true,
116
+ },
117
+ enableTable: {
118
+ type: 'boolean',
119
+ title: {
120
+ en_US: 'Enable Table Recognition',
121
+ zh_Hans: '启用表格识别',
122
+ },
123
+ description: {
124
+ en_US: 'Enable table recognition (default: true)',
125
+ zh_Hans: '启用表格识别(默认:true)',
126
+ },
127
+ default: true,
128
+ },
129
+ language: {
130
+ type: 'string',
131
+ title: {
132
+ en_US: 'Document Language',
133
+ zh_Hans: '文档语言',
134
+ },
135
+ description: {
136
+ en_US: 'Document language: "en" for English, "ch" for Chinese (default: "ch")',
137
+ zh_Hans: '文档语言:"en" 表示英语,"ch" 表示中文(默认:"ch")',
138
+ },
139
+ enum: ['en', 'ch'],
140
+ default: 'ch',
141
+ },
142
+ modelVersion: {
143
+ type: 'string',
144
+ title: {
145
+ en_US: 'Model Version',
146
+ zh_Hans: '模型版本',
147
+ },
148
+ description: {
149
+ en_US: 'Model version: "pipeline" or "vlm" (default: "pipeline")',
150
+ zh_Hans: '模型版本:"pipeline" 或 "vlm"(默认:"pipeline")',
151
+ },
152
+ enum: ['pipeline', 'vlm'],
153
+ default: 'pipeline',
154
+ },
155
+ },
156
+ required: ['serverType'], // apiKey required only for official, validated in validateConfig
157
+ },
158
+ };
159
+ /**
160
+ * Permissions required by MinerU toolset
161
+ */
162
+ this.permissions = [
163
+ {
164
+ type: 'filesystem',
165
+ operations: ['read', 'write', 'list'],
166
+ scope: [],
167
+ },
168
+ ];
169
+ }
170
+ /**
171
+ * Validate toolset configuration
172
+ */
173
+ validateConfig(config) {
174
+ if (!config) {
175
+ return Promise.resolve();
176
+ }
177
+ const serverType = config.serverType ?? 'official';
178
+ if (serverType === 'official' && !config.apiKey) {
179
+ throw new Error('MinerU apiKey is required for official serverType');
180
+ }
181
+ return Promise.resolve();
182
+ }
183
+ /**
184
+ * Create MinerU toolset instance
185
+ * Note: config may be null/undefined during authorization phase
186
+ */
187
+ async create(config) {
188
+ // Inject dependencies into config
189
+ // If config is null/undefined, create empty config
190
+ const baseConfig = config || {};
191
+ const configWithDependencies = {
192
+ ...baseConfig,
193
+ configService: this.configService,
194
+ resultParser: this.resultParser,
195
+ };
196
+ return new MinerUToolset(configWithDependencies);
197
+ }
198
+ /**
199
+ * Create tools for MinerU toolset
200
+ * Tools are created dynamically in MinerUToolset.initTools()
201
+ * based on the toolset credentials/configuration
202
+ */
203
+ createTools() {
204
+ /**
205
+ * IMPORTANT:
206
+ * The console UI requires builtin providers to expose at least one tool so users can
207
+ * enable it (otherwise it fails the "Enable at least one tool" validation).
208
+ *
209
+ * The returned tools here are used for listing/preview & toggling in UI. Actual execution
210
+ * will use the toolset instance created by `create()` -> `MinerUToolset.initTools()`,
211
+ * which wires credentials (apiUrl/apiKey/serverType) correctly.
212
+ */
213
+ return [
214
+ buildMinerUTool(this.configService, this.resultParser,
215
+ // No credentials at listing time
216
+ undefined, undefined,
217
+ // Defaults used if user doesn't pass tool-call parameters
218
+ {
219
+ isOcr: true,
220
+ enableFormula: true,
221
+ enableTable: true,
222
+ language: 'ch',
223
+ modelVersion: 'pipeline',
224
+ }),
225
+ ];
226
+ }
227
+ };
228
+ MinerUToolsetStrategy = __decorate([
229
+ Injectable(),
230
+ ToolsetStrategy(MinerU),
231
+ __param(0, Inject(forwardRef(() => ConfigService))),
232
+ __param(1, Inject(MinerUResultParserService)),
233
+ __metadata("design:paramtypes", [ConfigService,
234
+ MinerUResultParserService])
235
+ ], MinerUToolsetStrategy);
236
+ export { MinerUToolsetStrategy };
@@ -0,0 +1,120 @@
1
+ import { IIntegration } from '@metad/contracts';
2
+ import { ConfigService } from '@nestjs/config';
3
+ import { XpFileSystem } from '@xpert-ai/plugin-sdk';
4
+ import { AxiosResponse } from 'axios';
5
+ import { MinerUIntegrationOptions, MineruSelfHostedTaskResult, MinerUServerType } from './types.js';
6
+ interface CreateTaskOptions {
7
+ url?: string;
8
+ filePath?: string;
9
+ fileName?: string;
10
+ isOcr?: boolean;
11
+ enableFormula?: boolean;
12
+ enableTable?: boolean;
13
+ language?: string;
14
+ modelVersion?: string;
15
+ dataId?: string;
16
+ pageRanges?: string;
17
+ extraFormats?: string[];
18
+ callbackUrl?: string;
19
+ seed?: string;
20
+ /** Optional parse method used by self-hosted MinerU deployments */
21
+ parseMethod?: string;
22
+ /** Optional backend identifier used by self-hosted MinerU deployments */
23
+ backend?: string;
24
+ /** Optional mineru backend server url (used when backend is VLM client) */
25
+ serverUrl?: string;
26
+ /** Whether to request intermediate JSON payloads from self-hosted MinerU */
27
+ returnMiddleJson?: boolean;
28
+ }
29
+ interface CreateBatchTaskFile {
30
+ url: string;
31
+ isOcr?: boolean;
32
+ dataId?: string;
33
+ pageRanges?: string;
34
+ }
35
+ interface CreateBatchTaskOptions {
36
+ files: CreateBatchTaskFile[];
37
+ enableFormula?: boolean;
38
+ enableTable?: boolean;
39
+ language?: string;
40
+ modelVersion?: string;
41
+ extraFormats?: string[];
42
+ callbackUrl?: string;
43
+ seed?: string;
44
+ }
45
+ interface TaskResultOptions {
46
+ enableFormula?: boolean;
47
+ enableTable?: boolean;
48
+ language?: string;
49
+ }
50
+ export declare class MinerUClient {
51
+ private readonly configService;
52
+ private readonly permissions?;
53
+ private readonly logger;
54
+ private readonly baseUrl;
55
+ private readonly token?;
56
+ readonly serverType: MinerUServerType;
57
+ private readonly localTasks;
58
+ get fileSystem(): XpFileSystem | undefined;
59
+ constructor(configService: ConfigService, permissions?: {
60
+ fileSystem?: XpFileSystem;
61
+ integration?: Partial<IIntegration<MinerUIntegrationOptions>>;
62
+ });
63
+ /**
64
+ * Create a MinerU extraction task. For self-hosted deployments the file will be uploaded immediately
65
+ * and the parsed result cached locally, while official deployments follow the async task lifecycle.
66
+ */
67
+ createTask(options: CreateTaskOptions): Promise<{
68
+ taskId: string;
69
+ }>;
70
+ /**
71
+ * Create a batch MinerU extraction task. Only supported for official MinerU deployments.
72
+ */
73
+ createBatchTask(options: CreateBatchTaskOptions): Promise<{
74
+ batchId: string;
75
+ fileUrls?: string[];
76
+ }>;
77
+ getSelfHostedTask(taskId: string): MineruSelfHostedTaskResult | undefined;
78
+ /**
79
+ * Query offical task status or results.
80
+ */
81
+ getTaskResult(taskId: string, options?: TaskResultOptions): Promise<{
82
+ full_zip_url?: string;
83
+ full_url?: string;
84
+ content?: string;
85
+ status?: string;
86
+ }>;
87
+ /**
88
+ * Query batch task results. Only supported for official MinerU deployments.
89
+ */
90
+ getBatchResult(batchId: string): Promise<any>;
91
+ /**
92
+ * Wait for a task to complete and return the result when available.
93
+ */
94
+ waitForTask(taskId: string, timeoutMs?: number, intervalMs?: number): Promise<any>;
95
+ private ensureOfficial;
96
+ private resolveServerType;
97
+ private resolveCredentials;
98
+ private readIntegrationOptions;
99
+ private normalizeBaseUrl;
100
+ private buildApiUrl;
101
+ private getOfficialHeaders;
102
+ private getSelfHostedHeaders;
103
+ private createOfficialTask;
104
+ private createSelfHostedTask;
105
+ private invokeSelfHostedParse;
106
+ private invokeSelfHostedParseV1;
107
+ private isSelfHostedApiV1;
108
+ private normalizeSelfHostedResponse;
109
+ private normalizeSelfHostedFileResult;
110
+ private normalizeImageMap;
111
+ private parseJsonSafe;
112
+ private buildLanguageList;
113
+ private booleanToString;
114
+ private downloadFile;
115
+ private extractFileName;
116
+ getSelfHostedOpenApiSpec(): Promise<AxiosResponse<any, any>>;
117
+ validateOfficialApiToken(): Promise<void>;
118
+ }
119
+ export {};
120
+ //# sourceMappingURL=mineru.client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mineru.client.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhD,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAc,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAK7C,OAAO,EAIL,wBAAwB,EAExB,0BAA0B,EAC1B,gBAAgB,EACjB,MAAM,YAAY,CAAC;AAIpB,UAAU,iBAAiB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mEAAmE;IACnE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4EAA4E;IAC5E,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,UAAU,mBAAmB;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,sBAAsB;IAC9B,KAAK,EAAE,mBAAmB,EAAE,CAAC;IAC7B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,UAAU,iBAAiB;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AASD,qBAAa,YAAY;IAWrB,OAAO,CAAC,QAAQ,CAAC,aAAa;IAC9B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;IAX/B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IACxD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,SAAgB,UAAU,EAAE,gBAAgB,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAiD;IAE5E,IAAI,UAAU,IAAI,YAAY,GAAG,SAAS,CAEzC;gBAEkB,aAAa,EAAE,aAAa,EAC5B,WAAW,CAAC,EAAE;QACvB,UAAU,CAAC,EAAE,YAAY,CAAC;QAC1B,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,CAAC;KACjE;IAkBP;;;OAGG;IACG,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAYzE;;OAEG;IACG,eAAe,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAmCzG,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,0BAA0B,GAAG,SAAS;IAOzE;;OAEG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC;QACxE,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;IAoBF;;OAEG;IACG,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC;IAiBnD;;OAEG;IACG,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,SAAgB,EAAE,UAAU,SAAO,GAAG,OAAO,CAAC,GAAG,CAAC;IAsB7F,OAAO,CAAC,cAAc;IAMtB,OAAO,CAAC,iBAAiB;IAczB,OAAO,CAAC,kBAAkB;IAyB1B,OAAO,CAAC,sBAAsB;IAI9B,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,oBAAoB;YAYd,kBAAkB;YA4BlB,oBAAoB;YASpB,qBAAqB;YA0DrB,uBAAuB;IA+CrC,OAAO,CAAC,iBAAiB;IAgBzB,OAAO,CAAC,2BAA2B;IAenC,OAAO,CAAC,6BAA6B;IAcrC,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,aAAa;IAcrB,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,eAAe;YAIT,YAAY;IAkB1B,OAAO,CAAC,eAAe;IA0BvB,wBAAwB,IAAI,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAKtD,wBAAwB;CAU/B"}