@helloxiaohu/plugin-mineru 0.0.13 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -4
- package/dist/lib/mineru-toolset.strategy.d.ts +68 -3
- package/dist/lib/mineru-toolset.strategy.d.ts.map +1 -1
- package/dist/lib/mineru-toolset.strategy.js +75 -5
- package/dist/lib/mineru.tool.d.ts +23 -13
- package/dist/lib/mineru.tool.d.ts.map +1 -1
- package/dist/lib/mineru.tool.js +12 -6
- package/dist/lib/mineru.toolset.d.ts +5 -0
- package/dist/lib/mineru.toolset.d.ts.map +1 -1
- package/dist/lib/mineru.toolset.js +8 -2
- package/package.json +2 -1
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAexD,QAAA,MAAM,YAAY,gDAChB,CAAC;AAEH,QAAA,MAAM,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,CA4BrD,CAAC;AAEF,eAAe,MAAM,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { readFileSync } from 'fs';
|
|
3
|
-
import {
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import { dirname, join } from 'path';
|
|
4
5
|
import { MinerUPlugin } from './lib/mineru.plugin.js';
|
|
5
6
|
import { icon } from './lib/types.js';
|
|
6
|
-
|
|
7
|
-
const
|
|
8
|
-
const packageJson = JSON.parse(readFileSync(join(
|
|
7
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
8
|
+
const dir_name = dirname(__filename);
|
|
9
|
+
const packageJson = JSON.parse(readFileSync(join(dir_name, '../package.json'), 'utf8'));
|
|
9
10
|
const ConfigSchema = z.object({});
|
|
10
11
|
const plugin = {
|
|
11
12
|
meta: {
|
|
@@ -42,8 +42,70 @@ export declare class MinerUToolsetStrategy implements IToolsetStrategy<MinerUToo
|
|
|
42
42
|
zh_Hans: string;
|
|
43
43
|
};
|
|
44
44
|
};
|
|
45
|
+
isOcr: {
|
|
46
|
+
type: string;
|
|
47
|
+
title: {
|
|
48
|
+
en_US: string;
|
|
49
|
+
zh_Hans: string;
|
|
50
|
+
};
|
|
51
|
+
description: {
|
|
52
|
+
en_US: string;
|
|
53
|
+
zh_Hans: string;
|
|
54
|
+
};
|
|
55
|
+
default: boolean;
|
|
56
|
+
};
|
|
57
|
+
enableFormula: {
|
|
58
|
+
type: string;
|
|
59
|
+
title: {
|
|
60
|
+
en_US: string;
|
|
61
|
+
zh_Hans: string;
|
|
62
|
+
};
|
|
63
|
+
description: {
|
|
64
|
+
en_US: string;
|
|
65
|
+
zh_Hans: string;
|
|
66
|
+
};
|
|
67
|
+
default: boolean;
|
|
68
|
+
};
|
|
69
|
+
enableTable: {
|
|
70
|
+
type: string;
|
|
71
|
+
title: {
|
|
72
|
+
en_US: string;
|
|
73
|
+
zh_Hans: string;
|
|
74
|
+
};
|
|
75
|
+
description: {
|
|
76
|
+
en_US: string;
|
|
77
|
+
zh_Hans: string;
|
|
78
|
+
};
|
|
79
|
+
default: boolean;
|
|
80
|
+
};
|
|
81
|
+
language: {
|
|
82
|
+
type: string;
|
|
83
|
+
title: {
|
|
84
|
+
en_US: string;
|
|
85
|
+
zh_Hans: string;
|
|
86
|
+
};
|
|
87
|
+
description: {
|
|
88
|
+
en_US: string;
|
|
89
|
+
zh_Hans: string;
|
|
90
|
+
};
|
|
91
|
+
enum: string[];
|
|
92
|
+
default: string;
|
|
93
|
+
};
|
|
94
|
+
modelVersion: {
|
|
95
|
+
type: string;
|
|
96
|
+
title: {
|
|
97
|
+
en_US: string;
|
|
98
|
+
zh_Hans: string;
|
|
99
|
+
};
|
|
100
|
+
description: {
|
|
101
|
+
en_US: string;
|
|
102
|
+
zh_Hans: string;
|
|
103
|
+
};
|
|
104
|
+
enum: string[];
|
|
105
|
+
default: string;
|
|
106
|
+
};
|
|
45
107
|
};
|
|
46
|
-
required:
|
|
108
|
+
required: any[];
|
|
47
109
|
};
|
|
48
110
|
};
|
|
49
111
|
/**
|
|
@@ -53,12 +115,15 @@ export declare class MinerUToolsetStrategy implements IToolsetStrategy<MinerUToo
|
|
|
53
115
|
constructor(configService: ConfigService, resultParser: MinerUResultParserService);
|
|
54
116
|
/**
|
|
55
117
|
* Validate toolset configuration
|
|
118
|
+
* Note: During authorization phase, config may be null/undefined or missing integration.
|
|
119
|
+
* Integration is injected by the permission system, so we only validate if config is provided.
|
|
56
120
|
*/
|
|
57
|
-
validateConfig(config: MinerUToolsetConfig): Promise<void>;
|
|
121
|
+
validateConfig(config: MinerUToolsetConfig | null | undefined): Promise<void>;
|
|
58
122
|
/**
|
|
59
123
|
* Create MinerU toolset instance
|
|
124
|
+
* Note: config may be null/undefined during authorization phase
|
|
60
125
|
*/
|
|
61
|
-
create(config: MinerUToolsetConfig): Promise<BuiltinToolset>;
|
|
126
|
+
create(config: MinerUToolsetConfig | null | undefined): Promise<BuiltinToolset>;
|
|
62
127
|
/**
|
|
63
128
|
* Create tools for MinerU toolset
|
|
64
129
|
* Tools are created dynamically in MinerUToolset.initTools()
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mineru-toolset.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/mineru-toolset.strategy.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EACL,cAAc,EACd,gBAAgB,EAEhB,qBAAqB,EACrB,oBAAoB,EACrB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAiB,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAGzE;;;GAGG;AACH,qBAEa,qBAAsB,YAAW,gBAAgB,CAAC,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"mineru-toolset.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/mineru-toolset.strategy.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EACL,cAAc,EACd,gBAAgB,EAEhB,qBAAqB,EACrB,oBAAoB,EACrB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAiB,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAGzE;;;GAGG;AACH,qBAEa,qBAAsB,YAAW,gBAAgB,CAAC,mBAAmB,CAAC;IAyH/E,OAAO,CAAC,QAAQ,CAAC,aAAa;IAE9B,OAAO,CAAC,QAAQ,CAAC,YAAY;IA1H/B;;OAEG;IACH,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAiGF;IAEF;;OAEG;IACH,QAAQ,CAAC,WAAW,mDAWlB;gBAIiB,aAAa,EAAE,aAAa,EAE5B,YAAY,EAAE,yBAAyB;IAG1D;;;;OAIG;IACH,cAAc,CAAC,MAAM,EAAE,mBAAmB,GAAG,IAAI,GAAG,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;IAS7E;;;OAGG;IACG,MAAM,CAAC,MAAM,EAAE,mBAAmB,GAAG,IAAI,GAAG,SAAS,GAAG,OAAO,CAAC,cAAc,CAAC;IAYrF;;;;OAIG;IACH,WAAW;CAKZ"}
|
|
@@ -42,12 +42,75 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
42
42
|
zh_Hans: 'MinerU 集成',
|
|
43
43
|
},
|
|
44
44
|
description: {
|
|
45
|
-
en_US: 'MinerU integration configuration',
|
|
46
|
-
zh_Hans: 'MinerU
|
|
45
|
+
en_US: 'MinerU integration configuration (API URL, API Key, Server Type). Configure this in the integration settings.',
|
|
46
|
+
zh_Hans: 'MinerU 集成配置(API URL、API Key、服务类型)。请在集成设置中配置。',
|
|
47
47
|
},
|
|
48
48
|
},
|
|
49
|
+
// Default parsing settings (optional, can be overridden when calling the tool)
|
|
50
|
+
isOcr: {
|
|
51
|
+
type: 'boolean',
|
|
52
|
+
title: {
|
|
53
|
+
en_US: 'Enable OCR',
|
|
54
|
+
zh_Hans: '启用 OCR',
|
|
55
|
+
},
|
|
56
|
+
description: {
|
|
57
|
+
en_US: 'Enable OCR for image-based PDFs (default: true)',
|
|
58
|
+
zh_Hans: '为基于图像的 PDF 启用 OCR(默认:true)',
|
|
59
|
+
},
|
|
60
|
+
default: true,
|
|
61
|
+
},
|
|
62
|
+
enableFormula: {
|
|
63
|
+
type: 'boolean',
|
|
64
|
+
title: {
|
|
65
|
+
en_US: 'Enable Formula Recognition',
|
|
66
|
+
zh_Hans: '启用公式识别',
|
|
67
|
+
},
|
|
68
|
+
description: {
|
|
69
|
+
en_US: 'Enable formula recognition (default: true)',
|
|
70
|
+
zh_Hans: '启用公式识别(默认:true)',
|
|
71
|
+
},
|
|
72
|
+
default: true,
|
|
73
|
+
},
|
|
74
|
+
enableTable: {
|
|
75
|
+
type: 'boolean',
|
|
76
|
+
title: {
|
|
77
|
+
en_US: 'Enable Table Recognition',
|
|
78
|
+
zh_Hans: '启用表格识别',
|
|
79
|
+
},
|
|
80
|
+
description: {
|
|
81
|
+
en_US: 'Enable table recognition (default: true)',
|
|
82
|
+
zh_Hans: '启用表格识别(默认:true)',
|
|
83
|
+
},
|
|
84
|
+
default: true,
|
|
85
|
+
},
|
|
86
|
+
language: {
|
|
87
|
+
type: 'string',
|
|
88
|
+
title: {
|
|
89
|
+
en_US: 'Document Language',
|
|
90
|
+
zh_Hans: '文档语言',
|
|
91
|
+
},
|
|
92
|
+
description: {
|
|
93
|
+
en_US: 'Document language: "en" for English, "ch" for Chinese (default: "ch")',
|
|
94
|
+
zh_Hans: '文档语言:"en" 表示英语,"ch" 表示中文(默认:"ch")',
|
|
95
|
+
},
|
|
96
|
+
enum: ['en', 'ch'],
|
|
97
|
+
default: 'ch',
|
|
98
|
+
},
|
|
99
|
+
modelVersion: {
|
|
100
|
+
type: 'string',
|
|
101
|
+
title: {
|
|
102
|
+
en_US: 'Model Version',
|
|
103
|
+
zh_Hans: '模型版本',
|
|
104
|
+
},
|
|
105
|
+
description: {
|
|
106
|
+
en_US: 'Model version: "pipeline" or "vlm" (default: "pipeline")',
|
|
107
|
+
zh_Hans: '模型版本:"pipeline" 或 "vlm"(默认:"pipeline")',
|
|
108
|
+
},
|
|
109
|
+
enum: ['pipeline', 'vlm'],
|
|
110
|
+
default: 'pipeline',
|
|
111
|
+
},
|
|
49
112
|
},
|
|
50
|
-
required: [
|
|
113
|
+
required: [], // Integration is injected by permission system, parsing settings are optional
|
|
51
114
|
},
|
|
52
115
|
};
|
|
53
116
|
/**
|
|
@@ -68,20 +131,27 @@ let MinerUToolsetStrategy = class MinerUToolsetStrategy {
|
|
|
68
131
|
}
|
|
69
132
|
/**
|
|
70
133
|
* Validate toolset configuration
|
|
134
|
+
* Note: During authorization phase, config may be null/undefined or missing integration.
|
|
135
|
+
* Integration is injected by the permission system, so we only validate if config is provided.
|
|
71
136
|
*/
|
|
72
137
|
validateConfig(config) {
|
|
73
|
-
|
|
138
|
+
// During authorization, config may be null/undefined
|
|
139
|
+
// Integration will be injected by the permission system later
|
|
140
|
+
if (config && !config.integration) {
|
|
74
141
|
throw new Error('MinerU integration is required');
|
|
75
142
|
}
|
|
76
143
|
return Promise.resolve();
|
|
77
144
|
}
|
|
78
145
|
/**
|
|
79
146
|
* Create MinerU toolset instance
|
|
147
|
+
* Note: config may be null/undefined during authorization phase
|
|
80
148
|
*/
|
|
81
149
|
async create(config) {
|
|
82
150
|
// Inject dependencies into config
|
|
151
|
+
// If config is null/undefined, create empty config (integration will be injected later)
|
|
152
|
+
const baseConfig = config || {};
|
|
83
153
|
const configWithDependencies = {
|
|
84
|
-
...
|
|
154
|
+
...baseConfig,
|
|
85
155
|
configService: this.configService,
|
|
86
156
|
resultParser: this.resultParser,
|
|
87
157
|
};
|
|
@@ -4,11 +4,21 @@ import { ConfigService } from '@nestjs/config';
|
|
|
4
4
|
import { MinerUResultParserService } from './result-parser.service.js';
|
|
5
5
|
import { IIntegration } from '@metad/contracts';
|
|
6
6
|
import { MinerUIntegrationOptions } from './types.js';
|
|
7
|
+
/**
|
|
8
|
+
* Default parsing settings for MinerU tool
|
|
9
|
+
*/
|
|
10
|
+
export interface MinerUToolDefaults {
|
|
11
|
+
isOcr?: boolean;
|
|
12
|
+
enableFormula?: boolean;
|
|
13
|
+
enableTable?: boolean;
|
|
14
|
+
language?: 'en' | 'ch';
|
|
15
|
+
modelVersion?: 'pipeline' | 'vlm';
|
|
16
|
+
}
|
|
7
17
|
/**
|
|
8
18
|
* Build MinerU PDF parser tool
|
|
9
19
|
* This tool converts PDF files to markdown format using MinerU service
|
|
10
20
|
*/
|
|
11
|
-
export declare function buildMinerUTool(configService: ConfigService, resultParser: MinerUResultParserService, integration?: Partial<IIntegration<MinerUIntegrationOptions>>, fileSystem?: XpFileSystem): import("@langchain/core/tools").DynamicStructuredTool<z.ZodObject<{
|
|
21
|
+
export declare function buildMinerUTool(configService: ConfigService, resultParser: MinerUResultParserService, integration?: Partial<IIntegration<MinerUIntegrationOptions>>, fileSystem?: XpFileSystem, defaults?: MinerUToolDefaults): import("@langchain/core/tools").DynamicStructuredTool<z.ZodObject<{
|
|
12
22
|
fileUrl: z.ZodNullable<z.ZodOptional<z.ZodString>>;
|
|
13
23
|
filePath: z.ZodNullable<z.ZodOptional<z.ZodString>>;
|
|
14
24
|
fileName: z.ZodNullable<z.ZodOptional<z.ZodString>>;
|
|
@@ -18,41 +28,41 @@ export declare function buildMinerUTool(configService: ConfigService, resultPars
|
|
|
18
28
|
language: z.ZodNullable<z.ZodOptional<z.ZodEnum<["en", "ch"]>>>;
|
|
19
29
|
modelVersion: z.ZodNullable<z.ZodOptional<z.ZodEnum<["pipeline", "vlm"]>>>;
|
|
20
30
|
}, "strip", z.ZodTypeAny, {
|
|
21
|
-
filePath?: string;
|
|
22
31
|
fileUrl?: string;
|
|
32
|
+
filePath?: string;
|
|
33
|
+
fileName?: string;
|
|
23
34
|
isOcr?: boolean;
|
|
24
35
|
enableFormula?: boolean;
|
|
25
36
|
enableTable?: boolean;
|
|
26
|
-
language?: "
|
|
37
|
+
language?: "en" | "ch";
|
|
27
38
|
modelVersion?: "pipeline" | "vlm";
|
|
28
|
-
fileName?: string;
|
|
29
39
|
}, {
|
|
30
|
-
filePath?: string;
|
|
31
40
|
fileUrl?: string;
|
|
41
|
+
filePath?: string;
|
|
42
|
+
fileName?: string;
|
|
32
43
|
isOcr?: boolean;
|
|
33
44
|
enableFormula?: boolean;
|
|
34
45
|
enableTable?: boolean;
|
|
35
|
-
language?: "
|
|
46
|
+
language?: "en" | "ch";
|
|
36
47
|
modelVersion?: "pipeline" | "vlm";
|
|
37
|
-
fileName?: string;
|
|
38
48
|
}>, {
|
|
39
|
-
filePath?: string;
|
|
40
49
|
fileUrl?: string;
|
|
50
|
+
filePath?: string;
|
|
51
|
+
fileName?: string;
|
|
41
52
|
isOcr?: boolean;
|
|
42
53
|
enableFormula?: boolean;
|
|
43
54
|
enableTable?: boolean;
|
|
44
|
-
language?: "
|
|
55
|
+
language?: "en" | "ch";
|
|
45
56
|
modelVersion?: "pipeline" | "vlm";
|
|
46
|
-
fileName?: string;
|
|
47
57
|
}, {
|
|
48
|
-
filePath?: string;
|
|
49
58
|
fileUrl?: string;
|
|
59
|
+
filePath?: string;
|
|
60
|
+
fileName?: string;
|
|
50
61
|
isOcr?: boolean;
|
|
51
62
|
enableFormula?: boolean;
|
|
52
63
|
enableTable?: boolean;
|
|
53
|
-
language?: "
|
|
64
|
+
language?: "en" | "ch";
|
|
54
65
|
modelVersion?: "pipeline" | "vlm";
|
|
55
|
-
fileName?: string;
|
|
56
66
|
}, (string | {
|
|
57
67
|
files: any[];
|
|
58
68
|
taskId: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mineru.tool.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.tool.ts"],"names":[],"mappings":"AAEA,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAE,YAAY,EAAsB,MAAM,kBAAkB,CAAC;AACpE,OAAO,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAEtD;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,aAAa,EAAE,aAAa,EAC5B,YAAY,EAAE,yBAAyB,EACvC,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,EAC7D,UAAU,CAAC,EAAE,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
1
|
+
{"version":3,"file":"mineru.tool.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.tool.ts"],"names":[],"mappings":"AAEA,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAE,YAAY,EAAsB,MAAM,kBAAkB,CAAC;AACpE,OAAO,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAEtD;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IACvB,YAAY,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC;CACnC;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,aAAa,EAAE,aAAa,EAC5B,YAAY,EAAE,yBAAyB,EACvC,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,EAC7D,UAAU,CAAC,EAAE,YAAY,EACzB,QAAQ,CAAC,EAAE,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MA2J9B"}
|
package/dist/lib/mineru.tool.js
CHANGED
|
@@ -7,10 +7,16 @@ import { MinerUClient } from './mineru.client.js';
|
|
|
7
7
|
* Build MinerU PDF parser tool
|
|
8
8
|
* This tool converts PDF files to markdown format using MinerU service
|
|
9
9
|
*/
|
|
10
|
-
export function buildMinerUTool(configService, resultParser, integration, fileSystem) {
|
|
10
|
+
export function buildMinerUTool(configService, resultParser, integration, fileSystem, defaults) {
|
|
11
11
|
return tool(async (input) => {
|
|
12
12
|
try {
|
|
13
13
|
const { fileUrl, filePath, fileName, isOcr, enableFormula, enableTable, language, modelVersion } = input;
|
|
14
|
+
// Use defaults from toolset config if not provided in input
|
|
15
|
+
const finalIsOcr = isOcr ?? defaults?.isOcr ?? true;
|
|
16
|
+
const finalEnableFormula = enableFormula ?? defaults?.enableFormula ?? true;
|
|
17
|
+
const finalEnableTable = enableTable ?? defaults?.enableTable ?? true;
|
|
18
|
+
const finalLanguage = language ?? defaults?.language ?? 'ch';
|
|
19
|
+
const finalModelVersion = modelVersion ?? defaults?.modelVersion ?? 'pipeline';
|
|
14
20
|
if (!fileUrl && !filePath) {
|
|
15
21
|
throw new Error('Either fileUrl or filePath must be provided');
|
|
16
22
|
}
|
|
@@ -48,11 +54,11 @@ export function buildMinerUTool(configService, resultParser, integration, fileSy
|
|
|
48
54
|
url: fileUrl,
|
|
49
55
|
filePath: filePath,
|
|
50
56
|
fileName: finalFileName,
|
|
51
|
-
isOcr:
|
|
52
|
-
enableFormula:
|
|
53
|
-
enableTable:
|
|
54
|
-
language:
|
|
55
|
-
modelVersion:
|
|
57
|
+
isOcr: finalIsOcr,
|
|
58
|
+
enableFormula: finalEnableFormula,
|
|
59
|
+
enableTable: finalEnableTable,
|
|
60
|
+
language: finalLanguage,
|
|
61
|
+
modelVersion: finalModelVersion,
|
|
56
62
|
});
|
|
57
63
|
let parsedResult;
|
|
58
64
|
if (mineruClient.serverType === 'self-hosted') {
|
|
@@ -12,6 +12,11 @@ export interface MinerUToolsetConfig {
|
|
|
12
12
|
fileSystem?: XpFileSystem;
|
|
13
13
|
configService?: ConfigService;
|
|
14
14
|
resultParser?: MinerUResultParserService;
|
|
15
|
+
isOcr?: boolean;
|
|
16
|
+
enableFormula?: boolean;
|
|
17
|
+
enableTable?: boolean;
|
|
18
|
+
language?: 'en' | 'ch';
|
|
19
|
+
modelVersion?: 'pipeline' | 'vlm';
|
|
15
20
|
}
|
|
16
21
|
/**
|
|
17
22
|
* MinerU Toolset implementation
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mineru.toolset.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.toolset.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAChF,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpE,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAEvE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAEtD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,CAAC;IAC9D,UAAU,CAAC,EAAE,YAAY,CAAC;IAC1B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,YAAY,CAAC,EAAE,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"mineru.toolset.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.toolset.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAChF,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpE,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAEvE,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAE,wBAAwB,EAAE,MAAM,YAAY,CAAC;AAEtD;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,CAAC;IAC9D,UAAU,CAAC,EAAE,YAAY,CAAC;IAC1B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,YAAY,CAAC,EAAE,yBAAyB,CAAC;IAEzC,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IACvB,YAAY,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC;CACnC;AAED;;;GAGG;AACH,qBAAa,aAAc,SAAQ,cAAc,CAAC,uBAAuB,EAAE,mBAAmB,CAAC;IAC7F,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAE7C;;;;;OAKG;gBACS,MAAM,EAAE,mBAAmB;IAKvC;;;;;OAKG;IACY,oBAAoB,CAAC,WAAW,EAAE,mBAAmB,GAAG,OAAO,CAAC,IAAI,CAAC;IAMpF;;;OAGG;IACY,SAAS,IAAI,OAAO,CAAC,uBAAuB,CAAC,cAAc,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;CAsBzF"}
|
|
@@ -31,12 +31,18 @@ export class MinerUToolset extends BuiltinToolset {
|
|
|
31
31
|
* Creates the PDF parser tool with necessary dependencies
|
|
32
32
|
*/
|
|
33
33
|
async initTools() {
|
|
34
|
-
const { configService, resultParser, integration, fileSystem } = this.config;
|
|
34
|
+
const { configService, resultParser, integration, fileSystem, isOcr, enableFormula, enableTable, language, modelVersion } = this.config;
|
|
35
35
|
if (!configService || !resultParser) {
|
|
36
36
|
throw new Error('ConfigService and MinerUResultParserService are required');
|
|
37
37
|
}
|
|
38
38
|
this.tools = [
|
|
39
|
-
buildMinerUTool(configService, resultParser, integration, fileSystem
|
|
39
|
+
buildMinerUTool(configService, resultParser, integration, fileSystem, {
|
|
40
|
+
isOcr,
|
|
41
|
+
enableFormula,
|
|
42
|
+
enableTable,
|
|
43
|
+
language,
|
|
44
|
+
modelVersion,
|
|
45
|
+
}),
|
|
40
46
|
];
|
|
41
47
|
return this.tools;
|
|
42
48
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@helloxiaohu/plugin-mineru",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.15",
|
|
4
4
|
"license": "AGPL-3.0",
|
|
5
5
|
"description": "MinerU PDF to Markdown converter plugin for XpertAI platform with toolset support",
|
|
6
6
|
"keywords": [
|
|
@@ -34,6 +34,7 @@
|
|
|
34
34
|
},
|
|
35
35
|
"files": [
|
|
36
36
|
"dist",
|
|
37
|
+
"package.json",
|
|
37
38
|
"!**/*.tsbuildinfo"
|
|
38
39
|
],
|
|
39
40
|
"dependencies": {
|