@helloxiaohu/plugin-mineru 0.0.19 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +213 -56
- package/dist/lib/integration.strategy.js +1 -1
- package/dist/lib/mineru-toolset.strategy.d.ts +83 -16
- package/dist/lib/mineru-toolset.strategy.d.ts.map +1 -1
- package/dist/lib/mineru-toolset.strategy.js +131 -41
- package/dist/lib/mineru.client.d.ts +5 -1
- package/dist/lib/mineru.client.d.ts.map +1 -1
- package/dist/lib/mineru.client.js +46 -9
- package/dist/lib/mineru.tool.d.ts +8 -43
- package/dist/lib/mineru.tool.d.ts.map +1 -1
- package/dist/lib/mineru.tool.js +63 -51
- package/dist/lib/mineru.toolset.d.ts +4 -5
- package/dist/lib/mineru.toolset.d.ts.map +1 -1
- package/dist/lib/mineru.toolset.js +52 -9
- package/dist/lib/result-parser.service.d.ts +2 -2
- package/dist/lib/result-parser.service.d.ts.map +1 -1
- package/dist/lib/result-parser.service.js +72 -43
- package/dist/lib/types.d.ts +27 -20
- package/dist/lib/types.d.ts.map +1 -1
- package/package.json +62 -62
|
@@ -2,7 +2,6 @@ import { StructuredToolInterface, ToolSchemaBase } from '@langchain/core/tools';
|
|
|
2
2
|
import { BuiltinToolset, XpFileSystem } from '@xpert-ai/plugin-sdk';
|
|
3
3
|
import { ConfigService } from '@nestjs/config';
|
|
4
4
|
import { MinerUResultParserService } from './result-parser.service.js';
|
|
5
|
-
import { MinerUIntegrationOptions } from './types.js';
|
|
6
5
|
/**
|
|
7
6
|
* Configuration for MinerU Toolset
|
|
8
7
|
*/
|
|
@@ -12,13 +11,12 @@ export interface MinerUToolsetConfig {
|
|
|
12
11
|
*/
|
|
13
12
|
apiUrl?: string;
|
|
14
13
|
apiKey?: string;
|
|
15
|
-
serverType?: MinerUIntegrationOptions['serverType'];
|
|
16
14
|
fileSystem?: XpFileSystem;
|
|
17
15
|
configService?: ConfigService;
|
|
18
16
|
resultParser?: MinerUResultParserService;
|
|
19
|
-
isOcr?: boolean;
|
|
20
|
-
enableFormula?: boolean;
|
|
21
|
-
enableTable?: boolean;
|
|
17
|
+
isOcr?: boolean | string;
|
|
18
|
+
enableFormula?: boolean | string;
|
|
19
|
+
enableTable?: boolean | string;
|
|
22
20
|
language?: 'en' | 'ch';
|
|
23
21
|
modelVersion?: 'pipeline' | 'vlm';
|
|
24
22
|
}
|
|
@@ -28,6 +26,7 @@ export interface MinerUToolsetConfig {
|
|
|
28
26
|
*/
|
|
29
27
|
export declare class MinerUToolset extends BuiltinToolset<StructuredToolInterface, MinerUToolsetConfig> {
|
|
30
28
|
private readonly config;
|
|
29
|
+
tools: any[];
|
|
31
30
|
/**
|
|
32
31
|
* Constructor for MinerU Toolset
|
|
33
32
|
* Accepts config which contains credentials and dependencies
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mineru.toolset.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.toolset.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAChF,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpE,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;
|
|
1
|
+
{"version":3,"file":"mineru.toolset.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.toolset.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAChF,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpE,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAIvE;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,YAAY,CAAC;IAC1B,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,YAAY,CAAC,EAAE,yBAAyB,CAAC;IAGzC,KAAK,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IACjC,WAAW,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAC/B,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC;IACvB,YAAY,CAAC,EAAE,UAAU,GAAG,KAAK,CAAC;CACnC;AAED;;;GAGG;AACH,qBAAa,aAAc,SAAQ,cAAc,CAAC,uBAAuB,EAAE,mBAAmB,CAAC;IAC7F,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAEpC,KAAK,EAAE,GAAG,EAAE,CAAM;IAE3B;;;;;OAKG;gBACS,MAAM,EAAE,mBAAmB;IAgBvC;;;;;OAKG;IACY,oBAAoB,CAAC,WAAW,EAAE,mBAAmB,GAAG,OAAO,CAAC,IAAI,CAAC;IAKpF;;;OAGG;IACY,SAAS,IAAI,OAAO,CAAC,uBAAuB,CAAC,cAAc,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;CA+DzF"}
|
|
@@ -13,7 +13,20 @@ export class MinerUToolset extends BuiltinToolset {
|
|
|
13
13
|
*/
|
|
14
14
|
constructor(config) {
|
|
15
15
|
super('mineru', undefined, config);
|
|
16
|
+
// Ensure `tools` exists even if upstream BuiltinToolset typings differ across versions.
|
|
17
|
+
this.tools = [];
|
|
16
18
|
this.config = config;
|
|
19
|
+
// Log config received in constructor (mask apiKey for security)
|
|
20
|
+
const configForLog = { ...config };
|
|
21
|
+
if (configForLog.apiKey) {
|
|
22
|
+
configForLog.apiKey = configForLog.apiKey.length > 8
|
|
23
|
+
? `${configForLog.apiKey.substring(0, 4)}...${configForLog.apiKey.substring(configForLog.apiKey.length - 4)}`
|
|
24
|
+
: '***';
|
|
25
|
+
}
|
|
26
|
+
// Use base class logger (protected access)
|
|
27
|
+
if ('logger' in this && this.logger) {
|
|
28
|
+
this.logger.log(`[MinerU] MinerUToolset constructor received config: ${JSON.stringify(configForLog, null, 2)}`);
|
|
29
|
+
}
|
|
17
30
|
}
|
|
18
31
|
/**
|
|
19
32
|
* Validate credentials for MinerU toolset
|
|
@@ -30,21 +43,51 @@ export class MinerUToolset extends BuiltinToolset {
|
|
|
30
43
|
* Creates the PDF parser tool with necessary dependencies
|
|
31
44
|
*/
|
|
32
45
|
async initTools() {
|
|
33
|
-
const { configService, resultParser, apiUrl, apiKey,
|
|
46
|
+
const { configService, resultParser, apiUrl, apiKey, fileSystem, isOcr, enableFormula, enableTable, language, modelVersion } = this.config;
|
|
47
|
+
// Log config before destructuring
|
|
48
|
+
const configKeys = Object.keys(this.config);
|
|
49
|
+
const hasApiKey = 'apiKey' in this.config;
|
|
50
|
+
const apiKeyValue = this.config.apiKey;
|
|
51
|
+
const maskedApiKey = apiKeyValue
|
|
52
|
+
? (apiKeyValue.length > 8 ? `${apiKeyValue.substring(0, 4)}...${apiKeyValue.substring(apiKeyValue.length - 4)}` : '***')
|
|
53
|
+
: 'missing';
|
|
54
|
+
// Use base class logger (protected access)
|
|
55
|
+
if ('logger' in this && this.logger) {
|
|
56
|
+
this.logger.log(`[MinerU] MinerUToolset.initTools() - config keys: ${configKeys.join(', ')}, hasApiKey: ${hasApiKey}, apiKey: ${maskedApiKey}`);
|
|
57
|
+
this.logger.log(`[MinerU] MinerUToolset.initTools() - destructured apiKey: ${apiKey ? (apiKey.length > 8 ? `${apiKey.substring(0, 4)}...${apiKey.substring(apiKey.length - 4)}` : '***') : 'missing'}`);
|
|
58
|
+
}
|
|
34
59
|
if (!configService || !resultParser) {
|
|
35
60
|
throw new Error('ConfigService and MinerUResultParserService are required');
|
|
36
61
|
}
|
|
62
|
+
// Use configuration from authorization page
|
|
63
|
+
// apiUrl: use provided value or default to official server URL
|
|
64
|
+
const finalApiUrl = apiUrl || 'https://mineru.net/api/v4';
|
|
65
|
+
// Convert string enum values to boolean (compatible with 'true'/'false' strings and boolean values)
|
|
66
|
+
// Use provided values from authorization page, or default to true
|
|
67
|
+
const finalIsOcr = isOcr === 'true' || isOcr === true;
|
|
68
|
+
const finalEnableFormula = enableFormula === 'true' || enableFormula === true;
|
|
69
|
+
const finalEnableTable = enableTable === 'true' || enableTable === true;
|
|
70
|
+
// Use provided values from authorization page, or use defaults
|
|
71
|
+
const finalLanguage = language || 'ch';
|
|
72
|
+
const finalModelVersion = modelVersion || 'pipeline';
|
|
73
|
+
// Log what we're passing to buildMinerUTool
|
|
74
|
+
const maskedFinalApiKey = apiKey
|
|
75
|
+
? (apiKey.length > 8 ? `${apiKey.substring(0, 4)}...${apiKey.substring(apiKey.length - 4)}` : '***')
|
|
76
|
+
: 'missing';
|
|
77
|
+
// Use base class logger (protected access)
|
|
78
|
+
if ('logger' in this && this.logger) {
|
|
79
|
+
this.logger.log(`[MinerU] MinerUToolset.initTools() - passing to buildMinerUTool: apiUrl=${finalApiUrl}, apiKey=${maskedFinalApiKey}`);
|
|
80
|
+
}
|
|
37
81
|
this.tools = [
|
|
38
82
|
buildMinerUTool(configService, resultParser, {
|
|
39
|
-
apiUrl,
|
|
40
|
-
apiKey,
|
|
41
|
-
serverType: serverType ?? 'official',
|
|
83
|
+
apiUrl: finalApiUrl,
|
|
84
|
+
apiKey, // apiKey is required and validated in authorization page
|
|
42
85
|
}, fileSystem, {
|
|
43
|
-
isOcr,
|
|
44
|
-
enableFormula,
|
|
45
|
-
enableTable,
|
|
46
|
-
language,
|
|
47
|
-
modelVersion,
|
|
86
|
+
isOcr: finalIsOcr,
|
|
87
|
+
enableFormula: finalEnableFormula,
|
|
88
|
+
enableTable: finalEnableTable,
|
|
89
|
+
language: finalLanguage,
|
|
90
|
+
modelVersion: finalModelVersion,
|
|
48
91
|
}),
|
|
49
92
|
];
|
|
50
93
|
return this.tools;
|
|
@@ -4,12 +4,12 @@ import { ChunkMetadata, XpFileSystem } from '@xpert-ai/plugin-sdk';
|
|
|
4
4
|
import { MinerUDocumentMetadata, MineruSelfHostedTaskResult } from './types.js';
|
|
5
5
|
export declare class MinerUResultParserService {
|
|
6
6
|
private readonly logger;
|
|
7
|
-
parseFromUrl(fullZipUrl: string, taskId: string, document: Partial<IKnowledgeDocument>, fileSystem
|
|
7
|
+
parseFromUrl(fullZipUrl: string, taskId: string, document: Partial<IKnowledgeDocument>, fileSystem?: XpFileSystem): Promise<{
|
|
8
8
|
id?: string;
|
|
9
9
|
chunks: Document<ChunkMetadata>[];
|
|
10
10
|
metadata: MinerUDocumentMetadata;
|
|
11
11
|
}>;
|
|
12
|
-
parseLocalTask(result: MineruSelfHostedTaskResult, taskId: string, document: Partial<IKnowledgeDocument>, fileSystem
|
|
12
|
+
parseLocalTask(result: MineruSelfHostedTaskResult, taskId: string, document: Partial<IKnowledgeDocument>, fileSystem?: XpFileSystem): Promise<{
|
|
13
13
|
id?: string;
|
|
14
14
|
chunks: Document<ChunkMetadata>[];
|
|
15
15
|
metadata: MinerUDocumentMetadata;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"result-parser.service.d.ts","sourceRoot":"","sources":["../../src/lib/result-parser.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,EACL,aAAa,EAEb,YAAY,EACb,MAAM,sBAAsB,CAAC;AAK9B,OAAO,EAEL,sBAAsB,EACtB,0BAA0B,EAC3B,MAAM,YAAY,CAAC;AAEpB,qBACa,yBAAyB;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA8C;IAE/D,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,
|
|
1
|
+
{"version":3,"file":"result-parser.service.d.ts","sourceRoot":"","sources":["../../src/lib/result-parser.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,EACL,aAAa,EAEb,YAAY,EACb,MAAM,sBAAsB,CAAC;AAK9B,OAAO,EAEL,sBAAsB,EACtB,0BAA0B,EAC3B,MAAM,YAAY,CAAC;AAEpB,qBACa,yBAAyB;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA8C;IAE/D,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,CAAC,EAAE,YAAY,GACxB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;IAqGI,cAAc,CAClB,MAAM,EAAE,0BAA0B,EAClC,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,CAAC,EAAE,YAAY,GACxB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;CA4DH"}
|
|
@@ -36,43 +36,61 @@ let MinerUResultParserService = MinerUResultParserService_1 = class MinerUResult
|
|
|
36
36
|
zipEntries.push({ entryName: entry.path, data });
|
|
37
37
|
const fileName = entry.path;
|
|
38
38
|
const filePath = join(document.folder || '', entry.path);
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
39
|
+
// If platform didn't provide filesystem permission, still parse markdown but skip persisting files.
|
|
40
|
+
// This avoids runtime crashes like: "Cannot read properties of undefined (reading 'writeFile')".
|
|
41
|
+
if (fileSystem) {
|
|
42
|
+
const url = await fileSystem.writeFile(filePath, data);
|
|
43
|
+
pathMap.set(fileName, url);
|
|
44
|
+
// Write images to local file system
|
|
45
|
+
if (fileName.startsWith('images/')) {
|
|
46
|
+
assets.push({
|
|
47
|
+
type: 'image',
|
|
48
|
+
url: url,
|
|
49
|
+
filePath: filePath,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
else if (fileName.endsWith('layout.json')) {
|
|
53
|
+
layoutJson = JSON.parse(data.toString('utf-8'));
|
|
54
|
+
metadata.mineruBackend = layoutJson?._backend;
|
|
55
|
+
metadata.mineruVersion = layoutJson?._version_name;
|
|
56
|
+
assets.push({
|
|
57
|
+
type: 'file',
|
|
58
|
+
url,
|
|
59
|
+
filePath: filePath,
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
else if (fileName.endsWith('content_list.json')) {
|
|
63
|
+
assets.push({
|
|
64
|
+
type: 'file',
|
|
65
|
+
url,
|
|
66
|
+
filePath: filePath,
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
else if (fileName.endsWith('full.md')) {
|
|
70
|
+
fullMd = data.toString('utf-8');
|
|
71
|
+
assets.push({
|
|
72
|
+
type: 'file',
|
|
73
|
+
url,
|
|
74
|
+
filePath: filePath,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
else if (fileName.endsWith('origin.pdf')) {
|
|
78
|
+
metadata.originPdfUrl = fileName;
|
|
79
|
+
}
|
|
73
80
|
}
|
|
74
|
-
else
|
|
75
|
-
metadata
|
|
81
|
+
else {
|
|
82
|
+
// Still extract key metadata & markdown without writing to filesystem
|
|
83
|
+
if (fileName.endsWith('layout.json')) {
|
|
84
|
+
layoutJson = JSON.parse(data.toString('utf-8'));
|
|
85
|
+
metadata.mineruBackend = layoutJson?._backend;
|
|
86
|
+
metadata.mineruVersion = layoutJson?._version_name;
|
|
87
|
+
}
|
|
88
|
+
else if (fileName.endsWith('full.md')) {
|
|
89
|
+
fullMd = data.toString('utf-8');
|
|
90
|
+
}
|
|
91
|
+
else if (fileName.endsWith('origin.pdf')) {
|
|
92
|
+
metadata.originPdfUrl = fileName;
|
|
93
|
+
}
|
|
76
94
|
}
|
|
77
95
|
}
|
|
78
96
|
metadata.assets = assets;
|
|
@@ -101,13 +119,24 @@ let MinerUResultParserService = MinerUResultParserService_1 = class MinerUResult
|
|
|
101
119
|
const pathMap = new Map();
|
|
102
120
|
for (const image of result.images) {
|
|
103
121
|
const filePath = join(document.folder || '', 'images', image.name);
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
122
|
+
if (fileSystem) {
|
|
123
|
+
const url = await fileSystem.writeFile(filePath, Buffer.from(image.dataUrl.split(',')[1], 'base64'));
|
|
124
|
+
pathMap.set(`images/${image.name}`, url);
|
|
125
|
+
assets.push({
|
|
126
|
+
type: 'image',
|
|
127
|
+
url: url,
|
|
128
|
+
filePath: filePath,
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
// Fallback: keep images as data URLs so markdown can still render without filesystem permission
|
|
133
|
+
pathMap.set(`images/${image.name}`, image.dataUrl);
|
|
134
|
+
assets.push({
|
|
135
|
+
type: 'image',
|
|
136
|
+
url: image.dataUrl,
|
|
137
|
+
filePath: filePath,
|
|
138
|
+
});
|
|
139
|
+
}
|
|
111
140
|
}
|
|
112
141
|
if (result.sourceUrl) {
|
|
113
142
|
assets.push({
|
package/dist/lib/types.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { TDocumentAsset, TDocumentTransformerConfig } from "@xpert-ai/plugin-sdk";
|
|
2
1
|
export declare const MinerU = "mineru";
|
|
3
2
|
/**
|
|
4
3
|
* Integration provider key for MinerU API credentials.
|
|
@@ -17,37 +16,45 @@ export declare const ENV_MINERU_API_BASE_URL = "MINERU_API_BASE_URL";
|
|
|
17
16
|
export declare const ENV_MINERU_API_TOKEN = "MINERU_API_TOKEN";
|
|
18
17
|
export declare const ENV_MINERU_SERVER_TYPE = "MINERU_SERVER_TYPE";
|
|
19
18
|
export declare const icon = "<svg width=\"24\" height=\"24\" viewBox=\"0 0 24 24\" fill=\"none\" xmlns=\"http://www.w3.org/2000/svg\">\n<path d=\"M19.7238 3.86898C19.7238 4.57597 19.1502 5.1491 18.4427 5.1491C17.7352 5.1491 17.1616 4.57597 17.1616 3.86898C17.1616 3.16199 17.7352 2.58887 18.4427 2.58887C19.1502 2.58887 19.7238 3.16199 19.7238 3.86898Z\" fill=\"url(#paint0_linear_8609_1645)\"/>\n<path d=\"M19.7238 3.86898C19.7238 4.57597 19.1502 5.1491 18.4427 5.1491C17.7352 5.1491 17.1616 4.57597 17.1616 3.86898C17.1616 3.16199 17.7352 2.58887 18.4427 2.58887C19.1502 2.58887 19.7238 3.16199 19.7238 3.86898Z\" fill=\"#010101\"/>\n<path d=\"M15.3681 5.1491C15.3681 5.85609 14.7945 6.42921 14.087 6.42921C13.3794 6.42921 12.8059 5.85609 12.8059 5.1491C12.8059 4.44211 13.3794 3.86898 14.087 3.86898C14.7945 3.86898 15.3681 4.44211 15.3681 5.1491Z\" fill=\"url(#paint1_linear_8609_1645)\"/>\n<path d=\"M15.3681 5.1491C15.3681 5.85609 14.7945 6.42921 14.087 6.42921C13.3794 6.42921 12.8059 5.85609 12.8059 5.1491C12.8059 4.44211 13.3794 3.86898 14.087 3.86898C14.7945 3.86898 15.3681 4.44211 15.3681 5.1491Z\" fill=\"#010101\"/>\n<path fill-rule=\"evenodd\" clip-rule=\"evenodd\" d=\"M8.05175 11.2368C8.05175 13.4605 9.14375 15.4293 10.8211 16.6371C11.8241 15.7389 12.4551 14.4345 12.4551 12.9828V9.39673C12.4551 8.85661 12.8197 8.38448 13.3426 8.24757L19.8924 6.53265C20.6459 6.33534 21.3826 6.90341 21.3826 7.6818L21.3826 12.0452C21.3826 17.2179 17.1861 21.4111 12.0095 21.4111L11.9942 21.4111C6.81758 21.4111 2.62109 17.2179 2.62109 12.0452V9.03388C2.62109 8.49175 2.9884 8.01839 3.51385 7.88336L6.56677 7.09882C7.31904 6.9055 8.05175 7.47318 8.05175 8.24934V11.2368ZM3.9798 12.0452C3.9798 13.8476 4.57565 15.5108 5.58124 16.849C6.04996 17.4728 6.7655 17.8884 7.54573 17.8884V17.8884C8.28848 17.8884 8.9927 17.7236 9.62376 17.4286C7.83439 15.9596 6.69304 13.7314 6.69304 11.2368V8.46821L3.9798 9.16546V12.0452Z\" fill=\"url(#paint2_linear_8609_1645)\"/>\n<path fill-rule=\"evenodd\" clip-rule=\"evenodd\" d=\"M8.05175 11.2368C8.05175 13.4605 9.14375 15.4293 10.8211 16.6371C11.8241 15.7389 12.4551 14.4345 12.4551 12.9828V9.39673C12.4551 8.85661 12.8197 8.38448 13.3426 8.24757L19.8924 6.53265C20.6459 6.33534 21.3826 6.90341 21.3826 7.6818L21.3826 12.0452C21.3826 17.2179 17.1861 21.4111 12.0095 21.4111L11.9942 21.4111C6.81758 21.4111 2.62109 17.2179 2.62109 12.0452V9.03388C2.62109 8.49175 2.9884 8.01839 3.51385 7.88336L6.56677 7.09882C7.31904 6.9055 8.05175 7.47318 8.05175 8.24934V11.2368ZM3.9798 12.0452C3.9798 13.8476 4.57565 15.5108 5.58124 16.849C6.04996 17.4728 6.7655 17.8884 7.54573 17.8884V17.8884C8.28848 17.8884 8.9927 17.7236 9.62376 17.4286C7.83439 15.9596 6.69304 13.7314 6.69304 11.2368V8.46821L3.9798 9.16546V12.0452Z\" fill=\"#010101\"/>\n<defs>\n<linearGradient id=\"paint0_linear_8609_1645\" x1=\"14.3898\" y1=\"8.36821\" x2=\"13.1876\" y2=\"19.4461\" gradientUnits=\"userSpaceOnUse\">\n<stop stop-color=\"white\"/>\n<stop offset=\"1\" stop-color=\"#2E2E2E\"/>\n</linearGradient>\n<linearGradient id=\"paint1_linear_8609_1645\" x1=\"14.3898\" y1=\"8.36821\" x2=\"13.1876\" y2=\"19.4461\" gradientUnits=\"userSpaceOnUse\">\n<stop stop-color=\"white\"/>\n<stop offset=\"1\" stop-color=\"#2E2E2E\"/>\n</linearGradient>\n<linearGradient id=\"paint2_linear_8609_1645\" x1=\"14.3898\" y1=\"8.36821\" x2=\"13.1876\" y2=\"19.4461\" gradientUnits=\"userSpaceOnUse\">\n<stop stop-color=\"white\"/>\n<stop offset=\"1\" stop-color=\"#2E2E2E\"/>\n</linearGradient>\n</defs>\n</svg>\n";
|
|
20
|
-
export
|
|
21
|
-
isOcr?: boolean;
|
|
22
|
-
enableFormula?: boolean;
|
|
23
|
-
enableTable?: boolean;
|
|
24
|
-
language?: 'en' | 'ch';
|
|
25
|
-
modelVersion?: 'vlm' | 'pipeline';
|
|
26
|
-
};
|
|
27
|
-
export type MinerUDocumentMetadata = {
|
|
28
|
-
parser: 'mineru';
|
|
29
|
-
assets?: TDocumentAsset[];
|
|
30
|
-
taskId: string;
|
|
31
|
-
originPdfUrl?: string;
|
|
32
|
-
mineruBackend?: string;
|
|
33
|
-
mineruVersion?: string;
|
|
34
|
-
};
|
|
35
|
-
export type MinerUServerType = 'official' | 'self-hosted';
|
|
36
|
-
export type MinerUIntegrationOptions = {
|
|
19
|
+
export interface MinerUIntegrationOptions {
|
|
37
20
|
apiUrl?: string;
|
|
38
21
|
apiKey?: string;
|
|
39
22
|
serverType?: MinerUServerType;
|
|
40
|
-
}
|
|
23
|
+
}
|
|
24
|
+
export type MinerUServerType = 'official' | 'self-hosted';
|
|
41
25
|
export interface MineruSelfHostedImage {
|
|
42
26
|
name: string;
|
|
43
27
|
dataUrl: string;
|
|
44
28
|
}
|
|
45
29
|
export interface MineruSelfHostedTaskResult {
|
|
46
30
|
mdContent: string;
|
|
47
|
-
contentList
|
|
31
|
+
contentList: any;
|
|
32
|
+
images: MineruSelfHostedImage[];
|
|
33
|
+
raw: any;
|
|
34
|
+
fileName?: string;
|
|
35
|
+
}
|
|
36
|
+
export interface MinerUDocumentMetadata {
|
|
37
|
+
parser: string;
|
|
38
|
+
taskId: string;
|
|
39
|
+
mineruBackend?: string;
|
|
40
|
+
mineruVersion?: string;
|
|
41
|
+
originPdfUrl?: string;
|
|
42
|
+
assets?: any[];
|
|
43
|
+
}
|
|
44
|
+
export interface MineruSelfHostedTaskResult {
|
|
45
|
+
mdContent: string;
|
|
46
|
+
contentList: any;
|
|
48
47
|
images: MineruSelfHostedImage[];
|
|
49
48
|
raw: any;
|
|
50
49
|
fileName?: string;
|
|
51
50
|
sourceUrl?: string;
|
|
52
51
|
}
|
|
52
|
+
import type { XpFileSystem, TDocumentTransformerConfig } from '@xpert-ai/plugin-sdk';
|
|
53
|
+
import type { IIntegration } from '@metad/contracts';
|
|
54
|
+
export interface TMinerUTransformerConfig extends TDocumentTransformerConfig {
|
|
55
|
+
permissions?: {
|
|
56
|
+
fileSystem?: XpFileSystem;
|
|
57
|
+
integration?: IIntegration<MinerUIntegrationOptions>;
|
|
58
|
+
};
|
|
59
|
+
}
|
|
53
60
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/lib/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/lib/types.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/lib/types.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,MAAM,WAAW,CAAC;AAC/B;;;;;;;GAOG;AACH,eAAO,MAAM,iBAAiB,eAAe,CAAC;AAC9C;;GAEG;AACH,eAAO,MAAM,iBAAiB,uBAAuB,CAAC;AACtD,eAAO,MAAM,uBAAuB,wBAAwB,CAAC;AAC7D,eAAO,MAAM,oBAAoB,qBAAqB,CAAC;AACvD,eAAO,MAAM,sBAAsB,uBAAuB,CAAC;AAC3D,eAAO,MAAM,IAAI,m5GAsBhB,CAAC;AAEF,MAAM,WAAW,wBAAwB;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,gBAAgB,CAAC;CAC/B;AAED,MAAM,MAAM,gBAAgB,GAAG,UAAU,GAAG,aAAa,CAAC;AAE1D,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,GAAG,CAAC;IACjB,MAAM,EAAE,qBAAqB,EAAE,CAAC;IAChC,GAAG,EAAE,GAAG,CAAC;IACT,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,GAAG,EAAE,CAAC;CAChB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,GAAG,CAAC;IACjB,MAAM,EAAE,qBAAqB,EAAE,CAAC;IAChC,GAAG,EAAE,GAAG,CAAC;IACT,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,OAAO,KAAK,EAAE,YAAY,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAC;AACrF,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAErD,MAAM,WAAW,wBAAyB,SAAQ,0BAA0B;IAC1E,WAAW,CAAC,EAAE;QACZ,UAAU,CAAC,EAAE,YAAY,CAAC;QAC1B,WAAW,CAAC,EAAE,YAAY,CAAC,wBAAwB,CAAC,CAAC;KACtD,CAAC;CACH"}
|
package/package.json
CHANGED
|
@@ -1,62 +1,62 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@helloxiaohu/plugin-mineru",
|
|
3
|
-
"version": "0.0
|
|
4
|
-
"license": "AGPL-3.0",
|
|
5
|
-
"description": "MinerU PDF to Markdown converter plugin for XpertAI platform with toolset support",
|
|
6
|
-
"keywords": [
|
|
7
|
-
"xpert-ai",
|
|
8
|
-
"plugin",
|
|
9
|
-
"mineru",
|
|
10
|
-
"pdf",
|
|
11
|
-
"markdown",
|
|
12
|
-
"ocr",
|
|
13
|
-
"document-conversion"
|
|
14
|
-
],
|
|
15
|
-
"repository": {
|
|
16
|
-
"type": "git",
|
|
17
|
-
"url": "git+https://github.com/xpert-ai/xpert-plugins.git"
|
|
18
|
-
},
|
|
19
|
-
"bugs": {
|
|
20
|
-
"url": "https://github.com/xpert-ai/xpert-plugins/issues"
|
|
21
|
-
},
|
|
22
|
-
"type": "module",
|
|
23
|
-
"main": "./dist/index.js",
|
|
24
|
-
"module": "./dist/index.js",
|
|
25
|
-
"types": "./dist/index.d.ts",
|
|
26
|
-
"exports": {
|
|
27
|
-
"./package.json": "./package.json",
|
|
28
|
-
".": {
|
|
29
|
-
"@xpert-plugins-starter/source": "./src/index.ts",
|
|
30
|
-
"types": "./dist/index.d.ts",
|
|
31
|
-
"import": "./dist/index.js",
|
|
32
|
-
"default": "./dist/index.js"
|
|
33
|
-
}
|
|
34
|
-
},
|
|
35
|
-
"files": [
|
|
36
|
-
"dist",
|
|
37
|
-
"package.json",
|
|
38
|
-
"!**/*.tsbuildinfo"
|
|
39
|
-
],
|
|
40
|
-
"dependencies": {
|
|
41
|
-
"form-data": "^4.0.0",
|
|
42
|
-
"tslib": "^2.3.0",
|
|
43
|
-
"unzipper": "0.12.3"
|
|
44
|
-
},
|
|
45
|
-
"peerDependencies": {
|
|
46
|
-
"@langchain/core": "0.3.72",
|
|
47
|
-
"@langchain/langgraph": "0.4.7",
|
|
48
|
-
"@nestjs/common": "^11.1.6",
|
|
49
|
-
"@nestjs/config": "^4.0.2",
|
|
50
|
-
"@xpert-ai/plugin-sdk": "^3.6.2",
|
|
51
|
-
"@metad/contracts": "^3.6.2",
|
|
52
|
-
"axios": "1.12.2",
|
|
53
|
-
"nestjs-i18n": "10.5.1",
|
|
54
|
-
"chalk": "4.1.2",
|
|
55
|
-
"lodash-es": "4.17.21",
|
|
56
|
-
"uuid": "8.3.2",
|
|
57
|
-
"zod": "3.25.67"
|
|
58
|
-
},
|
|
59
|
-
"devDependencies": {
|
|
60
|
-
"@types/unzipper": "^0.10.11"
|
|
61
|
-
}
|
|
62
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "@helloxiaohu/plugin-mineru",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"license": "AGPL-3.0",
|
|
5
|
+
"description": "MinerU PDF to Markdown converter plugin for XpertAI platform with toolset support",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"xpert-ai",
|
|
8
|
+
"plugin",
|
|
9
|
+
"mineru",
|
|
10
|
+
"pdf",
|
|
11
|
+
"markdown",
|
|
12
|
+
"ocr",
|
|
13
|
+
"document-conversion"
|
|
14
|
+
],
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "git+https://github.com/xpert-ai/xpert-plugins.git"
|
|
18
|
+
},
|
|
19
|
+
"bugs": {
|
|
20
|
+
"url": "https://github.com/xpert-ai/xpert-plugins/issues"
|
|
21
|
+
},
|
|
22
|
+
"type": "module",
|
|
23
|
+
"main": "./dist/index.js",
|
|
24
|
+
"module": "./dist/index.js",
|
|
25
|
+
"types": "./dist/index.d.ts",
|
|
26
|
+
"exports": {
|
|
27
|
+
"./package.json": "./package.json",
|
|
28
|
+
".": {
|
|
29
|
+
"@xpert-plugins-starter/source": "./src/index.ts",
|
|
30
|
+
"types": "./dist/index.d.ts",
|
|
31
|
+
"import": "./dist/index.js",
|
|
32
|
+
"default": "./dist/index.js"
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
"files": [
|
|
36
|
+
"dist",
|
|
37
|
+
"package.json",
|
|
38
|
+
"!**/*.tsbuildinfo"
|
|
39
|
+
],
|
|
40
|
+
"dependencies": {
|
|
41
|
+
"form-data": "^4.0.0",
|
|
42
|
+
"tslib": "^2.3.0",
|
|
43
|
+
"unzipper": "0.12.3"
|
|
44
|
+
},
|
|
45
|
+
"peerDependencies": {
|
|
46
|
+
"@langchain/core": "0.3.72",
|
|
47
|
+
"@langchain/langgraph": "0.4.7",
|
|
48
|
+
"@nestjs/common": "^11.1.6",
|
|
49
|
+
"@nestjs/config": "^4.0.2",
|
|
50
|
+
"@xpert-ai/plugin-sdk": "^3.6.2",
|
|
51
|
+
"@metad/contracts": "^3.6.2",
|
|
52
|
+
"axios": "1.12.2",
|
|
53
|
+
"nestjs-i18n": "10.5.1",
|
|
54
|
+
"chalk": "4.1.2",
|
|
55
|
+
"lodash-es": "4.17.21",
|
|
56
|
+
"uuid": "8.3.2",
|
|
57
|
+
"zod": "3.25.67"
|
|
58
|
+
},
|
|
59
|
+
"devDependencies": {
|
|
60
|
+
"@types/unzipper": "^0.10.11"
|
|
61
|
+
}
|
|
62
|
+
}
|