@helloxiaohu/plugin-mineru 0.0.20 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -101
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -8
- package/dist/lib/integration.strategy.d.ts.map +1 -1
- package/dist/lib/integration.strategy.js +19 -4
- package/dist/lib/mineru.client.d.ts.map +1 -1
- package/dist/lib/mineru.client.js +157 -11
- package/dist/lib/mineru.plugin.d.ts.map +1 -1
- package/dist/lib/mineru.plugin.js +0 -2
- package/dist/lib/result-parser.service.d.ts.map +1 -1
- package/dist/lib/result-parser.service.js +1 -0
- package/dist/lib/transformer-mineru.strategy.d.ts +11 -0
- package/dist/lib/transformer-mineru.strategy.d.ts.map +1 -1
- package/dist/lib/transformer-mineru.strategy.js +31 -9
- package/dist/lib/types.d.ts +3 -13
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/types.js +22 -35
- package/package.json +54 -62
- package/dist/lib/mineru-toolset.strategy.d.ts +0 -167
- package/dist/lib/mineru-toolset.strategy.d.ts.map +0 -1
- package/dist/lib/mineru-toolset.strategy.js +0 -216
- package/dist/lib/mineru.tool.d.ts +0 -70
- package/dist/lib/mineru.tool.d.ts.map +0 -1
- package/dist/lib/mineru.tool.js +0 -145
- package/dist/lib/mineru.toolset.d.ts +0 -51
- package/dist/lib/mineru.toolset.d.ts.map +0 -1
- package/dist/lib/mineru.toolset.js +0 -52
|
@@ -5,13 +5,13 @@ import { DocumentTransformerStrategy, } from '@xpert-ai/plugin-sdk';
|
|
|
5
5
|
import { isNil, omitBy, pick } from 'lodash-es';
|
|
6
6
|
import { MinerUClient } from './mineru.client.js';
|
|
7
7
|
import { MinerUResultParserService } from './result-parser.service.js';
|
|
8
|
-
import { icon,
|
|
8
|
+
import { icon, MinerU } from './types.js';
|
|
9
9
|
let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
10
10
|
constructor() {
|
|
11
11
|
this.permissions = [
|
|
12
12
|
{
|
|
13
13
|
type: 'integration',
|
|
14
|
-
service:
|
|
14
|
+
service: MinerU,
|
|
15
15
|
description: 'Access to MinerU system integrations'
|
|
16
16
|
},
|
|
17
17
|
{
|
|
@@ -21,7 +21,7 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
|
21
21
|
}
|
|
22
22
|
];
|
|
23
23
|
this.meta = {
|
|
24
|
-
name:
|
|
24
|
+
name: MinerU,
|
|
25
25
|
label: {
|
|
26
26
|
en_US: 'MinerU',
|
|
27
27
|
zh_Hans: 'MinerU'
|
|
@@ -99,6 +99,17 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
|
99
99
|
},
|
|
100
100
|
enum: ['pipeline', 'vlm'],
|
|
101
101
|
default: 'pipeline'
|
|
102
|
+
},
|
|
103
|
+
pageRanges: {
|
|
104
|
+
type: 'string',
|
|
105
|
+
title: {
|
|
106
|
+
en_US: 'Page Ranges',
|
|
107
|
+
zh_Hans: '页码范围'
|
|
108
|
+
},
|
|
109
|
+
description: {
|
|
110
|
+
en_US: 'Page ranges like "2,4-6" or "2--2" (official API only).',
|
|
111
|
+
zh_Hans: '页码范围,例如 "2,4-6" 或 "2--2"(仅官方 API)。'
|
|
112
|
+
}
|
|
102
113
|
}
|
|
103
114
|
},
|
|
104
115
|
required: []
|
|
@@ -111,6 +122,7 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
|
111
122
|
async transformDocuments(documents, config) {
|
|
112
123
|
const mineru = new MinerUClient(this.configService, config.permissions);
|
|
113
124
|
const parsedResults = [];
|
|
125
|
+
const integrationOptions = config.permissions?.integration?.options;
|
|
114
126
|
for await (const document of documents) {
|
|
115
127
|
if (mineru.serverType === 'self-hosted') {
|
|
116
128
|
const { taskId } = await mineru.createTask({
|
|
@@ -125,8 +137,12 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
|
125
137
|
});
|
|
126
138
|
const result = mineru.getSelfHostedTask(taskId);
|
|
127
139
|
const parsedResult = await this.resultParser.parseLocalTask(result, taskId, document, config.permissions.fileSystem);
|
|
128
|
-
parsedResult
|
|
129
|
-
parsedResults.push(
|
|
140
|
+
// Convert parsedResult to IKnowledgeDocument format
|
|
141
|
+
parsedResults.push({
|
|
142
|
+
id: document.id,
|
|
143
|
+
chunks: parsedResult.chunks,
|
|
144
|
+
metadata: parsedResult.metadata
|
|
145
|
+
});
|
|
130
146
|
}
|
|
131
147
|
else {
|
|
132
148
|
const { taskId } = await mineru.createTask({
|
|
@@ -136,13 +152,19 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
|
136
152
|
enableTable: true,
|
|
137
153
|
language: 'ch',
|
|
138
154
|
modelVersion: 'vlm',
|
|
139
|
-
|
|
155
|
+
pageRanges: config.pageRanges,
|
|
156
|
+
extraFormats: integrationOptions?.extraFormats,
|
|
157
|
+
...omitBy(pick(config, ['isOcr', 'enableFormula', 'enableTable', 'language', 'modelVersion', 'pageRanges']), isNil)
|
|
140
158
|
});
|
|
141
159
|
// Waiting for completion
|
|
142
160
|
const result = await mineru.waitForTask(taskId, 5 * 60 * 1000, 5000);
|
|
143
161
|
const parsedResult = await this.resultParser.parseFromUrl(result.full_zip_url, taskId, document, config.permissions.fileSystem);
|
|
144
|
-
parsedResult
|
|
145
|
-
parsedResults.push(
|
|
162
|
+
// Convert parsedResult to IKnowledgeDocument format
|
|
163
|
+
parsedResults.push({
|
|
164
|
+
id: document.id,
|
|
165
|
+
chunks: parsedResult.chunks,
|
|
166
|
+
metadata: parsedResult.metadata
|
|
167
|
+
});
|
|
146
168
|
}
|
|
147
169
|
}
|
|
148
170
|
return parsedResults;
|
|
@@ -158,6 +180,6 @@ __decorate([
|
|
|
158
180
|
], MinerUTransformerStrategy.prototype, "configService", void 0);
|
|
159
181
|
MinerUTransformerStrategy = __decorate([
|
|
160
182
|
Injectable(),
|
|
161
|
-
DocumentTransformerStrategy(
|
|
183
|
+
DocumentTransformerStrategy(MinerU)
|
|
162
184
|
], MinerUTransformerStrategy);
|
|
163
185
|
export { MinerUTransformerStrategy };
|
package/dist/lib/types.d.ts
CHANGED
|
@@ -1,18 +1,5 @@
|
|
|
1
1
|
import { TDocumentAsset, TDocumentTransformerConfig } from "@xpert-ai/plugin-sdk";
|
|
2
2
|
export declare const MinerU = "mineru";
|
|
3
|
-
/**
|
|
4
|
-
* Integration provider key for MinerU API credentials.
|
|
5
|
-
*
|
|
6
|
-
* IMPORTANT:
|
|
7
|
-
* Keep this different from the builtin toolset provider key (`mineru`) to avoid the platform
|
|
8
|
-
* treating builtin toolset authorization as an "integration-backed" flow (which may read
|
|
9
|
-
* `credentials.integration` and crash when credentials is null).
|
|
10
|
-
*/
|
|
11
|
-
export declare const MinerUIntegration = "mineru_api";
|
|
12
|
-
/**
|
|
13
|
-
* Document transformer provider key (distinct from toolset key).
|
|
14
|
-
*/
|
|
15
|
-
export declare const MinerUTransformer = "mineru_transformer";
|
|
16
3
|
export declare const ENV_MINERU_API_BASE_URL = "MINERU_API_BASE_URL";
|
|
17
4
|
export declare const ENV_MINERU_API_TOKEN = "MINERU_API_TOKEN";
|
|
18
5
|
export declare const ENV_MINERU_SERVER_TYPE = "MINERU_SERVER_TYPE";
|
|
@@ -23,6 +10,7 @@ export type TMinerUTransformerConfig = TDocumentTransformerConfig & {
|
|
|
23
10
|
enableTable?: boolean;
|
|
24
11
|
language?: 'en' | 'ch';
|
|
25
12
|
modelVersion?: 'vlm' | 'pipeline';
|
|
13
|
+
pageRanges?: string;
|
|
26
14
|
};
|
|
27
15
|
export type MinerUDocumentMetadata = {
|
|
28
16
|
parser: 'mineru';
|
|
@@ -31,12 +19,14 @@ export type MinerUDocumentMetadata = {
|
|
|
31
19
|
originPdfUrl?: string;
|
|
32
20
|
mineruBackend?: string;
|
|
33
21
|
mineruVersion?: string;
|
|
22
|
+
fullZipUrl?: string;
|
|
34
23
|
};
|
|
35
24
|
export type MinerUServerType = 'official' | 'self-hosted';
|
|
36
25
|
export type MinerUIntegrationOptions = {
|
|
37
26
|
apiUrl?: string;
|
|
38
27
|
apiKey?: string;
|
|
39
28
|
serverType?: MinerUServerType;
|
|
29
|
+
extraFormats?: string[];
|
|
40
30
|
};
|
|
41
31
|
export interface MineruSelfHostedImage {
|
|
42
32
|
name: string;
|
package/dist/lib/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/lib/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAA;AAEjF,eAAO,MAAM,MAAM,WAAW,CAAA;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/lib/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,0BAA0B,EAAE,MAAM,sBAAsB,CAAA;AAEjF,eAAO,MAAM,MAAM,WAAW,CAAA;AAE9B,eAAO,MAAM,uBAAuB,wBAAwB,CAAA;AAC5D,eAAO,MAAM,oBAAoB,qBAAqB,CAAA;AACtD,eAAO,MAAM,sBAAsB,uBAAuB,CAAA;AAE1D,eAAO,MAAM,IAAI,m5GAsBhB,CAAA;AAED,MAAM,MAAM,wBAAwB,GAAG,0BAA0B,GAAG;IAClE,KAAK,CAAC,EAAE,OAAO,CAAA;IACf,aAAa,CAAC,EAAE,OAAO,CAAA;IACvB,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,QAAQ,CAAC,EAAE,IAAI,GAAG,IAAI,CAAA;IACtB,YAAY,CAAC,EAAE,KAAK,GAAG,UAAU,CAAA;IACjC,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB,CAAA;AAED,MAAM,MAAM,sBAAsB,GAAG;IACnC,MAAM,EAAE,QAAQ,CAAC;IACjB,MAAM,CAAC,EAAE,cAAc,EAAE,CAAC;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB,CAAA;AAED,MAAM,MAAM,gBAAgB,GAAG,UAAU,GAAG,aAAa,CAAA;AAEzD,MAAM,MAAM,wBAAwB,GAAG;IACrC,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,UAAU,CAAC,EAAE,gBAAgB,CAAA;IAC7B,YAAY,CAAC,EAAE,MAAM,EAAE,CAAA;CACxB,CAAA;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,0BAA0B;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,GAAG,CAAC;IAClB,MAAM,EAAE,qBAAqB,EAAE,CAAC;IAChC,GAAG,EAAE,GAAG,CAAC;IACT,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB"}
|
package/dist/lib/types.js
CHANGED
|
@@ -1,40 +1,27 @@
|
|
|
1
1
|
export const MinerU = 'mineru';
|
|
2
|
-
/**
|
|
3
|
-
* Integration provider key for MinerU API credentials.
|
|
4
|
-
*
|
|
5
|
-
* IMPORTANT:
|
|
6
|
-
* Keep this different from the builtin toolset provider key (`mineru`) to avoid the platform
|
|
7
|
-
* treating builtin toolset authorization as an "integration-backed" flow (which may read
|
|
8
|
-
* `credentials.integration` and crash when credentials is null).
|
|
9
|
-
*/
|
|
10
|
-
export const MinerUIntegration = 'mineru_api';
|
|
11
|
-
/**
|
|
12
|
-
* Document transformer provider key (distinct from toolset key).
|
|
13
|
-
*/
|
|
14
|
-
export const MinerUTransformer = 'mineru_transformer';
|
|
15
2
|
export const ENV_MINERU_API_BASE_URL = 'MINERU_API_BASE_URL';
|
|
16
3
|
export const ENV_MINERU_API_TOKEN = 'MINERU_API_TOKEN';
|
|
17
4
|
export const ENV_MINERU_SERVER_TYPE = 'MINERU_SERVER_TYPE';
|
|
18
|
-
export const icon = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
|
|
19
|
-
<path d="M19.7238 3.86898C19.7238 4.57597 19.1502 5.1491 18.4427 5.1491C17.7352 5.1491 17.1616 4.57597 17.1616 3.86898C17.1616 3.16199 17.7352 2.58887 18.4427 2.58887C19.1502 2.58887 19.7238 3.16199 19.7238 3.86898Z" fill="url(#paint0_linear_8609_1645)"/>
|
|
20
|
-
<path d="M19.7238 3.86898C19.7238 4.57597 19.1502 5.1491 18.4427 5.1491C17.7352 5.1491 17.1616 4.57597 17.1616 3.86898C17.1616 3.16199 17.7352 2.58887 18.4427 2.58887C19.1502 2.58887 19.7238 3.16199 19.7238 3.86898Z" fill="#010101"/>
|
|
21
|
-
<path d="M15.3681 5.1491C15.3681 5.85609 14.7945 6.42921 14.087 6.42921C13.3794 6.42921 12.8059 5.85609 12.8059 5.1491C12.8059 4.44211 13.3794 3.86898 14.087 3.86898C14.7945 3.86898 15.3681 4.44211 15.3681 5.1491Z" fill="url(#paint1_linear_8609_1645)"/>
|
|
22
|
-
<path d="M15.3681 5.1491C15.3681 5.85609 14.7945 6.42921 14.087 6.42921C13.3794 6.42921 12.8059 5.85609 12.8059 5.1491C12.8059 4.44211 13.3794 3.86898 14.087 3.86898C14.7945 3.86898 15.3681 4.44211 15.3681 5.1491Z" fill="#010101"/>
|
|
23
|
-
<path fill-rule="evenodd" clip-rule="evenodd" d="M8.05175 11.2368C8.05175 13.4605 9.14375 15.4293 10.8211 16.6371C11.8241 15.7389 12.4551 14.4345 12.4551 12.9828V9.39673C12.4551 8.85661 12.8197 8.38448 13.3426 8.24757L19.8924 6.53265C20.6459 6.33534 21.3826 6.90341 21.3826 7.6818L21.3826 12.0452C21.3826 17.2179 17.1861 21.4111 12.0095 21.4111L11.9942 21.4111C6.81758 21.4111 2.62109 17.2179 2.62109 12.0452V9.03388C2.62109 8.49175 2.9884 8.01839 3.51385 7.88336L6.56677 7.09882C7.31904 6.9055 8.05175 7.47318 8.05175 8.24934V11.2368ZM3.9798 12.0452C3.9798 13.8476 4.57565 15.5108 5.58124 16.849C6.04996 17.4728 6.7655 17.8884 7.54573 17.8884V17.8884C8.28848 17.8884 8.9927 17.7236 9.62376 17.4286C7.83439 15.9596 6.69304 13.7314 6.69304 11.2368V8.46821L3.9798 9.16546V12.0452Z" fill="url(#paint2_linear_8609_1645)"/>
|
|
24
|
-
<path fill-rule="evenodd" clip-rule="evenodd" d="M8.05175 11.2368C8.05175 13.4605 9.14375 15.4293 10.8211 16.6371C11.8241 15.7389 12.4551 14.4345 12.4551 12.9828V9.39673C12.4551 8.85661 12.8197 8.38448 13.3426 8.24757L19.8924 6.53265C20.6459 6.33534 21.3826 6.90341 21.3826 7.6818L21.3826 12.0452C21.3826 17.2179 17.1861 21.4111 12.0095 21.4111L11.9942 21.4111C6.81758 21.4111 2.62109 17.2179 2.62109 12.0452V9.03388C2.62109 8.49175 2.9884 8.01839 3.51385 7.88336L6.56677 7.09882C7.31904 6.9055 8.05175 7.47318 8.05175 8.24934V11.2368ZM3.9798 12.0452C3.9798 13.8476 4.57565 15.5108 5.58124 16.849C6.04996 17.4728 6.7655 17.8884 7.54573 17.8884V17.8884C8.28848 17.8884 8.9927 17.7236 9.62376 17.4286C7.83439 15.9596 6.69304 13.7314 6.69304 11.2368V8.46821L3.9798 9.16546V12.0452Z" fill="#010101"/>
|
|
25
|
-
<defs>
|
|
26
|
-
<linearGradient id="paint0_linear_8609_1645" x1="14.3898" y1="8.36821" x2="13.1876" y2="19.4461" gradientUnits="userSpaceOnUse">
|
|
27
|
-
<stop stop-color="white"/>
|
|
28
|
-
<stop offset="1" stop-color="#2E2E2E"/>
|
|
29
|
-
</linearGradient>
|
|
30
|
-
<linearGradient id="paint1_linear_8609_1645" x1="14.3898" y1="8.36821" x2="13.1876" y2="19.4461" gradientUnits="userSpaceOnUse">
|
|
31
|
-
<stop stop-color="white"/>
|
|
32
|
-
<stop offset="1" stop-color="#2E2E2E"/>
|
|
33
|
-
</linearGradient>
|
|
34
|
-
<linearGradient id="paint2_linear_8609_1645" x1="14.3898" y1="8.36821" x2="13.1876" y2="19.4461" gradientUnits="userSpaceOnUse">
|
|
35
|
-
<stop stop-color="white"/>
|
|
36
|
-
<stop offset="1" stop-color="#2E2E2E"/>
|
|
37
|
-
</linearGradient>
|
|
38
|
-
</defs>
|
|
39
|
-
</svg>
|
|
5
|
+
export const icon = `<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
|
|
6
|
+
<path d="M19.7238 3.86898C19.7238 4.57597 19.1502 5.1491 18.4427 5.1491C17.7352 5.1491 17.1616 4.57597 17.1616 3.86898C17.1616 3.16199 17.7352 2.58887 18.4427 2.58887C19.1502 2.58887 19.7238 3.16199 19.7238 3.86898Z" fill="url(#paint0_linear_8609_1645)"/>
|
|
7
|
+
<path d="M19.7238 3.86898C19.7238 4.57597 19.1502 5.1491 18.4427 5.1491C17.7352 5.1491 17.1616 4.57597 17.1616 3.86898C17.1616 3.16199 17.7352 2.58887 18.4427 2.58887C19.1502 2.58887 19.7238 3.16199 19.7238 3.86898Z" fill="#010101"/>
|
|
8
|
+
<path d="M15.3681 5.1491C15.3681 5.85609 14.7945 6.42921 14.087 6.42921C13.3794 6.42921 12.8059 5.85609 12.8059 5.1491C12.8059 4.44211 13.3794 3.86898 14.087 3.86898C14.7945 3.86898 15.3681 4.44211 15.3681 5.1491Z" fill="url(#paint1_linear_8609_1645)"/>
|
|
9
|
+
<path d="M15.3681 5.1491C15.3681 5.85609 14.7945 6.42921 14.087 6.42921C13.3794 6.42921 12.8059 5.85609 12.8059 5.1491C12.8059 4.44211 13.3794 3.86898 14.087 3.86898C14.7945 3.86898 15.3681 4.44211 15.3681 5.1491Z" fill="#010101"/>
|
|
10
|
+
<path fill-rule="evenodd" clip-rule="evenodd" d="M8.05175 11.2368C8.05175 13.4605 9.14375 15.4293 10.8211 16.6371C11.8241 15.7389 12.4551 14.4345 12.4551 12.9828V9.39673C12.4551 8.85661 12.8197 8.38448 13.3426 8.24757L19.8924 6.53265C20.6459 6.33534 21.3826 6.90341 21.3826 7.6818L21.3826 12.0452C21.3826 17.2179 17.1861 21.4111 12.0095 21.4111L11.9942 21.4111C6.81758 21.4111 2.62109 17.2179 2.62109 12.0452V9.03388C2.62109 8.49175 2.9884 8.01839 3.51385 7.88336L6.56677 7.09882C7.31904 6.9055 8.05175 7.47318 8.05175 8.24934V11.2368ZM3.9798 12.0452C3.9798 13.8476 4.57565 15.5108 5.58124 16.849C6.04996 17.4728 6.7655 17.8884 7.54573 17.8884V17.8884C8.28848 17.8884 8.9927 17.7236 9.62376 17.4286C7.83439 15.9596 6.69304 13.7314 6.69304 11.2368V8.46821L3.9798 9.16546V12.0452Z" fill="url(#paint2_linear_8609_1645)"/>
|
|
11
|
+
<path fill-rule="evenodd" clip-rule="evenodd" d="M8.05175 11.2368C8.05175 13.4605 9.14375 15.4293 10.8211 16.6371C11.8241 15.7389 12.4551 14.4345 12.4551 12.9828V9.39673C12.4551 8.85661 12.8197 8.38448 13.3426 8.24757L19.8924 6.53265C20.6459 6.33534 21.3826 6.90341 21.3826 7.6818L21.3826 12.0452C21.3826 17.2179 17.1861 21.4111 12.0095 21.4111L11.9942 21.4111C6.81758 21.4111 2.62109 17.2179 2.62109 12.0452V9.03388C2.62109 8.49175 2.9884 8.01839 3.51385 7.88336L6.56677 7.09882C7.31904 6.9055 8.05175 7.47318 8.05175 8.24934V11.2368ZM3.9798 12.0452C3.9798 13.8476 4.57565 15.5108 5.58124 16.849C6.04996 17.4728 6.7655 17.8884 7.54573 17.8884V17.8884C8.28848 17.8884 8.9927 17.7236 9.62376 17.4286C7.83439 15.9596 6.69304 13.7314 6.69304 11.2368V8.46821L3.9798 9.16546V12.0452Z" fill="#010101"/>
|
|
12
|
+
<defs>
|
|
13
|
+
<linearGradient id="paint0_linear_8609_1645" x1="14.3898" y1="8.36821" x2="13.1876" y2="19.4461" gradientUnits="userSpaceOnUse">
|
|
14
|
+
<stop stop-color="white"/>
|
|
15
|
+
<stop offset="1" stop-color="#2E2E2E"/>
|
|
16
|
+
</linearGradient>
|
|
17
|
+
<linearGradient id="paint1_linear_8609_1645" x1="14.3898" y1="8.36821" x2="13.1876" y2="19.4461" gradientUnits="userSpaceOnUse">
|
|
18
|
+
<stop stop-color="white"/>
|
|
19
|
+
<stop offset="1" stop-color="#2E2E2E"/>
|
|
20
|
+
</linearGradient>
|
|
21
|
+
<linearGradient id="paint2_linear_8609_1645" x1="14.3898" y1="8.36821" x2="13.1876" y2="19.4461" gradientUnits="userSpaceOnUse">
|
|
22
|
+
<stop stop-color="white"/>
|
|
23
|
+
<stop offset="1" stop-color="#2E2E2E"/>
|
|
24
|
+
</linearGradient>
|
|
25
|
+
</defs>
|
|
26
|
+
</svg>
|
|
40
27
|
`;
|
package/package.json
CHANGED
|
@@ -1,62 +1,54 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@helloxiaohu/plugin-mineru",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"
|
|
5
|
-
"
|
|
6
|
-
"
|
|
7
|
-
"
|
|
8
|
-
"
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
"
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
"
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
"
|
|
47
|
-
"@langchain/
|
|
48
|
-
"
|
|
49
|
-
"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
"
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
"lodash-es": "4.17.21",
|
|
56
|
-
"uuid": "8.3.2",
|
|
57
|
-
"zod": "3.25.67"
|
|
58
|
-
},
|
|
59
|
-
"devDependencies": {
|
|
60
|
-
"@types/unzipper": "^0.10.11"
|
|
61
|
-
}
|
|
62
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "@helloxiaohu/plugin-mineru",
|
|
3
|
+
"version": "0.1.2",
|
|
4
|
+
"description": "MinerU document converter plugin for Xpert AI platform",
|
|
5
|
+
"license": "AGPL-3.0",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "https://github.com/xpert-ai/xpert-plugins.git"
|
|
9
|
+
},
|
|
10
|
+
"bugs": {
|
|
11
|
+
"url": "https://github.com/xpert-ai/xpert-plugins/issues"
|
|
12
|
+
},
|
|
13
|
+
"publishConfig": {
|
|
14
|
+
"access": "public"
|
|
15
|
+
},
|
|
16
|
+
"type": "module",
|
|
17
|
+
"main": "./dist/index.js",
|
|
18
|
+
"module": "./dist/index.js",
|
|
19
|
+
"types": "./dist/index.d.ts",
|
|
20
|
+
"exports": {
|
|
21
|
+
"./package.json": "./package.json",
|
|
22
|
+
".": {
|
|
23
|
+
"@xpert-plugins-starter/source": "./src/index.ts",
|
|
24
|
+
"types": "./dist/index.d.ts",
|
|
25
|
+
"import": "./dist/index.js",
|
|
26
|
+
"default": "./dist/index.js"
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"files": [
|
|
30
|
+
"dist",
|
|
31
|
+
"!**/*.tsbuildinfo"
|
|
32
|
+
],
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"form-data": "^4.0.0",
|
|
35
|
+
"tslib": "^2.3.0",
|
|
36
|
+
"unzipper": "0.12.3"
|
|
37
|
+
},
|
|
38
|
+
"peerDependencies": {
|
|
39
|
+
"@nestjs/config": "^4.0.2",
|
|
40
|
+
"zod": "3.25.67",
|
|
41
|
+
"@xpert-ai/plugin-sdk": "^3.6.2",
|
|
42
|
+
"@metad/contracts": "^3.6.2",
|
|
43
|
+
"@nestjs/common": "^11.1.6",
|
|
44
|
+
"axios": "1.12.2",
|
|
45
|
+
"nestjs-i18n": "10.5.1",
|
|
46
|
+
"chalk": "4.1.2",
|
|
47
|
+
"@langchain/core": "^0.3.72",
|
|
48
|
+
"lodash-es": "4.17.21",
|
|
49
|
+
"uuid": "8.3.2"
|
|
50
|
+
},
|
|
51
|
+
"devDependencies": {
|
|
52
|
+
"@types/unzipper": "^0.10.11"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
import { ConfigService } from '@nestjs/config';
|
|
2
|
-
import { BuiltinToolset, IToolsetStrategy, FileSystemPermission, ISchemaSecretField } from '@xpert-ai/plugin-sdk';
|
|
3
|
-
import { MinerUResultParserService } from './result-parser.service.js';
|
|
4
|
-
import { MinerUToolsetConfig } from './mineru.toolset.js';
|
|
5
|
-
/**
|
|
6
|
-
* ToolsetStrategy for MinerU PDF parser tool
|
|
7
|
-
* Registers MinerU as a toolset that can be used in agent workflows
|
|
8
|
-
*/
|
|
9
|
-
export declare class MinerUToolsetStrategy implements IToolsetStrategy<MinerUToolsetConfig> {
|
|
10
|
-
private readonly configService;
|
|
11
|
-
private readonly resultParser;
|
|
12
|
-
/**
|
|
13
|
-
* Metadata for MinerU toolset
|
|
14
|
-
*/
|
|
15
|
-
meta: {
|
|
16
|
-
author: string;
|
|
17
|
-
tags: string[];
|
|
18
|
-
name: string;
|
|
19
|
-
label: {
|
|
20
|
-
en_US: string;
|
|
21
|
-
zh_Hans: string;
|
|
22
|
-
};
|
|
23
|
-
description: {
|
|
24
|
-
en_US: string;
|
|
25
|
-
zh_Hans: string;
|
|
26
|
-
};
|
|
27
|
-
icon: {
|
|
28
|
-
svg: string;
|
|
29
|
-
color: string;
|
|
30
|
-
};
|
|
31
|
-
configSchema: {
|
|
32
|
-
type: string;
|
|
33
|
-
properties: {
|
|
34
|
-
/**
|
|
35
|
-
* NOTE:
|
|
36
|
-
* We intentionally keep MinerU as a "self-contained" toolset that stores its own API credentials,
|
|
37
|
-
* instead of relying on the platform IntegrationPermission flow.
|
|
38
|
-
*
|
|
39
|
-
* Reason: during the built-in toolset authorization step, the platform may send `credentials = null`,
|
|
40
|
-
* and backend may access `credentials.integration`, causing a 500 (`Cannot read properties of null (reading 'integration')`).
|
|
41
|
-
* Defining API fields directly ensures the authorization UI renders fields and always submits an object.
|
|
42
|
-
*/
|
|
43
|
-
apiUrl: {
|
|
44
|
-
type: string;
|
|
45
|
-
title: {
|
|
46
|
-
en_US: string;
|
|
47
|
-
zh_Hans: string;
|
|
48
|
-
};
|
|
49
|
-
description: {
|
|
50
|
-
en_US: string;
|
|
51
|
-
zh_Hans: string;
|
|
52
|
-
};
|
|
53
|
-
default: string;
|
|
54
|
-
};
|
|
55
|
-
apiKey: {
|
|
56
|
-
type: string;
|
|
57
|
-
title: {
|
|
58
|
-
en_US: string;
|
|
59
|
-
zh_Hans: string;
|
|
60
|
-
};
|
|
61
|
-
description: {
|
|
62
|
-
en_US: string;
|
|
63
|
-
zh_Hans: string;
|
|
64
|
-
};
|
|
65
|
-
'x-ui': ISchemaSecretField;
|
|
66
|
-
};
|
|
67
|
-
serverType: {
|
|
68
|
-
type: string;
|
|
69
|
-
title: {
|
|
70
|
-
en_US: string;
|
|
71
|
-
zh_Hans: string;
|
|
72
|
-
};
|
|
73
|
-
description: {
|
|
74
|
-
en_US: string;
|
|
75
|
-
zh_Hans: string;
|
|
76
|
-
};
|
|
77
|
-
enum: string[];
|
|
78
|
-
default: string;
|
|
79
|
-
};
|
|
80
|
-
isOcr: {
|
|
81
|
-
type: string;
|
|
82
|
-
title: {
|
|
83
|
-
en_US: string;
|
|
84
|
-
zh_Hans: string;
|
|
85
|
-
};
|
|
86
|
-
description: {
|
|
87
|
-
en_US: string;
|
|
88
|
-
zh_Hans: string;
|
|
89
|
-
};
|
|
90
|
-
default: boolean;
|
|
91
|
-
};
|
|
92
|
-
enableFormula: {
|
|
93
|
-
type: string;
|
|
94
|
-
title: {
|
|
95
|
-
en_US: string;
|
|
96
|
-
zh_Hans: string;
|
|
97
|
-
};
|
|
98
|
-
description: {
|
|
99
|
-
en_US: string;
|
|
100
|
-
zh_Hans: string;
|
|
101
|
-
};
|
|
102
|
-
default: boolean;
|
|
103
|
-
};
|
|
104
|
-
enableTable: {
|
|
105
|
-
type: string;
|
|
106
|
-
title: {
|
|
107
|
-
en_US: string;
|
|
108
|
-
zh_Hans: string;
|
|
109
|
-
};
|
|
110
|
-
description: {
|
|
111
|
-
en_US: string;
|
|
112
|
-
zh_Hans: string;
|
|
113
|
-
};
|
|
114
|
-
default: boolean;
|
|
115
|
-
};
|
|
116
|
-
language: {
|
|
117
|
-
type: string;
|
|
118
|
-
title: {
|
|
119
|
-
en_US: string;
|
|
120
|
-
zh_Hans: string;
|
|
121
|
-
};
|
|
122
|
-
description: {
|
|
123
|
-
en_US: string;
|
|
124
|
-
zh_Hans: string;
|
|
125
|
-
};
|
|
126
|
-
enum: string[];
|
|
127
|
-
default: string;
|
|
128
|
-
};
|
|
129
|
-
modelVersion: {
|
|
130
|
-
type: string;
|
|
131
|
-
title: {
|
|
132
|
-
en_US: string;
|
|
133
|
-
zh_Hans: string;
|
|
134
|
-
};
|
|
135
|
-
description: {
|
|
136
|
-
en_US: string;
|
|
137
|
-
zh_Hans: string;
|
|
138
|
-
};
|
|
139
|
-
enum: string[];
|
|
140
|
-
default: string;
|
|
141
|
-
};
|
|
142
|
-
};
|
|
143
|
-
required: string[];
|
|
144
|
-
};
|
|
145
|
-
};
|
|
146
|
-
/**
|
|
147
|
-
* Permissions required by MinerU toolset
|
|
148
|
-
*/
|
|
149
|
-
readonly permissions: FileSystemPermission[];
|
|
150
|
-
constructor(configService: ConfigService, resultParser: MinerUResultParserService);
|
|
151
|
-
/**
|
|
152
|
-
* Validate toolset configuration
|
|
153
|
-
*/
|
|
154
|
-
validateConfig(config: MinerUToolsetConfig | null | undefined): Promise<void>;
|
|
155
|
-
/**
|
|
156
|
-
* Create MinerU toolset instance
|
|
157
|
-
* Note: config may be null/undefined during authorization phase
|
|
158
|
-
*/
|
|
159
|
-
create(config: MinerUToolsetConfig | null | undefined): Promise<BuiltinToolset>;
|
|
160
|
-
/**
|
|
161
|
-
* Create tools for MinerU toolset
|
|
162
|
-
* Tools are created dynamically in MinerUToolset.initTools()
|
|
163
|
-
* based on the toolset credentials/configuration
|
|
164
|
-
*/
|
|
165
|
-
createTools(): any[];
|
|
166
|
-
}
|
|
167
|
-
//# sourceMappingURL=mineru-toolset.strategy.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"mineru-toolset.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/mineru-toolset.strategy.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EACL,cAAc,EACd,gBAAgB,EAEhB,oBAAoB,EACpB,kBAAkB,EACnB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AACvE,OAAO,EAAiB,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAGzE;;;GAGG;AACH,qBAEa,qBAAsB,YAAW,gBAAgB,CAAC,mBAAmB,CAAC;IA8J/E,OAAO,CAAC,QAAQ,CAAC,aAAa;IAE9B,OAAO,CAAC,QAAQ,CAAC,YAAY;IA/J/B;;OAEG;IACH,IAAI;;;;;;;;;;;;;;;;;;;gBAoBE;;;;;;;;mBAQG;;;;;;;;;;;;;;;;;;;;;;;4BAuBQ,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAwFjC;IAEF;;OAEG;IACH,QAAQ,CAAC,WAAW,yBAMlB;gBAIiB,aAAa,EAAE,aAAa,EAE5B,YAAY,EAAE,yBAAyB;IAG1D;;OAEG;IACH,cAAc,CAAC,MAAM,EAAE,mBAAmB,GAAG,IAAI,GAAG,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;IAY7E;;;OAGG;IACG,MAAM,CAAC,MAAM,EAAE,mBAAmB,GAAG,IAAI,GAAG,SAAS,GAAG,OAAO,CAAC,cAAc,CAAC;IAYrF;;;;OAIG;IACH,WAAW;CAKZ"}
|