@chenchaolong/plugin-mineru 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -6
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -5
- package/dist/lib/mineru.client.d.ts.map +1 -1
- package/dist/lib/mineru.client.js +22 -4
- package/dist/lib/mineru.plugin.d.ts.map +1 -1
- package/dist/lib/mineru.plugin.js +0 -2
- package/dist/lib/result-parser.service.d.ts.map +1 -1
- package/dist/lib/result-parser.service.js +3 -1
- package/dist/lib/transformer-mineru.strategy.d.ts.map +1 -1
- package/dist/lib/transformer-mineru.strategy.js +2 -14
- package/package.json +5 -2
package/README.md
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# Xpert Plugin: MinerU
|
|
2
2
|
|
|
3
|
-
`@
|
|
3
|
+
`@chenchaolong/plugin-mineru` is a MinerU document converter plugin for the [Xpert AI](https://github.com/xpert-ai/xpert) platform, providing extraction capabilities from PDF to Markdown and structured JSON. The plugin includes built-in MinerU integration strategies, document conversion strategies, and result parsing services, enabling secure access to the MinerU API in automated workflows, polling task status, and writing parsed content and attachment resources to the platform file system.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
pnpm add @
|
|
8
|
+
pnpm add @chenchaolong/plugin-mineru
|
|
9
9
|
# or
|
|
10
|
-
npm install @
|
|
10
|
+
npm install @chenchaolong/plugin-mineru
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
> **Note**: This plugin depends on `@xpert-ai/plugin-sdk`, `@nestjs/common@^11`, `@nestjs/config@^4`, `@metad/contracts`, `axios@1`, `chalk@4`, `@langchain/core@^0.3.72`, and `uuid@8` as peerDependencies. Please ensure these packages are installed in your host project.
|
|
@@ -39,7 +39,7 @@ npm install @xpert-ai/plugin-mineru
|
|
|
39
39
|
Configure the plugin in your host service's plugin registration process:
|
|
40
40
|
|
|
41
41
|
```sh .env
|
|
42
|
-
PLUGINS=@
|
|
42
|
+
PLUGINS=@chenchaolong/plugin-mineru
|
|
43
43
|
```
|
|
44
44
|
|
|
45
45
|
The plugin returns the NestJS module `MinerUPlugin` in the `register` hook and logs messages during the `onStart`/`onStop` lifecycle.
|
|
@@ -84,14 +84,26 @@ The parser generates:
|
|
|
84
84
|
|
|
85
85
|
The returned `Document<ChunkMetadata>` array currently defaults to a single chunk containing the full Markdown; you can split it as needed.
|
|
86
86
|
|
|
87
|
+
## Local Deployment
|
|
88
|
+
|
|
89
|
+
For self-hosted MinerU deployments, see [LOCAL_SETUP.md](./LOCAL_SETUP.md) for detailed instructions on:
|
|
90
|
+
- Starting MinerU server using Docker
|
|
91
|
+
- Installing from source code
|
|
92
|
+
- Configuration and troubleshooting
|
|
93
|
+
|
|
94
|
+
Quick start with Docker:
|
|
95
|
+
```bash
|
|
96
|
+
docker run -d --name mineru -p 9960:9960 opendatalab/mineru:latest
|
|
97
|
+
```
|
|
98
|
+
|
|
87
99
|
## Development & Debugging
|
|
88
100
|
|
|
89
101
|
Run the following commands in the repository root to build and test locally:
|
|
90
102
|
|
|
91
103
|
```bash
|
|
92
104
|
npm install
|
|
93
|
-
npx nx build @
|
|
94
|
-
npx nx test @
|
|
105
|
+
npx nx build @chenchaolong/plugin-mineru
|
|
106
|
+
npx nx test @chenchaolong/plugin-mineru
|
|
95
107
|
```
|
|
96
108
|
|
|
97
109
|
TypeScript build artifacts are output to `packages/mineru/dist`. Before publishing, ensure `package.json`, type declarations, and runtime files are in sync.
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAcxD,QAAA,MAAM,YAAY,gDAChB,CAAC;AAEH,QAAA,MAAM,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,CA4BrD,CAAC;AAEF,eAAe,MAAM,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { readFileSync } from 'fs';
|
|
3
|
-
import {
|
|
4
|
-
import { dirname, join } from 'path';
|
|
3
|
+
import { join } from 'path';
|
|
5
4
|
import { MinerUPlugin } from './lib/mineru.plugin.js';
|
|
6
5
|
import { icon } from './lib/types.js';
|
|
7
|
-
|
|
8
|
-
const __dirname =
|
|
6
|
+
import { getModuleMeta } from './lib/path-meta.js';
|
|
7
|
+
const { __filename, __dirname } = getModuleMeta(import.meta);
|
|
9
8
|
const packageJson = JSON.parse(readFileSync(join(__dirname, '../package.json'), 'utf8'));
|
|
10
9
|
const ConfigSchema = z.object({});
|
|
11
10
|
const plugin = {
|
|
@@ -21,7 +20,7 @@ const plugin = {
|
|
|
21
20
|
description: 'Provide PDF to Markdown and JSON transformation functionality',
|
|
22
21
|
keywords: ['integration', 'pdf', 'markdown', 'json', 'transformer'],
|
|
23
22
|
author: 'XpertAI Team',
|
|
24
|
-
homepage: 'https://www.npmjs.com/package/@
|
|
23
|
+
homepage: 'https://www.npmjs.com/package/@chenchaolong/plugin-mineru',
|
|
25
24
|
},
|
|
26
25
|
config: {
|
|
27
26
|
schema: ConfigSchema,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mineru.client.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhD,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAc,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAK7C,OAAO,EAIL,wBAAwB,EAExB,0BAA0B,EAC1B,gBAAgB,EACjB,MAAM,YAAY,CAAC;AAIpB,UAAU,iBAAiB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mEAAmE;IACnE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4EAA4E;IAC5E,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,UAAU,mBAAmB;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,sBAAsB;IAC9B,KAAK,EAAE,mBAAmB,EAAE,CAAC;IAC7B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,UAAU,iBAAiB;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AASD,qBAAa,YAAY;IAWrB,OAAO,CAAC,QAAQ,CAAC,aAAa;IAC9B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;IAX/B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IACxD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,SAAgB,UAAU,EAAE,gBAAgB,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAiD;IAE5E,IAAI,UAAU,IAAI,YAAY,GAAG,SAAS,CAEzC;gBAEkB,aAAa,EAAE,aAAa,EAC5B,WAAW,CAAC,EAAE;QACvB,UAAU,CAAC,EAAE,YAAY,CAAC;QAC1B,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,CAAC;KACjE;IAkBP;;;OAGG;IACG,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAYzE;;OAEG;IACG,eAAe,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"mineru.client.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhD,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAc,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAK7C,OAAO,EAIL,wBAAwB,EAExB,0BAA0B,EAC1B,gBAAgB,EACjB,MAAM,YAAY,CAAC;AAIpB,UAAU,iBAAiB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mEAAmE;IACnE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4EAA4E;IAC5E,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,UAAU,mBAAmB;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,sBAAsB;IAC9B,KAAK,EAAE,mBAAmB,EAAE,CAAC;IAC7B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,UAAU,iBAAiB;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AASD,qBAAa,YAAY;IAWrB,OAAO,CAAC,QAAQ,CAAC,aAAa;IAC9B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;IAX/B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IACxD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,SAAgB,UAAU,EAAE,gBAAgB,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAiD;IAE5E,IAAI,UAAU,IAAI,YAAY,GAAG,SAAS,CAEzC;gBAEkB,aAAa,EAAE,aAAa,EAC5B,WAAW,CAAC,EAAE;QACvB,UAAU,CAAC,EAAE,YAAY,CAAC;QAC1B,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,CAAC;KACjE;IAkBP;;;OAGG;IACG,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAYzE;;OAEG;IACG,eAAe,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IA+CzG,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,0BAA0B,GAAG,SAAS;IAOzE;;OAEG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC;QACxE,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;IAoBF;;OAEG;IACG,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC;IAiBnD;;OAEG;IACG,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,SAAgB,EAAE,UAAU,SAAO,GAAG,OAAO,CAAC,GAAG,CAAC;IAsB7F,OAAO,CAAC,cAAc;IAMtB,OAAO,CAAC,iBAAiB;IAczB,OAAO,CAAC,kBAAkB;IAyB1B,OAAO,CAAC,sBAAsB;IAI9B,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,oBAAoB;YAYd,kBAAkB;YAmClB,oBAAoB;YASpB,qBAAqB;YA0DrB,uBAAuB;IA+CrC,OAAO,CAAC,iBAAiB;IAgBzB,OAAO,CAAC,2BAA2B;IAenC,OAAO,CAAC,6BAA6B;IAcrC,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,aAAa;IAcrB,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,eAAe;YAIT,YAAY;IAkB1B,OAAO,CAAC,eAAe;IA0BvB,wBAAwB,IAAI,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAKtD,wBAAwB;CAU/B"}
|
|
@@ -46,6 +46,10 @@ export class MinerUClient {
|
|
|
46
46
|
*/
|
|
47
47
|
async createBatchTask(options) {
|
|
48
48
|
this.ensureOfficial('createBatchTask');
|
|
49
|
+
// Validate files is an array
|
|
50
|
+
if (!Array.isArray(options.files)) {
|
|
51
|
+
throw new Error('MinerU createBatchTask requires files to be an array');
|
|
52
|
+
}
|
|
49
53
|
const url = this.buildApiUrl('extract', 'task', 'batch');
|
|
50
54
|
const body = {
|
|
51
55
|
files: options.files.map((file) => {
|
|
@@ -67,8 +71,15 @@ export class MinerUClient {
|
|
|
67
71
|
body.language = options.language;
|
|
68
72
|
if (options.modelVersion)
|
|
69
73
|
body.model_version = options.modelVersion;
|
|
70
|
-
if
|
|
71
|
-
|
|
74
|
+
// Ensure extraFormats is an array if provided
|
|
75
|
+
if (options.extraFormats) {
|
|
76
|
+
if (Array.isArray(options.extraFormats)) {
|
|
77
|
+
body.extra_formats = options.extraFormats;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
this.logger.warn('extraFormats is not an array, ignoring');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
72
83
|
if (options.callbackUrl)
|
|
73
84
|
body.callback = options.callbackUrl;
|
|
74
85
|
if (options.seed)
|
|
@@ -231,8 +242,15 @@ export class MinerUClient {
|
|
|
231
242
|
body.data_id = options.dataId;
|
|
232
243
|
if (options.pageRanges)
|
|
233
244
|
body.page_ranges = options.pageRanges;
|
|
234
|
-
if
|
|
235
|
-
|
|
245
|
+
// Ensure extraFormats is an array if provided
|
|
246
|
+
if (options.extraFormats) {
|
|
247
|
+
if (Array.isArray(options.extraFormats)) {
|
|
248
|
+
body.extra_formats = options.extraFormats;
|
|
249
|
+
}
|
|
250
|
+
else {
|
|
251
|
+
this.logger.warn('extraFormats is not an array, ignoring');
|
|
252
|
+
}
|
|
253
|
+
}
|
|
236
254
|
if (options.callbackUrl)
|
|
237
255
|
body.callback = options.callbackUrl;
|
|
238
256
|
if (options.seed)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mineru.plugin.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.plugin.ts"],"names":[],"mappings":"AACA,OAAO,EAAqB,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"mineru.plugin.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.plugin.ts"],"names":[],"mappings":"AACA,OAAO,EAAqB,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAO/F,qBAiBa,YAAa,YAAW,kBAAkB,EAAE,gBAAgB;IAExE,OAAO,CAAC,UAAU,CAAQ;IAE1B;;OAEG;IACH,iBAAiB,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAMzC;;OAEG;IACH,eAAe,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;CAKvC"}
|
|
@@ -7,7 +7,6 @@ import { MinerUTransformerStrategy } from './transformer-mineru.strategy.js';
|
|
|
7
7
|
import { MinerUResultParserService } from './result-parser.service.js';
|
|
8
8
|
import { MinerUIntegrationStrategy } from './integration.strategy.js';
|
|
9
9
|
import { MinerUController } from './mineru.controller.js';
|
|
10
|
-
import { MinerUToolsetStrategy } from './mineru-toolset.strategy.js';
|
|
11
10
|
let MinerUPlugin = MinerUPlugin_1 = class MinerUPlugin {
|
|
12
11
|
constructor() {
|
|
13
12
|
// We disable by default additional logging for each event to avoid cluttering the logs
|
|
@@ -41,7 +40,6 @@ MinerUPlugin = MinerUPlugin_1 = __decorate([
|
|
|
41
40
|
providers: [
|
|
42
41
|
MinerUIntegrationStrategy,
|
|
43
42
|
MinerUTransformerStrategy,
|
|
44
|
-
MinerUToolsetStrategy,
|
|
45
43
|
MinerUResultParserService,
|
|
46
44
|
],
|
|
47
45
|
controllers: [
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"result-parser.service.d.ts","sourceRoot":"","sources":["../../src/lib/result-parser.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,EACL,aAAa,EAEb,YAAY,EACb,MAAM,sBAAsB,CAAC;AAK9B,OAAO,EAEL,sBAAsB,EACtB,0BAA0B,EAC3B,MAAM,YAAY,CAAC;AAEpB,qBACa,yBAAyB;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA8C;IAE/D,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;IAqFI,cAAc,CAClB,MAAM,EAAE,0BAA0B,EAClC,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;
|
|
1
|
+
{"version":3,"file":"result-parser.service.d.ts","sourceRoot":"","sources":["../../src/lib/result-parser.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,EACL,aAAa,EAEb,YAAY,EACb,MAAM,sBAAsB,CAAC;AAK9B,OAAO,EAEL,sBAAsB,EACtB,0BAA0B,EAC3B,MAAM,YAAY,CAAC;AAEpB,qBACa,yBAAyB;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA8C;IAE/D,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;IAqFI,cAAc,CAClB,MAAM,EAAE,0BAA0B,EAClC,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;CAoDH"}
|
|
@@ -99,7 +99,9 @@ let MinerUResultParserService = MinerUResultParserService_1 = class MinerUResult
|
|
|
99
99
|
};
|
|
100
100
|
const assets = [];
|
|
101
101
|
const pathMap = new Map();
|
|
102
|
-
|
|
102
|
+
// Ensure images is an array before iterating
|
|
103
|
+
const images = Array.isArray(result.images) ? result.images : [];
|
|
104
|
+
for (const image of images) {
|
|
103
105
|
const filePath = join(document.folder || '', 'images', image.name);
|
|
104
106
|
const url = await fileSystem.writeFile(filePath, Buffer.from(image.dataUrl.split(',')[1], 'base64'));
|
|
105
107
|
pathMap.set(`images/${image.name}`, url);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transformer-mineru.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/transformer-mineru.strategy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAG/D,OAAO,EACL,aAAa,EAEb,oBAAoB,EACpB,4BAA4B,EAC5B,qBAAqB,EACtB,MAAM,sBAAsB,CAAA;AAI7B,OAAO,EAAgB,wBAAwB,EAAE,MAAM,YAAY,CAAA;AAEnE,qBAEa,yBAA0B,YAAW,4BAA4B,CAAC,wBAAwB,CAAC;IAEtG,OAAO,CAAC,QAAQ,CAAC,YAAY,CAA2B;IAGxD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAe;IAE7C,QAAQ,CAAC,WAAW,mDAWnB;IAED,QAAQ,CAAC,IAAI;;;;;;;;;;;kBAWM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAwE1B;IAED,cAAc,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC;IAIpC,kBAAkB,CACtB,SAAS,EAAE,OAAO,CAAC,kBAAkB,CAAC,EAAE,EACxC,MAAM,EAAE,wBAAwB,GAC/B,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"transformer-mineru.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/transformer-mineru.strategy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAG/D,OAAO,EACL,aAAa,EAEb,oBAAoB,EACpB,4BAA4B,EAC5B,qBAAqB,EACtB,MAAM,sBAAsB,CAAA;AAI7B,OAAO,EAAgB,wBAAwB,EAAE,MAAM,YAAY,CAAA;AAEnE,qBAEa,yBAA0B,YAAW,4BAA4B,CAAC,wBAAwB,CAAC;IAEtG,OAAO,CAAC,QAAQ,CAAC,YAAY,CAA2B;IAGxD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAe;IAE7C,QAAQ,CAAC,WAAW,mDAWnB;IAED,QAAQ,CAAC,IAAI;;;;;;;;;;;kBAWM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAwE1B;IAED,cAAc,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC;IAIpC,kBAAkB,CACtB,SAAS,EAAE,OAAO,CAAC,kBAAkB,CAAC,EAAE,EACxC,MAAM,EAAE,wBAAwB,GAC/B,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;CAsDzD"}
|
|
@@ -126,13 +126,7 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
|
126
126
|
const result = mineru.getSelfHostedTask(taskId);
|
|
127
127
|
const parsedResult = await this.resultParser.parseLocalTask(result, taskId, document, config.permissions.fileSystem);
|
|
128
128
|
parsedResult.id = document.id;
|
|
129
|
-
|
|
130
|
-
parsedResults.push({
|
|
131
|
-
id: parsedResult.id,
|
|
132
|
-
chunks: parsedResult.chunks,
|
|
133
|
-
// Use the metadata from chunks, not the document metadata
|
|
134
|
-
metadata: parsedResult.chunks[0]?.metadata || {},
|
|
135
|
-
});
|
|
129
|
+
parsedResults.push(parsedResult);
|
|
136
130
|
}
|
|
137
131
|
else {
|
|
138
132
|
const { taskId } = await mineru.createTask({
|
|
@@ -148,13 +142,7 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
|
148
142
|
const result = await mineru.waitForTask(taskId, 5 * 60 * 1000, 5000);
|
|
149
143
|
const parsedResult = await this.resultParser.parseFromUrl(result.full_zip_url, taskId, document, config.permissions.fileSystem);
|
|
150
144
|
parsedResult.id = document.id;
|
|
151
|
-
|
|
152
|
-
parsedResults.push({
|
|
153
|
-
id: parsedResult.id,
|
|
154
|
-
chunks: parsedResult.chunks,
|
|
155
|
-
// Use the metadata from chunks, not the document metadata
|
|
156
|
-
metadata: parsedResult.chunks[0]?.metadata || {},
|
|
157
|
-
});
|
|
145
|
+
parsedResults.push(parsedResult);
|
|
158
146
|
}
|
|
159
147
|
}
|
|
160
148
|
return parsedResults;
|
package/package.json
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chenchaolong/plugin-mineru",
|
|
3
|
-
"version": "0.0.
|
|
4
|
-
"license": "AGPL-3.0",
|
|
3
|
+
"version": "0.0.12",
|
|
5
4
|
"repository": {
|
|
6
5
|
"type": "git",
|
|
7
6
|
"url": "https://github.com/xpert-ai/xpert-plugins.git"
|
|
@@ -16,6 +15,7 @@
|
|
|
16
15
|
"exports": {
|
|
17
16
|
"./package.json": "./package.json",
|
|
18
17
|
".": {
|
|
18
|
+
"@xpert-plugins-starter/source": "./src/index.ts",
|
|
19
19
|
"types": "./dist/index.d.ts",
|
|
20
20
|
"import": "./dist/index.js",
|
|
21
21
|
"default": "./dist/index.js"
|
|
@@ -45,5 +45,8 @@
|
|
|
45
45
|
},
|
|
46
46
|
"devDependencies": {
|
|
47
47
|
"@types/unzipper": "^0.10.11"
|
|
48
|
+
},
|
|
49
|
+
"publishConfig": {
|
|
50
|
+
"access": "public"
|
|
48
51
|
}
|
|
49
52
|
}
|