@chenchaolong/plugin-mineru 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,13 +1,13 @@
1
1
  # Xpert Plugin: MinerU
2
2
 
3
- `@xpert-ai/plugin-mineru` is a MinerU document converter plugin for the [Xpert AI](https://github.com/xpert-ai/xpert) platform, providing extraction capabilities from PDF to Markdown and structured JSON. The plugin includes built-in MinerU integration strategies, document conversion strategies, and result parsing services, enabling secure access to the MinerU API in automated workflows, polling task status, and writing parsed content and attachment resources to the platform file system.
3
+ `@chenchaolong/plugin-mineru` is a MinerU document converter plugin for the [Xpert AI](https://github.com/xpert-ai/xpert) platform, providing extraction capabilities from PDF to Markdown and structured JSON. The plugin includes built-in MinerU integration strategies, document conversion strategies, and result parsing services, enabling secure access to the MinerU API in automated workflows, polling task status, and writing parsed content and attachment resources to the platform file system.
4
4
 
5
5
  ## Installation
6
6
 
7
7
  ```bash
8
- pnpm add @xpert-ai/plugin-mineru
8
+ pnpm add @chenchaolong/plugin-mineru
9
9
  # or
10
- npm install @xpert-ai/plugin-mineru
10
+ npm install @chenchaolong/plugin-mineru
11
11
  ```
12
12
 
13
13
  > **Note**: This plugin depends on `@xpert-ai/plugin-sdk`, `@nestjs/common@^11`, `@nestjs/config@^4`, `@metad/contracts`, `axios@1`, `chalk@4`, `@langchain/core@^0.3.72`, and `uuid@8` as peerDependencies. Please ensure these packages are installed in your host project.
@@ -39,7 +39,7 @@ npm install @xpert-ai/plugin-mineru
39
39
  Configure the plugin in your host service's plugin registration process:
40
40
 
41
41
  ```sh .env
42
- PLUGINS=@xpert-ai/plugin-mineru
42
+ PLUGINS=@chenchaolong/plugin-mineru
43
43
  ```
44
44
 
45
45
  The plugin returns the NestJS module `MinerUPlugin` in the `register` hook and logs messages during the `onStart`/`onStop` lifecycle.
@@ -84,14 +84,26 @@ The parser generates:
84
84
 
85
85
  The returned `Document<ChunkMetadata>` array currently defaults to a single chunk containing the full Markdown; you can split it as needed.
86
86
 
87
+ ## Local Deployment
88
+
89
+ For self-hosted MinerU deployments, see [LOCAL_SETUP.md](./LOCAL_SETUP.md) for detailed instructions on:
90
+ - Starting MinerU server using Docker
91
+ - Installing from source code
92
+ - Configuration and troubleshooting
93
+
94
+ Quick start with Docker:
95
+ ```bash
96
+ docker run -d --name mineru -p 9960:9960 opendatalab/mineru:latest
97
+ ```
98
+
87
99
  ## Development & Debugging
88
100
 
89
101
  Run the following commands in the repository root to build and test locally:
90
102
 
91
103
  ```bash
92
104
  npm install
93
- npx nx build @xpert-ai/plugin-mineru
94
- npx nx test @xpert-ai/plugin-mineru
105
+ npx nx build @chenchaolong/plugin-mineru
106
+ npx nx test @chenchaolong/plugin-mineru
95
107
  ```
96
108
 
97
109
  TypeScript build artifacts are output to `packages/mineru/dist`. Before publishing, ensure `package.json`, type declarations, and runtime files are in sync.
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAexD,QAAA,MAAM,YAAY,gDAChB,CAAC;AAEH,QAAA,MAAM,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,CA4BrD,CAAC;AAEF,eAAe,MAAM,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAcxD,QAAA,MAAM,YAAY,gDAChB,CAAC;AAEH,QAAA,MAAM,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,CA4BrD,CAAC;AAEF,eAAe,MAAM,CAAC"}
package/dist/index.js CHANGED
@@ -1,11 +1,10 @@
1
1
  import { z } from 'zod';
2
2
  import { readFileSync } from 'fs';
3
- import { fileURLToPath } from 'url';
4
- import { dirname, join } from 'path';
3
+ import { join } from 'path';
5
4
  import { MinerUPlugin } from './lib/mineru.plugin.js';
6
5
  import { icon } from './lib/types.js';
7
- const __filename = fileURLToPath(import.meta.url);
8
- const __dirname = dirname(__filename);
6
+ import { getModuleMeta } from './lib/path-meta.js';
7
+ const { __filename, __dirname } = getModuleMeta(import.meta);
9
8
  const packageJson = JSON.parse(readFileSync(join(__dirname, '../package.json'), 'utf8'));
10
9
  const ConfigSchema = z.object({});
11
10
  const plugin = {
@@ -21,7 +20,7 @@ const plugin = {
21
20
  description: 'Provide PDF to Markdown and JSON transformation functionality',
22
21
  keywords: ['integration', 'pdf', 'markdown', 'json', 'transformer'],
23
22
  author: 'XpertAI Team',
24
- homepage: 'https://www.npmjs.com/package/@xpert-ai/plugin-mineru',
23
+ homepage: 'https://www.npmjs.com/package/@chenchaolong/plugin-mineru',
25
24
  },
26
25
  config: {
27
26
  schema: ConfigSchema,
@@ -1 +1 @@
1
- {"version":3,"file":"mineru.client.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhD,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAc,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAK7C,OAAO,EAIL,wBAAwB,EAExB,0BAA0B,EAC1B,gBAAgB,EACjB,MAAM,YAAY,CAAC;AAIpB,UAAU,iBAAiB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mEAAmE;IACnE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4EAA4E;IAC5E,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,UAAU,mBAAmB;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,sBAAsB;IAC9B,KAAK,EAAE,mBAAmB,EAAE,CAAC;IAC7B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,UAAU,iBAAiB;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AASD,qBAAa,YAAY;IAWrB,OAAO,CAAC,QAAQ,CAAC,aAAa;IAC9B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;IAX/B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IACxD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,SAAgB,UAAU,EAAE,gBAAgB,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAiD;IAE5E,IAAI,UAAU,IAAI,YAAY,GAAG,SAAS,CAEzC;gBAEkB,aAAa,EAAE,aAAa,EAC5B,WAAW,CAAC,EAAE;QACvB,UAAU,CAAC,EAAE,YAAY,CAAC;QAC1B,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,CAAC;KACjE;IAkBP;;;OAGG;IACG,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAYzE;;OAEG;IACG,eAAe,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAmCzG,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,0BAA0B,GAAG,SAAS;IAOzE;;OAEG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC;QACxE,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;IAoBF;;OAEG;IACG,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC;IAiBnD;;OAEG;IACG,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,SAAgB,EAAE,UAAU,SAAO,GAAG,OAAO,CAAC,GAAG,CAAC;IAsB7F,OAAO,CAAC,cAAc;IAMtB,OAAO,CAAC,iBAAiB;IAczB,OAAO,CAAC,kBAAkB;IAyB1B,OAAO,CAAC,sBAAsB;IAI9B,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,oBAAoB;YAYd,kBAAkB;YA4BlB,oBAAoB;YASpB,qBAAqB;YA0DrB,uBAAuB;IA+CrC,OAAO,CAAC,iBAAiB;IAgBzB,OAAO,CAAC,2BAA2B;IAenC,OAAO,CAAC,6BAA6B;IAcrC,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,aAAa;IAcrB,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,eAAe;YAIT,YAAY;IAkB1B,OAAO,CAAC,eAAe;IA0BvB,wBAAwB,IAAI,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAKtD,wBAAwB;CAU/B"}
1
+ {"version":3,"file":"mineru.client.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAEhD,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAC/C,OAAO,EAAmB,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAc,EAAE,aAAa,EAAE,MAAM,OAAO,CAAC;AAK7C,OAAO,EAIL,wBAAwB,EAExB,0BAA0B,EAC1B,gBAAgB,EACjB,MAAM,YAAY,CAAC;AAIpB,UAAU,iBAAiB;IACzB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,mEAAmE;IACnE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,yEAAyE;IACzE,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4EAA4E;IAC5E,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED,UAAU,mBAAmB;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,UAAU,sBAAsB;IAC9B,KAAK,EAAE,mBAAmB,EAAE,CAAC;IAC7B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,UAAU,iBAAiB;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AASD,qBAAa,YAAY;IAWrB,OAAO,CAAC,QAAQ,CAAC,aAAa;IAC9B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;IAX/B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IACxD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,SAAgB,UAAU,EAAE,gBAAgB,CAAC;IAC7C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAiD;IAE5E,IAAI,UAAU,IAAI,YAAY,GAAG,SAAS,CAEzC;gBAEkB,aAAa,EAAE,aAAa,EAC5B,WAAW,CAAC,EAAE;QACvB,UAAU,CAAC,EAAE,YAAY,CAAC;QAC1B,WAAW,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,wBAAwB,CAAC,CAAC,CAAC;KACjE;IAkBP;;;OAGG;IACG,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAYzE;;OAEG;IACG,eAAe,CAAC,OAAO,EAAE,sBAAsB,GAAG,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IA+CzG,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,0BAA0B,GAAG,SAAS;IAOzE;;OAEG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC;QACxE,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,CAAC;IAoBF;;OAEG;IACG,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC;IAiBnD;;OAEG;IACG,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,SAAgB,EAAE,UAAU,SAAO,GAAG,OAAO,CAAC,GAAG,CAAC;IAsB7F,OAAO,CAAC,cAAc;IAMtB,OAAO,CAAC,iBAAiB;IAczB,OAAO,CAAC,kBAAkB;IAyB1B,OAAO,CAAC,sBAAsB;IAI9B,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,kBAAkB;IAO1B,OAAO,CAAC,oBAAoB;YAYd,kBAAkB;YAmClB,oBAAoB;YAcpB,qBAAqB;YA0DrB,uBAAuB;IA+CrC,OAAO,CAAC,iBAAiB;IAgBzB,OAAO,CAAC,2BAA2B;IAenC,OAAO,CAAC,6BAA6B;IAcrC,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,aAAa;IAcrB,OAAO,CAAC,iBAAiB;IAQzB,OAAO,CAAC,eAAe;YAIT,YAAY;IAkB1B,OAAO,CAAC,eAAe;IA0BvB,wBAAwB,IAAI,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAKtD,wBAAwB;CAU/B"}
@@ -3,7 +3,7 @@ import { getErrorMessage } from '@xpert-ai/plugin-sdk';
3
3
  import axios from 'axios';
4
4
  import FormData from 'form-data';
5
5
  import { randomUUID } from 'crypto';
6
- import { basename } from 'path';
6
+ import { basename, normalize, resolve } from 'path';
7
7
  import fs from 'fs';
8
8
  import { ENV_MINERU_API_BASE_URL, ENV_MINERU_API_TOKEN, ENV_MINERU_SERVER_TYPE, } from './types.js';
9
9
  const DEFAULT_OFFICIAL_BASE_URL = 'https://mineru.net/api/v4';
@@ -46,6 +46,10 @@ export class MinerUClient {
46
46
  */
47
47
  async createBatchTask(options) {
48
48
  this.ensureOfficial('createBatchTask');
49
+ // Validate files is an array
50
+ if (!Array.isArray(options.files)) {
51
+ throw new Error('MinerU createBatchTask requires files to be an array');
52
+ }
49
53
  const url = this.buildApiUrl('extract', 'task', 'batch');
50
54
  const body = {
51
55
  files: options.files.map((file) => {
@@ -67,8 +71,15 @@ export class MinerUClient {
67
71
  body.language = options.language;
68
72
  if (options.modelVersion)
69
73
  body.model_version = options.modelVersion;
70
- if (options.extraFormats)
71
- body.extra_formats = options.extraFormats;
74
+ // Ensure extraFormats is an array if provided
75
+ if (options.extraFormats) {
76
+ if (Array.isArray(options.extraFormats)) {
77
+ body.extra_formats = options.extraFormats;
78
+ }
79
+ else {
80
+ this.logger.warn('extraFormats is not an array, ignoring');
81
+ }
82
+ }
72
83
  if (options.callbackUrl)
73
84
  body.callback = options.callbackUrl;
74
85
  if (options.seed)
@@ -231,8 +242,15 @@ export class MinerUClient {
231
242
  body.data_id = options.dataId;
232
243
  if (options.pageRanges)
233
244
  body.page_ranges = options.pageRanges;
234
- if (options.extraFormats)
235
- body.extra_formats = options.extraFormats;
245
+ // Ensure extraFormats is an array if provided
246
+ if (options.extraFormats) {
247
+ if (Array.isArray(options.extraFormats)) {
248
+ body.extra_formats = options.extraFormats;
249
+ }
250
+ else {
251
+ this.logger.warn('extraFormats is not an array, ignoring');
252
+ }
253
+ }
236
254
  if (options.callbackUrl)
237
255
  body.callback = options.callbackUrl;
238
256
  if (options.seed)
@@ -251,7 +269,12 @@ export class MinerUClient {
251
269
  }
252
270
  }
253
271
  async createSelfHostedTask(options) {
254
- const filePath = this.fileSystem.fullPath(options.filePath);
272
+ if (!options.filePath) {
273
+ throw new Error('MinerU createSelfHostedTask requires a filePath');
274
+ }
275
+ // Normalize path for cross-platform compatibility (Windows/Linux)
276
+ const rawPath = this.fileSystem.fullPath(options.filePath);
277
+ const filePath = normalize(resolve(rawPath));
255
278
  const taskId = randomUUID();
256
279
  const result = await this.invokeSelfHostedParse(filePath, options.fileName, options);
257
280
  this.localTasks.set(taskId, { ...result, sourceUrl: options.url });
@@ -1 +1 @@
1
- {"version":3,"file":"mineru.plugin.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.plugin.ts"],"names":[],"mappings":"AACA,OAAO,EAAqB,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAQ/F,qBAkBa,YAAa,YAAW,kBAAkB,EAAE,gBAAgB;IAExE,OAAO,CAAC,UAAU,CAAQ;IAE1B;;OAEG;IACH,iBAAiB,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAMzC;;OAEG;IACH,eAAe,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;CAKvC"}
1
+ {"version":3,"file":"mineru.plugin.d.ts","sourceRoot":"","sources":["../../src/lib/mineru.plugin.ts"],"names":[],"mappings":"AACA,OAAO,EAAqB,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAO/F,qBAiBa,YAAa,YAAW,kBAAkB,EAAE,gBAAgB;IAExE,OAAO,CAAC,UAAU,CAAQ;IAE1B;;OAEG;IACH,iBAAiB,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAMzC;;OAEG;IACH,eAAe,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;CAKvC"}
@@ -7,7 +7,6 @@ import { MinerUTransformerStrategy } from './transformer-mineru.strategy.js';
7
7
  import { MinerUResultParserService } from './result-parser.service.js';
8
8
  import { MinerUIntegrationStrategy } from './integration.strategy.js';
9
9
  import { MinerUController } from './mineru.controller.js';
10
- import { MinerUToolsetStrategy } from './mineru-toolset.strategy.js';
11
10
  let MinerUPlugin = MinerUPlugin_1 = class MinerUPlugin {
12
11
  constructor() {
13
12
  // We disable by default additional logging for each event to avoid cluttering the logs
@@ -41,7 +40,6 @@ MinerUPlugin = MinerUPlugin_1 = __decorate([
41
40
  providers: [
42
41
  MinerUIntegrationStrategy,
43
42
  MinerUTransformerStrategy,
44
- MinerUToolsetStrategy,
45
43
  MinerUResultParserService,
46
44
  ],
47
45
  controllers: [
@@ -1 +1 @@
1
- {"version":3,"file":"result-parser.service.d.ts","sourceRoot":"","sources":["../../src/lib/result-parser.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,EACL,aAAa,EAEb,YAAY,EACb,MAAM,sBAAsB,CAAC;AAK9B,OAAO,EAEL,sBAAsB,EACtB,0BAA0B,EAC3B,MAAM,YAAY,CAAC;AAEpB,qBACa,yBAAyB;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA8C;IAE/D,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;IAqFI,cAAc,CAClB,MAAM,EAAE,0BAA0B,EAClC,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;CAkDH"}
1
+ {"version":3,"file":"result-parser.service.d.ts","sourceRoot":"","sources":["../../src/lib/result-parser.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,EACL,aAAa,EAEb,YAAY,EACb,MAAM,sBAAsB,CAAC;AAK9B,OAAO,EAEL,sBAAsB,EACtB,0BAA0B,EAC3B,MAAM,YAAY,CAAC;AAEpB,qBACa,yBAAyB;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA8C;IAE/D,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;IAwFI,cAAc,CAClB,MAAM,EAAE,0BAA0B,EAClC,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;CAoDH"}
@@ -3,7 +3,7 @@ import { __decorate } from "tslib";
3
3
  import { Document } from '@langchain/core/documents';
4
4
  import { Injectable, Logger } from '@nestjs/common';
5
5
  import axios from 'axios';
6
- import { join } from 'path';
6
+ import { join, normalize } from 'path';
7
7
  import unzipper from 'unzipper';
8
8
  import { v4 as uuidv4 } from 'uuid';
9
9
  import { MinerU, } from './types.js';
@@ -34,8 +34,11 @@ let MinerUResultParserService = MinerUResultParserService_1 = class MinerUResult
34
34
  continue;
35
35
  const data = await entry.buffer();
36
36
  zipEntries.push({ entryName: entry.path, data });
37
- const fileName = entry.path;
38
- const filePath = join(document.folder || '', entry.path);
37
+ // Normalize ZIP entry path (ZIP files use POSIX format with '/' separator)
38
+ // Convert to platform-specific path format for cross-platform compatibility
39
+ const normalizedEntryPath = entry.path.replace(/\\/g, '/'); // Normalize to POSIX format first
40
+ const fileName = normalizedEntryPath;
41
+ const filePath = normalize(join(document.folder || '', normalizedEntryPath));
39
42
  const url = await fileSystem.writeFile(filePath, data);
40
43
  pathMap.set(fileName, url);
41
44
  // Write images to local file system
@@ -99,7 +102,9 @@ let MinerUResultParserService = MinerUResultParserService_1 = class MinerUResult
99
102
  };
100
103
  const assets = [];
101
104
  const pathMap = new Map();
102
- for (const image of result.images) {
105
+ // Ensure images is an array before iterating
106
+ const images = Array.isArray(result.images) ? result.images : [];
107
+ for (const image of images) {
103
108
  const filePath = join(document.folder || '', 'images', image.name);
104
109
  const url = await fileSystem.writeFile(filePath, Buffer.from(image.dataUrl.split(',')[1], 'base64'));
105
110
  pathMap.set(`images/${image.name}`, url);
@@ -1 +1 @@
1
- {"version":3,"file":"transformer-mineru.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/transformer-mineru.strategy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAG/D,OAAO,EACL,aAAa,EAEb,oBAAoB,EACpB,4BAA4B,EAC5B,qBAAqB,EACtB,MAAM,sBAAsB,CAAA;AAI7B,OAAO,EAAgB,wBAAwB,EAAE,MAAM,YAAY,CAAA;AAEnE,qBAEa,yBAA0B,YAAW,4BAA4B,CAAC,wBAAwB,CAAC;IAEtG,OAAO,CAAC,QAAQ,CAAC,YAAY,CAA2B;IAGxD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAe;IAE7C,QAAQ,CAAC,WAAW,mDAWnB;IAED,QAAQ,CAAC,IAAI;;;;;;;;;;;kBAWM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAwE1B;IAED,cAAc,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC;IAIpC,kBAAkB,CACtB,SAAS,EAAE,OAAO,CAAC,kBAAkB,CAAC,EAAE,EACxC,MAAM,EAAE,wBAAwB,GAC/B,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;CAkEzD"}
1
+ {"version":3,"file":"transformer-mineru.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/transformer-mineru.strategy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAG/D,OAAO,EACL,aAAa,EAEb,oBAAoB,EACpB,4BAA4B,EAC5B,qBAAqB,EACtB,MAAM,sBAAsB,CAAA;AAI7B,OAAO,EAAgB,wBAAwB,EAAE,MAAM,YAAY,CAAA;AAEnE,qBAEa,yBAA0B,YAAW,4BAA4B,CAAC,wBAAwB,CAAC;IAEtG,OAAO,CAAC,QAAQ,CAAC,YAAY,CAA2B;IAGxD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAe;IAE7C,QAAQ,CAAC,WAAW,mDAWnB;IAED,QAAQ,CAAC,IAAI;;;;;;;;;;;kBAWM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAwE1B;IAED,cAAc,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC;IAIpC,kBAAkB,CACtB,SAAS,EAAE,OAAO,CAAC,kBAAkB,CAAC,EAAE,EACxC,MAAM,EAAE,wBAAwB,GAC/B,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;CAsDzD"}
@@ -126,13 +126,7 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
126
126
  const result = mineru.getSelfHostedTask(taskId);
127
127
  const parsedResult = await this.resultParser.parseLocalTask(result, taskId, document, config.permissions.fileSystem);
128
128
  parsedResult.id = document.id;
129
- // Convert to expected format - chunks already have ChunkMetadata
130
- parsedResults.push({
131
- id: parsedResult.id,
132
- chunks: parsedResult.chunks,
133
- // Use the metadata from chunks, not the document metadata
134
- metadata: parsedResult.chunks[0]?.metadata || {},
135
- });
129
+ parsedResults.push(parsedResult);
136
130
  }
137
131
  else {
138
132
  const { taskId } = await mineru.createTask({
@@ -148,13 +142,7 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
148
142
  const result = await mineru.waitForTask(taskId, 5 * 60 * 1000, 5000);
149
143
  const parsedResult = await this.resultParser.parseFromUrl(result.full_zip_url, taskId, document, config.permissions.fileSystem);
150
144
  parsedResult.id = document.id;
151
- // Convert to expected format - chunks already have ChunkMetadata
152
- parsedResults.push({
153
- id: parsedResult.id,
154
- chunks: parsedResult.chunks,
155
- // Use the metadata from chunks, not the document metadata
156
- metadata: parsedResult.chunks[0]?.metadata || {},
157
- });
145
+ parsedResults.push(parsedResult);
158
146
  }
159
147
  }
160
148
  return parsedResults;
package/package.json CHANGED
@@ -1,7 +1,6 @@
1
1
  {
2
2
  "name": "@chenchaolong/plugin-mineru",
3
- "version": "0.0.11",
4
- "license": "AGPL-3.0",
3
+ "version": "0.0.13",
5
4
  "repository": {
6
5
  "type": "git",
7
6
  "url": "https://github.com/xpert-ai/xpert-plugins.git"
@@ -16,6 +15,7 @@
16
15
  "exports": {
17
16
  "./package.json": "./package.json",
18
17
  ".": {
18
+ "@xpert-plugins-starter/source": "./src/index.ts",
19
19
  "types": "./dist/index.d.ts",
20
20
  "import": "./dist/index.js",
21
21
  "default": "./dist/index.js"
@@ -45,5 +45,8 @@
45
45
  },
46
46
  "devDependencies": {
47
47
  "@types/unzipper": "^0.10.11"
48
+ },
49
+ "publishConfig": {
50
+ "access": "public"
48
51
  }
49
52
  }