@chenchaolong/plugin-mineru 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAexD,QAAA,MAAM,YAAY,gDAChB,CAAC;AAEH,QAAA,MAAM,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,CA4BrD,CAAC;AAEF,eAAe,MAAM,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { readFileSync } from 'fs';
|
|
3
|
-
import {
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import { dirname, join } from 'path';
|
|
4
5
|
import { MinerUPlugin } from './lib/mineru.plugin.js';
|
|
5
6
|
import { icon } from './lib/types.js';
|
|
6
|
-
|
|
7
|
-
const
|
|
7
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
8
|
+
const __dirname = dirname(__filename);
|
|
8
9
|
const packageJson = JSON.parse(readFileSync(join(__dirname, '../package.json'), 'utf8'));
|
|
9
10
|
const ConfigSchema = z.object({});
|
|
10
11
|
const plugin = {
|
|
@@ -10,16 +10,16 @@ export declare function buildPdfToMarkdownTool(configService: ConfigService, res
|
|
|
10
10
|
fileUrl: z.ZodOptional<z.ZodString>;
|
|
11
11
|
}, "strip", z.ZodTypeAny, {
|
|
12
12
|
name?: string;
|
|
13
|
-
filename?: string;
|
|
14
|
-
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
15
13
|
filePath?: string;
|
|
16
14
|
fileUrl?: string;
|
|
17
|
-
}, {
|
|
18
|
-
name?: string;
|
|
19
15
|
filename?: string;
|
|
20
16
|
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
17
|
+
}, {
|
|
18
|
+
name?: string;
|
|
21
19
|
filePath?: string;
|
|
22
20
|
fileUrl?: string;
|
|
21
|
+
filename?: string;
|
|
22
|
+
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
23
23
|
}>;
|
|
24
24
|
isOcr: z.ZodOptional<z.ZodBoolean>;
|
|
25
25
|
enableFormula: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -27,57 +27,57 @@ export declare function buildPdfToMarkdownTool(configService: ConfigService, res
|
|
|
27
27
|
language: z.ZodOptional<z.ZodEnum<["en", "ch"]>>;
|
|
28
28
|
modelVersion: z.ZodOptional<z.ZodEnum<["pipeline", "vlm"]>>;
|
|
29
29
|
}, "strip", z.ZodTypeAny, {
|
|
30
|
+
isOcr?: boolean;
|
|
31
|
+
enableFormula?: boolean;
|
|
32
|
+
enableTable?: boolean;
|
|
33
|
+
language?: "ch" | "en";
|
|
34
|
+
modelVersion?: "pipeline" | "vlm";
|
|
30
35
|
file?: {
|
|
31
36
|
name?: string;
|
|
32
|
-
filename?: string;
|
|
33
|
-
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
34
37
|
filePath?: string;
|
|
35
38
|
fileUrl?: string;
|
|
39
|
+
filename?: string;
|
|
40
|
+
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
36
41
|
};
|
|
42
|
+
}, {
|
|
37
43
|
isOcr?: boolean;
|
|
38
44
|
enableFormula?: boolean;
|
|
39
45
|
enableTable?: boolean;
|
|
40
|
-
language?: "
|
|
41
|
-
modelVersion?: "
|
|
42
|
-
}, {
|
|
46
|
+
language?: "ch" | "en";
|
|
47
|
+
modelVersion?: "pipeline" | "vlm";
|
|
43
48
|
file?: {
|
|
44
49
|
name?: string;
|
|
45
|
-
filename?: string;
|
|
46
|
-
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
47
50
|
filePath?: string;
|
|
48
51
|
fileUrl?: string;
|
|
52
|
+
filename?: string;
|
|
53
|
+
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
49
54
|
};
|
|
55
|
+
}>, {
|
|
50
56
|
isOcr?: boolean;
|
|
51
57
|
enableFormula?: boolean;
|
|
52
58
|
enableTable?: boolean;
|
|
53
|
-
language?: "
|
|
54
|
-
modelVersion?: "
|
|
55
|
-
}>, {
|
|
59
|
+
language?: "ch" | "en";
|
|
60
|
+
modelVersion?: "pipeline" | "vlm";
|
|
56
61
|
file?: {
|
|
57
62
|
name?: string;
|
|
58
|
-
filename?: string;
|
|
59
|
-
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
60
63
|
filePath?: string;
|
|
61
64
|
fileUrl?: string;
|
|
65
|
+
filename?: string;
|
|
66
|
+
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
62
67
|
};
|
|
68
|
+
}, {
|
|
63
69
|
isOcr?: boolean;
|
|
64
70
|
enableFormula?: boolean;
|
|
65
71
|
enableTable?: boolean;
|
|
66
|
-
language?: "
|
|
67
|
-
modelVersion?: "
|
|
68
|
-
}, {
|
|
72
|
+
language?: "ch" | "en";
|
|
73
|
+
modelVersion?: "pipeline" | "vlm";
|
|
69
74
|
file?: {
|
|
70
75
|
name?: string;
|
|
71
|
-
filename?: string;
|
|
72
|
-
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
73
76
|
filePath?: string;
|
|
74
77
|
fileUrl?: string;
|
|
78
|
+
filename?: string;
|
|
79
|
+
content?: string | Uint8Array<ArrayBuffer> | Buffer<ArrayBufferLike>;
|
|
75
80
|
};
|
|
76
|
-
isOcr?: boolean;
|
|
77
|
-
enableFormula?: boolean;
|
|
78
|
-
enableTable?: boolean;
|
|
79
|
-
language?: "en" | "ch";
|
|
80
|
-
modelVersion?: "vlm" | "pipeline";
|
|
81
81
|
}, (string | {
|
|
82
82
|
files: {
|
|
83
83
|
mimeType: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transformer-mineru.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/transformer-mineru.strategy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAG/D,OAAO,EACL,aAAa,EAEb,oBAAoB,EACpB,4BAA4B,EAC5B,qBAAqB,EACtB,MAAM,sBAAsB,CAAA;AAI7B,OAAO,EAAgB,wBAAwB,EAAE,MAAM,YAAY,CAAA;AAEnE,qBAEa,yBAA0B,YAAW,4BAA4B,CAAC,wBAAwB,CAAC;IAEtG,OAAO,CAAC,QAAQ,CAAC,YAAY,CAA2B;IAGxD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAe;IAE7C,QAAQ,CAAC,WAAW,mDAWnB;IAED,QAAQ,CAAC,IAAI;;;;;;;;;;;kBAWM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAwE1B;IAED,cAAc,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC;IAIpC,kBAAkB,CACtB,SAAS,EAAE,OAAO,CAAC,kBAAkB,CAAC,EAAE,EACxC,MAAM,EAAE,wBAAwB,GAC/B,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"transformer-mineru.strategy.d.ts","sourceRoot":"","sources":["../../src/lib/transformer-mineru.strategy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAA;AAG/D,OAAO,EACL,aAAa,EAEb,oBAAoB,EACpB,4BAA4B,EAC5B,qBAAqB,EACtB,MAAM,sBAAsB,CAAA;AAI7B,OAAO,EAAgB,wBAAwB,EAAE,MAAM,YAAY,CAAA;AAEnE,qBAEa,yBAA0B,YAAW,4BAA4B,CAAC,wBAAwB,CAAC;IAEtG,OAAO,CAAC,QAAQ,CAAC,YAAY,CAA2B;IAGxD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAe;IAE7C,QAAQ,CAAC,WAAW,mDAWnB;IAED,QAAQ,CAAC,IAAI;;;;;;;;;;;kBAWM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAwE1B;IAED,cAAc,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC;IAIpC,kBAAkB,CACtB,SAAS,EAAE,OAAO,CAAC,kBAAkB,CAAC,EAAE,EACxC,MAAM,EAAE,wBAAwB,GAC/B,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC;CAkEzD"}
|
|
@@ -126,7 +126,13 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
|
126
126
|
const result = mineru.getSelfHostedTask(taskId);
|
|
127
127
|
const parsedResult = await this.resultParser.parseLocalTask(result, taskId, document, config.permissions.fileSystem);
|
|
128
128
|
parsedResult.id = document.id;
|
|
129
|
-
|
|
129
|
+
// Convert to expected format - chunks already have ChunkMetadata
|
|
130
|
+
parsedResults.push({
|
|
131
|
+
id: parsedResult.id,
|
|
132
|
+
chunks: parsedResult.chunks,
|
|
133
|
+
// Use the metadata from chunks, not the document metadata
|
|
134
|
+
metadata: parsedResult.chunks[0]?.metadata || {},
|
|
135
|
+
});
|
|
130
136
|
}
|
|
131
137
|
else {
|
|
132
138
|
const { taskId } = await mineru.createTask({
|
|
@@ -142,7 +148,13 @@ let MinerUTransformerStrategy = class MinerUTransformerStrategy {
|
|
|
142
148
|
const result = await mineru.waitForTask(taskId, 5 * 60 * 1000, 5000);
|
|
143
149
|
const parsedResult = await this.resultParser.parseFromUrl(result.full_zip_url, taskId, document, config.permissions.fileSystem);
|
|
144
150
|
parsedResult.id = document.id;
|
|
145
|
-
|
|
151
|
+
// Convert to expected format - chunks already have ChunkMetadata
|
|
152
|
+
parsedResults.push({
|
|
153
|
+
id: parsedResult.id,
|
|
154
|
+
chunks: parsedResult.chunks,
|
|
155
|
+
// Use the metadata from chunks, not the document metadata
|
|
156
|
+
metadata: parsedResult.chunks[0]?.metadata || {},
|
|
157
|
+
});
|
|
146
158
|
}
|
|
147
159
|
}
|
|
148
160
|
return parsedResults;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chenchaolong/plugin-mineru",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.11",
|
|
4
4
|
"license": "AGPL-3.0",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -16,7 +16,6 @@
|
|
|
16
16
|
"exports": {
|
|
17
17
|
"./package.json": "./package.json",
|
|
18
18
|
".": {
|
|
19
|
-
"@xpert-plugins-starter/source": "./src/index.ts",
|
|
20
19
|
"types": "./dist/index.d.ts",
|
|
21
20
|
"import": "./dist/index.js",
|
|
22
21
|
"default": "./dist/index.js"
|