@helloxiaohu/plugin-mineru 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,12 +4,12 @@ import { ChunkMetadata, XpFileSystem } from '@xpert-ai/plugin-sdk';
4
4
  import { MinerUDocumentMetadata, MineruSelfHostedTaskResult } from './types.js';
5
5
  export declare class MinerUResultParserService {
6
6
  private readonly logger;
7
- parseFromUrl(fullZipUrl: string, taskId: string, document: Partial<IKnowledgeDocument>, fileSystem: XpFileSystem): Promise<{
7
+ parseFromUrl(fullZipUrl: string, taskId: string, document: Partial<IKnowledgeDocument>, fileSystem?: XpFileSystem): Promise<{
8
8
  id?: string;
9
9
  chunks: Document<ChunkMetadata>[];
10
10
  metadata: MinerUDocumentMetadata;
11
11
  }>;
12
- parseLocalTask(result: MineruSelfHostedTaskResult, taskId: string, document: Partial<IKnowledgeDocument>, fileSystem: XpFileSystem): Promise<{
12
+ parseLocalTask(result: MineruSelfHostedTaskResult, taskId: string, document: Partial<IKnowledgeDocument>, fileSystem?: XpFileSystem): Promise<{
13
13
  id?: string;
14
14
  chunks: Document<ChunkMetadata>[];
15
15
  metadata: MinerUDocumentMetadata;
@@ -1 +1 @@
1
- {"version":3,"file":"result-parser.service.d.ts","sourceRoot":"","sources":["../../src/lib/result-parser.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,EACL,aAAa,EAEb,YAAY,EACb,MAAM,sBAAsB,CAAC;AAK9B,OAAO,EAEL,sBAAsB,EACtB,0BAA0B,EAC3B,MAAM,YAAY,CAAC;AAEpB,qBACa,yBAAyB;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA8C;IAE/D,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;IAsFI,cAAc,CAClB,MAAM,EAAE,0BAA0B,EAClC,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,EAAE,YAAY,GACvB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;CAkDH"}
1
+ {"version":3,"file":"result-parser.service.d.ts","sourceRoot":"","sources":["../../src/lib/result-parser.service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,EACL,aAAa,EAEb,YAAY,EACb,MAAM,sBAAsB,CAAC;AAK9B,OAAO,EAEL,sBAAsB,EACtB,0BAA0B,EAC3B,MAAM,YAAY,CAAC;AAEpB,qBACa,yBAAyB;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA8C;IAE/D,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,CAAC,EAAE,YAAY,GACxB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;IAmGI,cAAc,CAClB,MAAM,EAAE,0BAA0B,EAClC,MAAM,EAAE,MAAM,EACd,QAAQ,EAAE,OAAO,CAAC,kBAAkB,CAAC,EACrC,UAAU,CAAC,EAAE,YAAY,GACxB,OAAO,CAAC;QACT,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,MAAM,EAAE,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,QAAQ,EAAE,sBAAsB,CAAC;KAClC,CAAC;CA2DH"}
@@ -37,43 +37,59 @@ let MinerUResultParserService = MinerUResultParserService_1 = class MinerUResult
37
37
  zipEntries.push({ entryName: entry.path, data });
38
38
  const fileName = entry.path;
39
39
  const filePath = join(document.folder || '', entry.path);
40
- const url = await fileSystem.writeFile(filePath, data);
41
- pathMap.set(fileName, url);
42
- // Write images to local file system
43
- if (fileName.startsWith('images/')) {
44
- assets.push({
45
- type: 'image',
46
- url: url,
47
- filePath: filePath,
48
- });
49
- }
50
- else if (fileName.endsWith('layout.json')) {
51
- layoutJson = JSON.parse(data.toString('utf-8'));
52
- metadata.mineruBackend = layoutJson?._backend;
53
- metadata.mineruVersion = layoutJson?._version_name;
54
- assets.push({
55
- type: 'file',
56
- url,
57
- filePath: filePath,
58
- });
59
- }
60
- else if (fileName.endsWith('content_list.json')) {
61
- assets.push({
62
- type: 'file',
63
- url,
64
- filePath: filePath,
65
- });
66
- }
67
- else if (fileName.endsWith('full.md')) {
68
- fullMd = data.toString('utf-8');
69
- assets.push({
70
- type: 'file',
71
- url,
72
- filePath: filePath,
73
- });
40
+ if (fileSystem) {
41
+ const url = await fileSystem.writeFile(filePath, data);
42
+ pathMap.set(fileName, url);
43
+ // Write images to local file system
44
+ if (fileName.startsWith('images/')) {
45
+ assets.push({
46
+ type: 'image',
47
+ url: url,
48
+ filePath: filePath,
49
+ });
50
+ }
51
+ else if (fileName.endsWith('layout.json')) {
52
+ layoutJson = JSON.parse(data.toString('utf-8'));
53
+ metadata.mineruBackend = layoutJson?._backend;
54
+ metadata.mineruVersion = layoutJson?._version_name;
55
+ assets.push({
56
+ type: 'file',
57
+ url,
58
+ filePath: filePath,
59
+ });
60
+ }
61
+ else if (fileName.endsWith('content_list.json')) {
62
+ assets.push({
63
+ type: 'file',
64
+ url,
65
+ filePath: filePath,
66
+ });
67
+ }
68
+ else if (fileName.endsWith('full.md')) {
69
+ fullMd = data.toString('utf-8');
70
+ assets.push({
71
+ type: 'file',
72
+ url,
73
+ filePath: filePath,
74
+ });
75
+ }
76
+ else if (fileName.endsWith('origin.pdf')) {
77
+ metadata.originPdfUrl = fileName;
78
+ }
74
79
  }
75
- else if (fileName.endsWith('origin.pdf')) {
76
- metadata.originPdfUrl = fileName;
80
+ else {
81
+ // Still extract key metadata & markdown without writing to filesystem.
82
+ if (fileName.endsWith('layout.json')) {
83
+ layoutJson = JSON.parse(data.toString('utf-8'));
84
+ metadata.mineruBackend = layoutJson?._backend;
85
+ metadata.mineruVersion = layoutJson?._version_name;
86
+ }
87
+ else if (fileName.endsWith('full.md')) {
88
+ fullMd = data.toString('utf-8');
89
+ }
90
+ else if (fileName.endsWith('origin.pdf')) {
91
+ metadata.originPdfUrl = fileName;
92
+ }
77
93
  }
78
94
  }
79
95
  metadata.assets = assets;
@@ -102,13 +118,23 @@ let MinerUResultParserService = MinerUResultParserService_1 = class MinerUResult
102
118
  const pathMap = new Map();
103
119
  for (const image of result.images) {
104
120
  const filePath = join(document.folder || '', 'images', image.name);
105
- const url = await fileSystem.writeFile(filePath, Buffer.from(image.dataUrl.split(',')[1], 'base64'));
106
- pathMap.set(`images/${image.name}`, url);
107
- assets.push({
108
- type: 'image',
109
- url: url,
110
- filePath: filePath,
111
- });
121
+ if (fileSystem) {
122
+ const url = await fileSystem.writeFile(filePath, Buffer.from(image.dataUrl.split(',')[1], 'base64'));
123
+ pathMap.set(`images/${image.name}`, url);
124
+ assets.push({
125
+ type: 'image',
126
+ url: url,
127
+ filePath: filePath,
128
+ });
129
+ }
130
+ else {
131
+ pathMap.set(`images/${image.name}`, image.dataUrl);
132
+ assets.push({
133
+ type: 'image',
134
+ url: image.dataUrl,
135
+ filePath: filePath,
136
+ });
137
+ }
112
138
  }
113
139
  if (result.sourceUrl) {
114
140
  assets.push({
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@helloxiaohu/plugin-mineru",
3
- "version": "0.1.3",
3
+ "version": "0.1.4",
4
4
  "description": "MinerU document converter plugin for Xpert AI platform",
5
5
  "license": "AGPL-3.0",
6
6
  "repository": {