@lobehub/cli 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +486 -30691
- package/man/man1/lh.1 +4 -1
- package/package.json +16 -19
- package/dist/buffer-crc32-BlUV1nEz.js +0 -342
- package/dist/doc-CR0Zm5t8.js +0 -4144
- package/dist/docx-73xqMWN6.js +0 -33097
- package/dist/excel-3NB56vnT.js +0 -23451
- package/dist/node-BbXtxEF5.js +0 -48
- package/dist/pdf-B5Ukgxfs.js +0 -108386
- package/dist/pptx-DwZ8X2Hk.js +0 -9929
- package/dist/text-LU5pflEU.js +0 -59
package/dist/doc-CR0Zm5t8.js
DELETED
|
@@ -1,4144 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import { a as __require, n as __commonJSMin, s as __toESM, t as require_src } from "./index.js";
|
|
3
|
-
import { n as require_pend, t as require_buffer_crc32 } from "./buffer-crc32-BlUV1nEz.js";
|
|
4
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-header.js
|
|
5
|
-
var require_ole_header = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
6
|
-
const HEADER_DATA = Buffer.from("D0CF11E0A1B11AE1", "hex");
|
|
7
|
-
var Header = class {
|
|
8
|
-
constructor() {}
|
|
9
|
-
load(buffer) {
|
|
10
|
-
for (let i = 0; i < HEADER_DATA.length; i++) if (HEADER_DATA[i] != buffer[i]) return false;
|
|
11
|
-
this.secSize = 1 << buffer.readInt16LE(30);
|
|
12
|
-
this.shortSecSize = 1 << buffer.readInt16LE(32);
|
|
13
|
-
this.SATSize = buffer.readInt32LE(44);
|
|
14
|
-
this.dirSecId = buffer.readInt32LE(48);
|
|
15
|
-
this.shortStreamMax = buffer.readInt32LE(56);
|
|
16
|
-
this.SSATSecId = buffer.readInt32LE(60);
|
|
17
|
-
this.SSATSize = buffer.readInt32LE(64);
|
|
18
|
-
this.MSATSecId = buffer.readInt32LE(68);
|
|
19
|
-
this.MSATSize = buffer.readInt32LE(72);
|
|
20
|
-
this.partialMSAT = new Array(109);
|
|
21
|
-
for (let i = 0; i < 109; i++) this.partialMSAT[i] = buffer.readInt32LE(76 + i * 4);
|
|
22
|
-
return true;
|
|
23
|
-
}
|
|
24
|
-
};
|
|
25
|
-
module.exports = Header;
|
|
26
|
-
}));
|
|
27
|
-
//#endregion
|
|
28
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-allocation-table.js
|
|
29
|
-
var require_ole_allocation_table = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
30
|
-
const ALLOCATION_TABLE_SEC_ID_FREE = -1;
|
|
31
|
-
var AllocationTable = class {
|
|
32
|
-
constructor(doc) {
|
|
33
|
-
this._doc = doc;
|
|
34
|
-
}
|
|
35
|
-
load(secIds) {
|
|
36
|
-
const doc = this._doc;
|
|
37
|
-
const header = doc._header;
|
|
38
|
-
this._table = new Array(secIds.length * (header.secSize / 4));
|
|
39
|
-
return doc._readSectors(secIds).then((buffer) => {
|
|
40
|
-
for (let i = 0; i < buffer.length / 4; i++) this._table[i] = buffer.readInt32LE(i * 4);
|
|
41
|
-
});
|
|
42
|
-
}
|
|
43
|
-
getSecIdChain(startSecId) {
|
|
44
|
-
let secId = startSecId;
|
|
45
|
-
const secIds = [];
|
|
46
|
-
while (secId > ALLOCATION_TABLE_SEC_ID_FREE) {
|
|
47
|
-
secIds.push(secId);
|
|
48
|
-
const secIdPrior = secId;
|
|
49
|
-
secId = this._table[secId];
|
|
50
|
-
if (secId === secIdPrior) break;
|
|
51
|
-
}
|
|
52
|
-
return secIds;
|
|
53
|
-
}
|
|
54
|
-
};
|
|
55
|
-
module.exports = AllocationTable;
|
|
56
|
-
}));
|
|
57
|
-
//#endregion
|
|
58
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-directory-tree.js
|
|
59
|
-
var require_ole_directory_tree = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
60
|
-
const DIRECTORY_TREE_ENTRY_TYPE_STORAGE = 1;
|
|
61
|
-
const DIRECTORY_TREE_ENTRY_TYPE_STREAM = 2;
|
|
62
|
-
const DIRECTORY_TREE_ENTRY_TYPE_ROOT = 5;
|
|
63
|
-
const DIRECTORY_TREE_LEAF = -1;
|
|
64
|
-
var DirectoryTree = class {
|
|
65
|
-
constructor(doc) {
|
|
66
|
-
this._doc = doc;
|
|
67
|
-
}
|
|
68
|
-
load(secIds) {
|
|
69
|
-
return this._doc._readSectors(secIds).then((buffer) => {
|
|
70
|
-
const count = buffer.length / 128;
|
|
71
|
-
this._entries = new Array(count);
|
|
72
|
-
for (let i = 0; i < count; i++) {
|
|
73
|
-
const offset = i * 128;
|
|
74
|
-
const nameLength = Math.max(buffer.readInt16LE(64 + offset) - 1, 0);
|
|
75
|
-
const entry = {};
|
|
76
|
-
entry.name = buffer.toString("utf16le", 0 + offset, nameLength + offset);
|
|
77
|
-
entry.type = buffer.readInt8(66 + offset);
|
|
78
|
-
entry.nodeColor = buffer.readInt8(67 + offset);
|
|
79
|
-
entry.left = buffer.readInt32LE(68 + offset);
|
|
80
|
-
entry.right = buffer.readInt32LE(72 + offset);
|
|
81
|
-
entry.storageDirId = buffer.readInt32LE(76 + offset);
|
|
82
|
-
entry.secId = buffer.readInt32LE(116 + offset);
|
|
83
|
-
entry.size = buffer.readInt32LE(120 + offset);
|
|
84
|
-
this._entries[i] = entry;
|
|
85
|
-
}
|
|
86
|
-
this.root = this._entries.find((entry) => entry.type === DIRECTORY_TREE_ENTRY_TYPE_ROOT);
|
|
87
|
-
this._buildHierarchy(this.root);
|
|
88
|
-
});
|
|
89
|
-
}
|
|
90
|
-
_buildHierarchy(storageEntry) {
|
|
91
|
-
const childIds = this._getChildIds(storageEntry);
|
|
92
|
-
storageEntry.storages = {};
|
|
93
|
-
storageEntry.streams = {};
|
|
94
|
-
for (const childId of childIds) {
|
|
95
|
-
const childEntry = this._entries[childId];
|
|
96
|
-
const name = childEntry.name;
|
|
97
|
-
if (childEntry.type === DIRECTORY_TREE_ENTRY_TYPE_STORAGE) storageEntry.storages[name] = childEntry;
|
|
98
|
-
if (childEntry.type === DIRECTORY_TREE_ENTRY_TYPE_STREAM) storageEntry.streams[name] = childEntry;
|
|
99
|
-
}
|
|
100
|
-
for (const name in storageEntry.storages) this._buildHierarchy(storageEntry.storages[name]);
|
|
101
|
-
}
|
|
102
|
-
_getChildIds(storageEntry) {
|
|
103
|
-
const childIds = [];
|
|
104
|
-
const visit = (visitEntry) => {
|
|
105
|
-
if (visitEntry.left !== DIRECTORY_TREE_LEAF) {
|
|
106
|
-
childIds.push(visitEntry.left);
|
|
107
|
-
visit(this._entries[visitEntry.left]);
|
|
108
|
-
}
|
|
109
|
-
if (visitEntry.right !== DIRECTORY_TREE_LEAF) {
|
|
110
|
-
childIds.push(visitEntry.right);
|
|
111
|
-
visit(this._entries[visitEntry.right]);
|
|
112
|
-
}
|
|
113
|
-
};
|
|
114
|
-
if (storageEntry.storageDirId > -1) {
|
|
115
|
-
childIds.push(storageEntry.storageDirId);
|
|
116
|
-
const rootChildEntry = this._entries[storageEntry.storageDirId];
|
|
117
|
-
visit(rootChildEntry);
|
|
118
|
-
}
|
|
119
|
-
return childIds;
|
|
120
|
-
}
|
|
121
|
-
};
|
|
122
|
-
module.exports = DirectoryTree;
|
|
123
|
-
}));
|
|
124
|
-
//#endregion
|
|
125
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-storage-stream.js
|
|
126
|
-
var require_ole_storage_stream = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
127
|
-
const { Readable } = __require("stream");
|
|
128
|
-
var StorageStream = class extends Readable {
|
|
129
|
-
constructor(doc, streamEntry) {
|
|
130
|
-
super();
|
|
131
|
-
this._doc = doc;
|
|
132
|
-
this._streamEntry = streamEntry;
|
|
133
|
-
this.initialize();
|
|
134
|
-
}
|
|
135
|
-
initialize() {
|
|
136
|
-
this._index = 0;
|
|
137
|
-
this._done = true;
|
|
138
|
-
if (!this._streamEntry) return;
|
|
139
|
-
const doc = this._doc;
|
|
140
|
-
this._bytes = this._streamEntry.size;
|
|
141
|
-
this._allocationTable = doc._SAT;
|
|
142
|
-
this._shortStream = false;
|
|
143
|
-
if (this._bytes < doc._header.shortStreamMax) {
|
|
144
|
-
this._shortStream = true;
|
|
145
|
-
this._allocationTable = doc._SSAT;
|
|
146
|
-
}
|
|
147
|
-
this._secIds = this._allocationTable.getSecIdChain(this._streamEntry.secId);
|
|
148
|
-
this._done = false;
|
|
149
|
-
}
|
|
150
|
-
_readSector(sector) {
|
|
151
|
-
if (this._shortStream) return this._doc._readShortSector(sector);
|
|
152
|
-
else return this._doc._readSector(sector);
|
|
153
|
-
}
|
|
154
|
-
_read() {
|
|
155
|
-
if (this._done) return this.push(null);
|
|
156
|
-
if (this._index >= this._secIds.length) {
|
|
157
|
-
this._done = true;
|
|
158
|
-
return this.push(null);
|
|
159
|
-
}
|
|
160
|
-
return this._readSector(this._secIds[this._index]).then((buffer) => {
|
|
161
|
-
if (this._bytes - buffer.length < 0) buffer = buffer.slice(0, this._bytes);
|
|
162
|
-
this._bytes -= buffer.length;
|
|
163
|
-
this._index++;
|
|
164
|
-
this.push(buffer);
|
|
165
|
-
});
|
|
166
|
-
}
|
|
167
|
-
};
|
|
168
|
-
module.exports = StorageStream;
|
|
169
|
-
}));
|
|
170
|
-
//#endregion
|
|
171
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-storage.js
|
|
172
|
-
var require_ole_storage = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
173
|
-
const StorageStream = require_ole_storage_stream();
|
|
174
|
-
module.exports = class Storage {
|
|
175
|
-
constructor(doc, dirEntry) {
|
|
176
|
-
this._doc = doc;
|
|
177
|
-
this._dirEntry = dirEntry;
|
|
178
|
-
}
|
|
179
|
-
storage(storageName) {
|
|
180
|
-
return new Storage(this._doc, this._dirEntry.storages[storageName]);
|
|
181
|
-
}
|
|
182
|
-
stream(streamName) {
|
|
183
|
-
return new StorageStream(this._doc, this._dirEntry.streams[streamName]);
|
|
184
|
-
}
|
|
185
|
-
};
|
|
186
|
-
}));
|
|
187
|
-
//#endregion
|
|
188
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-compound-doc.js
|
|
189
|
-
var require_ole_compound_doc = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
190
|
-
/**
|
|
191
|
-
* @module ole-compound-doc
|
|
192
|
-
*/
|
|
193
|
-
const Header = require_ole_header();
|
|
194
|
-
const AllocationTable = require_ole_allocation_table();
|
|
195
|
-
const DirectoryTree = require_ole_directory_tree();
|
|
196
|
-
const Storage = require_ole_storage();
|
|
197
|
-
/**
|
|
198
|
-
* Implements the main interface used to read from an OLE compoound file.
|
|
199
|
-
*/
|
|
200
|
-
var OleCompoundDoc = class {
|
|
201
|
-
constructor(reader) {
|
|
202
|
-
this._reader = reader;
|
|
203
|
-
this._skipBytes = 0;
|
|
204
|
-
}
|
|
205
|
-
read() {
|
|
206
|
-
return Promise.resolve().then(() => this._readHeader()).then(() => this._readMSAT()).then(() => this._readSAT()).then(() => this._readSSAT()).then(() => this._readDirectoryTree()).then(() => {
|
|
207
|
-
if (this._skipBytes != 0) return this._readCustomHeader();
|
|
208
|
-
}).then(() => this);
|
|
209
|
-
}
|
|
210
|
-
_readCustomHeader() {
|
|
211
|
-
const buffer = Buffer.alloc(this._skipBytes);
|
|
212
|
-
return this._reader.read(buffer, 0, this._skipBytes, 0).then((buffer) => {
|
|
213
|
-
if (!this._customHeaderCallback(buffer)) return;
|
|
214
|
-
});
|
|
215
|
-
}
|
|
216
|
-
_readHeader() {
|
|
217
|
-
const buffer = Buffer.alloc(512);
|
|
218
|
-
return this._reader.read(buffer, 0, 512, 0 + this._skipBytes).then((buffer) => {
|
|
219
|
-
if (!(this._header = new Header()).load(buffer)) throw new Error("Not a valid compound document");
|
|
220
|
-
});
|
|
221
|
-
}
|
|
222
|
-
_readMSAT() {
|
|
223
|
-
const header = this._header;
|
|
224
|
-
this._MSAT = header.partialMSAT.slice(0);
|
|
225
|
-
this._MSAT.length = header.SATSize;
|
|
226
|
-
if (header.SATSize <= 109 || header.MSATSize == 0) return Promise.resolve();
|
|
227
|
-
let currMSATIndex = 109;
|
|
228
|
-
let i = 0;
|
|
229
|
-
const readOneMSAT = (i, currMSATIndex, secId) => {
|
|
230
|
-
if (i >= header.MSATSize) return Promise.resolve();
|
|
231
|
-
return this._readSector(secId).then((sectorBuffer) => {
|
|
232
|
-
let s;
|
|
233
|
-
for (s = 0; s < header.secSize - 4; s += 4) {
|
|
234
|
-
if (currMSATIndex >= header.SATSize) break;
|
|
235
|
-
else this._MSAT[currMSATIndex] = sectorBuffer.readInt32LE(s);
|
|
236
|
-
currMSATIndex++;
|
|
237
|
-
}
|
|
238
|
-
secId = sectorBuffer.readInt32LE(header.secSize - 4);
|
|
239
|
-
return readOneMSAT(i + 1, currMSATIndex, secId);
|
|
240
|
-
});
|
|
241
|
-
};
|
|
242
|
-
return readOneMSAT(i, currMSATIndex, header.MSATSecId);
|
|
243
|
-
}
|
|
244
|
-
_readSector(secId) {
|
|
245
|
-
return this._readSectors([secId]);
|
|
246
|
-
}
|
|
247
|
-
_readSectors(secIds) {
|
|
248
|
-
const header = this._header;
|
|
249
|
-
const buffer = Buffer.alloc(secIds.length * header.secSize);
|
|
250
|
-
const readOneSector = (i) => {
|
|
251
|
-
if (i >= secIds.length) return Promise.resolve(buffer);
|
|
252
|
-
const bufferOffset = i * header.secSize;
|
|
253
|
-
const fileOffset = this._getFileOffsetForSec(secIds[i]);
|
|
254
|
-
return this._reader.read(buffer, bufferOffset, header.secSize, fileOffset).then(() => readOneSector(i + 1));
|
|
255
|
-
};
|
|
256
|
-
return readOneSector(0);
|
|
257
|
-
}
|
|
258
|
-
_readShortSector(secId) {
|
|
259
|
-
return this._readShortSectors([secId]);
|
|
260
|
-
}
|
|
261
|
-
_readShortSectors(secIds) {
|
|
262
|
-
const header = this._header;
|
|
263
|
-
const buffer = Buffer.alloc(secIds.length * header.shortSecSize);
|
|
264
|
-
const readOneShortSector = (i) => {
|
|
265
|
-
if (i >= secIds.length) return Promise.resolve(buffer);
|
|
266
|
-
const bufferOffset = i * header.shortSecSize;
|
|
267
|
-
const fileOffset = this._getFileOffsetForShortSec(secIds[i]);
|
|
268
|
-
return this._reader.read(buffer, bufferOffset, header.shortSecSize, fileOffset).then(() => readOneShortSector(i + 1));
|
|
269
|
-
};
|
|
270
|
-
return readOneShortSector(0);
|
|
271
|
-
}
|
|
272
|
-
_readSAT() {
|
|
273
|
-
this._SAT = new AllocationTable(this);
|
|
274
|
-
return this._SAT.load(this._MSAT);
|
|
275
|
-
}
|
|
276
|
-
_readSSAT() {
|
|
277
|
-
const header = this._header;
|
|
278
|
-
const secIds = this._SAT.getSecIdChain(header.SSATSecId);
|
|
279
|
-
if (secIds.length != header.SSATSize) return Promise.reject(/* @__PURE__ */ new Error("Invalid Short Sector Allocation Table"));
|
|
280
|
-
this._SSAT = new AllocationTable(this);
|
|
281
|
-
return this._SSAT.load(secIds);
|
|
282
|
-
}
|
|
283
|
-
_readDirectoryTree() {
|
|
284
|
-
const header = this._header;
|
|
285
|
-
this._directoryTree = new DirectoryTree(this);
|
|
286
|
-
const secIds = this._SAT.getSecIdChain(header.dirSecId);
|
|
287
|
-
return this._directoryTree.load(secIds).then(() => {
|
|
288
|
-
const rootEntry = this._directoryTree.root;
|
|
289
|
-
this._rootStorage = new Storage(this, rootEntry);
|
|
290
|
-
this._shortStreamSecIds = this._SAT.getSecIdChain(rootEntry.secId);
|
|
291
|
-
});
|
|
292
|
-
}
|
|
293
|
-
_getFileOffsetForSec(secId) {
|
|
294
|
-
const secSize = this._header.secSize;
|
|
295
|
-
return this._skipBytes + (secId + 1) * secSize;
|
|
296
|
-
}
|
|
297
|
-
_getFileOffsetForShortSec(shortSecId) {
|
|
298
|
-
const shortStreamOffset = shortSecId * this._header.shortSecSize;
|
|
299
|
-
const secSize = this._header.secSize;
|
|
300
|
-
const secIdIndex = Math.floor(shortStreamOffset / secSize);
|
|
301
|
-
const secOffset = shortStreamOffset % secSize;
|
|
302
|
-
const secId = this._shortStreamSecIds[secIdIndex];
|
|
303
|
-
return this._getFileOffsetForSec(secId) + secOffset;
|
|
304
|
-
}
|
|
305
|
-
storage(storageName) {
|
|
306
|
-
return this._rootStorage.storage(storageName);
|
|
307
|
-
}
|
|
308
|
-
stream(streamName) {
|
|
309
|
-
return this._rootStorage.stream(streamName);
|
|
310
|
-
}
|
|
311
|
-
};
|
|
312
|
-
module.exports = OleCompoundDoc;
|
|
313
|
-
}));
|
|
314
|
-
//#endregion
|
|
315
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/filters.js
|
|
316
|
-
var require_filters = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
317
|
-
/**
|
|
318
|
-
* @module filters
|
|
319
|
-
*
|
|
320
|
-
* @description
|
|
321
|
-
* Exports several functions that implement various methods for translating
|
|
322
|
-
* characters into Unicode, and cleaning up some of the remaining residues from
|
|
323
|
-
* Word's odd internal marker character usage.
|
|
324
|
-
*/
|
|
325
|
-
/**
|
|
326
|
-
* A replacement table, that maps Word control characters to either NULL, for
|
|
327
|
-
* deletion, or to another more acceptable character ina Unicode world, such
|
|
328
|
-
* as a newline.
|
|
329
|
-
*/
|
|
330
|
-
const replaceTable = [];
|
|
331
|
-
replaceTable[2] = "\0";
|
|
332
|
-
replaceTable[5] = "\0";
|
|
333
|
-
replaceTable[7] = " ";
|
|
334
|
-
replaceTable[8] = "\0";
|
|
335
|
-
replaceTable[10] = "\n";
|
|
336
|
-
replaceTable[11] = "\n";
|
|
337
|
-
replaceTable[12] = "\n";
|
|
338
|
-
replaceTable[13] = "\n";
|
|
339
|
-
replaceTable[30] = "‑";
|
|
340
|
-
/**
|
|
341
|
-
* @constant
|
|
342
|
-
* Maps between Windows character codes, especially between 0x80 and 0x9f,
|
|
343
|
-
* into official Unicode code points. This smooths over the differences
|
|
344
|
-
* between UCS-2 and 8-bit code runs in Word, by allowing us to work
|
|
345
|
-
* entirely within Unicode later on.
|
|
346
|
-
*/
|
|
347
|
-
const binaryToUnicodeTable = [];
|
|
348
|
-
binaryToUnicodeTable[130] = "‚";
|
|
349
|
-
binaryToUnicodeTable[131] = "ƒ";
|
|
350
|
-
binaryToUnicodeTable[132] = "„";
|
|
351
|
-
binaryToUnicodeTable[133] = "…";
|
|
352
|
-
binaryToUnicodeTable[134] = "†";
|
|
353
|
-
binaryToUnicodeTable[135] = "‡";
|
|
354
|
-
binaryToUnicodeTable[136] = "ˆ";
|
|
355
|
-
binaryToUnicodeTable[137] = "‰";
|
|
356
|
-
binaryToUnicodeTable[138] = "Š";
|
|
357
|
-
binaryToUnicodeTable[139] = "‹";
|
|
358
|
-
binaryToUnicodeTable[140] = "Œ";
|
|
359
|
-
binaryToUnicodeTable[142] = "Ž";
|
|
360
|
-
binaryToUnicodeTable[145] = "‘";
|
|
361
|
-
binaryToUnicodeTable[146] = "’";
|
|
362
|
-
binaryToUnicodeTable[147] = "“";
|
|
363
|
-
binaryToUnicodeTable[148] = "”";
|
|
364
|
-
binaryToUnicodeTable[149] = "•";
|
|
365
|
-
binaryToUnicodeTable[150] = "–";
|
|
366
|
-
binaryToUnicodeTable[151] = "—";
|
|
367
|
-
binaryToUnicodeTable[152] = "˜";
|
|
368
|
-
binaryToUnicodeTable[153] = "™";
|
|
369
|
-
binaryToUnicodeTable[154] = "š";
|
|
370
|
-
binaryToUnicodeTable[155] = "›";
|
|
371
|
-
binaryToUnicodeTable[156] = "œ";
|
|
372
|
-
binaryToUnicodeTable[158] = "ž";
|
|
373
|
-
binaryToUnicodeTable[159] = "Ÿ";
|
|
374
|
-
/**
|
|
375
|
-
* Converts character codes from 0x80 to 0x9f to Unicode equivalents
|
|
376
|
-
* within a string
|
|
377
|
-
* @param {string} string - the input string
|
|
378
|
-
* @returns a converted string
|
|
379
|
-
*/
|
|
380
|
-
module.exports.binaryToUnicode = (string) => {
|
|
381
|
-
return string.replace(/([\x80-\x9f])/g, (match) => binaryToUnicodeTable[match.charCodeAt(0)]);
|
|
382
|
-
};
|
|
383
|
-
/**
|
|
384
|
-
* The main function for cleaning OLE-based text. It runs a few standard replacements on characters
|
|
385
|
-
* that are reserved for special purposes, also removes fields, and finally strips out any weird
|
|
386
|
-
* characters that are likely not to be useful for anyone.
|
|
387
|
-
*
|
|
388
|
-
* @param {string} string - an input string
|
|
389
|
-
* @returns a cleaned up string
|
|
390
|
-
*/
|
|
391
|
-
module.exports.clean = (string) => {
|
|
392
|
-
string = string.replace(/([\x02\x05\x07\x08\x0a\x0b\x0c\x0d\x1f])/g, (match) => replaceTable[match.charCodeAt(0)]);
|
|
393
|
-
let called = true;
|
|
394
|
-
while (called) {
|
|
395
|
-
called = false;
|
|
396
|
-
string = string.replace(/(?:\x13[^\x13\x14\x15]*\x14?([^\x13\x14\x15]*)\x15)/g, (match, p1) => {
|
|
397
|
-
called = true;
|
|
398
|
-
return p1;
|
|
399
|
-
});
|
|
400
|
-
}
|
|
401
|
-
return string.replace(/[\x00-\x07]/g, "");
|
|
402
|
-
};
|
|
403
|
-
const filterTable = [];
|
|
404
|
-
filterTable[8194] = " ";
|
|
405
|
-
filterTable[8195] = " ";
|
|
406
|
-
filterTable[8210] = "-";
|
|
407
|
-
filterTable[8211] = "-";
|
|
408
|
-
filterTable[8212] = "-";
|
|
409
|
-
filterTable[8216] = "'";
|
|
410
|
-
filterTable[8217] = "'";
|
|
411
|
-
filterTable[8220] = "\"";
|
|
412
|
-
filterTable[8221] = "\"";
|
|
413
|
-
/**
|
|
414
|
-
* Filters a string, with a few common Unicode replacements, primarily for standard
|
|
415
|
-
* punctuation like non-breaking spaces, hyphens, and left and right curly quotes.
|
|
416
|
-
* @param {string} string - the input string
|
|
417
|
-
* @returns a filtered string
|
|
418
|
-
*/
|
|
419
|
-
module.exports.filter = (string) => {
|
|
420
|
-
return string.replace(/[\u2002\u2003\u2012\u2013\u2014\u2018\u2019\u201c\u201d]/g, (match) => filterTable[match.charCodeAt(0)]);
|
|
421
|
-
};
|
|
422
|
-
}));
|
|
423
|
-
//#endregion
|
|
424
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/document.js
|
|
425
|
-
var require_document = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
426
|
-
/**
|
|
427
|
-
* @module document
|
|
428
|
-
*
|
|
429
|
-
* @description
|
|
430
|
-
* Implements the main document returned when a Word file has been extracted. This exposes
|
|
431
|
-
* methods that allow the body, annotations, headers, footnotes, and endnotes, to be
|
|
432
|
-
* read and used.
|
|
433
|
-
*
|
|
434
|
-
* @author
|
|
435
|
-
* Stuart Watt <stuart@morungos.com>
|
|
436
|
-
*/
|
|
437
|
-
const { filter } = require_filters();
|
|
438
|
-
/**
|
|
439
|
-
* @class
|
|
440
|
-
* Returned from all extractors, this class provides accessors to read the different
|
|
441
|
-
* parts of a Word document. This also allows some options to be passed to the accessors,
|
|
442
|
-
* so you can control some character conversion and filtering, as described in the methods
|
|
443
|
-
* below.
|
|
444
|
-
*/
|
|
445
|
-
var Document = class {
|
|
446
|
-
constructor() {
|
|
447
|
-
this._body = "";
|
|
448
|
-
this._footnotes = "";
|
|
449
|
-
this._endnotes = "";
|
|
450
|
-
this._headers = "";
|
|
451
|
-
this._footers = "";
|
|
452
|
-
this._annotations = "";
|
|
453
|
-
this._textboxes = "";
|
|
454
|
-
this._headerTextboxes = "";
|
|
455
|
-
}
|
|
456
|
-
/**
|
|
457
|
-
* Accessor to read the main body part of a Word file
|
|
458
|
-
* @param {Object} options - options for body data
|
|
459
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
460
|
-
* to standard ASCII characters
|
|
461
|
-
* @returns a string, containing the Word file body
|
|
462
|
-
*/
|
|
463
|
-
getBody(options) {
|
|
464
|
-
options = options || {};
|
|
465
|
-
const value = this._body;
|
|
466
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
467
|
-
}
|
|
468
|
-
/**
|
|
469
|
-
* Accessor to read the footnotes part of a Word file
|
|
470
|
-
* @param {Object} options - options for body data
|
|
471
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
472
|
-
* to standard ASCII characters
|
|
473
|
-
* @returns a string, containing the Word file footnotes
|
|
474
|
-
*/
|
|
475
|
-
getFootnotes(options) {
|
|
476
|
-
options = options || {};
|
|
477
|
-
const value = this._footnotes;
|
|
478
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
479
|
-
}
|
|
480
|
-
/**
|
|
481
|
-
* Accessor to read the endnotes part of a Word file
|
|
482
|
-
* @param {Object} options - options for body data
|
|
483
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
484
|
-
* to standard ASCII characters
|
|
485
|
-
* @returns a string, containing the Word file endnotes
|
|
486
|
-
*/
|
|
487
|
-
getEndnotes(options) {
|
|
488
|
-
options = options || {};
|
|
489
|
-
const value = this._endnotes;
|
|
490
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
491
|
-
}
|
|
492
|
-
/**
|
|
493
|
-
* Accessor to read the headers part of a Word file
|
|
494
|
-
* @param {Object} options - options for body data
|
|
495
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
496
|
-
* to standard ASCII characters
|
|
497
|
-
* @param {boolean} options.includeFooters - if true (the default), returns headers and footers
|
|
498
|
-
* as a single string
|
|
499
|
-
* @returns a string, containing the Word file headers
|
|
500
|
-
*/
|
|
501
|
-
getHeaders(options) {
|
|
502
|
-
options = options || {};
|
|
503
|
-
const value = this._headers + (options.includeFooters == false ? "" : this._footers);
|
|
504
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
505
|
-
}
|
|
506
|
-
/**
|
|
507
|
-
* Accessor to read the footers part of a Word file
|
|
508
|
-
* @param {Object} options - options for body data
|
|
509
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
510
|
-
* to standard ASCII characters
|
|
511
|
-
* @returns a string, containing the Word file footers
|
|
512
|
-
*/
|
|
513
|
-
getFooters(options) {
|
|
514
|
-
options = options || {};
|
|
515
|
-
const value = this._footers;
|
|
516
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
517
|
-
}
|
|
518
|
-
/**
|
|
519
|
-
* Accessor to read the annotations part of a Word file
|
|
520
|
-
* @param {Object} options - options for body data
|
|
521
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
522
|
-
* to standard ASCII characters
|
|
523
|
-
* @returns a string, containing the Word file annotations
|
|
524
|
-
*/
|
|
525
|
-
getAnnotations(options) {
|
|
526
|
-
options = options || {};
|
|
527
|
-
const value = this._annotations;
|
|
528
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
529
|
-
}
|
|
530
|
-
/**
|
|
531
|
-
* Accessor to read the textboxes from a Word file. The text box content is aggregated as a
|
|
532
|
-
* single long string. When both the body and header content exists, they will be separated
|
|
533
|
-
* by a newline.
|
|
534
|
-
* @param {Object} options - options for body data
|
|
535
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
536
|
-
* to standard ASCII characters
|
|
537
|
-
* @param {boolean} options.includeHeadersAndFooters - if true (the default), includes text box
|
|
538
|
-
* content in headers and footers
|
|
539
|
-
* @param {boolean} options.includeBody - if true (the default), includes text box
|
|
540
|
-
* content in the document body
|
|
541
|
-
* @returns a string, containing the Word file text box content
|
|
542
|
-
*/
|
|
543
|
-
getTextboxes(options) {
|
|
544
|
-
options = options || {};
|
|
545
|
-
const segments = [];
|
|
546
|
-
if (options.includeBody != false) segments.push(this._textboxes);
|
|
547
|
-
if (options.includeHeadersAndFooters != false) segments.push(this._headerTextboxes);
|
|
548
|
-
const value = segments.join("\n");
|
|
549
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
550
|
-
}
|
|
551
|
-
};
|
|
552
|
-
module.exports = Document;
|
|
553
|
-
}));
|
|
554
|
-
//#endregion
|
|
555
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/word-ole-extractor.js
|
|
556
|
-
var require_word_ole_extractor = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
557
|
-
/**
|
|
558
|
-
* @module word-ole-extractor
|
|
559
|
-
*
|
|
560
|
-
* @description
|
|
561
|
-
* Implements the main logic of extracting text from "classic" OLE-based Word files.
|
|
562
|
-
* Depends on [OleCompoundDoc]{@link module:ole-compound-doc~OleCompoundDoc}
|
|
563
|
-
* for most of the underlying OLE logic. Note that
|
|
564
|
-
* [OpenOfficeExtractor]{@link module:open-office-extractor~OpenOfficeExtractor} is
|
|
565
|
-
* used for newer, Open Office-style, files.
|
|
566
|
-
*/
|
|
567
|
-
const OleCompoundDoc = require_ole_compound_doc();
|
|
568
|
-
const Document = require_document();
|
|
569
|
-
const { binaryToUnicode, clean } = require_filters();
|
|
570
|
-
/**
|
|
571
|
-
* Constant for the deletion character SPRM.
|
|
572
|
-
*/
|
|
573
|
-
const sprmCFRMarkDel = 0;
|
|
574
|
-
/**
|
|
575
|
-
* Given a cp-style file offset, finds the containing piece index.
|
|
576
|
-
* @param {*} offset the character offset
|
|
577
|
-
* @returns the piece index
|
|
578
|
-
*
|
|
579
|
-
* @todo
|
|
580
|
-
* Might be better using a binary search
|
|
581
|
-
*/
|
|
582
|
-
const getPieceIndexByCP = (pieces, position) => {
|
|
583
|
-
for (let i = 0; i < pieces.length; i++) if (position <= pieces[i].endCp) return i;
|
|
584
|
-
};
|
|
585
|
-
/**
|
|
586
|
-
* Given a file-style offset, finds the containing piece index.
|
|
587
|
-
* @param {*} offset the character offset
|
|
588
|
-
* @returns the piece index
|
|
589
|
-
*
|
|
590
|
-
* @todo
|
|
591
|
-
* Might be better using a binary search
|
|
592
|
-
*/
|
|
593
|
-
const getPieceIndexByFilePos = (pieces, position) => {
|
|
594
|
-
for (let i = 0; i < pieces.length; i++) if (position <= pieces[i].endFilePos) return i;
|
|
595
|
-
};
|
|
596
|
-
/**
|
|
597
|
-
* Reads and extracts a character range from the pieces. This returns the
|
|
598
|
-
* plain text within the pieces in the given range.
|
|
599
|
-
* @param {*} start the start offset
|
|
600
|
-
* @param {*} end the end offset
|
|
601
|
-
* @returns a character string
|
|
602
|
-
*/
|
|
603
|
-
function getTextRangeByCP(pieces, start, end) {
|
|
604
|
-
const startPiece = getPieceIndexByCP(pieces, start);
|
|
605
|
-
const endPiece = getPieceIndexByCP(pieces, end);
|
|
606
|
-
const result = [];
|
|
607
|
-
for (let i = startPiece, end1 = endPiece; i <= end1; i++) {
|
|
608
|
-
const piece = pieces[i];
|
|
609
|
-
const xstart = i === startPiece ? start - piece.startCp : 0;
|
|
610
|
-
const xend = i === endPiece ? end - piece.startCp : piece.endCp;
|
|
611
|
-
result.push(piece.text.substring(xstart, xend));
|
|
612
|
-
}
|
|
613
|
-
return result.join("");
|
|
614
|
-
}
|
|
615
|
-
/**
|
|
616
|
-
* Given a piece, and a starting and ending cp-style file offset,
|
|
617
|
-
* and a replacement character, updates the piece text to replace
|
|
618
|
-
* between start and end with the given character.
|
|
619
|
-
* @param {*} piece the piece
|
|
620
|
-
* @param {*} start the starting character offset
|
|
621
|
-
* @param {*} end the endingcharacter offset
|
|
622
|
-
* @param {*} character the replacement character
|
|
623
|
-
*/
|
|
624
|
-
function fillPieceRange(piece, start, end, character) {
|
|
625
|
-
const pieceStart = piece.startCp;
|
|
626
|
-
const pieceEnd = pieceStart + piece.length;
|
|
627
|
-
const original = piece.text;
|
|
628
|
-
if (start < pieceStart) start = pieceStart;
|
|
629
|
-
if (end > pieceEnd) end = pieceEnd;
|
|
630
|
-
piece.text = (start == pieceStart ? "" : original.slice(0, start - pieceStart)) + "".padStart(end - start, character) + (end == pieceEnd ? "" : original.slice(end - pieceEnd));
|
|
631
|
-
}
|
|
632
|
-
/**
|
|
633
|
-
* Given a piece, and a starting and ending filePos-style file offset,
|
|
634
|
-
* and a replacement character, updates the piece text to replace
|
|
635
|
-
* between start and end with the given character. This is used when
|
|
636
|
-
* applying character styles, which use filePos values rather than cp
|
|
637
|
-
* values.
|
|
638
|
-
*
|
|
639
|
-
* @param {*} piece the piece
|
|
640
|
-
* @param {*} start the starting character offset
|
|
641
|
-
* @param {*} end the endingcharacter offset
|
|
642
|
-
* @param {*} character the replacement character
|
|
643
|
-
*/
|
|
644
|
-
function fillPieceRangeByFilePos(piece, start, end, character) {
|
|
645
|
-
const pieceStart = piece.startFilePos;
|
|
646
|
-
const pieceEnd = pieceStart + piece.size;
|
|
647
|
-
const original = piece.text;
|
|
648
|
-
if (start < pieceStart) start = pieceStart;
|
|
649
|
-
if (end > pieceEnd) end = pieceEnd;
|
|
650
|
-
piece.text = (start == pieceStart ? "" : original.slice(0, (start - pieceStart) / piece.bpc)) + "".padStart((end - start) / piece.bpc, character) + (end == pieceEnd ? "" : original.slice((end - pieceEnd) / piece.bpc));
|
|
651
|
-
}
|
|
652
|
-
/**
|
|
653
|
-
* Replaces a selected range in the piece table, overwriting the selection with
|
|
654
|
-
* the given character. The length of segments in the piece table must never be
|
|
655
|
-
* changed.
|
|
656
|
-
* @param {*} pieces
|
|
657
|
-
* @param {*} start
|
|
658
|
-
* @param {*} end
|
|
659
|
-
* @param {*} character
|
|
660
|
-
*/
|
|
661
|
-
function replaceSelectedRange(pieces, start, end, character) {
|
|
662
|
-
const startPiece = getPieceIndexByCP(pieces, start);
|
|
663
|
-
const endPiece = getPieceIndexByCP(pieces, end);
|
|
664
|
-
for (let i = startPiece, end1 = endPiece; i <= end1; i++) {
|
|
665
|
-
const piece = pieces[i];
|
|
666
|
-
fillPieceRange(piece, start, end, character);
|
|
667
|
-
}
|
|
668
|
-
}
|
|
669
|
-
/**
|
|
670
|
-
* Replaces a selected range in the piece table, overwriting the selection with
|
|
671
|
-
* the given character. The length of segments in the piece table must never be
|
|
672
|
-
* changed. The start and end values are found by file position.
|
|
673
|
-
* @param {*} pieces
|
|
674
|
-
* @param {*} start
|
|
675
|
-
* @param {*} end
|
|
676
|
-
* @param {*} character
|
|
677
|
-
*/
|
|
678
|
-
function replaceSelectedRangeByFilePos(pieces, start, end, character) {
|
|
679
|
-
const startPiece = getPieceIndexByFilePos(pieces, start);
|
|
680
|
-
const endPiece = getPieceIndexByFilePos(pieces, end);
|
|
681
|
-
for (let i = startPiece, end1 = endPiece; i <= end1; i++) {
|
|
682
|
-
const piece = pieces[i];
|
|
683
|
-
fillPieceRangeByFilePos(piece, start, end, character);
|
|
684
|
-
}
|
|
685
|
-
}
|
|
686
|
-
/**
|
|
687
|
-
* Marks a range as deleted. It does this by overwriting it with null characters,
|
|
688
|
-
* wich then get removed during the later cleaning process.
|
|
689
|
-
* @param {*} pieces
|
|
690
|
-
* @param {*} start
|
|
691
|
-
* @param {*} end
|
|
692
|
-
*/
|
|
693
|
-
function markDeletedRange(pieces, start, end) {
|
|
694
|
-
replaceSelectedRangeByFilePos(pieces, start, end, "\0");
|
|
695
|
-
}
|
|
696
|
-
/**
|
|
697
|
-
* Called to iterate over a set of SPRMs in a buffer, starting at
|
|
698
|
-
* a gived offset. The handler is called with the arguments:
|
|
699
|
-
* buffer, offset, sprm, ispmd, fspec, sgc, spra.
|
|
700
|
-
* @param {*} buffer the buffer
|
|
701
|
-
* @param {*} offset the starting offset
|
|
702
|
-
* @param {*} handler the function to call for each SPRM
|
|
703
|
-
*/
|
|
704
|
-
const processSprms = (buffer, offset, handler) => {
|
|
705
|
-
while (offset < buffer.length - 1) {
|
|
706
|
-
const sprm = buffer.readUInt16LE(offset);
|
|
707
|
-
const ispmd = sprm & 31;
|
|
708
|
-
const fspec = sprm >> 9 & 1;
|
|
709
|
-
const sgc = sprm >> 10 & 7;
|
|
710
|
-
const spra = sprm >> 13 & 7;
|
|
711
|
-
offset += 2;
|
|
712
|
-
handler(buffer, offset, sprm, ispmd, fspec, sgc, spra);
|
|
713
|
-
if (spra === 0) {
|
|
714
|
-
offset += 1;
|
|
715
|
-
continue;
|
|
716
|
-
} else if (spra === 1) {
|
|
717
|
-
offset += 1;
|
|
718
|
-
continue;
|
|
719
|
-
} else if (spra === 2) {
|
|
720
|
-
offset += 2;
|
|
721
|
-
continue;
|
|
722
|
-
} else if (spra === 3) {
|
|
723
|
-
offset += 4;
|
|
724
|
-
continue;
|
|
725
|
-
} else if (spra === 4 || spra === 5) {
|
|
726
|
-
offset += 2;
|
|
727
|
-
continue;
|
|
728
|
-
} else if (spra === 6) {
|
|
729
|
-
offset += buffer.readUInt8(offset) + 1;
|
|
730
|
-
continue;
|
|
731
|
-
} else if (spra === 7) {
|
|
732
|
-
offset += 3;
|
|
733
|
-
continue;
|
|
734
|
-
} else throw new Error("Unparsed sprm");
|
|
735
|
-
}
|
|
736
|
-
};
|
|
737
|
-
/**
|
|
738
|
-
* @class
|
|
739
|
-
* The main class implementing extraction from OLE-based Word files.
|
|
740
|
-
* This handles all the extraction and conversion logic.
|
|
741
|
-
*/
|
|
742
|
-
var WordOleExtractor = class {
|
|
743
|
-
constructor() {
|
|
744
|
-
this._pieces = [];
|
|
745
|
-
this._bookmarks = {};
|
|
746
|
-
this._boundaries = {};
|
|
747
|
-
this._taggedHeaders = [];
|
|
748
|
-
}
|
|
749
|
-
/**
|
|
750
|
-
* The main extraction method. This creates an OLE compound document
|
|
751
|
-
* interface, then opens up a stream and extracts out the main
|
|
752
|
-
* stream.
|
|
753
|
-
* @param {*} reader
|
|
754
|
-
*/
|
|
755
|
-
extract(reader) {
|
|
756
|
-
const document = new OleCompoundDoc(reader);
|
|
757
|
-
return document.read().then(() => this.documentStream(document, "WordDocument").then((stream) => this.streamBuffer(stream)).then((buffer) => this.extractWordDocument(document, buffer)));
|
|
758
|
-
}
|
|
759
|
-
/**
|
|
760
|
-
* Builds and returns a {@link Document} object corresponding to the text
|
|
761
|
-
* in the original document. This involves reading and retrieving the text
|
|
762
|
-
* ranges corresponding to the primary document parts. The text segments are
|
|
763
|
-
* read from the extracted table of text pieces.
|
|
764
|
-
* @returns a {@link Document} object
|
|
765
|
-
*/
|
|
766
|
-
buildDocument() {
|
|
767
|
-
const document = new Document();
|
|
768
|
-
const pieces = this._pieces;
|
|
769
|
-
let start = 0;
|
|
770
|
-
document._body = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpText));
|
|
771
|
-
start += this._boundaries.ccpText;
|
|
772
|
-
if (this._boundaries.ccpFtn) {
|
|
773
|
-
document._footnotes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpFtn - 1));
|
|
774
|
-
start += this._boundaries.ccpFtn;
|
|
775
|
-
}
|
|
776
|
-
if (this._boundaries.ccpHdd) {
|
|
777
|
-
document._headers = clean(this._taggedHeaders.filter((s) => s.type === "headers").map((s) => s.text).join(""));
|
|
778
|
-
document._footers = clean(this._taggedHeaders.filter((s) => s.type === "footers").map((s) => s.text).join(""));
|
|
779
|
-
start += this._boundaries.ccpHdd;
|
|
780
|
-
}
|
|
781
|
-
if (this._boundaries.ccpAtn) {
|
|
782
|
-
document._annotations = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpAtn - 1));
|
|
783
|
-
start += this._boundaries.ccpAtn;
|
|
784
|
-
}
|
|
785
|
-
if (this._boundaries.ccpEdn) {
|
|
786
|
-
document._endnotes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpEdn - 1));
|
|
787
|
-
start += this._boundaries.ccpEdn;
|
|
788
|
-
}
|
|
789
|
-
if (this._boundaries.ccpTxbx) {
|
|
790
|
-
document._textboxes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpTxbx - 1));
|
|
791
|
-
start += this._boundaries.ccpTxbx;
|
|
792
|
-
}
|
|
793
|
-
if (this._boundaries.ccpHdrTxbx) {
|
|
794
|
-
document._headerTextboxes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpHdrTxbx - 1));
|
|
795
|
-
start += this._boundaries.ccpHdrTxbx;
|
|
796
|
-
}
|
|
797
|
-
return document;
|
|
798
|
-
}
|
|
799
|
-
/**
|
|
800
|
-
* Main logic top level function for unpacking a Word document
|
|
801
|
-
* @param {*} document the OLE document
|
|
802
|
-
* @param {*} buffer a buffer
|
|
803
|
-
* @returns a Promise which resolves to a {@link Document}
|
|
804
|
-
*/
|
|
805
|
-
extractWordDocument(document, buffer) {
|
|
806
|
-
const magic = buffer.readUInt16LE(0);
|
|
807
|
-
if (magic !== 42476) return Promise.reject(/* @__PURE__ */ new Error(`This does not seem to be a Word document: Invalid magic number: ${magic.toString(16)}`));
|
|
808
|
-
const streamName = (buffer.readUInt16LE(10) & 512) !== 0 ? "1Table" : "0Table";
|
|
809
|
-
return this.documentStream(document, streamName).then((stream) => this.streamBuffer(stream)).then((streamBuffer) => {
|
|
810
|
-
this._boundaries.fcMin = buffer.readUInt32LE(24);
|
|
811
|
-
this._boundaries.ccpText = buffer.readUInt32LE(76);
|
|
812
|
-
this._boundaries.ccpFtn = buffer.readUInt32LE(80);
|
|
813
|
-
this._boundaries.ccpHdd = buffer.readUInt32LE(84);
|
|
814
|
-
this._boundaries.ccpAtn = buffer.readUInt32LE(92);
|
|
815
|
-
this._boundaries.ccpEdn = buffer.readUInt32LE(96);
|
|
816
|
-
this._boundaries.ccpTxbx = buffer.readUInt32LE(100);
|
|
817
|
-
this._boundaries.ccpHdrTxbx = buffer.readUInt32LE(104);
|
|
818
|
-
this.writeBookmarks(buffer, streamBuffer);
|
|
819
|
-
this.writePieces(buffer, streamBuffer);
|
|
820
|
-
this.writeCharacterProperties(buffer, streamBuffer);
|
|
821
|
-
this.writeParagraphProperties(buffer, streamBuffer);
|
|
822
|
-
this.normalizeHeaders(buffer, streamBuffer);
|
|
823
|
-
return this.buildDocument();
|
|
824
|
-
});
|
|
825
|
-
}
|
|
826
|
-
/**
|
|
827
|
-
* Returns a promise that resolves to the named stream.
|
|
828
|
-
* @param {*} document
|
|
829
|
-
* @param {*} streamName
|
|
830
|
-
* @returns a promise that resolves to the named stream
|
|
831
|
-
*/
|
|
832
|
-
documentStream(document, streamName) {
|
|
833
|
-
return Promise.resolve(document.stream(streamName));
|
|
834
|
-
}
|
|
835
|
-
/**
|
|
836
|
-
* Returns a promise that resolves to a Buffer containing the contents of
|
|
837
|
-
* the given stream.
|
|
838
|
-
* @param {*} stream
|
|
839
|
-
* @returns a promise that resolves to the sream contents
|
|
840
|
-
*/
|
|
841
|
-
streamBuffer(stream) {
|
|
842
|
-
return new Promise((resolve, reject) => {
|
|
843
|
-
const chunks = [];
|
|
844
|
-
stream.on("data", (chunk) => chunks.push(chunk));
|
|
845
|
-
stream.on("error", (error) => reject(error));
|
|
846
|
-
stream.on("end", () => resolve(Buffer.concat(chunks)));
|
|
847
|
-
return stream;
|
|
848
|
-
});
|
|
849
|
-
}
|
|
850
|
-
writeFields(buffer, tableBuffer, result) {
|
|
851
|
-
const fcPlcffldMom = buffer.readInt32LE(282);
|
|
852
|
-
const lcbPlcffldMom = buffer.readUInt32LE(286);
|
|
853
|
-
if (lcbPlcffldMom == 0) return;
|
|
854
|
-
const fieldCount = (lcbPlcffldMom - 4) / 6;
|
|
855
|
-
const dataOffset = (fieldCount + 1) * 4;
|
|
856
|
-
const plcffldMom = tableBuffer.slice(fcPlcffldMom, fcPlcffldMom + lcbPlcffldMom);
|
|
857
|
-
for (let i = 0; i < fieldCount; i++) {
|
|
858
|
-
plcffldMom.readUInt32LE(i * 4);
|
|
859
|
-
const fld = plcffldMom.readUInt16LE(dataOffset + i * 2);
|
|
860
|
-
const byte1 = fld & 255;
|
|
861
|
-
fld >> 8;
|
|
862
|
-
if ((byte1 & 31) == 19) {}
|
|
863
|
-
}
|
|
864
|
-
}
|
|
865
|
-
/**
|
|
866
|
-
* Extracts and stores the document bookmarks into a local field.
|
|
867
|
-
* @param {*} buffer
|
|
868
|
-
* @param {*} tableBuffer
|
|
869
|
-
*/
|
|
870
|
-
writeBookmarks(buffer, tableBuffer) {
|
|
871
|
-
const fcSttbfBkmk = buffer.readUInt32LE(322);
|
|
872
|
-
const lcbSttbfBkmk = buffer.readUInt32LE(326);
|
|
873
|
-
const fcPlcfBkf = buffer.readUInt32LE(330);
|
|
874
|
-
const lcbPlcfBkf = buffer.readUInt32LE(334);
|
|
875
|
-
const fcPlcfBkl = buffer.readUInt32LE(338);
|
|
876
|
-
const lcbPlcfBkl = buffer.readUInt32LE(342);
|
|
877
|
-
if (lcbSttbfBkmk === 0) return;
|
|
878
|
-
const sttbfBkmk = tableBuffer.slice(fcSttbfBkmk, fcSttbfBkmk + lcbSttbfBkmk);
|
|
879
|
-
const plcfBkf = tableBuffer.slice(fcPlcfBkf, fcPlcfBkf + lcbPlcfBkf);
|
|
880
|
-
const plcfBkl = tableBuffer.slice(fcPlcfBkl, fcPlcfBkl + lcbPlcfBkl);
|
|
881
|
-
const fcExtend = sttbfBkmk.readUInt16LE(0);
|
|
882
|
-
sttbfBkmk.readUInt16LE(2);
|
|
883
|
-
sttbfBkmk.readUInt16LE(4);
|
|
884
|
-
if (fcExtend !== 65535) throw new Error("Internal error: unexpected single-byte bookmark data");
|
|
885
|
-
let offset = 6;
|
|
886
|
-
const index = 0;
|
|
887
|
-
while (offset < lcbSttbfBkmk) {
|
|
888
|
-
let length = sttbfBkmk.readUInt16LE(offset);
|
|
889
|
-
length = length * 2;
|
|
890
|
-
const segment = sttbfBkmk.slice(offset + 2, offset + 2 + length);
|
|
891
|
-
const cpStart = plcfBkf.readUInt32LE(index * 4);
|
|
892
|
-
const cpEnd = plcfBkl.readUInt32LE(index * 4);
|
|
893
|
-
this._bookmarks[segment] = {
|
|
894
|
-
start: cpStart,
|
|
895
|
-
end: cpEnd
|
|
896
|
-
};
|
|
897
|
-
offset = offset + length + 2;
|
|
898
|
-
}
|
|
899
|
-
}
|
|
900
|
-
/**
|
|
901
|
-
* Extracts and stores the document text pieces into a local field. This is
|
|
902
|
-
* probably the most crucial part of text extraction, as it is where we
|
|
903
|
-
* get text corresponding to character positions. These may be stored in a
|
|
904
|
-
* different order in the file compared to the order we want them.
|
|
905
|
-
*
|
|
906
|
-
* @param {*} buffer
|
|
907
|
-
* @param {*} tableBuffer
|
|
908
|
-
*/
|
|
909
|
-
writePieces(buffer, tableBuffer) {
|
|
910
|
-
let flag;
|
|
911
|
-
let pos = buffer.readUInt32LE(418);
|
|
912
|
-
while (true) {
|
|
913
|
-
flag = tableBuffer.readUInt8(pos);
|
|
914
|
-
if (flag !== 1) break;
|
|
915
|
-
pos = pos + 1;
|
|
916
|
-
const skip = tableBuffer.readUInt16LE(pos);
|
|
917
|
-
pos = pos + 2 + skip;
|
|
918
|
-
}
|
|
919
|
-
flag = tableBuffer.readUInt8(pos);
|
|
920
|
-
pos = pos + 1;
|
|
921
|
-
if (flag !== 2) throw new Error("Internal error: ccorrupted Word file");
|
|
922
|
-
const pieceTableSize = tableBuffer.readUInt32LE(pos);
|
|
923
|
-
pos = pos + 4;
|
|
924
|
-
const pieces = (pieceTableSize - 4) / 12;
|
|
925
|
-
let startCp = 0;
|
|
926
|
-
let startStream = 0;
|
|
927
|
-
for (let x = 0, end = pieces - 1; x <= end; x++) {
|
|
928
|
-
const offset = pos + (pieces + 1) * 4 + x * 8 + 2;
|
|
929
|
-
let startFilePos = tableBuffer.readUInt32LE(offset);
|
|
930
|
-
let unicode = false;
|
|
931
|
-
if ((startFilePos & 1073741824) === 0) unicode = true;
|
|
932
|
-
else {
|
|
933
|
-
startFilePos = startFilePos & -1073741825;
|
|
934
|
-
startFilePos = Math.floor(startFilePos / 2);
|
|
935
|
-
}
|
|
936
|
-
const lStart = tableBuffer.readUInt32LE(pos + x * 4);
|
|
937
|
-
const lEnd = tableBuffer.readUInt32LE(pos + (x + 1) * 4);
|
|
938
|
-
const totLength = lEnd - lStart;
|
|
939
|
-
const piece = {
|
|
940
|
-
startCp,
|
|
941
|
-
startStream,
|
|
942
|
-
totLength,
|
|
943
|
-
startFilePos,
|
|
944
|
-
unicode,
|
|
945
|
-
bpc: unicode ? 2 : 1
|
|
946
|
-
};
|
|
947
|
-
piece.size = piece.bpc * (lEnd - lStart);
|
|
948
|
-
const textBuffer = buffer.slice(startFilePos, startFilePos + piece.size);
|
|
949
|
-
if (unicode) piece.text = textBuffer.toString("ucs2");
|
|
950
|
-
else piece.text = binaryToUnicode(textBuffer.toString("binary"));
|
|
951
|
-
piece.length = piece.text.length;
|
|
952
|
-
piece.endCp = piece.startCp + piece.length;
|
|
953
|
-
piece.endStream = piece.startStream + piece.size;
|
|
954
|
-
piece.endFilePos = piece.startFilePos + piece.size;
|
|
955
|
-
startCp = piece.endCp;
|
|
956
|
-
startStream = piece.endStream;
|
|
957
|
-
this._pieces.push(piece);
|
|
958
|
-
}
|
|
959
|
-
}
|
|
960
|
-
/**
|
|
961
|
-
* Processes the headers and footers. The main logic here is that we might have a mix
|
|
962
|
-
* of "real" and "pseudo" headers. For example, a footnote generates some footnote
|
|
963
|
-
* separator footer elements, which, unless they contain something interesting, we
|
|
964
|
-
* can dispense with. In fact, we want to dispense with anything which is made up of
|
|
965
|
-
* whitespace and control characters, in general. This means locating the segments of
|
|
966
|
-
* text in the extracted pieces, and conditionally replacing them with nulls.
|
|
967
|
-
*
|
|
968
|
-
* @param {*} buffer
|
|
969
|
-
* @param {*} tableBuffer
|
|
970
|
-
*/
|
|
971
|
-
normalizeHeaders(buffer, tableBuffer) {
|
|
972
|
-
const pieces = this._pieces;
|
|
973
|
-
const fcPlcfhdd = buffer.readUInt32LE(242);
|
|
974
|
-
const lcbPlcfhdd = buffer.readUInt32LE(246);
|
|
975
|
-
if (lcbPlcfhdd < 8) return;
|
|
976
|
-
const offset = this._boundaries.ccpText + this._boundaries.ccpFtn;
|
|
977
|
-
const ccpHdd = this._boundaries.ccpHdd;
|
|
978
|
-
const plcHdd = tableBuffer.slice(fcPlcfhdd, fcPlcfhdd + lcbPlcfhdd);
|
|
979
|
-
const plcHddCount = lcbPlcfhdd / 4;
|
|
980
|
-
let start = offset + plcHdd.readUInt32LE(0);
|
|
981
|
-
for (let i = 1; i < plcHddCount; i++) {
|
|
982
|
-
let end = offset + plcHdd.readUInt32LE(i * 4);
|
|
983
|
-
if (end > offset + ccpHdd) end = offset + ccpHdd;
|
|
984
|
-
const string = getTextRangeByCP(pieces, start, end);
|
|
985
|
-
const story = i - 1;
|
|
986
|
-
if ([
|
|
987
|
-
0,
|
|
988
|
-
1,
|
|
989
|
-
2
|
|
990
|
-
].includes(story)) this._taggedHeaders.push({
|
|
991
|
-
type: "footnoteSeparators",
|
|
992
|
-
text: string
|
|
993
|
-
});
|
|
994
|
-
else if ([
|
|
995
|
-
3,
|
|
996
|
-
4,
|
|
997
|
-
5
|
|
998
|
-
].includes(story)) this._taggedHeaders.push({
|
|
999
|
-
type: "endSeparators",
|
|
1000
|
-
text: string
|
|
1001
|
-
});
|
|
1002
|
-
else if ([
|
|
1003
|
-
0,
|
|
1004
|
-
1,
|
|
1005
|
-
4
|
|
1006
|
-
].includes(story % 6)) this._taggedHeaders.push({
|
|
1007
|
-
type: "headers",
|
|
1008
|
-
text: string
|
|
1009
|
-
});
|
|
1010
|
-
else if ([
|
|
1011
|
-
2,
|
|
1012
|
-
3,
|
|
1013
|
-
5
|
|
1014
|
-
].includes(story % 6)) this._taggedHeaders.push({
|
|
1015
|
-
type: "footers",
|
|
1016
|
-
text: string
|
|
1017
|
-
});
|
|
1018
|
-
if (!/[^\r\n\u0002-\u0008]/.test(string)) replaceSelectedRange(pieces, start, end, "\0");
|
|
1019
|
-
else replaceSelectedRange(pieces, end - 1, end, "\0");
|
|
1020
|
-
start = end;
|
|
1021
|
-
}
|
|
1022
|
-
}
|
|
1023
|
-
writeParagraphProperties(buffer, tableBuffer) {
|
|
1024
|
-
const pieces = this._pieces;
|
|
1025
|
-
const fcPlcfbtePapx = buffer.readUInt32LE(258);
|
|
1026
|
-
const lcbPlcfbtePapx = buffer.readUInt32LE(262);
|
|
1027
|
-
const plcBtePapxCount = (lcbPlcfbtePapx - 4) / 8;
|
|
1028
|
-
const dataOffset = (plcBtePapxCount + 1) * 4;
|
|
1029
|
-
const plcBtePapx = tableBuffer.slice(fcPlcfbtePapx, fcPlcfbtePapx + lcbPlcfbtePapx);
|
|
1030
|
-
for (let i = 0; i < plcBtePapxCount; i++) {
|
|
1031
|
-
plcBtePapx.readUInt32LE(i * 4);
|
|
1032
|
-
const papxFkpBlock = plcBtePapx.readUInt32LE(dataOffset + i * 4);
|
|
1033
|
-
const papxFkpBlockBuffer = buffer.slice(papxFkpBlock * 512, (papxFkpBlock + 1) * 512);
|
|
1034
|
-
const crun = papxFkpBlockBuffer.readUInt8(511);
|
|
1035
|
-
for (let j = 0; j < crun; j++) {
|
|
1036
|
-
const rgfc = papxFkpBlockBuffer.readUInt32LE(j * 4);
|
|
1037
|
-
const rgfcNext = papxFkpBlockBuffer.readUInt32LE((j + 1) * 4);
|
|
1038
|
-
const cbLocation = (crun + 1) * 4 + j * 13;
|
|
1039
|
-
const cbIndex = papxFkpBlockBuffer.readUInt8(cbLocation) * 2;
|
|
1040
|
-
const cb = papxFkpBlockBuffer.readUInt8(cbIndex);
|
|
1041
|
-
let grpPrlAndIstd = null;
|
|
1042
|
-
if (cb !== 0) grpPrlAndIstd = papxFkpBlockBuffer.slice(cbIndex + 1, cbIndex + 1 + 2 * cb - 1);
|
|
1043
|
-
else {
|
|
1044
|
-
const cb2 = papxFkpBlockBuffer.readUInt8(cbIndex + 1);
|
|
1045
|
-
grpPrlAndIstd = papxFkpBlockBuffer.slice(cbIndex + 2, cbIndex + 2 + 2 * cb2);
|
|
1046
|
-
}
|
|
1047
|
-
grpPrlAndIstd.readUInt16LE(0);
|
|
1048
|
-
processSprms(grpPrlAndIstd, 2, (buffer, offset, sprm, ispmd, fspec, sgc, spra) => {
|
|
1049
|
-
if (sprm === 9239) replaceSelectedRangeByFilePos(pieces, rgfc, rgfcNext, "\n");
|
|
1050
|
-
});
|
|
1051
|
-
}
|
|
1052
|
-
}
|
|
1053
|
-
}
|
|
1054
|
-
writeCharacterProperties(buffer, tableBuffer) {
|
|
1055
|
-
const pieces = this._pieces;
|
|
1056
|
-
const fcPlcfbteChpx = buffer.readUInt32LE(250);
|
|
1057
|
-
const lcbPlcfbteChpx = buffer.readUInt32LE(254);
|
|
1058
|
-
const plcBteChpxCount = (lcbPlcfbteChpx - 4) / 8;
|
|
1059
|
-
const dataOffset = (plcBteChpxCount + 1) * 4;
|
|
1060
|
-
const plcBteChpx = tableBuffer.slice(fcPlcfbteChpx, fcPlcfbteChpx + lcbPlcfbteChpx);
|
|
1061
|
-
let lastDeletionEnd = null;
|
|
1062
|
-
for (let i = 0; i < plcBteChpxCount; i++) {
|
|
1063
|
-
plcBteChpx.readUInt32LE(i * 4);
|
|
1064
|
-
const chpxFkpBlock = plcBteChpx.readUInt32LE(dataOffset + i * 4);
|
|
1065
|
-
const chpxFkpBlockBuffer = buffer.slice(chpxFkpBlock * 512, (chpxFkpBlock + 1) * 512);
|
|
1066
|
-
const crun = chpxFkpBlockBuffer.readUInt8(511);
|
|
1067
|
-
for (let j = 0; j < crun; j++) {
|
|
1068
|
-
const rgfc = chpxFkpBlockBuffer.readUInt32LE(j * 4);
|
|
1069
|
-
const rgfcNext = chpxFkpBlockBuffer.readUInt32LE((j + 1) * 4);
|
|
1070
|
-
const rgb = chpxFkpBlockBuffer.readUInt8((crun + 1) * 4 + j);
|
|
1071
|
-
if (rgb == 0) continue;
|
|
1072
|
-
const chpxOffset = rgb * 2;
|
|
1073
|
-
const cb = chpxFkpBlockBuffer.readUInt8(chpxOffset);
|
|
1074
|
-
processSprms(chpxFkpBlockBuffer.slice(chpxOffset + 1, chpxOffset + 1 + cb), 0, (buffer, offset, sprm, ispmd) => {
|
|
1075
|
-
if (ispmd === sprmCFRMarkDel) {
|
|
1076
|
-
if ((buffer[offset] & 1) != 1) return;
|
|
1077
|
-
if (lastDeletionEnd === rgfc) markDeletedRange(pieces, lastDeletionEnd, rgfcNext);
|
|
1078
|
-
else markDeletedRange(pieces, rgfc, rgfcNext);
|
|
1079
|
-
lastDeletionEnd = rgfcNext;
|
|
1080
|
-
}
|
|
1081
|
-
});
|
|
1082
|
-
}
|
|
1083
|
-
}
|
|
1084
|
-
}
|
|
1085
|
-
};
|
|
1086
|
-
module.exports = WordOleExtractor;
|
|
1087
|
-
}));
|
|
1088
|
-
//#endregion
|
|
1089
|
-
//#region node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xml/1.0/ed5.js
|
|
1090
|
-
var require_ed5 = /* @__PURE__ */ __commonJSMin(((exports) => {
|
|
1091
|
-
/**
|
|
1092
|
-
* Character classes and associated utilities for the 5th edition of XML 1.0.
|
|
1093
|
-
*
|
|
1094
|
-
* @author Louis-Dominique Dubeau
|
|
1095
|
-
* @license MIT
|
|
1096
|
-
* @copyright Louis-Dominique Dubeau
|
|
1097
|
-
*/
|
|
1098
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1099
|
-
exports.CHAR = " \n\r --�𐀀-";
|
|
1100
|
-
exports.S = " \r\n";
|
|
1101
|
-
exports.NAME_START_CHAR = ":A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-⁰-Ⰰ-、-豈-﷏ﷰ-�𐀀-";
|
|
1102
|
-
exports.NAME_CHAR = "-" + exports.NAME_START_CHAR + ".0-9·̀-ͯ‿-⁀";
|
|
1103
|
-
exports.CHAR_RE = new RegExp("^[" + exports.CHAR + "]$", "u");
|
|
1104
|
-
exports.S_RE = new RegExp("^[" + exports.S + "]+$", "u");
|
|
1105
|
-
exports.NAME_START_CHAR_RE = new RegExp("^[" + exports.NAME_START_CHAR + "]$", "u");
|
|
1106
|
-
exports.NAME_CHAR_RE = new RegExp("^[" + exports.NAME_CHAR + "]$", "u");
|
|
1107
|
-
exports.NAME_RE = new RegExp("^[" + exports.NAME_START_CHAR + "][" + exports.NAME_CHAR + "]*$", "u");
|
|
1108
|
-
exports.NMTOKEN_RE = new RegExp("^[" + exports.NAME_CHAR + "]+$", "u");
|
|
1109
|
-
var TAB = 9;
|
|
1110
|
-
var NL = 10;
|
|
1111
|
-
var CR = 13;
|
|
1112
|
-
var SPACE = 32;
|
|
1113
|
-
/** All characters in the ``S`` production. */
|
|
1114
|
-
exports.S_LIST = [
|
|
1115
|
-
SPACE,
|
|
1116
|
-
NL,
|
|
1117
|
-
CR,
|
|
1118
|
-
TAB
|
|
1119
|
-
];
|
|
1120
|
-
/**
|
|
1121
|
-
* Determines whether a codepoint matches the ``CHAR`` production.
|
|
1122
|
-
*
|
|
1123
|
-
* @param c The code point.
|
|
1124
|
-
*
|
|
1125
|
-
* @returns ``true`` if the codepoint matches ``CHAR``.
|
|
1126
|
-
*/
|
|
1127
|
-
function isChar(c) {
|
|
1128
|
-
return c >= SPACE && c <= 55295 || c === NL || c === CR || c === TAB || c >= 57344 && c <= 65533 || c >= 65536 && c <= 1114111;
|
|
1129
|
-
}
|
|
1130
|
-
exports.isChar = isChar;
|
|
1131
|
-
/**
|
|
1132
|
-
* Determines whether a codepoint matches the ``S`` (space) production.
|
|
1133
|
-
*
|
|
1134
|
-
* @param c The code point.
|
|
1135
|
-
*
|
|
1136
|
-
* @returns ``true`` if the codepoint matches ``S``.
|
|
1137
|
-
*/
|
|
1138
|
-
function isS(c) {
|
|
1139
|
-
return c === SPACE || c === NL || c === CR || c === TAB;
|
|
1140
|
-
}
|
|
1141
|
-
exports.isS = isS;
|
|
1142
|
-
/**
|
|
1143
|
-
* Determines whether a codepoint matches the ``NAME_START_CHAR`` production.
|
|
1144
|
-
*
|
|
1145
|
-
* @param c The code point.
|
|
1146
|
-
*
|
|
1147
|
-
* @returns ``true`` if the codepoint matches ``NAME_START_CHAR``.
|
|
1148
|
-
*/
|
|
1149
|
-
function isNameStartChar(c) {
|
|
1150
|
-
return c >= 65 && c <= 90 || c >= 97 && c <= 122 || c === 58 || c === 95 || c === 8204 || c === 8205 || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533 || c >= 65536 && c <= 983039;
|
|
1151
|
-
}
|
|
1152
|
-
exports.isNameStartChar = isNameStartChar;
|
|
1153
|
-
/**
|
|
1154
|
-
* Determines whether a codepoint matches the ``NAME_CHAR`` production.
|
|
1155
|
-
*
|
|
1156
|
-
* @param c The code point.
|
|
1157
|
-
*
|
|
1158
|
-
* @returns ``true`` if the codepoint matches ``NAME_CHAR``.
|
|
1159
|
-
*/
|
|
1160
|
-
function isNameChar(c) {
|
|
1161
|
-
return isNameStartChar(c) || c >= 48 && c <= 57 || c === 45 || c === 46 || c === 183 || c >= 768 && c <= 879 || c >= 8255 && c <= 8256;
|
|
1162
|
-
}
|
|
1163
|
-
exports.isNameChar = isNameChar;
|
|
1164
|
-
}));
|
|
1165
|
-
//#endregion
|
|
1166
|
-
//#region node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xml/1.1/ed2.js
|
|
1167
|
-
var require_ed2 = /* @__PURE__ */ __commonJSMin(((exports) => {
|
|
1168
|
-
/**
|
|
1169
|
-
* Character classes and associated utilities for the 2nd edition of XML 1.1.
|
|
1170
|
-
*
|
|
1171
|
-
* @author Louis-Dominique Dubeau
|
|
1172
|
-
* @license MIT
|
|
1173
|
-
* @copyright Louis-Dominique Dubeau
|
|
1174
|
-
*/
|
|
1175
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1176
|
-
exports.CHAR = "--�𐀀-";
|
|
1177
|
-
exports.RESTRICTED_CHAR = "-\b\v\f---";
|
|
1178
|
-
exports.S = " \r\n";
|
|
1179
|
-
exports.NAME_START_CHAR = ":A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-⁰-Ⰰ-、-豈-﷏ﷰ-�𐀀-";
|
|
1180
|
-
exports.NAME_CHAR = "-" + exports.NAME_START_CHAR + ".0-9·̀-ͯ‿-⁀";
|
|
1181
|
-
exports.CHAR_RE = new RegExp("^[" + exports.CHAR + "]$", "u");
|
|
1182
|
-
exports.RESTRICTED_CHAR_RE = new RegExp("^[" + exports.RESTRICTED_CHAR + "]$", "u");
|
|
1183
|
-
exports.S_RE = new RegExp("^[" + exports.S + "]+$", "u");
|
|
1184
|
-
exports.NAME_START_CHAR_RE = new RegExp("^[" + exports.NAME_START_CHAR + "]$", "u");
|
|
1185
|
-
exports.NAME_CHAR_RE = new RegExp("^[" + exports.NAME_CHAR + "]$", "u");
|
|
1186
|
-
exports.NAME_RE = new RegExp("^[" + exports.NAME_START_CHAR + "][" + exports.NAME_CHAR + "]*$", "u");
|
|
1187
|
-
exports.NMTOKEN_RE = new RegExp("^[" + exports.NAME_CHAR + "]+$", "u");
|
|
1188
|
-
var TAB = 9;
|
|
1189
|
-
var NL = 10;
|
|
1190
|
-
var CR = 13;
|
|
1191
|
-
var SPACE = 32;
|
|
1192
|
-
/** All characters in the ``S`` production. */
|
|
1193
|
-
exports.S_LIST = [
|
|
1194
|
-
SPACE,
|
|
1195
|
-
NL,
|
|
1196
|
-
CR,
|
|
1197
|
-
TAB
|
|
1198
|
-
];
|
|
1199
|
-
/**
|
|
1200
|
-
* Determines whether a codepoint matches the ``CHAR`` production.
|
|
1201
|
-
*
|
|
1202
|
-
* @param c The code point.
|
|
1203
|
-
*
|
|
1204
|
-
* @returns ``true`` if the codepoint matches ``CHAR``.
|
|
1205
|
-
*/
|
|
1206
|
-
function isChar(c) {
|
|
1207
|
-
return c >= 1 && c <= 55295 || c >= 57344 && c <= 65533 || c >= 65536 && c <= 1114111;
|
|
1208
|
-
}
|
|
1209
|
-
exports.isChar = isChar;
|
|
1210
|
-
/**
|
|
1211
|
-
* Determines whether a codepoint matches the ``RESTRICTED_CHAR`` production.
|
|
1212
|
-
*
|
|
1213
|
-
* @param c The code point.
|
|
1214
|
-
*
|
|
1215
|
-
* @returns ``true`` if the codepoint matches ``RESTRICTED_CHAR``.
|
|
1216
|
-
*/
|
|
1217
|
-
function isRestrictedChar(c) {
|
|
1218
|
-
return c >= 1 && c <= 8 || c === 11 || c === 12 || c >= 14 && c <= 31 || c >= 127 && c <= 132 || c >= 134 && c <= 159;
|
|
1219
|
-
}
|
|
1220
|
-
exports.isRestrictedChar = isRestrictedChar;
|
|
1221
|
-
/**
|
|
1222
|
-
* Determines whether a codepoint matches the ``CHAR`` production and does not
|
|
1223
|
-
* match the ``RESTRICTED_CHAR`` production. ``isCharAndNotRestricted(x)`` is
|
|
1224
|
-
* equivalent to ``isChar(x) && !isRestrictedChar(x)``. This function is faster
|
|
1225
|
-
* than running the two-call equivalent.
|
|
1226
|
-
*
|
|
1227
|
-
* @param c The code point.
|
|
1228
|
-
*
|
|
1229
|
-
* @returns ``true`` if the codepoint matches ``CHAR`` and does not match
|
|
1230
|
-
* ``RESTRICTED_CHAR``.
|
|
1231
|
-
*/
|
|
1232
|
-
function isCharAndNotRestricted(c) {
|
|
1233
|
-
return c === 9 || c === 10 || c === 13 || c > 31 && c < 127 || c === 133 || c > 159 && c <= 55295 || c >= 57344 && c <= 65533 || c >= 65536 && c <= 1114111;
|
|
1234
|
-
}
|
|
1235
|
-
exports.isCharAndNotRestricted = isCharAndNotRestricted;
|
|
1236
|
-
/**
|
|
1237
|
-
* Determines whether a codepoint matches the ``S`` (space) production.
|
|
1238
|
-
*
|
|
1239
|
-
* @param c The code point.
|
|
1240
|
-
*
|
|
1241
|
-
* @returns ``true`` if the codepoint matches ``S``.
|
|
1242
|
-
*/
|
|
1243
|
-
function isS(c) {
|
|
1244
|
-
return c === SPACE || c === NL || c === CR || c === TAB;
|
|
1245
|
-
}
|
|
1246
|
-
exports.isS = isS;
|
|
1247
|
-
/**
|
|
1248
|
-
* Determines whether a codepoint matches the ``NAME_START_CHAR`` production.
|
|
1249
|
-
*
|
|
1250
|
-
* @param c The code point.
|
|
1251
|
-
*
|
|
1252
|
-
* @returns ``true`` if the codepoint matches ``NAME_START_CHAR``.
|
|
1253
|
-
*/
|
|
1254
|
-
function isNameStartChar(c) {
|
|
1255
|
-
return c >= 65 && c <= 90 || c >= 97 && c <= 122 || c === 58 || c === 95 || c === 8204 || c === 8205 || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533 || c >= 65536 && c <= 983039;
|
|
1256
|
-
}
|
|
1257
|
-
exports.isNameStartChar = isNameStartChar;
|
|
1258
|
-
/**
|
|
1259
|
-
* Determines whether a codepoint matches the ``NAME_CHAR`` production.
|
|
1260
|
-
*
|
|
1261
|
-
* @param c The code point.
|
|
1262
|
-
*
|
|
1263
|
-
* @returns ``true`` if the codepoint matches ``NAME_CHAR``.
|
|
1264
|
-
*/
|
|
1265
|
-
function isNameChar(c) {
|
|
1266
|
-
return isNameStartChar(c) || c >= 48 && c <= 57 || c === 45 || c === 46 || c === 183 || c >= 768 && c <= 879 || c >= 8255 && c <= 8256;
|
|
1267
|
-
}
|
|
1268
|
-
exports.isNameChar = isNameChar;
|
|
1269
|
-
}));
|
|
1270
|
-
//#endregion
|
|
1271
|
-
//#region node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xmlns/1.0/ed3.js
|
|
1272
|
-
var require_ed3 = /* @__PURE__ */ __commonJSMin(((exports) => {
|
|
1273
|
-
/**
|
|
1274
|
-
* Character class utilities for XML NS 1.0 edition 3.
|
|
1275
|
-
*
|
|
1276
|
-
* @author Louis-Dominique Dubeau
|
|
1277
|
-
* @license MIT
|
|
1278
|
-
* @copyright Louis-Dominique Dubeau
|
|
1279
|
-
*/
|
|
1280
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1281
|
-
exports.NC_NAME_START_CHAR = "A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ--⁰-Ⰰ-、-豈-﷏ﷰ-�𐀀-";
|
|
1282
|
-
exports.NC_NAME_CHAR = "-" + exports.NC_NAME_START_CHAR + ".0-9·̀-ͯ‿-⁀";
|
|
1283
|
-
exports.NC_NAME_START_CHAR_RE = new RegExp("^[" + exports.NC_NAME_START_CHAR + "]$", "u");
|
|
1284
|
-
exports.NC_NAME_CHAR_RE = new RegExp("^[" + exports.NC_NAME_CHAR + "]$", "u");
|
|
1285
|
-
exports.NC_NAME_RE = new RegExp("^[" + exports.NC_NAME_START_CHAR + "][" + exports.NC_NAME_CHAR + "]*$", "u");
|
|
1286
|
-
/**
|
|
1287
|
-
* Determines whether a codepoint matches [[NC_NAME_START_CHAR]].
|
|
1288
|
-
*
|
|
1289
|
-
* @param c The code point.
|
|
1290
|
-
*
|
|
1291
|
-
* @returns ``true`` if the codepoint matches.
|
|
1292
|
-
*/
|
|
1293
|
-
function isNCNameStartChar(c) {
|
|
1294
|
-
return c >= 65 && c <= 90 || c === 95 || c >= 97 && c <= 122 || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8204 && c <= 8205 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533 || c >= 65536 && c <= 983039;
|
|
1295
|
-
}
|
|
1296
|
-
exports.isNCNameStartChar = isNCNameStartChar;
|
|
1297
|
-
/**
|
|
1298
|
-
* Determines whether a codepoint matches [[NC_NAME_CHAR]].
|
|
1299
|
-
*
|
|
1300
|
-
* @param c The code point.
|
|
1301
|
-
*
|
|
1302
|
-
* @returns ``true`` if the codepoint matches.
|
|
1303
|
-
*/
|
|
1304
|
-
function isNCNameChar(c) {
|
|
1305
|
-
return isNCNameStartChar(c) || c === 45 || c === 46 || c >= 48 && c <= 57 || c === 183 || c >= 768 && c <= 879 || c >= 8255 && c <= 8256;
|
|
1306
|
-
}
|
|
1307
|
-
exports.isNCNameChar = isNCNameChar;
|
|
1308
|
-
}));
|
|
1309
|
-
//#endregion
|
|
1310
|
-
//#region node_modules/.pnpm/saxes@5.0.1/node_modules/saxes/saxes.js
|
|
1311
|
-
var require_saxes = /* @__PURE__ */ __commonJSMin(((exports) => {
|
|
1312
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1313
|
-
const ed5 = require_ed5();
|
|
1314
|
-
const ed2 = require_ed2();
|
|
1315
|
-
const NSed3 = require_ed3();
|
|
1316
|
-
var isS = ed5.isS;
|
|
1317
|
-
var isChar10 = ed5.isChar;
|
|
1318
|
-
var isNameStartChar = ed5.isNameStartChar;
|
|
1319
|
-
var isNameChar = ed5.isNameChar;
|
|
1320
|
-
var S_LIST = ed5.S_LIST;
|
|
1321
|
-
var NAME_RE = ed5.NAME_RE;
|
|
1322
|
-
var isChar11 = ed2.isChar;
|
|
1323
|
-
var isNCNameStartChar = NSed3.isNCNameStartChar;
|
|
1324
|
-
var isNCNameChar = NSed3.isNCNameChar;
|
|
1325
|
-
var NC_NAME_RE = NSed3.NC_NAME_RE;
|
|
1326
|
-
const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace";
|
|
1327
|
-
const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
|
|
1328
|
-
const rootNS = {
|
|
1329
|
-
__proto__: null,
|
|
1330
|
-
xml: XML_NAMESPACE,
|
|
1331
|
-
xmlns: XMLNS_NAMESPACE
|
|
1332
|
-
};
|
|
1333
|
-
const XML_ENTITIES = {
|
|
1334
|
-
__proto__: null,
|
|
1335
|
-
amp: "&",
|
|
1336
|
-
gt: ">",
|
|
1337
|
-
lt: "<",
|
|
1338
|
-
quot: "\"",
|
|
1339
|
-
apos: "'"
|
|
1340
|
-
};
|
|
1341
|
-
const EOC = -1;
|
|
1342
|
-
const NL_LIKE = -2;
|
|
1343
|
-
const S_BEGIN = 0;
|
|
1344
|
-
const S_BEGIN_WHITESPACE = 1;
|
|
1345
|
-
const S_DOCTYPE = 2;
|
|
1346
|
-
const S_DOCTYPE_QUOTE = 3;
|
|
1347
|
-
const S_DTD = 4;
|
|
1348
|
-
const S_DTD_QUOTED = 5;
|
|
1349
|
-
const S_DTD_OPEN_WAKA = 6;
|
|
1350
|
-
const S_DTD_OPEN_WAKA_BANG = 7;
|
|
1351
|
-
const S_DTD_COMMENT = 8;
|
|
1352
|
-
const S_DTD_COMMENT_ENDING = 9;
|
|
1353
|
-
const S_DTD_COMMENT_ENDED = 10;
|
|
1354
|
-
const S_DTD_PI = 11;
|
|
1355
|
-
const S_DTD_PI_ENDING = 12;
|
|
1356
|
-
const S_TEXT = 13;
|
|
1357
|
-
const S_ENTITY = 14;
|
|
1358
|
-
const S_OPEN_WAKA = 15;
|
|
1359
|
-
const S_OPEN_WAKA_BANG = 16;
|
|
1360
|
-
const S_COMMENT = 17;
|
|
1361
|
-
const S_COMMENT_ENDING = 18;
|
|
1362
|
-
const S_COMMENT_ENDED = 19;
|
|
1363
|
-
const S_CDATA = 20;
|
|
1364
|
-
const S_CDATA_ENDING = 21;
|
|
1365
|
-
const S_CDATA_ENDING_2 = 22;
|
|
1366
|
-
const S_PI_FIRST_CHAR = 23;
|
|
1367
|
-
const S_PI_REST = 24;
|
|
1368
|
-
const S_PI_BODY = 25;
|
|
1369
|
-
const S_PI_ENDING = 26;
|
|
1370
|
-
const S_XML_DECL_NAME_START = 27;
|
|
1371
|
-
const S_XML_DECL_NAME = 28;
|
|
1372
|
-
const S_XML_DECL_EQ = 29;
|
|
1373
|
-
const S_XML_DECL_VALUE_START = 30;
|
|
1374
|
-
const S_XML_DECL_VALUE = 31;
|
|
1375
|
-
const S_XML_DECL_SEPARATOR = 32;
|
|
1376
|
-
const S_XML_DECL_ENDING = 33;
|
|
1377
|
-
const S_OPEN_TAG = 34;
|
|
1378
|
-
const S_OPEN_TAG_SLASH = 35;
|
|
1379
|
-
const S_ATTRIB = 36;
|
|
1380
|
-
const S_ATTRIB_NAME = 37;
|
|
1381
|
-
const S_ATTRIB_NAME_SAW_WHITE = 38;
|
|
1382
|
-
const S_ATTRIB_VALUE = 39;
|
|
1383
|
-
const S_ATTRIB_VALUE_QUOTED = 40;
|
|
1384
|
-
const S_ATTRIB_VALUE_CLOSED = 41;
|
|
1385
|
-
const S_ATTRIB_VALUE_UNQUOTED = 42;
|
|
1386
|
-
const S_CLOSE_TAG = 43;
|
|
1387
|
-
const S_CLOSE_TAG_SAW_WHITE = 44;
|
|
1388
|
-
const TAB = 9;
|
|
1389
|
-
const NL = 10;
|
|
1390
|
-
const CR = 13;
|
|
1391
|
-
const SPACE = 32;
|
|
1392
|
-
const BANG = 33;
|
|
1393
|
-
const DQUOTE = 34;
|
|
1394
|
-
const AMP = 38;
|
|
1395
|
-
const SQUOTE = 39;
|
|
1396
|
-
const MINUS = 45;
|
|
1397
|
-
const FORWARD_SLASH = 47;
|
|
1398
|
-
const SEMICOLON = 59;
|
|
1399
|
-
const LESS = 60;
|
|
1400
|
-
const EQUAL = 61;
|
|
1401
|
-
const GREATER = 62;
|
|
1402
|
-
const QUESTION = 63;
|
|
1403
|
-
const OPEN_BRACKET = 91;
|
|
1404
|
-
const CLOSE_BRACKET = 93;
|
|
1405
|
-
const NEL = 133;
|
|
1406
|
-
const LS = 8232;
|
|
1407
|
-
const isQuote = (c) => c === DQUOTE || c === SQUOTE;
|
|
1408
|
-
const QUOTES = [DQUOTE, SQUOTE];
|
|
1409
|
-
const DOCTYPE_TERMINATOR = [
|
|
1410
|
-
...QUOTES,
|
|
1411
|
-
OPEN_BRACKET,
|
|
1412
|
-
GREATER
|
|
1413
|
-
];
|
|
1414
|
-
const DTD_TERMINATOR = [
|
|
1415
|
-
...QUOTES,
|
|
1416
|
-
LESS,
|
|
1417
|
-
CLOSE_BRACKET
|
|
1418
|
-
];
|
|
1419
|
-
const XML_DECL_NAME_TERMINATOR = [
|
|
1420
|
-
EQUAL,
|
|
1421
|
-
QUESTION,
|
|
1422
|
-
...S_LIST
|
|
1423
|
-
];
|
|
1424
|
-
const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [
|
|
1425
|
-
...S_LIST,
|
|
1426
|
-
GREATER,
|
|
1427
|
-
AMP,
|
|
1428
|
-
LESS
|
|
1429
|
-
];
|
|
1430
|
-
function nsPairCheck(parser, prefix, uri) {
|
|
1431
|
-
switch (prefix) {
|
|
1432
|
-
case "xml":
|
|
1433
|
-
if (uri !== XML_NAMESPACE) parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`);
|
|
1434
|
-
break;
|
|
1435
|
-
case "xmlns":
|
|
1436
|
-
if (uri !== XMLNS_NAMESPACE) parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`);
|
|
1437
|
-
break;
|
|
1438
|
-
default:
|
|
1439
|
-
}
|
|
1440
|
-
switch (uri) {
|
|
1441
|
-
case XMLNS_NAMESPACE:
|
|
1442
|
-
parser.fail(prefix === "" ? `the default namespace may not be set to ${uri}.` : `may not assign a prefix (even "xmlns") to the URI \
|
|
1443
|
-
${XMLNS_NAMESPACE}.`);
|
|
1444
|
-
break;
|
|
1445
|
-
case XML_NAMESPACE:
|
|
1446
|
-
switch (prefix) {
|
|
1447
|
-
case "xml": break;
|
|
1448
|
-
case "":
|
|
1449
|
-
parser.fail(`the default namespace may not be set to ${uri}.`);
|
|
1450
|
-
break;
|
|
1451
|
-
default: parser.fail("may not assign the xml namespace to another prefix.");
|
|
1452
|
-
}
|
|
1453
|
-
break;
|
|
1454
|
-
default:
|
|
1455
|
-
}
|
|
1456
|
-
}
|
|
1457
|
-
function nsMappingCheck(parser, mapping) {
|
|
1458
|
-
for (const local of Object.keys(mapping)) nsPairCheck(parser, local, mapping[local]);
|
|
1459
|
-
}
|
|
1460
|
-
const isNCName = (name) => NC_NAME_RE.test(name);
|
|
1461
|
-
const isName = (name) => NAME_RE.test(name);
|
|
1462
|
-
const FORBIDDEN_START = 0;
|
|
1463
|
-
const FORBIDDEN_BRACKET = 1;
|
|
1464
|
-
const FORBIDDEN_BRACKET_BRACKET = 2;
|
|
1465
|
-
/**
|
|
1466
|
-
* The list of supported events.
|
|
1467
|
-
*/
|
|
1468
|
-
exports.EVENTS = [
|
|
1469
|
-
"xmldecl",
|
|
1470
|
-
"text",
|
|
1471
|
-
"processinginstruction",
|
|
1472
|
-
"doctype",
|
|
1473
|
-
"comment",
|
|
1474
|
-
"opentagstart",
|
|
1475
|
-
"attribute",
|
|
1476
|
-
"opentag",
|
|
1477
|
-
"closetag",
|
|
1478
|
-
"cdata",
|
|
1479
|
-
"error",
|
|
1480
|
-
"end",
|
|
1481
|
-
"ready"
|
|
1482
|
-
];
|
|
1483
|
-
const EVENT_NAME_TO_HANDLER_NAME = {
|
|
1484
|
-
xmldecl: "xmldeclHandler",
|
|
1485
|
-
text: "textHandler",
|
|
1486
|
-
processinginstruction: "piHandler",
|
|
1487
|
-
doctype: "doctypeHandler",
|
|
1488
|
-
comment: "commentHandler",
|
|
1489
|
-
opentagstart: "openTagStartHandler",
|
|
1490
|
-
attribute: "attributeHandler",
|
|
1491
|
-
opentag: "openTagHandler",
|
|
1492
|
-
closetag: "closeTagHandler",
|
|
1493
|
-
cdata: "cdataHandler",
|
|
1494
|
-
error: "errorHandler",
|
|
1495
|
-
end: "endHandler",
|
|
1496
|
-
ready: "readyHandler"
|
|
1497
|
-
};
|
|
1498
|
-
var SaxesParser = class {
|
|
1499
|
-
/**
|
|
1500
|
-
* @param opt The parser options.
|
|
1501
|
-
*/
|
|
1502
|
-
constructor(opt) {
|
|
1503
|
-
this.opt = opt !== null && opt !== void 0 ? opt : {};
|
|
1504
|
-
this.fragmentOpt = !!this.opt.fragment;
|
|
1505
|
-
const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns;
|
|
1506
|
-
this.trackPosition = this.opt.position !== false;
|
|
1507
|
-
this.fileName = this.opt.fileName;
|
|
1508
|
-
if (xmlnsOpt) {
|
|
1509
|
-
this.nameStartCheck = isNCNameStartChar;
|
|
1510
|
-
this.nameCheck = isNCNameChar;
|
|
1511
|
-
this.isName = isNCName;
|
|
1512
|
-
this.processAttribs = this.processAttribsNS;
|
|
1513
|
-
this.pushAttrib = this.pushAttribNS;
|
|
1514
|
-
this.ns = Object.assign({ __proto__: null }, rootNS);
|
|
1515
|
-
const additional = this.opt.additionalNamespaces;
|
|
1516
|
-
if (additional != null) {
|
|
1517
|
-
nsMappingCheck(this, additional);
|
|
1518
|
-
Object.assign(this.ns, additional);
|
|
1519
|
-
}
|
|
1520
|
-
} else {
|
|
1521
|
-
this.nameStartCheck = isNameStartChar;
|
|
1522
|
-
this.nameCheck = isNameChar;
|
|
1523
|
-
this.isName = isName;
|
|
1524
|
-
this.processAttribs = this.processAttribsPlain;
|
|
1525
|
-
this.pushAttrib = this.pushAttribPlain;
|
|
1526
|
-
}
|
|
1527
|
-
this.stateTable = [
|
|
1528
|
-
this.sBegin,
|
|
1529
|
-
this.sBeginWhitespace,
|
|
1530
|
-
this.sDoctype,
|
|
1531
|
-
this.sDoctypeQuote,
|
|
1532
|
-
this.sDTD,
|
|
1533
|
-
this.sDTDQuoted,
|
|
1534
|
-
this.sDTDOpenWaka,
|
|
1535
|
-
this.sDTDOpenWakaBang,
|
|
1536
|
-
this.sDTDComment,
|
|
1537
|
-
this.sDTDCommentEnding,
|
|
1538
|
-
this.sDTDCommentEnded,
|
|
1539
|
-
this.sDTDPI,
|
|
1540
|
-
this.sDTDPIEnding,
|
|
1541
|
-
this.sText,
|
|
1542
|
-
this.sEntity,
|
|
1543
|
-
this.sOpenWaka,
|
|
1544
|
-
this.sOpenWakaBang,
|
|
1545
|
-
this.sComment,
|
|
1546
|
-
this.sCommentEnding,
|
|
1547
|
-
this.sCommentEnded,
|
|
1548
|
-
this.sCData,
|
|
1549
|
-
this.sCDataEnding,
|
|
1550
|
-
this.sCDataEnding2,
|
|
1551
|
-
this.sPIFirstChar,
|
|
1552
|
-
this.sPIRest,
|
|
1553
|
-
this.sPIBody,
|
|
1554
|
-
this.sPIEnding,
|
|
1555
|
-
this.sXMLDeclNameStart,
|
|
1556
|
-
this.sXMLDeclName,
|
|
1557
|
-
this.sXMLDeclEq,
|
|
1558
|
-
this.sXMLDeclValueStart,
|
|
1559
|
-
this.sXMLDeclValue,
|
|
1560
|
-
this.sXMLDeclSeparator,
|
|
1561
|
-
this.sXMLDeclEnding,
|
|
1562
|
-
this.sOpenTag,
|
|
1563
|
-
this.sOpenTagSlash,
|
|
1564
|
-
this.sAttrib,
|
|
1565
|
-
this.sAttribName,
|
|
1566
|
-
this.sAttribNameSawWhite,
|
|
1567
|
-
this.sAttribValue,
|
|
1568
|
-
this.sAttribValueQuoted,
|
|
1569
|
-
this.sAttribValueClosed,
|
|
1570
|
-
this.sAttribValueUnquoted,
|
|
1571
|
-
this.sCloseTag,
|
|
1572
|
-
this.sCloseTagSawWhite
|
|
1573
|
-
];
|
|
1574
|
-
this._init();
|
|
1575
|
-
}
|
|
1576
|
-
/**
|
|
1577
|
-
* Indicates whether or not the parser is closed. If ``true``, wait for
|
|
1578
|
-
* the ``ready`` event to write again.
|
|
1579
|
-
*/
|
|
1580
|
-
get closed() {
|
|
1581
|
-
return this._closed;
|
|
1582
|
-
}
|
|
1583
|
-
_init() {
|
|
1584
|
-
var _a;
|
|
1585
|
-
this.openWakaBang = "";
|
|
1586
|
-
this.text = "";
|
|
1587
|
-
this.name = "";
|
|
1588
|
-
this.piTarget = "";
|
|
1589
|
-
this.entity = "";
|
|
1590
|
-
this.q = null;
|
|
1591
|
-
this.tags = [];
|
|
1592
|
-
this.tag = null;
|
|
1593
|
-
this.topNS = null;
|
|
1594
|
-
this.chunk = "";
|
|
1595
|
-
this.chunkPosition = 0;
|
|
1596
|
-
this.i = 0;
|
|
1597
|
-
this.prevI = 0;
|
|
1598
|
-
this.carriedFromPrevious = void 0;
|
|
1599
|
-
this.forbiddenState = FORBIDDEN_START;
|
|
1600
|
-
this.attribList = [];
|
|
1601
|
-
const { fragmentOpt } = this;
|
|
1602
|
-
this.state = fragmentOpt ? S_TEXT : S_BEGIN;
|
|
1603
|
-
this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot = this.sawRoot = fragmentOpt;
|
|
1604
|
-
this.xmlDeclPossible = !fragmentOpt;
|
|
1605
|
-
this.xmlDeclExpects = ["version"];
|
|
1606
|
-
this.entityReturnState = void 0;
|
|
1607
|
-
let { defaultXMLVersion } = this.opt;
|
|
1608
|
-
if (defaultXMLVersion === void 0) {
|
|
1609
|
-
if (this.opt.forceXMLVersion === true) throw new Error("forceXMLVersion set but defaultXMLVersion is not set");
|
|
1610
|
-
defaultXMLVersion = "1.0";
|
|
1611
|
-
}
|
|
1612
|
-
this.setXMLVersion(defaultXMLVersion);
|
|
1613
|
-
this.positionAtNewLine = 0;
|
|
1614
|
-
this.doctype = false;
|
|
1615
|
-
this._closed = false;
|
|
1616
|
-
this.xmlDecl = {
|
|
1617
|
-
version: void 0,
|
|
1618
|
-
encoding: void 0,
|
|
1619
|
-
standalone: void 0
|
|
1620
|
-
};
|
|
1621
|
-
this.line = 1;
|
|
1622
|
-
this.column = 0;
|
|
1623
|
-
this.ENTITIES = Object.create(XML_ENTITIES);
|
|
1624
|
-
(_a = this.readyHandler) === null || _a === void 0 || _a.call(this);
|
|
1625
|
-
}
|
|
1626
|
-
/**
|
|
1627
|
-
* The stream position the parser is currently looking at. This field is
|
|
1628
|
-
* zero-based.
|
|
1629
|
-
*
|
|
1630
|
-
* This field is not based on counting Unicode characters but is to be
|
|
1631
|
-
* interpreted as a plain index into a JavaScript string.
|
|
1632
|
-
*/
|
|
1633
|
-
get position() {
|
|
1634
|
-
return this.chunkPosition + this.i;
|
|
1635
|
-
}
|
|
1636
|
-
/**
|
|
1637
|
-
* The column number of the next character to be read by the parser. *
|
|
1638
|
-
* This field is zero-based. (The first column in a line is 0.)
|
|
1639
|
-
*
|
|
1640
|
-
* This field reports the index at which the next character would be in the
|
|
1641
|
-
* line if the line were represented as a JavaScript string. Note that this
|
|
1642
|
-
* *can* be different to a count based on the number of *Unicode characters*
|
|
1643
|
-
* due to how JavaScript handles astral plane characters.
|
|
1644
|
-
*
|
|
1645
|
-
* See [[column]] for a number that corresponds to a count of Unicode
|
|
1646
|
-
* characters.
|
|
1647
|
-
*/
|
|
1648
|
-
get columnIndex() {
|
|
1649
|
-
return this.position - this.positionAtNewLine;
|
|
1650
|
-
}
|
|
1651
|
-
/**
|
|
1652
|
-
* Set an event listener on an event. The parser supports one handler per
|
|
1653
|
-
* event type. If you try to set an event handler over an existing handler,
|
|
1654
|
-
* the old handler is silently overwritten.
|
|
1655
|
-
*
|
|
1656
|
-
* @param name The event to listen to.
|
|
1657
|
-
*
|
|
1658
|
-
* @param handler The handler to set.
|
|
1659
|
-
*/
|
|
1660
|
-
on(name, handler) {
|
|
1661
|
-
this[EVENT_NAME_TO_HANDLER_NAME[name]] = handler;
|
|
1662
|
-
}
|
|
1663
|
-
/**
|
|
1664
|
-
* Unset an event handler.
|
|
1665
|
-
*
|
|
1666
|
-
* @parma name The event to stop listening to.
|
|
1667
|
-
*/
|
|
1668
|
-
off(name) {
|
|
1669
|
-
this[EVENT_NAME_TO_HANDLER_NAME[name]] = void 0;
|
|
1670
|
-
}
|
|
1671
|
-
/**
|
|
1672
|
-
* Make an error object. The error object will have a message that contains
|
|
1673
|
-
* the ``fileName`` option passed at the creation of the parser. If position
|
|
1674
|
-
* tracking was turned on, it will also have line and column number
|
|
1675
|
-
* information.
|
|
1676
|
-
*
|
|
1677
|
-
* @param message The message describing the error to report.
|
|
1678
|
-
*
|
|
1679
|
-
* @returns An error object with a properly formatted message.
|
|
1680
|
-
*/
|
|
1681
|
-
makeError(message) {
|
|
1682
|
-
var _a;
|
|
1683
|
-
let msg = (_a = this.fileName) !== null && _a !== void 0 ? _a : "";
|
|
1684
|
-
if (this.trackPosition) {
|
|
1685
|
-
if (msg.length > 0) msg += ":";
|
|
1686
|
-
msg += `${this.line}:${this.column}`;
|
|
1687
|
-
}
|
|
1688
|
-
if (msg.length > 0) msg += ": ";
|
|
1689
|
-
return new Error(msg + message);
|
|
1690
|
-
}
|
|
1691
|
-
/**
|
|
1692
|
-
* Report a parsing error. This method is made public so that client code may
|
|
1693
|
-
* check for issues that are outside the scope of this project and can report
|
|
1694
|
-
* errors.
|
|
1695
|
-
*
|
|
1696
|
-
* @param message The error to report.
|
|
1697
|
-
*
|
|
1698
|
-
* @returns this
|
|
1699
|
-
*/
|
|
1700
|
-
fail(message) {
|
|
1701
|
-
const err = this.makeError(message);
|
|
1702
|
-
const handler = this.errorHandler;
|
|
1703
|
-
if (handler === void 0) throw err;
|
|
1704
|
-
else handler(err);
|
|
1705
|
-
return this;
|
|
1706
|
-
}
|
|
1707
|
-
/**
|
|
1708
|
-
* Write a XML data to the parser.
|
|
1709
|
-
*
|
|
1710
|
-
* @param chunk The XML data to write.
|
|
1711
|
-
*
|
|
1712
|
-
* @returns this
|
|
1713
|
-
*/
|
|
1714
|
-
write(chunk) {
|
|
1715
|
-
if (this.closed) return this.fail("cannot write after close; assign an onready handler.");
|
|
1716
|
-
let end = false;
|
|
1717
|
-
if (chunk === null) {
|
|
1718
|
-
end = true;
|
|
1719
|
-
chunk = "";
|
|
1720
|
-
} else if (typeof chunk === "object") chunk = chunk.toString();
|
|
1721
|
-
if (this.carriedFromPrevious !== void 0) {
|
|
1722
|
-
chunk = `${this.carriedFromPrevious}${chunk}`;
|
|
1723
|
-
this.carriedFromPrevious = void 0;
|
|
1724
|
-
}
|
|
1725
|
-
let limit = chunk.length;
|
|
1726
|
-
const lastCode = chunk.charCodeAt(limit - 1);
|
|
1727
|
-
if (!end && (lastCode === CR || lastCode >= 55296 && lastCode <= 56319)) {
|
|
1728
|
-
this.carriedFromPrevious = chunk[limit - 1];
|
|
1729
|
-
limit--;
|
|
1730
|
-
chunk = chunk.slice(0, limit);
|
|
1731
|
-
}
|
|
1732
|
-
const { stateTable } = this;
|
|
1733
|
-
this.chunk = chunk;
|
|
1734
|
-
this.i = 0;
|
|
1735
|
-
while (this.i < limit) stateTable[this.state].call(this);
|
|
1736
|
-
this.chunkPosition += limit;
|
|
1737
|
-
return end ? this.end() : this;
|
|
1738
|
-
}
|
|
1739
|
-
/**
|
|
1740
|
-
* Close the current stream. Perform final well-formedness checks and reset
|
|
1741
|
-
* the parser tstate.
|
|
1742
|
-
*
|
|
1743
|
-
* @returns this
|
|
1744
|
-
*/
|
|
1745
|
-
close() {
|
|
1746
|
-
return this.write(null);
|
|
1747
|
-
}
|
|
1748
|
-
/**
|
|
1749
|
-
* Get a single code point out of the current chunk. This updates the current
|
|
1750
|
-
* position if we do position tracking.
|
|
1751
|
-
*
|
|
1752
|
-
* This is the algorithm to use for XML 1.0.
|
|
1753
|
-
*
|
|
1754
|
-
* @returns The character read.
|
|
1755
|
-
*/
|
|
1756
|
-
getCode10() {
|
|
1757
|
-
const { chunk, i } = this;
|
|
1758
|
-
this.prevI = i;
|
|
1759
|
-
this.i = i + 1;
|
|
1760
|
-
if (i >= chunk.length) return EOC;
|
|
1761
|
-
const code = chunk.charCodeAt(i);
|
|
1762
|
-
this.column++;
|
|
1763
|
-
if (code < 55296) {
|
|
1764
|
-
if (code >= SPACE || code === TAB) return code;
|
|
1765
|
-
switch (code) {
|
|
1766
|
-
case NL:
|
|
1767
|
-
this.line++;
|
|
1768
|
-
this.column = 0;
|
|
1769
|
-
this.positionAtNewLine = this.position;
|
|
1770
|
-
return NL;
|
|
1771
|
-
case CR:
|
|
1772
|
-
if (chunk.charCodeAt(i + 1) === NL) this.i = i + 2;
|
|
1773
|
-
this.line++;
|
|
1774
|
-
this.column = 0;
|
|
1775
|
-
this.positionAtNewLine = this.position;
|
|
1776
|
-
return NL_LIKE;
|
|
1777
|
-
default:
|
|
1778
|
-
this.fail("disallowed character.");
|
|
1779
|
-
return code;
|
|
1780
|
-
}
|
|
1781
|
-
}
|
|
1782
|
-
if (code > 56319) {
|
|
1783
|
-
if (!(code >= 57344 && code <= 65533)) this.fail("disallowed character.");
|
|
1784
|
-
return code;
|
|
1785
|
-
}
|
|
1786
|
-
const final = 65536 + (code - 55296) * 1024 + (chunk.charCodeAt(i + 1) - 56320);
|
|
1787
|
-
this.i = i + 2;
|
|
1788
|
-
if (final > 1114111) this.fail("disallowed character.");
|
|
1789
|
-
return final;
|
|
1790
|
-
}
|
|
1791
|
-
/**
|
|
1792
|
-
* Get a single code point out of the current chunk. This updates the current
|
|
1793
|
-
* position if we do position tracking.
|
|
1794
|
-
*
|
|
1795
|
-
* This is the algorithm to use for XML 1.1.
|
|
1796
|
-
*
|
|
1797
|
-
* @returns {number} The character read.
|
|
1798
|
-
*/
|
|
1799
|
-
getCode11() {
|
|
1800
|
-
const { chunk, i } = this;
|
|
1801
|
-
this.prevI = i;
|
|
1802
|
-
this.i = i + 1;
|
|
1803
|
-
if (i >= chunk.length) return EOC;
|
|
1804
|
-
const code = chunk.charCodeAt(i);
|
|
1805
|
-
this.column++;
|
|
1806
|
-
if (code < 55296) {
|
|
1807
|
-
if (code > 31 && code < 127 || code > 159 && code !== LS || code === TAB) return code;
|
|
1808
|
-
switch (code) {
|
|
1809
|
-
case NL:
|
|
1810
|
-
this.line++;
|
|
1811
|
-
this.column = 0;
|
|
1812
|
-
this.positionAtNewLine = this.position;
|
|
1813
|
-
return NL;
|
|
1814
|
-
case CR: {
|
|
1815
|
-
const next = chunk.charCodeAt(i + 1);
|
|
1816
|
-
if (next === NL || next === NEL) this.i = i + 2;
|
|
1817
|
-
}
|
|
1818
|
-
case NEL:
|
|
1819
|
-
case LS:
|
|
1820
|
-
this.line++;
|
|
1821
|
-
this.column = 0;
|
|
1822
|
-
this.positionAtNewLine = this.position;
|
|
1823
|
-
return NL_LIKE;
|
|
1824
|
-
default:
|
|
1825
|
-
this.fail("disallowed character.");
|
|
1826
|
-
return code;
|
|
1827
|
-
}
|
|
1828
|
-
}
|
|
1829
|
-
if (code > 56319) {
|
|
1830
|
-
if (!(code >= 57344 && code <= 65533)) this.fail("disallowed character.");
|
|
1831
|
-
return code;
|
|
1832
|
-
}
|
|
1833
|
-
const final = 65536 + (code - 55296) * 1024 + (chunk.charCodeAt(i + 1) - 56320);
|
|
1834
|
-
this.i = i + 2;
|
|
1835
|
-
if (final > 1114111) this.fail("disallowed character.");
|
|
1836
|
-
return final;
|
|
1837
|
-
}
|
|
1838
|
-
/**
|
|
1839
|
-
* Like ``getCode`` but with the return value normalized so that ``NL`` is
|
|
1840
|
-
* returned for ``NL_LIKE``.
|
|
1841
|
-
*/
|
|
1842
|
-
getCodeNorm() {
|
|
1843
|
-
const c = this.getCode();
|
|
1844
|
-
return c === NL_LIKE ? NL : c;
|
|
1845
|
-
}
|
|
1846
|
-
unget() {
|
|
1847
|
-
this.i = this.prevI;
|
|
1848
|
-
this.column--;
|
|
1849
|
-
}
|
|
1850
|
-
/**
|
|
1851
|
-
* Capture characters into a buffer until encountering one of a set of
|
|
1852
|
-
* characters.
|
|
1853
|
-
*
|
|
1854
|
-
* @param chars An array of codepoints. Encountering a character in the array
|
|
1855
|
-
* ends the capture. (``chars`` may safely contain ``NL``.)
|
|
1856
|
-
*
|
|
1857
|
-
* @return The character code that made the capture end, or ``EOC`` if we hit
|
|
1858
|
-
* the end of the chunk. The return value cannot be NL_LIKE: NL is returned
|
|
1859
|
-
* instead.
|
|
1860
|
-
*/
|
|
1861
|
-
captureTo(chars) {
|
|
1862
|
-
let { i: start } = this;
|
|
1863
|
-
const { chunk } = this;
|
|
1864
|
-
while (true) {
|
|
1865
|
-
const c = this.getCode();
|
|
1866
|
-
const isNLLike = c === NL_LIKE;
|
|
1867
|
-
const final = isNLLike ? NL : c;
|
|
1868
|
-
if (final === EOC || chars.includes(final)) {
|
|
1869
|
-
this.text += chunk.slice(start, this.prevI);
|
|
1870
|
-
return final;
|
|
1871
|
-
}
|
|
1872
|
-
if (isNLLike) {
|
|
1873
|
-
this.text += `${chunk.slice(start, this.prevI)}\n`;
|
|
1874
|
-
start = this.i;
|
|
1875
|
-
}
|
|
1876
|
-
}
|
|
1877
|
-
}
|
|
1878
|
-
/**
|
|
1879
|
-
* Capture characters into a buffer until encountering a character.
|
|
1880
|
-
*
|
|
1881
|
-
* @param char The codepoint that ends the capture. **NOTE ``char`` MAY NOT
|
|
1882
|
-
* CONTAIN ``NL``.** Passing ``NL`` will result in buggy behavior.
|
|
1883
|
-
*
|
|
1884
|
-
* @return ``true`` if we ran into the character. Otherwise, we ran into the
|
|
1885
|
-
* end of the current chunk.
|
|
1886
|
-
*/
|
|
1887
|
-
captureToChar(char) {
|
|
1888
|
-
let { i: start } = this;
|
|
1889
|
-
const { chunk } = this;
|
|
1890
|
-
while (true) {
|
|
1891
|
-
let c = this.getCode();
|
|
1892
|
-
switch (c) {
|
|
1893
|
-
case NL_LIKE:
|
|
1894
|
-
this.text += `${chunk.slice(start, this.prevI)}\n`;
|
|
1895
|
-
start = this.i;
|
|
1896
|
-
c = NL;
|
|
1897
|
-
break;
|
|
1898
|
-
case EOC:
|
|
1899
|
-
this.text += chunk.slice(start);
|
|
1900
|
-
return false;
|
|
1901
|
-
default:
|
|
1902
|
-
}
|
|
1903
|
-
if (c === char) {
|
|
1904
|
-
this.text += chunk.slice(start, this.prevI);
|
|
1905
|
-
return true;
|
|
1906
|
-
}
|
|
1907
|
-
}
|
|
1908
|
-
}
|
|
1909
|
-
/**
|
|
1910
|
-
* Capture characters that satisfy ``isNameChar`` into the ``name`` field of
|
|
1911
|
-
* this parser.
|
|
1912
|
-
*
|
|
1913
|
-
* @return The character code that made the test fail, or ``EOC`` if we hit
|
|
1914
|
-
* the end of the chunk. The return value cannot be NL_LIKE: NL is returned
|
|
1915
|
-
* instead.
|
|
1916
|
-
*/
|
|
1917
|
-
captureNameChars() {
|
|
1918
|
-
const { chunk, i: start } = this;
|
|
1919
|
-
while (true) {
|
|
1920
|
-
const c = this.getCode();
|
|
1921
|
-
if (c === EOC) {
|
|
1922
|
-
this.name += chunk.slice(start);
|
|
1923
|
-
return EOC;
|
|
1924
|
-
}
|
|
1925
|
-
if (!isNameChar(c)) {
|
|
1926
|
-
this.name += chunk.slice(start, this.prevI);
|
|
1927
|
-
return c === NL_LIKE ? NL : c;
|
|
1928
|
-
}
|
|
1929
|
-
}
|
|
1930
|
-
}
|
|
1931
|
-
/**
|
|
1932
|
-
* Skip white spaces.
|
|
1933
|
-
*
|
|
1934
|
-
* @return The character that ended the skip, or ``EOC`` if we hit
|
|
1935
|
-
* the end of the chunk. The return value cannot be NL_LIKE: NL is returned
|
|
1936
|
-
* instead.
|
|
1937
|
-
*/
|
|
1938
|
-
skipSpaces() {
|
|
1939
|
-
while (true) {
|
|
1940
|
-
const c = this.getCodeNorm();
|
|
1941
|
-
if (c === EOC || !isS(c)) return c;
|
|
1942
|
-
}
|
|
1943
|
-
}
|
|
1944
|
-
setXMLVersion(version) {
|
|
1945
|
-
this.currentXMLVersion = version;
|
|
1946
|
-
if (version === "1.0") {
|
|
1947
|
-
this.isChar = isChar10;
|
|
1948
|
-
this.getCode = this.getCode10;
|
|
1949
|
-
} else {
|
|
1950
|
-
this.isChar = isChar11;
|
|
1951
|
-
this.getCode = this.getCode11;
|
|
1952
|
-
}
|
|
1953
|
-
}
|
|
1954
|
-
sBegin() {
|
|
1955
|
-
if (this.chunk.charCodeAt(0) === 65279) {
|
|
1956
|
-
this.i++;
|
|
1957
|
-
this.column++;
|
|
1958
|
-
}
|
|
1959
|
-
this.state = S_BEGIN_WHITESPACE;
|
|
1960
|
-
}
|
|
1961
|
-
sBeginWhitespace() {
|
|
1962
|
-
const iBefore = this.i;
|
|
1963
|
-
const c = this.skipSpaces();
|
|
1964
|
-
if (this.prevI !== iBefore) this.xmlDeclPossible = false;
|
|
1965
|
-
switch (c) {
|
|
1966
|
-
case LESS:
|
|
1967
|
-
this.state = S_OPEN_WAKA;
|
|
1968
|
-
if (this.text.length !== 0) throw new Error("no-empty text at start");
|
|
1969
|
-
break;
|
|
1970
|
-
case EOC: break;
|
|
1971
|
-
default:
|
|
1972
|
-
this.unget();
|
|
1973
|
-
this.state = S_TEXT;
|
|
1974
|
-
this.xmlDeclPossible = false;
|
|
1975
|
-
}
|
|
1976
|
-
}
|
|
1977
|
-
sDoctype() {
|
|
1978
|
-
var _a;
|
|
1979
|
-
const c = this.captureTo(DOCTYPE_TERMINATOR);
|
|
1980
|
-
switch (c) {
|
|
1981
|
-
case GREATER:
|
|
1982
|
-
(_a = this.doctypeHandler) === null || _a === void 0 || _a.call(this, this.text);
|
|
1983
|
-
this.text = "";
|
|
1984
|
-
this.state = S_TEXT;
|
|
1985
|
-
this.doctype = true;
|
|
1986
|
-
break;
|
|
1987
|
-
case EOC: break;
|
|
1988
|
-
default:
|
|
1989
|
-
this.text += String.fromCodePoint(c);
|
|
1990
|
-
if (c === OPEN_BRACKET) this.state = S_DTD;
|
|
1991
|
-
else if (isQuote(c)) {
|
|
1992
|
-
this.state = S_DOCTYPE_QUOTE;
|
|
1993
|
-
this.q = c;
|
|
1994
|
-
}
|
|
1995
|
-
}
|
|
1996
|
-
}
|
|
1997
|
-
sDoctypeQuote() {
|
|
1998
|
-
const q = this.q;
|
|
1999
|
-
if (this.captureToChar(q)) {
|
|
2000
|
-
this.text += String.fromCodePoint(q);
|
|
2001
|
-
this.q = null;
|
|
2002
|
-
this.state = S_DOCTYPE;
|
|
2003
|
-
}
|
|
2004
|
-
}
|
|
2005
|
-
sDTD() {
|
|
2006
|
-
const c = this.captureTo(DTD_TERMINATOR);
|
|
2007
|
-
if (c === EOC) return;
|
|
2008
|
-
this.text += String.fromCodePoint(c);
|
|
2009
|
-
if (c === CLOSE_BRACKET) this.state = S_DOCTYPE;
|
|
2010
|
-
else if (c === LESS) this.state = S_DTD_OPEN_WAKA;
|
|
2011
|
-
else if (isQuote(c)) {
|
|
2012
|
-
this.state = S_DTD_QUOTED;
|
|
2013
|
-
this.q = c;
|
|
2014
|
-
}
|
|
2015
|
-
}
|
|
2016
|
-
sDTDQuoted() {
|
|
2017
|
-
const q = this.q;
|
|
2018
|
-
if (this.captureToChar(q)) {
|
|
2019
|
-
this.text += String.fromCodePoint(q);
|
|
2020
|
-
this.state = S_DTD;
|
|
2021
|
-
this.q = null;
|
|
2022
|
-
}
|
|
2023
|
-
}
|
|
2024
|
-
sDTDOpenWaka() {
|
|
2025
|
-
const c = this.getCodeNorm();
|
|
2026
|
-
this.text += String.fromCodePoint(c);
|
|
2027
|
-
switch (c) {
|
|
2028
|
-
case BANG:
|
|
2029
|
-
this.state = S_DTD_OPEN_WAKA_BANG;
|
|
2030
|
-
this.openWakaBang = "";
|
|
2031
|
-
break;
|
|
2032
|
-
case QUESTION:
|
|
2033
|
-
this.state = S_DTD_PI;
|
|
2034
|
-
break;
|
|
2035
|
-
default: this.state = S_DTD;
|
|
2036
|
-
}
|
|
2037
|
-
}
|
|
2038
|
-
sDTDOpenWakaBang() {
|
|
2039
|
-
const char = String.fromCodePoint(this.getCodeNorm());
|
|
2040
|
-
const owb = this.openWakaBang += char;
|
|
2041
|
-
this.text += char;
|
|
2042
|
-
if (owb !== "-") {
|
|
2043
|
-
this.state = owb === "--" ? S_DTD_COMMENT : S_DTD;
|
|
2044
|
-
this.openWakaBang = "";
|
|
2045
|
-
}
|
|
2046
|
-
}
|
|
2047
|
-
sDTDComment() {
|
|
2048
|
-
if (this.captureToChar(MINUS)) {
|
|
2049
|
-
this.text += "-";
|
|
2050
|
-
this.state = S_DTD_COMMENT_ENDING;
|
|
2051
|
-
}
|
|
2052
|
-
}
|
|
2053
|
-
sDTDCommentEnding() {
|
|
2054
|
-
const c = this.getCodeNorm();
|
|
2055
|
-
this.text += String.fromCodePoint(c);
|
|
2056
|
-
this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT;
|
|
2057
|
-
}
|
|
2058
|
-
sDTDCommentEnded() {
|
|
2059
|
-
const c = this.getCodeNorm();
|
|
2060
|
-
this.text += String.fromCodePoint(c);
|
|
2061
|
-
if (c === GREATER) this.state = S_DTD;
|
|
2062
|
-
else {
|
|
2063
|
-
this.fail("malformed comment.");
|
|
2064
|
-
this.state = S_DTD_COMMENT;
|
|
2065
|
-
}
|
|
2066
|
-
}
|
|
2067
|
-
sDTDPI() {
|
|
2068
|
-
if (this.captureToChar(QUESTION)) {
|
|
2069
|
-
this.text += "?";
|
|
2070
|
-
this.state = S_DTD_PI_ENDING;
|
|
2071
|
-
}
|
|
2072
|
-
}
|
|
2073
|
-
sDTDPIEnding() {
|
|
2074
|
-
const c = this.getCodeNorm();
|
|
2075
|
-
this.text += String.fromCodePoint(c);
|
|
2076
|
-
if (c === GREATER) this.state = S_DTD;
|
|
2077
|
-
}
|
|
2078
|
-
sText() {
|
|
2079
|
-
if (this.tags.length !== 0) this.handleTextInRoot();
|
|
2080
|
-
else this.handleTextOutsideRoot();
|
|
2081
|
-
}
|
|
2082
|
-
sEntity() {
|
|
2083
|
-
let { i: start } = this;
|
|
2084
|
-
const { chunk } = this;
|
|
2085
|
-
loop: while (true) switch (this.getCode()) {
|
|
2086
|
-
case NL_LIKE:
|
|
2087
|
-
this.entity += `${chunk.slice(start, this.prevI)}\n`;
|
|
2088
|
-
start = this.i;
|
|
2089
|
-
break;
|
|
2090
|
-
case SEMICOLON: {
|
|
2091
|
-
const { entityReturnState } = this;
|
|
2092
|
-
const entity = this.entity + chunk.slice(start, this.prevI);
|
|
2093
|
-
this.state = entityReturnState;
|
|
2094
|
-
let parsed;
|
|
2095
|
-
if (entity === "") {
|
|
2096
|
-
this.fail("empty entity name.");
|
|
2097
|
-
parsed = "&;";
|
|
2098
|
-
} else {
|
|
2099
|
-
parsed = this.parseEntity(entity);
|
|
2100
|
-
this.entity = "";
|
|
2101
|
-
}
|
|
2102
|
-
if (entityReturnState !== S_TEXT || this.textHandler !== void 0) this.text += parsed;
|
|
2103
|
-
break loop;
|
|
2104
|
-
}
|
|
2105
|
-
case EOC:
|
|
2106
|
-
this.entity += chunk.slice(start);
|
|
2107
|
-
break loop;
|
|
2108
|
-
default:
|
|
2109
|
-
}
|
|
2110
|
-
}
|
|
2111
|
-
sOpenWaka() {
|
|
2112
|
-
const c = this.getCode();
|
|
2113
|
-
if (isNameStartChar(c)) {
|
|
2114
|
-
this.state = S_OPEN_TAG;
|
|
2115
|
-
this.unget();
|
|
2116
|
-
this.xmlDeclPossible = false;
|
|
2117
|
-
} else switch (c) {
|
|
2118
|
-
case FORWARD_SLASH:
|
|
2119
|
-
this.state = S_CLOSE_TAG;
|
|
2120
|
-
this.xmlDeclPossible = false;
|
|
2121
|
-
break;
|
|
2122
|
-
case BANG:
|
|
2123
|
-
this.state = S_OPEN_WAKA_BANG;
|
|
2124
|
-
this.openWakaBang = "";
|
|
2125
|
-
this.xmlDeclPossible = false;
|
|
2126
|
-
break;
|
|
2127
|
-
case QUESTION:
|
|
2128
|
-
this.state = S_PI_FIRST_CHAR;
|
|
2129
|
-
break;
|
|
2130
|
-
default:
|
|
2131
|
-
this.fail("disallowed character in tag name");
|
|
2132
|
-
this.state = S_TEXT;
|
|
2133
|
-
this.xmlDeclPossible = false;
|
|
2134
|
-
}
|
|
2135
|
-
}
|
|
2136
|
-
sOpenWakaBang() {
|
|
2137
|
-
this.openWakaBang += String.fromCodePoint(this.getCodeNorm());
|
|
2138
|
-
switch (this.openWakaBang) {
|
|
2139
|
-
case "[CDATA[":
|
|
2140
|
-
if (!this.sawRoot && !this.reportedTextBeforeRoot) {
|
|
2141
|
-
this.fail("text data outside of root node.");
|
|
2142
|
-
this.reportedTextBeforeRoot = true;
|
|
2143
|
-
}
|
|
2144
|
-
if (this.closedRoot && !this.reportedTextAfterRoot) {
|
|
2145
|
-
this.fail("text data outside of root node.");
|
|
2146
|
-
this.reportedTextAfterRoot = true;
|
|
2147
|
-
}
|
|
2148
|
-
this.state = S_CDATA;
|
|
2149
|
-
this.openWakaBang = "";
|
|
2150
|
-
break;
|
|
2151
|
-
case "--":
|
|
2152
|
-
this.state = S_COMMENT;
|
|
2153
|
-
this.openWakaBang = "";
|
|
2154
|
-
break;
|
|
2155
|
-
case "DOCTYPE":
|
|
2156
|
-
this.state = S_DOCTYPE;
|
|
2157
|
-
if (this.doctype || this.sawRoot) this.fail("inappropriately located doctype declaration.");
|
|
2158
|
-
this.openWakaBang = "";
|
|
2159
|
-
break;
|
|
2160
|
-
default: if (this.openWakaBang.length >= 7) this.fail("incorrect syntax.");
|
|
2161
|
-
}
|
|
2162
|
-
}
|
|
2163
|
-
sComment() {
|
|
2164
|
-
if (this.captureToChar(MINUS)) this.state = S_COMMENT_ENDING;
|
|
2165
|
-
}
|
|
2166
|
-
sCommentEnding() {
|
|
2167
|
-
var _a;
|
|
2168
|
-
const c = this.getCodeNorm();
|
|
2169
|
-
if (c === MINUS) {
|
|
2170
|
-
this.state = S_COMMENT_ENDED;
|
|
2171
|
-
(_a = this.commentHandler) === null || _a === void 0 || _a.call(this, this.text);
|
|
2172
|
-
this.text = "";
|
|
2173
|
-
} else {
|
|
2174
|
-
this.text += `-${String.fromCodePoint(c)}`;
|
|
2175
|
-
this.state = S_COMMENT;
|
|
2176
|
-
}
|
|
2177
|
-
}
|
|
2178
|
-
sCommentEnded() {
|
|
2179
|
-
const c = this.getCodeNorm();
|
|
2180
|
-
if (c !== GREATER) {
|
|
2181
|
-
this.fail("malformed comment.");
|
|
2182
|
-
this.text += `--${String.fromCodePoint(c)}`;
|
|
2183
|
-
this.state = S_COMMENT;
|
|
2184
|
-
} else this.state = S_TEXT;
|
|
2185
|
-
}
|
|
2186
|
-
sCData() {
|
|
2187
|
-
if (this.captureToChar(CLOSE_BRACKET)) this.state = S_CDATA_ENDING;
|
|
2188
|
-
}
|
|
2189
|
-
sCDataEnding() {
|
|
2190
|
-
const c = this.getCodeNorm();
|
|
2191
|
-
if (c === CLOSE_BRACKET) this.state = S_CDATA_ENDING_2;
|
|
2192
|
-
else {
|
|
2193
|
-
this.text += `]${String.fromCodePoint(c)}`;
|
|
2194
|
-
this.state = S_CDATA;
|
|
2195
|
-
}
|
|
2196
|
-
}
|
|
2197
|
-
sCDataEnding2() {
|
|
2198
|
-
var _a;
|
|
2199
|
-
const c = this.getCodeNorm();
|
|
2200
|
-
switch (c) {
|
|
2201
|
-
case GREATER:
|
|
2202
|
-
(_a = this.cdataHandler) === null || _a === void 0 || _a.call(this, this.text);
|
|
2203
|
-
this.text = "";
|
|
2204
|
-
this.state = S_TEXT;
|
|
2205
|
-
break;
|
|
2206
|
-
case CLOSE_BRACKET:
|
|
2207
|
-
this.text += "]";
|
|
2208
|
-
break;
|
|
2209
|
-
default:
|
|
2210
|
-
this.text += `]]${String.fromCodePoint(c)}`;
|
|
2211
|
-
this.state = S_CDATA;
|
|
2212
|
-
}
|
|
2213
|
-
}
|
|
2214
|
-
sPIFirstChar() {
|
|
2215
|
-
const c = this.getCodeNorm();
|
|
2216
|
-
if (this.nameStartCheck(c)) {
|
|
2217
|
-
this.piTarget += String.fromCodePoint(c);
|
|
2218
|
-
this.state = S_PI_REST;
|
|
2219
|
-
} else if (c === QUESTION || isS(c)) {
|
|
2220
|
-
this.fail("processing instruction without a target.");
|
|
2221
|
-
this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
|
|
2222
|
-
} else {
|
|
2223
|
-
this.fail("disallowed character in processing instruction name.");
|
|
2224
|
-
this.piTarget += String.fromCodePoint(c);
|
|
2225
|
-
this.state = S_PI_REST;
|
|
2226
|
-
}
|
|
2227
|
-
}
|
|
2228
|
-
sPIRest() {
|
|
2229
|
-
const { chunk, i: start } = this;
|
|
2230
|
-
while (true) {
|
|
2231
|
-
const c = this.getCodeNorm();
|
|
2232
|
-
if (c === EOC) {
|
|
2233
|
-
this.piTarget += chunk.slice(start);
|
|
2234
|
-
return;
|
|
2235
|
-
}
|
|
2236
|
-
if (!this.nameCheck(c)) {
|
|
2237
|
-
this.piTarget += chunk.slice(start, this.prevI);
|
|
2238
|
-
const isQuestion = c === QUESTION;
|
|
2239
|
-
if (isQuestion || isS(c)) if (this.piTarget === "xml") {
|
|
2240
|
-
if (!this.xmlDeclPossible) this.fail("an XML declaration must be at the start of the document.");
|
|
2241
|
-
this.state = isQuestion ? S_XML_DECL_ENDING : S_XML_DECL_NAME_START;
|
|
2242
|
-
} else this.state = isQuestion ? S_PI_ENDING : S_PI_BODY;
|
|
2243
|
-
else {
|
|
2244
|
-
this.fail("disallowed character in processing instruction name.");
|
|
2245
|
-
this.piTarget += String.fromCodePoint(c);
|
|
2246
|
-
}
|
|
2247
|
-
break;
|
|
2248
|
-
}
|
|
2249
|
-
}
|
|
2250
|
-
}
|
|
2251
|
-
sPIBody() {
|
|
2252
|
-
if (this.text.length === 0) {
|
|
2253
|
-
const c = this.getCodeNorm();
|
|
2254
|
-
if (c === QUESTION) this.state = S_PI_ENDING;
|
|
2255
|
-
else if (!isS(c)) this.text = String.fromCodePoint(c);
|
|
2256
|
-
} else if (this.captureToChar(QUESTION)) this.state = S_PI_ENDING;
|
|
2257
|
-
}
|
|
2258
|
-
sPIEnding() {
|
|
2259
|
-
var _a;
|
|
2260
|
-
const c = this.getCodeNorm();
|
|
2261
|
-
if (c === GREATER) {
|
|
2262
|
-
const { piTarget } = this;
|
|
2263
|
-
if (piTarget.toLowerCase() === "xml") this.fail("the XML declaration must appear at the start of the document.");
|
|
2264
|
-
(_a = this.piHandler) === null || _a === void 0 || _a.call(this, {
|
|
2265
|
-
target: piTarget,
|
|
2266
|
-
body: this.text
|
|
2267
|
-
});
|
|
2268
|
-
this.piTarget = this.text = "";
|
|
2269
|
-
this.state = S_TEXT;
|
|
2270
|
-
} else if (c === QUESTION) this.text += "?";
|
|
2271
|
-
else {
|
|
2272
|
-
this.text += `?${String.fromCodePoint(c)}`;
|
|
2273
|
-
this.state = S_PI_BODY;
|
|
2274
|
-
}
|
|
2275
|
-
this.xmlDeclPossible = false;
|
|
2276
|
-
}
|
|
2277
|
-
sXMLDeclNameStart() {
|
|
2278
|
-
const c = this.skipSpaces();
|
|
2279
|
-
if (c === QUESTION) {
|
|
2280
|
-
this.state = S_XML_DECL_ENDING;
|
|
2281
|
-
return;
|
|
2282
|
-
}
|
|
2283
|
-
if (c !== EOC) {
|
|
2284
|
-
this.state = S_XML_DECL_NAME;
|
|
2285
|
-
this.name = String.fromCodePoint(c);
|
|
2286
|
-
}
|
|
2287
|
-
}
|
|
2288
|
-
sXMLDeclName() {
|
|
2289
|
-
const c = this.captureTo(XML_DECL_NAME_TERMINATOR);
|
|
2290
|
-
if (c === QUESTION) {
|
|
2291
|
-
this.state = S_XML_DECL_ENDING;
|
|
2292
|
-
this.name += this.text;
|
|
2293
|
-
this.text = "";
|
|
2294
|
-
this.fail("XML declaration is incomplete.");
|
|
2295
|
-
return;
|
|
2296
|
-
}
|
|
2297
|
-
if (!(isS(c) || c === EQUAL)) return;
|
|
2298
|
-
this.name += this.text;
|
|
2299
|
-
this.text = "";
|
|
2300
|
-
if (!this.xmlDeclExpects.includes(this.name)) switch (this.name.length) {
|
|
2301
|
-
case 0:
|
|
2302
|
-
this.fail("did not expect any more name/value pairs.");
|
|
2303
|
-
break;
|
|
2304
|
-
case 1:
|
|
2305
|
-
this.fail(`expected the name ${this.xmlDeclExpects[0]}.`);
|
|
2306
|
-
break;
|
|
2307
|
-
default: this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`);
|
|
2308
|
-
}
|
|
2309
|
-
this.state = c === EQUAL ? S_XML_DECL_VALUE_START : S_XML_DECL_EQ;
|
|
2310
|
-
}
|
|
2311
|
-
sXMLDeclEq() {
|
|
2312
|
-
const c = this.getCodeNorm();
|
|
2313
|
-
if (c === QUESTION) {
|
|
2314
|
-
this.state = S_XML_DECL_ENDING;
|
|
2315
|
-
this.fail("XML declaration is incomplete.");
|
|
2316
|
-
return;
|
|
2317
|
-
}
|
|
2318
|
-
if (isS(c)) return;
|
|
2319
|
-
if (c !== EQUAL) this.fail("value required.");
|
|
2320
|
-
this.state = S_XML_DECL_VALUE_START;
|
|
2321
|
-
}
|
|
2322
|
-
sXMLDeclValueStart() {
|
|
2323
|
-
const c = this.getCodeNorm();
|
|
2324
|
-
if (c === QUESTION) {
|
|
2325
|
-
this.state = S_XML_DECL_ENDING;
|
|
2326
|
-
this.fail("XML declaration is incomplete.");
|
|
2327
|
-
return;
|
|
2328
|
-
}
|
|
2329
|
-
if (isS(c)) return;
|
|
2330
|
-
if (!isQuote(c)) {
|
|
2331
|
-
this.fail("value must be quoted.");
|
|
2332
|
-
this.q = SPACE;
|
|
2333
|
-
} else this.q = c;
|
|
2334
|
-
this.state = S_XML_DECL_VALUE;
|
|
2335
|
-
}
|
|
2336
|
-
sXMLDeclValue() {
|
|
2337
|
-
const c = this.captureTo([this.q, QUESTION]);
|
|
2338
|
-
if (c === QUESTION) {
|
|
2339
|
-
this.state = S_XML_DECL_ENDING;
|
|
2340
|
-
this.text = "";
|
|
2341
|
-
this.fail("XML declaration is incomplete.");
|
|
2342
|
-
return;
|
|
2343
|
-
}
|
|
2344
|
-
if (c === EOC) return;
|
|
2345
|
-
const value = this.text;
|
|
2346
|
-
this.text = "";
|
|
2347
|
-
switch (this.name) {
|
|
2348
|
-
case "version": {
|
|
2349
|
-
this.xmlDeclExpects = ["encoding", "standalone"];
|
|
2350
|
-
const version = value;
|
|
2351
|
-
this.xmlDecl.version = version;
|
|
2352
|
-
if (!/^1\.[0-9]+$/.test(version)) this.fail("version number must match /^1\\.[0-9]+$/.");
|
|
2353
|
-
else if (!this.opt.forceXMLVersion) this.setXMLVersion(version);
|
|
2354
|
-
break;
|
|
2355
|
-
}
|
|
2356
|
-
case "encoding":
|
|
2357
|
-
if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(value)) this.fail("encoding value must match /^[A-Za-z0-9][A-Za-z0-9._-]*$/.");
|
|
2358
|
-
this.xmlDeclExpects = ["standalone"];
|
|
2359
|
-
this.xmlDecl.encoding = value;
|
|
2360
|
-
break;
|
|
2361
|
-
case "standalone":
|
|
2362
|
-
if (value !== "yes" && value !== "no") this.fail("standalone value must match \"yes\" or \"no\".");
|
|
2363
|
-
this.xmlDeclExpects = [];
|
|
2364
|
-
this.xmlDecl.standalone = value;
|
|
2365
|
-
break;
|
|
2366
|
-
default:
|
|
2367
|
-
}
|
|
2368
|
-
this.name = "";
|
|
2369
|
-
this.state = S_XML_DECL_SEPARATOR;
|
|
2370
|
-
}
|
|
2371
|
-
sXMLDeclSeparator() {
|
|
2372
|
-
const c = this.getCodeNorm();
|
|
2373
|
-
if (c === QUESTION) {
|
|
2374
|
-
this.state = S_XML_DECL_ENDING;
|
|
2375
|
-
return;
|
|
2376
|
-
}
|
|
2377
|
-
if (!isS(c)) {
|
|
2378
|
-
this.fail("whitespace required.");
|
|
2379
|
-
this.unget();
|
|
2380
|
-
}
|
|
2381
|
-
this.state = S_XML_DECL_NAME_START;
|
|
2382
|
-
}
|
|
2383
|
-
sXMLDeclEnding() {
|
|
2384
|
-
var _a;
|
|
2385
|
-
if (this.getCodeNorm() === GREATER) {
|
|
2386
|
-
if (this.piTarget !== "xml") this.fail("processing instructions are not allowed before root.");
|
|
2387
|
-
else if (this.name !== "version" && this.xmlDeclExpects.includes("version")) this.fail("XML declaration must contain a version.");
|
|
2388
|
-
(_a = this.xmldeclHandler) === null || _a === void 0 || _a.call(this, this.xmlDecl);
|
|
2389
|
-
this.name = "";
|
|
2390
|
-
this.piTarget = this.text = "";
|
|
2391
|
-
this.state = S_TEXT;
|
|
2392
|
-
} else this.fail("The character ? is disallowed anywhere in XML declarations.");
|
|
2393
|
-
this.xmlDeclPossible = false;
|
|
2394
|
-
}
|
|
2395
|
-
sOpenTag() {
|
|
2396
|
-
var _a;
|
|
2397
|
-
const c = this.captureNameChars();
|
|
2398
|
-
if (c === EOC) return;
|
|
2399
|
-
const tag = this.tag = {
|
|
2400
|
-
name: this.name,
|
|
2401
|
-
attributes: Object.create(null)
|
|
2402
|
-
};
|
|
2403
|
-
this.name = "";
|
|
2404
|
-
if (this.xmlnsOpt) this.topNS = tag.ns = Object.create(null);
|
|
2405
|
-
(_a = this.openTagStartHandler) === null || _a === void 0 || _a.call(this, tag);
|
|
2406
|
-
this.sawRoot = true;
|
|
2407
|
-
if (!this.fragmentOpt && this.closedRoot) this.fail("documents may contain only one root.");
|
|
2408
|
-
switch (c) {
|
|
2409
|
-
case GREATER:
|
|
2410
|
-
this.openTag();
|
|
2411
|
-
break;
|
|
2412
|
-
case FORWARD_SLASH:
|
|
2413
|
-
this.state = S_OPEN_TAG_SLASH;
|
|
2414
|
-
break;
|
|
2415
|
-
default:
|
|
2416
|
-
if (!isS(c)) this.fail("disallowed character in tag name.");
|
|
2417
|
-
this.state = S_ATTRIB;
|
|
2418
|
-
}
|
|
2419
|
-
}
|
|
2420
|
-
sOpenTagSlash() {
|
|
2421
|
-
if (this.getCode() === GREATER) this.openSelfClosingTag();
|
|
2422
|
-
else {
|
|
2423
|
-
this.fail("forward-slash in opening tag not followed by >.");
|
|
2424
|
-
this.state = S_ATTRIB;
|
|
2425
|
-
}
|
|
2426
|
-
}
|
|
2427
|
-
sAttrib() {
|
|
2428
|
-
const c = this.skipSpaces();
|
|
2429
|
-
if (c === EOC) return;
|
|
2430
|
-
if (isNameStartChar(c)) {
|
|
2431
|
-
this.unget();
|
|
2432
|
-
this.state = S_ATTRIB_NAME;
|
|
2433
|
-
} else if (c === GREATER) this.openTag();
|
|
2434
|
-
else if (c === FORWARD_SLASH) this.state = S_OPEN_TAG_SLASH;
|
|
2435
|
-
else this.fail("disallowed character in attribute name.");
|
|
2436
|
-
}
|
|
2437
|
-
sAttribName() {
|
|
2438
|
-
const c = this.captureNameChars();
|
|
2439
|
-
if (c === EQUAL) this.state = S_ATTRIB_VALUE;
|
|
2440
|
-
else if (isS(c)) this.state = S_ATTRIB_NAME_SAW_WHITE;
|
|
2441
|
-
else if (c === GREATER) {
|
|
2442
|
-
this.fail("attribute without value.");
|
|
2443
|
-
this.pushAttrib(this.name, this.name);
|
|
2444
|
-
this.name = this.text = "";
|
|
2445
|
-
this.openTag();
|
|
2446
|
-
} else if (c !== EOC) this.fail("disallowed character in attribute name.");
|
|
2447
|
-
}
|
|
2448
|
-
sAttribNameSawWhite() {
|
|
2449
|
-
const c = this.skipSpaces();
|
|
2450
|
-
switch (c) {
|
|
2451
|
-
case EOC: return;
|
|
2452
|
-
case EQUAL:
|
|
2453
|
-
this.state = S_ATTRIB_VALUE;
|
|
2454
|
-
break;
|
|
2455
|
-
default:
|
|
2456
|
-
this.fail("attribute without value.");
|
|
2457
|
-
this.text = "";
|
|
2458
|
-
this.name = "";
|
|
2459
|
-
if (c === GREATER) this.openTag();
|
|
2460
|
-
else if (isNameStartChar(c)) {
|
|
2461
|
-
this.unget();
|
|
2462
|
-
this.state = S_ATTRIB_NAME;
|
|
2463
|
-
} else {
|
|
2464
|
-
this.fail("disallowed character in attribute name.");
|
|
2465
|
-
this.state = S_ATTRIB;
|
|
2466
|
-
}
|
|
2467
|
-
}
|
|
2468
|
-
}
|
|
2469
|
-
sAttribValue() {
|
|
2470
|
-
const c = this.getCodeNorm();
|
|
2471
|
-
if (isQuote(c)) {
|
|
2472
|
-
this.q = c;
|
|
2473
|
-
this.state = S_ATTRIB_VALUE_QUOTED;
|
|
2474
|
-
} else if (!isS(c)) {
|
|
2475
|
-
this.fail("unquoted attribute value.");
|
|
2476
|
-
this.state = S_ATTRIB_VALUE_UNQUOTED;
|
|
2477
|
-
this.unget();
|
|
2478
|
-
}
|
|
2479
|
-
}
|
|
2480
|
-
sAttribValueQuoted() {
|
|
2481
|
-
const { q, chunk } = this;
|
|
2482
|
-
let { i: start } = this;
|
|
2483
|
-
while (true) switch (this.getCode()) {
|
|
2484
|
-
case q:
|
|
2485
|
-
this.pushAttrib(this.name, this.text + chunk.slice(start, this.prevI));
|
|
2486
|
-
this.name = this.text = "";
|
|
2487
|
-
this.q = null;
|
|
2488
|
-
this.state = S_ATTRIB_VALUE_CLOSED;
|
|
2489
|
-
return;
|
|
2490
|
-
case AMP:
|
|
2491
|
-
this.text += chunk.slice(start, this.prevI);
|
|
2492
|
-
this.state = S_ENTITY;
|
|
2493
|
-
this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
|
|
2494
|
-
return;
|
|
2495
|
-
case NL:
|
|
2496
|
-
case NL_LIKE:
|
|
2497
|
-
case TAB:
|
|
2498
|
-
this.text += `${chunk.slice(start, this.prevI)} `;
|
|
2499
|
-
start = this.i;
|
|
2500
|
-
break;
|
|
2501
|
-
case LESS:
|
|
2502
|
-
this.text += chunk.slice(start, this.prevI);
|
|
2503
|
-
this.fail("disallowed character.");
|
|
2504
|
-
return;
|
|
2505
|
-
case EOC:
|
|
2506
|
-
this.text += chunk.slice(start);
|
|
2507
|
-
return;
|
|
2508
|
-
default:
|
|
2509
|
-
}
|
|
2510
|
-
}
|
|
2511
|
-
sAttribValueClosed() {
|
|
2512
|
-
const c = this.getCodeNorm();
|
|
2513
|
-
if (isS(c)) this.state = S_ATTRIB;
|
|
2514
|
-
else if (c === GREATER) this.openTag();
|
|
2515
|
-
else if (c === FORWARD_SLASH) this.state = S_OPEN_TAG_SLASH;
|
|
2516
|
-
else if (isNameStartChar(c)) {
|
|
2517
|
-
this.fail("no whitespace between attributes.");
|
|
2518
|
-
this.unget();
|
|
2519
|
-
this.state = S_ATTRIB_NAME;
|
|
2520
|
-
} else this.fail("disallowed character in attribute name.");
|
|
2521
|
-
}
|
|
2522
|
-
sAttribValueUnquoted() {
|
|
2523
|
-
const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR);
|
|
2524
|
-
switch (c) {
|
|
2525
|
-
case AMP:
|
|
2526
|
-
this.state = S_ENTITY;
|
|
2527
|
-
this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED;
|
|
2528
|
-
break;
|
|
2529
|
-
case LESS:
|
|
2530
|
-
this.fail("disallowed character.");
|
|
2531
|
-
break;
|
|
2532
|
-
case EOC: break;
|
|
2533
|
-
default:
|
|
2534
|
-
if (this.text.includes("]]>")) this.fail("the string \"]]>\" is disallowed in char data.");
|
|
2535
|
-
this.pushAttrib(this.name, this.text);
|
|
2536
|
-
this.name = this.text = "";
|
|
2537
|
-
if (c === GREATER) this.openTag();
|
|
2538
|
-
else this.state = S_ATTRIB;
|
|
2539
|
-
}
|
|
2540
|
-
}
|
|
2541
|
-
sCloseTag() {
|
|
2542
|
-
const c = this.captureNameChars();
|
|
2543
|
-
if (c === GREATER) this.closeTag();
|
|
2544
|
-
else if (isS(c)) this.state = S_CLOSE_TAG_SAW_WHITE;
|
|
2545
|
-
else if (c !== EOC) this.fail("disallowed character in closing tag.");
|
|
2546
|
-
}
|
|
2547
|
-
sCloseTagSawWhite() {
|
|
2548
|
-
switch (this.skipSpaces()) {
|
|
2549
|
-
case GREATER:
|
|
2550
|
-
this.closeTag();
|
|
2551
|
-
break;
|
|
2552
|
-
case EOC: break;
|
|
2553
|
-
default: this.fail("disallowed character in closing tag.");
|
|
2554
|
-
}
|
|
2555
|
-
}
|
|
2556
|
-
handleTextInRoot() {
|
|
2557
|
-
let { i: start, forbiddenState } = this;
|
|
2558
|
-
const { chunk, textHandler: handler } = this;
|
|
2559
|
-
scanLoop: while (true) switch (this.getCode()) {
|
|
2560
|
-
case LESS:
|
|
2561
|
-
this.state = S_OPEN_WAKA;
|
|
2562
|
-
if (handler !== void 0) {
|
|
2563
|
-
const { text } = this;
|
|
2564
|
-
const slice = chunk.slice(start, this.prevI);
|
|
2565
|
-
if (text.length !== 0) {
|
|
2566
|
-
handler(text + slice);
|
|
2567
|
-
this.text = "";
|
|
2568
|
-
} else if (slice.length !== 0) handler(slice);
|
|
2569
|
-
}
|
|
2570
|
-
forbiddenState = FORBIDDEN_START;
|
|
2571
|
-
break scanLoop;
|
|
2572
|
-
case AMP:
|
|
2573
|
-
this.state = S_ENTITY;
|
|
2574
|
-
this.entityReturnState = S_TEXT;
|
|
2575
|
-
if (handler !== void 0) this.text += chunk.slice(start, this.prevI);
|
|
2576
|
-
forbiddenState = FORBIDDEN_START;
|
|
2577
|
-
break scanLoop;
|
|
2578
|
-
case CLOSE_BRACKET:
|
|
2579
|
-
switch (forbiddenState) {
|
|
2580
|
-
case FORBIDDEN_START:
|
|
2581
|
-
forbiddenState = FORBIDDEN_BRACKET;
|
|
2582
|
-
break;
|
|
2583
|
-
case FORBIDDEN_BRACKET:
|
|
2584
|
-
forbiddenState = FORBIDDEN_BRACKET_BRACKET;
|
|
2585
|
-
break;
|
|
2586
|
-
case FORBIDDEN_BRACKET_BRACKET: break;
|
|
2587
|
-
default: throw new Error("impossible state");
|
|
2588
|
-
}
|
|
2589
|
-
break;
|
|
2590
|
-
case GREATER:
|
|
2591
|
-
if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) this.fail("the string \"]]>\" is disallowed in char data.");
|
|
2592
|
-
forbiddenState = FORBIDDEN_START;
|
|
2593
|
-
break;
|
|
2594
|
-
case NL_LIKE:
|
|
2595
|
-
if (handler !== void 0) this.text += `${chunk.slice(start, this.prevI)}\n`;
|
|
2596
|
-
start = this.i;
|
|
2597
|
-
forbiddenState = FORBIDDEN_START;
|
|
2598
|
-
break;
|
|
2599
|
-
case EOC:
|
|
2600
|
-
if (handler !== void 0) this.text += chunk.slice(start);
|
|
2601
|
-
break scanLoop;
|
|
2602
|
-
default: forbiddenState = FORBIDDEN_START;
|
|
2603
|
-
}
|
|
2604
|
-
this.forbiddenState = forbiddenState;
|
|
2605
|
-
}
|
|
2606
|
-
handleTextOutsideRoot() {
|
|
2607
|
-
let { i: start } = this;
|
|
2608
|
-
const { chunk, textHandler: handler } = this;
|
|
2609
|
-
let nonSpace = false;
|
|
2610
|
-
outRootLoop: while (true) {
|
|
2611
|
-
const code = this.getCode();
|
|
2612
|
-
switch (code) {
|
|
2613
|
-
case LESS:
|
|
2614
|
-
this.state = S_OPEN_WAKA;
|
|
2615
|
-
if (handler !== void 0) {
|
|
2616
|
-
const { text } = this;
|
|
2617
|
-
const slice = chunk.slice(start, this.prevI);
|
|
2618
|
-
if (text.length !== 0) {
|
|
2619
|
-
handler(text + slice);
|
|
2620
|
-
this.text = "";
|
|
2621
|
-
} else if (slice.length !== 0) handler(slice);
|
|
2622
|
-
}
|
|
2623
|
-
break outRootLoop;
|
|
2624
|
-
case AMP:
|
|
2625
|
-
this.state = S_ENTITY;
|
|
2626
|
-
this.entityReturnState = S_TEXT;
|
|
2627
|
-
if (handler !== void 0) this.text += chunk.slice(start, this.prevI);
|
|
2628
|
-
nonSpace = true;
|
|
2629
|
-
break outRootLoop;
|
|
2630
|
-
case NL_LIKE:
|
|
2631
|
-
if (handler !== void 0) this.text += `${chunk.slice(start, this.prevI)}\n`;
|
|
2632
|
-
start = this.i;
|
|
2633
|
-
break;
|
|
2634
|
-
case EOC:
|
|
2635
|
-
if (handler !== void 0) this.text += chunk.slice(start);
|
|
2636
|
-
break outRootLoop;
|
|
2637
|
-
default: if (!isS(code)) nonSpace = true;
|
|
2638
|
-
}
|
|
2639
|
-
}
|
|
2640
|
-
if (!nonSpace) return;
|
|
2641
|
-
if (!this.sawRoot && !this.reportedTextBeforeRoot) {
|
|
2642
|
-
this.fail("text data outside of root node.");
|
|
2643
|
-
this.reportedTextBeforeRoot = true;
|
|
2644
|
-
}
|
|
2645
|
-
if (this.closedRoot && !this.reportedTextAfterRoot) {
|
|
2646
|
-
this.fail("text data outside of root node.");
|
|
2647
|
-
this.reportedTextAfterRoot = true;
|
|
2648
|
-
}
|
|
2649
|
-
}
|
|
2650
|
-
pushAttribNS(name, value) {
|
|
2651
|
-
var _a;
|
|
2652
|
-
const { prefix, local } = this.qname(name);
|
|
2653
|
-
const attr = {
|
|
2654
|
-
name,
|
|
2655
|
-
prefix,
|
|
2656
|
-
local,
|
|
2657
|
-
value
|
|
2658
|
-
};
|
|
2659
|
-
this.attribList.push(attr);
|
|
2660
|
-
(_a = this.attributeHandler) === null || _a === void 0 || _a.call(this, attr);
|
|
2661
|
-
if (prefix === "xmlns") {
|
|
2662
|
-
const trimmed = value.trim();
|
|
2663
|
-
if (this.currentXMLVersion === "1.0" && trimmed === "") this.fail("invalid attempt to undefine prefix in XML 1.0");
|
|
2664
|
-
this.topNS[local] = trimmed;
|
|
2665
|
-
nsPairCheck(this, local, trimmed);
|
|
2666
|
-
} else if (name === "xmlns") {
|
|
2667
|
-
const trimmed = value.trim();
|
|
2668
|
-
this.topNS[""] = trimmed;
|
|
2669
|
-
nsPairCheck(this, "", trimmed);
|
|
2670
|
-
}
|
|
2671
|
-
}
|
|
2672
|
-
pushAttribPlain(name, value) {
|
|
2673
|
-
var _a;
|
|
2674
|
-
const attr = {
|
|
2675
|
-
name,
|
|
2676
|
-
value
|
|
2677
|
-
};
|
|
2678
|
-
this.attribList.push(attr);
|
|
2679
|
-
(_a = this.attributeHandler) === null || _a === void 0 || _a.call(this, attr);
|
|
2680
|
-
}
|
|
2681
|
-
/**
|
|
2682
|
-
* End parsing. This performs final well-formedness checks and resets the
|
|
2683
|
-
* parser to a clean state.
|
|
2684
|
-
*
|
|
2685
|
-
* @returns this
|
|
2686
|
-
*/
|
|
2687
|
-
end() {
|
|
2688
|
-
var _a, _b;
|
|
2689
|
-
if (!this.sawRoot) this.fail("document must contain a root element.");
|
|
2690
|
-
const { tags } = this;
|
|
2691
|
-
while (tags.length > 0) {
|
|
2692
|
-
const tag = tags.pop();
|
|
2693
|
-
this.fail(`unclosed tag: ${tag.name}`);
|
|
2694
|
-
}
|
|
2695
|
-
if (this.state !== S_BEGIN && this.state !== S_TEXT) this.fail("unexpected end.");
|
|
2696
|
-
const { text } = this;
|
|
2697
|
-
if (text.length !== 0) {
|
|
2698
|
-
(_a = this.textHandler) === null || _a === void 0 || _a.call(this, text);
|
|
2699
|
-
this.text = "";
|
|
2700
|
-
}
|
|
2701
|
-
this._closed = true;
|
|
2702
|
-
(_b = this.endHandler) === null || _b === void 0 || _b.call(this);
|
|
2703
|
-
this._init();
|
|
2704
|
-
return this;
|
|
2705
|
-
}
|
|
2706
|
-
/**
|
|
2707
|
-
* Resolve a namespace prefix.
|
|
2708
|
-
*
|
|
2709
|
-
* @param prefix The prefix to resolve.
|
|
2710
|
-
*
|
|
2711
|
-
* @returns The namespace URI or ``undefined`` if the prefix is not defined.
|
|
2712
|
-
*/
|
|
2713
|
-
resolve(prefix) {
|
|
2714
|
-
var _a, _b;
|
|
2715
|
-
let uri = this.topNS[prefix];
|
|
2716
|
-
if (uri !== void 0) return uri;
|
|
2717
|
-
const { tags } = this;
|
|
2718
|
-
for (let index = tags.length - 1; index >= 0; index--) {
|
|
2719
|
-
uri = tags[index].ns[prefix];
|
|
2720
|
-
if (uri !== void 0) return uri;
|
|
2721
|
-
}
|
|
2722
|
-
uri = this.ns[prefix];
|
|
2723
|
-
if (uri !== void 0) return uri;
|
|
2724
|
-
return (_b = (_a = this.opt).resolvePrefix) === null || _b === void 0 ? void 0 : _b.call(_a, prefix);
|
|
2725
|
-
}
|
|
2726
|
-
/**
|
|
2727
|
-
* Parse a qname into its prefix and local name parts.
|
|
2728
|
-
*
|
|
2729
|
-
* @param name The name to parse
|
|
2730
|
-
*
|
|
2731
|
-
* @returns
|
|
2732
|
-
*/
|
|
2733
|
-
qname(name) {
|
|
2734
|
-
const colon = name.indexOf(":");
|
|
2735
|
-
if (colon === -1) return {
|
|
2736
|
-
prefix: "",
|
|
2737
|
-
local: name
|
|
2738
|
-
};
|
|
2739
|
-
const local = name.slice(colon + 1);
|
|
2740
|
-
const prefix = name.slice(0, colon);
|
|
2741
|
-
if (prefix === "" || local === "" || local.includes(":")) this.fail(`malformed name: ${name}.`);
|
|
2742
|
-
return {
|
|
2743
|
-
prefix,
|
|
2744
|
-
local
|
|
2745
|
-
};
|
|
2746
|
-
}
|
|
2747
|
-
processAttribsNS() {
|
|
2748
|
-
var _a;
|
|
2749
|
-
const { attribList } = this;
|
|
2750
|
-
const tag = this.tag;
|
|
2751
|
-
{
|
|
2752
|
-
const { prefix, local } = this.qname(tag.name);
|
|
2753
|
-
tag.prefix = prefix;
|
|
2754
|
-
tag.local = local;
|
|
2755
|
-
const uri = tag.uri = (_a = this.resolve(prefix)) !== null && _a !== void 0 ? _a : "";
|
|
2756
|
-
if (prefix !== "") {
|
|
2757
|
-
if (prefix === "xmlns") this.fail("tags may not have \"xmlns\" as prefix.");
|
|
2758
|
-
if (uri === "") {
|
|
2759
|
-
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
|
|
2760
|
-
tag.uri = prefix;
|
|
2761
|
-
}
|
|
2762
|
-
}
|
|
2763
|
-
}
|
|
2764
|
-
if (attribList.length === 0) return;
|
|
2765
|
-
const { attributes } = tag;
|
|
2766
|
-
const seen = /* @__PURE__ */ new Set();
|
|
2767
|
-
for (const attr of attribList) {
|
|
2768
|
-
const { name, prefix, local } = attr;
|
|
2769
|
-
let uri;
|
|
2770
|
-
let eqname;
|
|
2771
|
-
if (prefix === "") {
|
|
2772
|
-
uri = name === "xmlns" ? XMLNS_NAMESPACE : "";
|
|
2773
|
-
eqname = name;
|
|
2774
|
-
} else {
|
|
2775
|
-
uri = this.resolve(prefix);
|
|
2776
|
-
if (uri === void 0) {
|
|
2777
|
-
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
|
|
2778
|
-
uri = prefix;
|
|
2779
|
-
}
|
|
2780
|
-
eqname = `{${uri}}${local}`;
|
|
2781
|
-
}
|
|
2782
|
-
if (seen.has(eqname)) this.fail(`duplicate attribute: ${eqname}.`);
|
|
2783
|
-
seen.add(eqname);
|
|
2784
|
-
attr.uri = uri;
|
|
2785
|
-
attributes[name] = attr;
|
|
2786
|
-
}
|
|
2787
|
-
this.attribList = [];
|
|
2788
|
-
}
|
|
2789
|
-
processAttribsPlain() {
|
|
2790
|
-
const { attribList } = this;
|
|
2791
|
-
const attributes = this.tag.attributes;
|
|
2792
|
-
for (const { name, value } of attribList) {
|
|
2793
|
-
if (attributes[name] !== void 0) this.fail(`duplicate attribute: ${name}.`);
|
|
2794
|
-
attributes[name] = value;
|
|
2795
|
-
}
|
|
2796
|
-
this.attribList = [];
|
|
2797
|
-
}
|
|
2798
|
-
/**
|
|
2799
|
-
* Handle a complete open tag. This parser code calls this once it has seen
|
|
2800
|
-
* the whole tag. This method checks for well-formeness and then emits
|
|
2801
|
-
* ``onopentag``.
|
|
2802
|
-
*/
|
|
2803
|
-
openTag() {
|
|
2804
|
-
var _a;
|
|
2805
|
-
this.processAttribs();
|
|
2806
|
-
const { tags } = this;
|
|
2807
|
-
const tag = this.tag;
|
|
2808
|
-
tag.isSelfClosing = false;
|
|
2809
|
-
(_a = this.openTagHandler) === null || _a === void 0 || _a.call(this, tag);
|
|
2810
|
-
tags.push(tag);
|
|
2811
|
-
this.state = S_TEXT;
|
|
2812
|
-
this.name = "";
|
|
2813
|
-
}
|
|
2814
|
-
/**
|
|
2815
|
-
* Handle a complete self-closing tag. This parser code calls this once it has
|
|
2816
|
-
* seen the whole tag. This method checks for well-formeness and then emits
|
|
2817
|
-
* ``onopentag`` and ``onclosetag``.
|
|
2818
|
-
*/
|
|
2819
|
-
openSelfClosingTag() {
|
|
2820
|
-
var _a, _b, _c;
|
|
2821
|
-
this.processAttribs();
|
|
2822
|
-
const { tags } = this;
|
|
2823
|
-
const tag = this.tag;
|
|
2824
|
-
tag.isSelfClosing = true;
|
|
2825
|
-
(_a = this.openTagHandler) === null || _a === void 0 || _a.call(this, tag);
|
|
2826
|
-
(_b = this.closeTagHandler) === null || _b === void 0 || _b.call(this, tag);
|
|
2827
|
-
if ((this.tag = (_c = tags[tags.length - 1]) !== null && _c !== void 0 ? _c : null) === null) this.closedRoot = true;
|
|
2828
|
-
this.state = S_TEXT;
|
|
2829
|
-
this.name = "";
|
|
2830
|
-
}
|
|
2831
|
-
/**
|
|
2832
|
-
* Handle a complete close tag. This parser code calls this once it has seen
|
|
2833
|
-
* the whole tag. This method checks for well-formeness and then emits
|
|
2834
|
-
* ``onclosetag``.
|
|
2835
|
-
*/
|
|
2836
|
-
closeTag() {
|
|
2837
|
-
const { tags, name } = this;
|
|
2838
|
-
this.state = S_TEXT;
|
|
2839
|
-
this.name = "";
|
|
2840
|
-
if (name === "") {
|
|
2841
|
-
this.fail("weird empty close tag.");
|
|
2842
|
-
this.text += "</>";
|
|
2843
|
-
return;
|
|
2844
|
-
}
|
|
2845
|
-
const handler = this.closeTagHandler;
|
|
2846
|
-
let l = tags.length;
|
|
2847
|
-
while (l-- > 0) {
|
|
2848
|
-
const tag = this.tag = tags.pop();
|
|
2849
|
-
this.topNS = tag.ns;
|
|
2850
|
-
handler === null || handler === void 0 || handler(tag);
|
|
2851
|
-
if (tag.name === name) break;
|
|
2852
|
-
this.fail("unexpected close tag.");
|
|
2853
|
-
}
|
|
2854
|
-
if (l === 0) this.closedRoot = true;
|
|
2855
|
-
else if (l < 0) {
|
|
2856
|
-
this.fail(`unmatched closing tag: ${name}.`);
|
|
2857
|
-
this.text += `</${name}>`;
|
|
2858
|
-
}
|
|
2859
|
-
}
|
|
2860
|
-
/**
|
|
2861
|
-
* Resolves an entity. Makes any necessary well-formedness checks.
|
|
2862
|
-
*
|
|
2863
|
-
* @param entity The entity to resolve.
|
|
2864
|
-
*
|
|
2865
|
-
* @returns The parsed entity.
|
|
2866
|
-
*/
|
|
2867
|
-
parseEntity(entity) {
|
|
2868
|
-
if (entity[0] !== "#") {
|
|
2869
|
-
const defined = this.ENTITIES[entity];
|
|
2870
|
-
if (defined !== void 0) return defined;
|
|
2871
|
-
this.fail(this.isName(entity) ? "undefined entity." : "disallowed character in entity name.");
|
|
2872
|
-
return `&${entity};`;
|
|
2873
|
-
}
|
|
2874
|
-
let num = NaN;
|
|
2875
|
-
if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) num = parseInt(entity.slice(2), 16);
|
|
2876
|
-
else if (/^#[0-9]+$/.test(entity)) num = parseInt(entity.slice(1), 10);
|
|
2877
|
-
if (!this.isChar(num)) {
|
|
2878
|
-
this.fail("malformed character entity.");
|
|
2879
|
-
return `&${entity};`;
|
|
2880
|
-
}
|
|
2881
|
-
return String.fromCodePoint(num);
|
|
2882
|
-
}
|
|
2883
|
-
};
|
|
2884
|
-
exports.SaxesParser = SaxesParser;
|
|
2885
|
-
}));
|
|
2886
|
-
//#endregion
|
|
2887
|
-
//#region node_modules/.pnpm/fd-slicer@1.1.0/node_modules/fd-slicer/index.js
|
|
2888
|
-
var require_fd_slicer = /* @__PURE__ */ __commonJSMin(((exports) => {
|
|
2889
|
-
var fs$2 = __require("fs");
|
|
2890
|
-
var util$1 = __require("util");
|
|
2891
|
-
var stream = __require("stream");
|
|
2892
|
-
var Readable = stream.Readable;
|
|
2893
|
-
var Writable = stream.Writable;
|
|
2894
|
-
var PassThrough = stream.PassThrough;
|
|
2895
|
-
var Pend = require_pend();
|
|
2896
|
-
var EventEmitter$1 = __require("events").EventEmitter;
|
|
2897
|
-
exports.createFromBuffer = createFromBuffer;
|
|
2898
|
-
exports.createFromFd = createFromFd;
|
|
2899
|
-
exports.BufferSlicer = BufferSlicer;
|
|
2900
|
-
exports.FdSlicer = FdSlicer;
|
|
2901
|
-
util$1.inherits(FdSlicer, EventEmitter$1);
|
|
2902
|
-
function FdSlicer(fd, options) {
|
|
2903
|
-
options = options || {};
|
|
2904
|
-
EventEmitter$1.call(this);
|
|
2905
|
-
this.fd = fd;
|
|
2906
|
-
this.pend = new Pend();
|
|
2907
|
-
this.pend.max = 1;
|
|
2908
|
-
this.refCount = 0;
|
|
2909
|
-
this.autoClose = !!options.autoClose;
|
|
2910
|
-
}
|
|
2911
|
-
FdSlicer.prototype.read = function(buffer, offset, length, position, callback) {
|
|
2912
|
-
var self = this;
|
|
2913
|
-
self.pend.go(function(cb) {
|
|
2914
|
-
fs$2.read(self.fd, buffer, offset, length, position, function(err, bytesRead, buffer) {
|
|
2915
|
-
cb();
|
|
2916
|
-
callback(err, bytesRead, buffer);
|
|
2917
|
-
});
|
|
2918
|
-
});
|
|
2919
|
-
};
|
|
2920
|
-
FdSlicer.prototype.write = function(buffer, offset, length, position, callback) {
|
|
2921
|
-
var self = this;
|
|
2922
|
-
self.pend.go(function(cb) {
|
|
2923
|
-
fs$2.write(self.fd, buffer, offset, length, position, function(err, written, buffer) {
|
|
2924
|
-
cb();
|
|
2925
|
-
callback(err, written, buffer);
|
|
2926
|
-
});
|
|
2927
|
-
});
|
|
2928
|
-
};
|
|
2929
|
-
FdSlicer.prototype.createReadStream = function(options) {
|
|
2930
|
-
return new ReadStream(this, options);
|
|
2931
|
-
};
|
|
2932
|
-
FdSlicer.prototype.createWriteStream = function(options) {
|
|
2933
|
-
return new WriteStream(this, options);
|
|
2934
|
-
};
|
|
2935
|
-
FdSlicer.prototype.ref = function() {
|
|
2936
|
-
this.refCount += 1;
|
|
2937
|
-
};
|
|
2938
|
-
FdSlicer.prototype.unref = function() {
|
|
2939
|
-
var self = this;
|
|
2940
|
-
self.refCount -= 1;
|
|
2941
|
-
if (self.refCount > 0) return;
|
|
2942
|
-
if (self.refCount < 0) throw new Error("invalid unref");
|
|
2943
|
-
if (self.autoClose) fs$2.close(self.fd, onCloseDone);
|
|
2944
|
-
function onCloseDone(err) {
|
|
2945
|
-
if (err) self.emit("error", err);
|
|
2946
|
-
else self.emit("close");
|
|
2947
|
-
}
|
|
2948
|
-
};
|
|
2949
|
-
util$1.inherits(ReadStream, Readable);
|
|
2950
|
-
function ReadStream(context, options) {
|
|
2951
|
-
options = options || {};
|
|
2952
|
-
Readable.call(this, options);
|
|
2953
|
-
this.context = context;
|
|
2954
|
-
this.context.ref();
|
|
2955
|
-
this.start = options.start || 0;
|
|
2956
|
-
this.endOffset = options.end;
|
|
2957
|
-
this.pos = this.start;
|
|
2958
|
-
this.destroyed = false;
|
|
2959
|
-
}
|
|
2960
|
-
ReadStream.prototype._read = function(n) {
|
|
2961
|
-
var self = this;
|
|
2962
|
-
if (self.destroyed) return;
|
|
2963
|
-
var toRead = Math.min(self._readableState.highWaterMark, n);
|
|
2964
|
-
if (self.endOffset != null) toRead = Math.min(toRead, self.endOffset - self.pos);
|
|
2965
|
-
if (toRead <= 0) {
|
|
2966
|
-
self.destroyed = true;
|
|
2967
|
-
self.push(null);
|
|
2968
|
-
self.context.unref();
|
|
2969
|
-
return;
|
|
2970
|
-
}
|
|
2971
|
-
self.context.pend.go(function(cb) {
|
|
2972
|
-
if (self.destroyed) return cb();
|
|
2973
|
-
var buffer = new Buffer(toRead);
|
|
2974
|
-
fs$2.read(self.context.fd, buffer, 0, toRead, self.pos, function(err, bytesRead) {
|
|
2975
|
-
if (err) self.destroy(err);
|
|
2976
|
-
else if (bytesRead === 0) {
|
|
2977
|
-
self.destroyed = true;
|
|
2978
|
-
self.push(null);
|
|
2979
|
-
self.context.unref();
|
|
2980
|
-
} else {
|
|
2981
|
-
self.pos += bytesRead;
|
|
2982
|
-
self.push(buffer.slice(0, bytesRead));
|
|
2983
|
-
}
|
|
2984
|
-
cb();
|
|
2985
|
-
});
|
|
2986
|
-
});
|
|
2987
|
-
};
|
|
2988
|
-
ReadStream.prototype.destroy = function(err) {
|
|
2989
|
-
if (this.destroyed) return;
|
|
2990
|
-
err = err || /* @__PURE__ */ new Error("stream destroyed");
|
|
2991
|
-
this.destroyed = true;
|
|
2992
|
-
this.emit("error", err);
|
|
2993
|
-
this.context.unref();
|
|
2994
|
-
};
|
|
2995
|
-
util$1.inherits(WriteStream, Writable);
|
|
2996
|
-
function WriteStream(context, options) {
|
|
2997
|
-
options = options || {};
|
|
2998
|
-
Writable.call(this, options);
|
|
2999
|
-
this.context = context;
|
|
3000
|
-
this.context.ref();
|
|
3001
|
-
this.start = options.start || 0;
|
|
3002
|
-
this.endOffset = options.end == null ? Infinity : +options.end;
|
|
3003
|
-
this.bytesWritten = 0;
|
|
3004
|
-
this.pos = this.start;
|
|
3005
|
-
this.destroyed = false;
|
|
3006
|
-
this.on("finish", this.destroy.bind(this));
|
|
3007
|
-
}
|
|
3008
|
-
WriteStream.prototype._write = function(buffer, encoding, callback) {
|
|
3009
|
-
var self = this;
|
|
3010
|
-
if (self.destroyed) return;
|
|
3011
|
-
if (self.pos + buffer.length > self.endOffset) {
|
|
3012
|
-
var err = /* @__PURE__ */ new Error("maximum file length exceeded");
|
|
3013
|
-
err.code = "ETOOBIG";
|
|
3014
|
-
self.destroy();
|
|
3015
|
-
callback(err);
|
|
3016
|
-
return;
|
|
3017
|
-
}
|
|
3018
|
-
self.context.pend.go(function(cb) {
|
|
3019
|
-
if (self.destroyed) return cb();
|
|
3020
|
-
fs$2.write(self.context.fd, buffer, 0, buffer.length, self.pos, function(err, bytes) {
|
|
3021
|
-
if (err) {
|
|
3022
|
-
self.destroy();
|
|
3023
|
-
cb();
|
|
3024
|
-
callback(err);
|
|
3025
|
-
} else {
|
|
3026
|
-
self.bytesWritten += bytes;
|
|
3027
|
-
self.pos += bytes;
|
|
3028
|
-
self.emit("progress");
|
|
3029
|
-
cb();
|
|
3030
|
-
callback();
|
|
3031
|
-
}
|
|
3032
|
-
});
|
|
3033
|
-
});
|
|
3034
|
-
};
|
|
3035
|
-
WriteStream.prototype.destroy = function() {
|
|
3036
|
-
if (this.destroyed) return;
|
|
3037
|
-
this.destroyed = true;
|
|
3038
|
-
this.context.unref();
|
|
3039
|
-
};
|
|
3040
|
-
util$1.inherits(BufferSlicer, EventEmitter$1);
|
|
3041
|
-
function BufferSlicer(buffer, options) {
|
|
3042
|
-
EventEmitter$1.call(this);
|
|
3043
|
-
options = options || {};
|
|
3044
|
-
this.refCount = 0;
|
|
3045
|
-
this.buffer = buffer;
|
|
3046
|
-
this.maxChunkSize = options.maxChunkSize || Number.MAX_SAFE_INTEGER;
|
|
3047
|
-
}
|
|
3048
|
-
BufferSlicer.prototype.read = function(buffer, offset, length, position, callback) {
|
|
3049
|
-
var end = position + length;
|
|
3050
|
-
var delta = end - this.buffer.length;
|
|
3051
|
-
var written = delta > 0 ? delta : length;
|
|
3052
|
-
this.buffer.copy(buffer, offset, position, end);
|
|
3053
|
-
setImmediate(function() {
|
|
3054
|
-
callback(null, written);
|
|
3055
|
-
});
|
|
3056
|
-
};
|
|
3057
|
-
BufferSlicer.prototype.write = function(buffer, offset, length, position, callback) {
|
|
3058
|
-
buffer.copy(this.buffer, position, offset, offset + length);
|
|
3059
|
-
setImmediate(function() {
|
|
3060
|
-
callback(null, length, buffer);
|
|
3061
|
-
});
|
|
3062
|
-
};
|
|
3063
|
-
BufferSlicer.prototype.createReadStream = function(options) {
|
|
3064
|
-
options = options || {};
|
|
3065
|
-
var readStream = new PassThrough(options);
|
|
3066
|
-
readStream.destroyed = false;
|
|
3067
|
-
readStream.start = options.start || 0;
|
|
3068
|
-
readStream.endOffset = options.end;
|
|
3069
|
-
readStream.pos = readStream.endOffset || this.buffer.length;
|
|
3070
|
-
var entireSlice = this.buffer.slice(readStream.start, readStream.pos);
|
|
3071
|
-
var offset = 0;
|
|
3072
|
-
while (true) {
|
|
3073
|
-
var nextOffset = offset + this.maxChunkSize;
|
|
3074
|
-
if (nextOffset >= entireSlice.length) {
|
|
3075
|
-
if (offset < entireSlice.length) readStream.write(entireSlice.slice(offset, entireSlice.length));
|
|
3076
|
-
break;
|
|
3077
|
-
}
|
|
3078
|
-
readStream.write(entireSlice.slice(offset, nextOffset));
|
|
3079
|
-
offset = nextOffset;
|
|
3080
|
-
}
|
|
3081
|
-
readStream.end();
|
|
3082
|
-
readStream.destroy = function() {
|
|
3083
|
-
readStream.destroyed = true;
|
|
3084
|
-
};
|
|
3085
|
-
return readStream;
|
|
3086
|
-
};
|
|
3087
|
-
BufferSlicer.prototype.createWriteStream = function(options) {
|
|
3088
|
-
var bufferSlicer = this;
|
|
3089
|
-
options = options || {};
|
|
3090
|
-
var writeStream = new Writable(options);
|
|
3091
|
-
writeStream.start = options.start || 0;
|
|
3092
|
-
writeStream.endOffset = options.end == null ? this.buffer.length : +options.end;
|
|
3093
|
-
writeStream.bytesWritten = 0;
|
|
3094
|
-
writeStream.pos = writeStream.start;
|
|
3095
|
-
writeStream.destroyed = false;
|
|
3096
|
-
writeStream._write = function(buffer, encoding, callback) {
|
|
3097
|
-
if (writeStream.destroyed) return;
|
|
3098
|
-
var end = writeStream.pos + buffer.length;
|
|
3099
|
-
if (end > writeStream.endOffset) {
|
|
3100
|
-
var err = /* @__PURE__ */ new Error("maximum file length exceeded");
|
|
3101
|
-
err.code = "ETOOBIG";
|
|
3102
|
-
writeStream.destroyed = true;
|
|
3103
|
-
callback(err);
|
|
3104
|
-
return;
|
|
3105
|
-
}
|
|
3106
|
-
buffer.copy(bufferSlicer.buffer, writeStream.pos, 0, buffer.length);
|
|
3107
|
-
writeStream.bytesWritten += buffer.length;
|
|
3108
|
-
writeStream.pos = end;
|
|
3109
|
-
writeStream.emit("progress");
|
|
3110
|
-
callback();
|
|
3111
|
-
};
|
|
3112
|
-
writeStream.destroy = function() {
|
|
3113
|
-
writeStream.destroyed = true;
|
|
3114
|
-
};
|
|
3115
|
-
return writeStream;
|
|
3116
|
-
};
|
|
3117
|
-
BufferSlicer.prototype.ref = function() {
|
|
3118
|
-
this.refCount += 1;
|
|
3119
|
-
};
|
|
3120
|
-
BufferSlicer.prototype.unref = function() {
|
|
3121
|
-
this.refCount -= 1;
|
|
3122
|
-
if (this.refCount < 0) throw new Error("invalid unref");
|
|
3123
|
-
};
|
|
3124
|
-
function createFromBuffer(buffer, options) {
|
|
3125
|
-
return new BufferSlicer(buffer, options);
|
|
3126
|
-
}
|
|
3127
|
-
function createFromFd(fd, options) {
|
|
3128
|
-
return new FdSlicer(fd, options);
|
|
3129
|
-
}
|
|
3130
|
-
}));
|
|
3131
|
-
//#endregion
|
|
3132
|
-
//#region node_modules/.pnpm/yauzl@2.10.0/node_modules/yauzl/index.js
|
|
3133
|
-
var require_yauzl = /* @__PURE__ */ __commonJSMin(((exports) => {
|
|
3134
|
-
var fs$1 = __require("fs");
|
|
3135
|
-
var zlib = __require("zlib");
|
|
3136
|
-
var fd_slicer = require_fd_slicer();
|
|
3137
|
-
var crc32 = require_buffer_crc32();
|
|
3138
|
-
var util = __require("util");
|
|
3139
|
-
var EventEmitter = __require("events").EventEmitter;
|
|
3140
|
-
var Transform = __require("stream").Transform;
|
|
3141
|
-
var PassThrough = __require("stream").PassThrough;
|
|
3142
|
-
var Writable = __require("stream").Writable;
|
|
3143
|
-
exports.open = open;
|
|
3144
|
-
exports.fromFd = fromFd;
|
|
3145
|
-
exports.fromBuffer = fromBuffer;
|
|
3146
|
-
exports.fromRandomAccessReader = fromRandomAccessReader;
|
|
3147
|
-
exports.dosDateTimeToDate = dosDateTimeToDate;
|
|
3148
|
-
exports.validateFileName = validateFileName;
|
|
3149
|
-
exports.ZipFile = ZipFile;
|
|
3150
|
-
exports.Entry = Entry;
|
|
3151
|
-
exports.RandomAccessReader = RandomAccessReader;
|
|
3152
|
-
function open(path, options, callback) {
|
|
3153
|
-
if (typeof options === "function") {
|
|
3154
|
-
callback = options;
|
|
3155
|
-
options = null;
|
|
3156
|
-
}
|
|
3157
|
-
if (options == null) options = {};
|
|
3158
|
-
if (options.autoClose == null) options.autoClose = true;
|
|
3159
|
-
if (options.lazyEntries == null) options.lazyEntries = false;
|
|
3160
|
-
if (options.decodeStrings == null) options.decodeStrings = true;
|
|
3161
|
-
if (options.validateEntrySizes == null) options.validateEntrySizes = true;
|
|
3162
|
-
if (options.strictFileNames == null) options.strictFileNames = false;
|
|
3163
|
-
if (callback == null) callback = defaultCallback;
|
|
3164
|
-
fs$1.open(path, "r", function(err, fd) {
|
|
3165
|
-
if (err) return callback(err);
|
|
3166
|
-
fromFd(fd, options, function(err, zipfile) {
|
|
3167
|
-
if (err) fs$1.close(fd, defaultCallback);
|
|
3168
|
-
callback(err, zipfile);
|
|
3169
|
-
});
|
|
3170
|
-
});
|
|
3171
|
-
}
|
|
3172
|
-
function fromFd(fd, options, callback) {
|
|
3173
|
-
if (typeof options === "function") {
|
|
3174
|
-
callback = options;
|
|
3175
|
-
options = null;
|
|
3176
|
-
}
|
|
3177
|
-
if (options == null) options = {};
|
|
3178
|
-
if (options.autoClose == null) options.autoClose = false;
|
|
3179
|
-
if (options.lazyEntries == null) options.lazyEntries = false;
|
|
3180
|
-
if (options.decodeStrings == null) options.decodeStrings = true;
|
|
3181
|
-
if (options.validateEntrySizes == null) options.validateEntrySizes = true;
|
|
3182
|
-
if (options.strictFileNames == null) options.strictFileNames = false;
|
|
3183
|
-
if (callback == null) callback = defaultCallback;
|
|
3184
|
-
fs$1.fstat(fd, function(err, stats) {
|
|
3185
|
-
if (err) return callback(err);
|
|
3186
|
-
fromRandomAccessReader(fd_slicer.createFromFd(fd, { autoClose: true }), stats.size, options, callback);
|
|
3187
|
-
});
|
|
3188
|
-
}
|
|
3189
|
-
function fromBuffer(buffer, options, callback) {
|
|
3190
|
-
if (typeof options === "function") {
|
|
3191
|
-
callback = options;
|
|
3192
|
-
options = null;
|
|
3193
|
-
}
|
|
3194
|
-
if (options == null) options = {};
|
|
3195
|
-
options.autoClose = false;
|
|
3196
|
-
if (options.lazyEntries == null) options.lazyEntries = false;
|
|
3197
|
-
if (options.decodeStrings == null) options.decodeStrings = true;
|
|
3198
|
-
if (options.validateEntrySizes == null) options.validateEntrySizes = true;
|
|
3199
|
-
if (options.strictFileNames == null) options.strictFileNames = false;
|
|
3200
|
-
fromRandomAccessReader(fd_slicer.createFromBuffer(buffer, { maxChunkSize: 65536 }), buffer.length, options, callback);
|
|
3201
|
-
}
|
|
3202
|
-
function fromRandomAccessReader(reader, totalSize, options, callback) {
|
|
3203
|
-
if (typeof options === "function") {
|
|
3204
|
-
callback = options;
|
|
3205
|
-
options = null;
|
|
3206
|
-
}
|
|
3207
|
-
if (options == null) options = {};
|
|
3208
|
-
if (options.autoClose == null) options.autoClose = true;
|
|
3209
|
-
if (options.lazyEntries == null) options.lazyEntries = false;
|
|
3210
|
-
if (options.decodeStrings == null) options.decodeStrings = true;
|
|
3211
|
-
var decodeStrings = !!options.decodeStrings;
|
|
3212
|
-
if (options.validateEntrySizes == null) options.validateEntrySizes = true;
|
|
3213
|
-
if (options.strictFileNames == null) options.strictFileNames = false;
|
|
3214
|
-
if (callback == null) callback = defaultCallback;
|
|
3215
|
-
if (typeof totalSize !== "number") throw new Error("expected totalSize parameter to be a number");
|
|
3216
|
-
if (totalSize > Number.MAX_SAFE_INTEGER) throw new Error("zip file too large. only file sizes up to 2^52 are supported due to JavaScript's Number type being an IEEE 754 double.");
|
|
3217
|
-
reader.ref();
|
|
3218
|
-
var eocdrWithoutCommentSize = 22;
|
|
3219
|
-
var bufferSize = Math.min(eocdrWithoutCommentSize + 65535, totalSize);
|
|
3220
|
-
var buffer = newBuffer(bufferSize);
|
|
3221
|
-
var bufferReadStart = totalSize - buffer.length;
|
|
3222
|
-
readAndAssertNoEof(reader, buffer, 0, bufferSize, bufferReadStart, function(err) {
|
|
3223
|
-
if (err) return callback(err);
|
|
3224
|
-
for (var i = bufferSize - eocdrWithoutCommentSize; i >= 0; i -= 1) {
|
|
3225
|
-
if (buffer.readUInt32LE(i) !== 101010256) continue;
|
|
3226
|
-
var eocdrBuffer = buffer.slice(i);
|
|
3227
|
-
var diskNumber = eocdrBuffer.readUInt16LE(4);
|
|
3228
|
-
if (diskNumber !== 0) return callback(/* @__PURE__ */ new Error("multi-disk zip files are not supported: found disk number: " + diskNumber));
|
|
3229
|
-
var entryCount = eocdrBuffer.readUInt16LE(10);
|
|
3230
|
-
var centralDirectoryOffset = eocdrBuffer.readUInt32LE(16);
|
|
3231
|
-
var commentLength = eocdrBuffer.readUInt16LE(20);
|
|
3232
|
-
var expectedCommentLength = eocdrBuffer.length - eocdrWithoutCommentSize;
|
|
3233
|
-
if (commentLength !== expectedCommentLength) return callback(/* @__PURE__ */ new Error("invalid comment length. expected: " + expectedCommentLength + ". found: " + commentLength));
|
|
3234
|
-
var comment = decodeStrings ? decodeBuffer(eocdrBuffer, 22, eocdrBuffer.length, false) : eocdrBuffer.slice(22);
|
|
3235
|
-
if (!(entryCount === 65535 || centralDirectoryOffset === 4294967295)) return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose, options.lazyEntries, decodeStrings, options.validateEntrySizes, options.strictFileNames));
|
|
3236
|
-
var zip64EocdlBuffer = newBuffer(20);
|
|
3237
|
-
var zip64EocdlOffset = bufferReadStart + i - zip64EocdlBuffer.length;
|
|
3238
|
-
readAndAssertNoEof(reader, zip64EocdlBuffer, 0, zip64EocdlBuffer.length, zip64EocdlOffset, function(err) {
|
|
3239
|
-
if (err) return callback(err);
|
|
3240
|
-
if (zip64EocdlBuffer.readUInt32LE(0) !== 117853008) return callback(/* @__PURE__ */ new Error("invalid zip64 end of central directory locator signature"));
|
|
3241
|
-
var zip64EocdrOffset = readUInt64LE(zip64EocdlBuffer, 8);
|
|
3242
|
-
var zip64EocdrBuffer = newBuffer(56);
|
|
3243
|
-
readAndAssertNoEof(reader, zip64EocdrBuffer, 0, zip64EocdrBuffer.length, zip64EocdrOffset, function(err) {
|
|
3244
|
-
if (err) return callback(err);
|
|
3245
|
-
if (zip64EocdrBuffer.readUInt32LE(0) !== 101075792) return callback(/* @__PURE__ */ new Error("invalid zip64 end of central directory record signature"));
|
|
3246
|
-
entryCount = readUInt64LE(zip64EocdrBuffer, 32);
|
|
3247
|
-
centralDirectoryOffset = readUInt64LE(zip64EocdrBuffer, 48);
|
|
3248
|
-
return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose, options.lazyEntries, decodeStrings, options.validateEntrySizes, options.strictFileNames));
|
|
3249
|
-
});
|
|
3250
|
-
});
|
|
3251
|
-
return;
|
|
3252
|
-
}
|
|
3253
|
-
callback(/* @__PURE__ */ new Error("end of central directory record signature not found"));
|
|
3254
|
-
});
|
|
3255
|
-
}
|
|
3256
|
-
util.inherits(ZipFile, EventEmitter);
|
|
3257
|
-
function ZipFile(reader, centralDirectoryOffset, fileSize, entryCount, comment, autoClose, lazyEntries, decodeStrings, validateEntrySizes, strictFileNames) {
|
|
3258
|
-
var self = this;
|
|
3259
|
-
EventEmitter.call(self);
|
|
3260
|
-
self.reader = reader;
|
|
3261
|
-
self.reader.on("error", function(err) {
|
|
3262
|
-
emitError(self, err);
|
|
3263
|
-
});
|
|
3264
|
-
self.reader.once("close", function() {
|
|
3265
|
-
self.emit("close");
|
|
3266
|
-
});
|
|
3267
|
-
self.readEntryCursor = centralDirectoryOffset;
|
|
3268
|
-
self.fileSize = fileSize;
|
|
3269
|
-
self.entryCount = entryCount;
|
|
3270
|
-
self.comment = comment;
|
|
3271
|
-
self.entriesRead = 0;
|
|
3272
|
-
self.autoClose = !!autoClose;
|
|
3273
|
-
self.lazyEntries = !!lazyEntries;
|
|
3274
|
-
self.decodeStrings = !!decodeStrings;
|
|
3275
|
-
self.validateEntrySizes = !!validateEntrySizes;
|
|
3276
|
-
self.strictFileNames = !!strictFileNames;
|
|
3277
|
-
self.isOpen = true;
|
|
3278
|
-
self.emittedError = false;
|
|
3279
|
-
if (!self.lazyEntries) self._readEntry();
|
|
3280
|
-
}
|
|
3281
|
-
ZipFile.prototype.close = function() {
|
|
3282
|
-
if (!this.isOpen) return;
|
|
3283
|
-
this.isOpen = false;
|
|
3284
|
-
this.reader.unref();
|
|
3285
|
-
};
|
|
3286
|
-
function emitErrorAndAutoClose(self, err) {
|
|
3287
|
-
if (self.autoClose) self.close();
|
|
3288
|
-
emitError(self, err);
|
|
3289
|
-
}
|
|
3290
|
-
function emitError(self, err) {
|
|
3291
|
-
if (self.emittedError) return;
|
|
3292
|
-
self.emittedError = true;
|
|
3293
|
-
self.emit("error", err);
|
|
3294
|
-
}
|
|
3295
|
-
ZipFile.prototype.readEntry = function() {
|
|
3296
|
-
if (!this.lazyEntries) throw new Error("readEntry() called without lazyEntries:true");
|
|
3297
|
-
this._readEntry();
|
|
3298
|
-
};
|
|
3299
|
-
ZipFile.prototype._readEntry = function() {
|
|
3300
|
-
var self = this;
|
|
3301
|
-
if (self.entryCount === self.entriesRead) {
|
|
3302
|
-
setImmediate(function() {
|
|
3303
|
-
if (self.autoClose) self.close();
|
|
3304
|
-
if (self.emittedError) return;
|
|
3305
|
-
self.emit("end");
|
|
3306
|
-
});
|
|
3307
|
-
return;
|
|
3308
|
-
}
|
|
3309
|
-
if (self.emittedError) return;
|
|
3310
|
-
var buffer = newBuffer(46);
|
|
3311
|
-
readAndAssertNoEof(self.reader, buffer, 0, buffer.length, self.readEntryCursor, function(err) {
|
|
3312
|
-
if (err) return emitErrorAndAutoClose(self, err);
|
|
3313
|
-
if (self.emittedError) return;
|
|
3314
|
-
var entry = new Entry();
|
|
3315
|
-
var signature = buffer.readUInt32LE(0);
|
|
3316
|
-
if (signature !== 33639248) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("invalid central directory file header signature: 0x" + signature.toString(16)));
|
|
3317
|
-
entry.versionMadeBy = buffer.readUInt16LE(4);
|
|
3318
|
-
entry.versionNeededToExtract = buffer.readUInt16LE(6);
|
|
3319
|
-
entry.generalPurposeBitFlag = buffer.readUInt16LE(8);
|
|
3320
|
-
entry.compressionMethod = buffer.readUInt16LE(10);
|
|
3321
|
-
entry.lastModFileTime = buffer.readUInt16LE(12);
|
|
3322
|
-
entry.lastModFileDate = buffer.readUInt16LE(14);
|
|
3323
|
-
entry.crc32 = buffer.readUInt32LE(16);
|
|
3324
|
-
entry.compressedSize = buffer.readUInt32LE(20);
|
|
3325
|
-
entry.uncompressedSize = buffer.readUInt32LE(24);
|
|
3326
|
-
entry.fileNameLength = buffer.readUInt16LE(28);
|
|
3327
|
-
entry.extraFieldLength = buffer.readUInt16LE(30);
|
|
3328
|
-
entry.fileCommentLength = buffer.readUInt16LE(32);
|
|
3329
|
-
entry.internalFileAttributes = buffer.readUInt16LE(36);
|
|
3330
|
-
entry.externalFileAttributes = buffer.readUInt32LE(38);
|
|
3331
|
-
entry.relativeOffsetOfLocalHeader = buffer.readUInt32LE(42);
|
|
3332
|
-
if (entry.generalPurposeBitFlag & 64) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("strong encryption is not supported"));
|
|
3333
|
-
self.readEntryCursor += 46;
|
|
3334
|
-
buffer = newBuffer(entry.fileNameLength + entry.extraFieldLength + entry.fileCommentLength);
|
|
3335
|
-
readAndAssertNoEof(self.reader, buffer, 0, buffer.length, self.readEntryCursor, function(err) {
|
|
3336
|
-
if (err) return emitErrorAndAutoClose(self, err);
|
|
3337
|
-
if (self.emittedError) return;
|
|
3338
|
-
var isUtf8 = (entry.generalPurposeBitFlag & 2048) !== 0;
|
|
3339
|
-
entry.fileName = self.decodeStrings ? decodeBuffer(buffer, 0, entry.fileNameLength, isUtf8) : buffer.slice(0, entry.fileNameLength);
|
|
3340
|
-
var fileCommentStart = entry.fileNameLength + entry.extraFieldLength;
|
|
3341
|
-
var extraFieldBuffer = buffer.slice(entry.fileNameLength, fileCommentStart);
|
|
3342
|
-
entry.extraFields = [];
|
|
3343
|
-
var i = 0;
|
|
3344
|
-
while (i < extraFieldBuffer.length - 3) {
|
|
3345
|
-
var headerId = extraFieldBuffer.readUInt16LE(i + 0);
|
|
3346
|
-
var dataSize = extraFieldBuffer.readUInt16LE(i + 2);
|
|
3347
|
-
var dataStart = i + 4;
|
|
3348
|
-
var dataEnd = dataStart + dataSize;
|
|
3349
|
-
if (dataEnd > extraFieldBuffer.length) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("extra field length exceeds extra field buffer size"));
|
|
3350
|
-
var dataBuffer = newBuffer(dataSize);
|
|
3351
|
-
extraFieldBuffer.copy(dataBuffer, 0, dataStart, dataEnd);
|
|
3352
|
-
entry.extraFields.push({
|
|
3353
|
-
id: headerId,
|
|
3354
|
-
data: dataBuffer
|
|
3355
|
-
});
|
|
3356
|
-
i = dataEnd;
|
|
3357
|
-
}
|
|
3358
|
-
entry.fileComment = self.decodeStrings ? decodeBuffer(buffer, fileCommentStart, fileCommentStart + entry.fileCommentLength, isUtf8) : buffer.slice(fileCommentStart, fileCommentStart + entry.fileCommentLength);
|
|
3359
|
-
entry.comment = entry.fileComment;
|
|
3360
|
-
self.readEntryCursor += buffer.length;
|
|
3361
|
-
self.entriesRead += 1;
|
|
3362
|
-
if (entry.uncompressedSize === 4294967295 || entry.compressedSize === 4294967295 || entry.relativeOffsetOfLocalHeader === 4294967295) {
|
|
3363
|
-
var zip64EiefBuffer = null;
|
|
3364
|
-
for (var i = 0; i < entry.extraFields.length; i++) {
|
|
3365
|
-
var extraField = entry.extraFields[i];
|
|
3366
|
-
if (extraField.id === 1) {
|
|
3367
|
-
zip64EiefBuffer = extraField.data;
|
|
3368
|
-
break;
|
|
3369
|
-
}
|
|
3370
|
-
}
|
|
3371
|
-
if (zip64EiefBuffer == null) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("expected zip64 extended information extra field"));
|
|
3372
|
-
var index = 0;
|
|
3373
|
-
if (entry.uncompressedSize === 4294967295) {
|
|
3374
|
-
if (index + 8 > zip64EiefBuffer.length) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("zip64 extended information extra field does not include uncompressed size"));
|
|
3375
|
-
entry.uncompressedSize = readUInt64LE(zip64EiefBuffer, index);
|
|
3376
|
-
index += 8;
|
|
3377
|
-
}
|
|
3378
|
-
if (entry.compressedSize === 4294967295) {
|
|
3379
|
-
if (index + 8 > zip64EiefBuffer.length) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("zip64 extended information extra field does not include compressed size"));
|
|
3380
|
-
entry.compressedSize = readUInt64LE(zip64EiefBuffer, index);
|
|
3381
|
-
index += 8;
|
|
3382
|
-
}
|
|
3383
|
-
if (entry.relativeOffsetOfLocalHeader === 4294967295) {
|
|
3384
|
-
if (index + 8 > zip64EiefBuffer.length) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("zip64 extended information extra field does not include relative header offset"));
|
|
3385
|
-
entry.relativeOffsetOfLocalHeader = readUInt64LE(zip64EiefBuffer, index);
|
|
3386
|
-
index += 8;
|
|
3387
|
-
}
|
|
3388
|
-
}
|
|
3389
|
-
if (self.decodeStrings) for (var i = 0; i < entry.extraFields.length; i++) {
|
|
3390
|
-
var extraField = entry.extraFields[i];
|
|
3391
|
-
if (extraField.id === 28789) {
|
|
3392
|
-
if (extraField.data.length < 6) continue;
|
|
3393
|
-
if (extraField.data.readUInt8(0) !== 1) continue;
|
|
3394
|
-
var oldNameCrc32 = extraField.data.readUInt32LE(1);
|
|
3395
|
-
if (crc32.unsigned(buffer.slice(0, entry.fileNameLength)) !== oldNameCrc32) continue;
|
|
3396
|
-
entry.fileName = decodeBuffer(extraField.data, 5, extraField.data.length, true);
|
|
3397
|
-
break;
|
|
3398
|
-
}
|
|
3399
|
-
}
|
|
3400
|
-
if (self.validateEntrySizes && entry.compressionMethod === 0) {
|
|
3401
|
-
var expectedCompressedSize = entry.uncompressedSize;
|
|
3402
|
-
if (entry.isEncrypted()) expectedCompressedSize += 12;
|
|
3403
|
-
if (entry.compressedSize !== expectedCompressedSize) {
|
|
3404
|
-
var msg = "compressed/uncompressed size mismatch for stored file: " + entry.compressedSize + " != " + entry.uncompressedSize;
|
|
3405
|
-
return emitErrorAndAutoClose(self, new Error(msg));
|
|
3406
|
-
}
|
|
3407
|
-
}
|
|
3408
|
-
if (self.decodeStrings) {
|
|
3409
|
-
if (!self.strictFileNames) entry.fileName = entry.fileName.replace(/\\/g, "/");
|
|
3410
|
-
var errorMessage = validateFileName(entry.fileName, self.validateFileNameOptions);
|
|
3411
|
-
if (errorMessage != null) return emitErrorAndAutoClose(self, new Error(errorMessage));
|
|
3412
|
-
}
|
|
3413
|
-
self.emit("entry", entry);
|
|
3414
|
-
if (!self.lazyEntries) self._readEntry();
|
|
3415
|
-
});
|
|
3416
|
-
});
|
|
3417
|
-
};
|
|
3418
|
-
ZipFile.prototype.openReadStream = function(entry, options, callback) {
|
|
3419
|
-
var self = this;
|
|
3420
|
-
var relativeStart = 0;
|
|
3421
|
-
var relativeEnd = entry.compressedSize;
|
|
3422
|
-
if (callback == null) {
|
|
3423
|
-
callback = options;
|
|
3424
|
-
options = {};
|
|
3425
|
-
} else {
|
|
3426
|
-
if (options.decrypt != null) {
|
|
3427
|
-
if (!entry.isEncrypted()) throw new Error("options.decrypt can only be specified for encrypted entries");
|
|
3428
|
-
if (options.decrypt !== false) throw new Error("invalid options.decrypt value: " + options.decrypt);
|
|
3429
|
-
if (entry.isCompressed()) {
|
|
3430
|
-
if (options.decompress !== false) throw new Error("entry is encrypted and compressed, and options.decompress !== false");
|
|
3431
|
-
}
|
|
3432
|
-
}
|
|
3433
|
-
if (options.decompress != null) {
|
|
3434
|
-
if (!entry.isCompressed()) throw new Error("options.decompress can only be specified for compressed entries");
|
|
3435
|
-
if (!(options.decompress === false || options.decompress === true)) throw new Error("invalid options.decompress value: " + options.decompress);
|
|
3436
|
-
}
|
|
3437
|
-
if (options.start != null || options.end != null) {
|
|
3438
|
-
if (entry.isCompressed() && options.decompress !== false) throw new Error("start/end range not allowed for compressed entry without options.decompress === false");
|
|
3439
|
-
if (entry.isEncrypted() && options.decrypt !== false) throw new Error("start/end range not allowed for encrypted entry without options.decrypt === false");
|
|
3440
|
-
}
|
|
3441
|
-
if (options.start != null) {
|
|
3442
|
-
relativeStart = options.start;
|
|
3443
|
-
if (relativeStart < 0) throw new Error("options.start < 0");
|
|
3444
|
-
if (relativeStart > entry.compressedSize) throw new Error("options.start > entry.compressedSize");
|
|
3445
|
-
}
|
|
3446
|
-
if (options.end != null) {
|
|
3447
|
-
relativeEnd = options.end;
|
|
3448
|
-
if (relativeEnd < 0) throw new Error("options.end < 0");
|
|
3449
|
-
if (relativeEnd > entry.compressedSize) throw new Error("options.end > entry.compressedSize");
|
|
3450
|
-
if (relativeEnd < relativeStart) throw new Error("options.end < options.start");
|
|
3451
|
-
}
|
|
3452
|
-
}
|
|
3453
|
-
if (!self.isOpen) return callback(/* @__PURE__ */ new Error("closed"));
|
|
3454
|
-
if (entry.isEncrypted()) {
|
|
3455
|
-
if (options.decrypt !== false) return callback(/* @__PURE__ */ new Error("entry is encrypted, and options.decrypt !== false"));
|
|
3456
|
-
}
|
|
3457
|
-
self.reader.ref();
|
|
3458
|
-
var buffer = newBuffer(30);
|
|
3459
|
-
readAndAssertNoEof(self.reader, buffer, 0, buffer.length, entry.relativeOffsetOfLocalHeader, function(err) {
|
|
3460
|
-
try {
|
|
3461
|
-
if (err) return callback(err);
|
|
3462
|
-
var signature = buffer.readUInt32LE(0);
|
|
3463
|
-
if (signature !== 67324752) return callback(/* @__PURE__ */ new Error("invalid local file header signature: 0x" + signature.toString(16)));
|
|
3464
|
-
var fileNameLength = buffer.readUInt16LE(26);
|
|
3465
|
-
var extraFieldLength = buffer.readUInt16LE(28);
|
|
3466
|
-
var localFileHeaderEnd = entry.relativeOffsetOfLocalHeader + buffer.length + fileNameLength + extraFieldLength;
|
|
3467
|
-
var decompress;
|
|
3468
|
-
if (entry.compressionMethod === 0) decompress = false;
|
|
3469
|
-
else if (entry.compressionMethod === 8) decompress = options.decompress != null ? options.decompress : true;
|
|
3470
|
-
else return callback(/* @__PURE__ */ new Error("unsupported compression method: " + entry.compressionMethod));
|
|
3471
|
-
var fileDataStart = localFileHeaderEnd;
|
|
3472
|
-
var fileDataEnd = fileDataStart + entry.compressedSize;
|
|
3473
|
-
if (entry.compressedSize !== 0) {
|
|
3474
|
-
if (fileDataEnd > self.fileSize) return callback(/* @__PURE__ */ new Error("file data overflows file bounds: " + fileDataStart + " + " + entry.compressedSize + " > " + self.fileSize));
|
|
3475
|
-
}
|
|
3476
|
-
var readStream = self.reader.createReadStream({
|
|
3477
|
-
start: fileDataStart + relativeStart,
|
|
3478
|
-
end: fileDataStart + relativeEnd
|
|
3479
|
-
});
|
|
3480
|
-
var endpointStream = readStream;
|
|
3481
|
-
if (decompress) {
|
|
3482
|
-
var destroyed = false;
|
|
3483
|
-
var inflateFilter = zlib.createInflateRaw();
|
|
3484
|
-
readStream.on("error", function(err) {
|
|
3485
|
-
setImmediate(function() {
|
|
3486
|
-
if (!destroyed) inflateFilter.emit("error", err);
|
|
3487
|
-
});
|
|
3488
|
-
});
|
|
3489
|
-
readStream.pipe(inflateFilter);
|
|
3490
|
-
if (self.validateEntrySizes) {
|
|
3491
|
-
endpointStream = new AssertByteCountStream(entry.uncompressedSize);
|
|
3492
|
-
inflateFilter.on("error", function(err) {
|
|
3493
|
-
setImmediate(function() {
|
|
3494
|
-
if (!destroyed) endpointStream.emit("error", err);
|
|
3495
|
-
});
|
|
3496
|
-
});
|
|
3497
|
-
inflateFilter.pipe(endpointStream);
|
|
3498
|
-
} else endpointStream = inflateFilter;
|
|
3499
|
-
endpointStream.destroy = function() {
|
|
3500
|
-
destroyed = true;
|
|
3501
|
-
if (inflateFilter !== endpointStream) inflateFilter.unpipe(endpointStream);
|
|
3502
|
-
readStream.unpipe(inflateFilter);
|
|
3503
|
-
readStream.destroy();
|
|
3504
|
-
};
|
|
3505
|
-
}
|
|
3506
|
-
callback(null, endpointStream);
|
|
3507
|
-
} finally {
|
|
3508
|
-
self.reader.unref();
|
|
3509
|
-
}
|
|
3510
|
-
});
|
|
3511
|
-
};
|
|
3512
|
-
function Entry() {}
|
|
3513
|
-
Entry.prototype.getLastModDate = function() {
|
|
3514
|
-
return dosDateTimeToDate(this.lastModFileDate, this.lastModFileTime);
|
|
3515
|
-
};
|
|
3516
|
-
Entry.prototype.isEncrypted = function() {
|
|
3517
|
-
return (this.generalPurposeBitFlag & 1) !== 0;
|
|
3518
|
-
};
|
|
3519
|
-
Entry.prototype.isCompressed = function() {
|
|
3520
|
-
return this.compressionMethod === 8;
|
|
3521
|
-
};
|
|
3522
|
-
function dosDateTimeToDate(date, time) {
|
|
3523
|
-
var day = date & 31;
|
|
3524
|
-
var month = (date >> 5 & 15) - 1;
|
|
3525
|
-
var year = (date >> 9 & 127) + 1980;
|
|
3526
|
-
var millisecond = 0;
|
|
3527
|
-
var second = (time & 31) * 2;
|
|
3528
|
-
var minute = time >> 5 & 63;
|
|
3529
|
-
var hour = time >> 11 & 31;
|
|
3530
|
-
return new Date(year, month, day, hour, minute, second, millisecond);
|
|
3531
|
-
}
|
|
3532
|
-
function validateFileName(fileName) {
|
|
3533
|
-
if (fileName.indexOf("\\") !== -1) return "invalid characters in fileName: " + fileName;
|
|
3534
|
-
if (/^[a-zA-Z]:/.test(fileName) || /^\//.test(fileName)) return "absolute path: " + fileName;
|
|
3535
|
-
if (fileName.split("/").indexOf("..") !== -1) return "invalid relative path: " + fileName;
|
|
3536
|
-
return null;
|
|
3537
|
-
}
|
|
3538
|
-
function readAndAssertNoEof(reader, buffer, offset, length, position, callback) {
|
|
3539
|
-
if (length === 0) return setImmediate(function() {
|
|
3540
|
-
callback(null, newBuffer(0));
|
|
3541
|
-
});
|
|
3542
|
-
reader.read(buffer, offset, length, position, function(err, bytesRead) {
|
|
3543
|
-
if (err) return callback(err);
|
|
3544
|
-
if (bytesRead < length) return callback(/* @__PURE__ */ new Error("unexpected EOF"));
|
|
3545
|
-
callback();
|
|
3546
|
-
});
|
|
3547
|
-
}
|
|
3548
|
-
util.inherits(AssertByteCountStream, Transform);
|
|
3549
|
-
function AssertByteCountStream(byteCount) {
|
|
3550
|
-
Transform.call(this);
|
|
3551
|
-
this.actualByteCount = 0;
|
|
3552
|
-
this.expectedByteCount = byteCount;
|
|
3553
|
-
}
|
|
3554
|
-
AssertByteCountStream.prototype._transform = function(chunk, encoding, cb) {
|
|
3555
|
-
this.actualByteCount += chunk.length;
|
|
3556
|
-
if (this.actualByteCount > this.expectedByteCount) {
|
|
3557
|
-
var msg = "too many bytes in the stream. expected " + this.expectedByteCount + ". got at least " + this.actualByteCount;
|
|
3558
|
-
return cb(new Error(msg));
|
|
3559
|
-
}
|
|
3560
|
-
cb(null, chunk);
|
|
3561
|
-
};
|
|
3562
|
-
AssertByteCountStream.prototype._flush = function(cb) {
|
|
3563
|
-
if (this.actualByteCount < this.expectedByteCount) {
|
|
3564
|
-
var msg = "not enough bytes in the stream. expected " + this.expectedByteCount + ". got only " + this.actualByteCount;
|
|
3565
|
-
return cb(new Error(msg));
|
|
3566
|
-
}
|
|
3567
|
-
cb();
|
|
3568
|
-
};
|
|
3569
|
-
util.inherits(RandomAccessReader, EventEmitter);
|
|
3570
|
-
function RandomAccessReader() {
|
|
3571
|
-
EventEmitter.call(this);
|
|
3572
|
-
this.refCount = 0;
|
|
3573
|
-
}
|
|
3574
|
-
RandomAccessReader.prototype.ref = function() {
|
|
3575
|
-
this.refCount += 1;
|
|
3576
|
-
};
|
|
3577
|
-
RandomAccessReader.prototype.unref = function() {
|
|
3578
|
-
var self = this;
|
|
3579
|
-
self.refCount -= 1;
|
|
3580
|
-
if (self.refCount > 0) return;
|
|
3581
|
-
if (self.refCount < 0) throw new Error("invalid unref");
|
|
3582
|
-
self.close(onCloseDone);
|
|
3583
|
-
function onCloseDone(err) {
|
|
3584
|
-
if (err) return self.emit("error", err);
|
|
3585
|
-
self.emit("close");
|
|
3586
|
-
}
|
|
3587
|
-
};
|
|
3588
|
-
RandomAccessReader.prototype.createReadStream = function(options) {
|
|
3589
|
-
var start = options.start;
|
|
3590
|
-
var end = options.end;
|
|
3591
|
-
if (start === end) {
|
|
3592
|
-
var emptyStream = new PassThrough();
|
|
3593
|
-
setImmediate(function() {
|
|
3594
|
-
emptyStream.end();
|
|
3595
|
-
});
|
|
3596
|
-
return emptyStream;
|
|
3597
|
-
}
|
|
3598
|
-
var stream = this._readStreamForRange(start, end);
|
|
3599
|
-
var destroyed = false;
|
|
3600
|
-
var refUnrefFilter = new RefUnrefFilter(this);
|
|
3601
|
-
stream.on("error", function(err) {
|
|
3602
|
-
setImmediate(function() {
|
|
3603
|
-
if (!destroyed) refUnrefFilter.emit("error", err);
|
|
3604
|
-
});
|
|
3605
|
-
});
|
|
3606
|
-
refUnrefFilter.destroy = function() {
|
|
3607
|
-
stream.unpipe(refUnrefFilter);
|
|
3608
|
-
refUnrefFilter.unref();
|
|
3609
|
-
stream.destroy();
|
|
3610
|
-
};
|
|
3611
|
-
var byteCounter = new AssertByteCountStream(end - start);
|
|
3612
|
-
refUnrefFilter.on("error", function(err) {
|
|
3613
|
-
setImmediate(function() {
|
|
3614
|
-
if (!destroyed) byteCounter.emit("error", err);
|
|
3615
|
-
});
|
|
3616
|
-
});
|
|
3617
|
-
byteCounter.destroy = function() {
|
|
3618
|
-
destroyed = true;
|
|
3619
|
-
refUnrefFilter.unpipe(byteCounter);
|
|
3620
|
-
refUnrefFilter.destroy();
|
|
3621
|
-
};
|
|
3622
|
-
return stream.pipe(refUnrefFilter).pipe(byteCounter);
|
|
3623
|
-
};
|
|
3624
|
-
RandomAccessReader.prototype._readStreamForRange = function(start, end) {
|
|
3625
|
-
throw new Error("not implemented");
|
|
3626
|
-
};
|
|
3627
|
-
RandomAccessReader.prototype.read = function(buffer, offset, length, position, callback) {
|
|
3628
|
-
var readStream = this.createReadStream({
|
|
3629
|
-
start: position,
|
|
3630
|
-
end: position + length
|
|
3631
|
-
});
|
|
3632
|
-
var writeStream = new Writable();
|
|
3633
|
-
var written = 0;
|
|
3634
|
-
writeStream._write = function(chunk, encoding, cb) {
|
|
3635
|
-
chunk.copy(buffer, offset + written, 0, chunk.length);
|
|
3636
|
-
written += chunk.length;
|
|
3637
|
-
cb();
|
|
3638
|
-
};
|
|
3639
|
-
writeStream.on("finish", callback);
|
|
3640
|
-
readStream.on("error", function(error) {
|
|
3641
|
-
callback(error);
|
|
3642
|
-
});
|
|
3643
|
-
readStream.pipe(writeStream);
|
|
3644
|
-
};
|
|
3645
|
-
RandomAccessReader.prototype.close = function(callback) {
|
|
3646
|
-
setImmediate(callback);
|
|
3647
|
-
};
|
|
3648
|
-
util.inherits(RefUnrefFilter, PassThrough);
|
|
3649
|
-
function RefUnrefFilter(context) {
|
|
3650
|
-
PassThrough.call(this);
|
|
3651
|
-
this.context = context;
|
|
3652
|
-
this.context.ref();
|
|
3653
|
-
this.unreffedYet = false;
|
|
3654
|
-
}
|
|
3655
|
-
RefUnrefFilter.prototype._flush = function(cb) {
|
|
3656
|
-
this.unref();
|
|
3657
|
-
cb();
|
|
3658
|
-
};
|
|
3659
|
-
RefUnrefFilter.prototype.unref = function(cb) {
|
|
3660
|
-
if (this.unreffedYet) return;
|
|
3661
|
-
this.unreffedYet = true;
|
|
3662
|
-
this.context.unref();
|
|
3663
|
-
};
|
|
3664
|
-
var cp437 = "\0☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⌂ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■\xA0";
|
|
3665
|
-
function decodeBuffer(buffer, start, end, isUtf8) {
|
|
3666
|
-
if (isUtf8) return buffer.toString("utf8", start, end);
|
|
3667
|
-
else {
|
|
3668
|
-
var result = "";
|
|
3669
|
-
for (var i = start; i < end; i++) result += cp437[buffer[i]];
|
|
3670
|
-
return result;
|
|
3671
|
-
}
|
|
3672
|
-
}
|
|
3673
|
-
function readUInt64LE(buffer, offset) {
|
|
3674
|
-
var lower32 = buffer.readUInt32LE(offset);
|
|
3675
|
-
return buffer.readUInt32LE(offset + 4) * 4294967296 + lower32;
|
|
3676
|
-
}
|
|
3677
|
-
var newBuffer;
|
|
3678
|
-
if (typeof Buffer.allocUnsafe === "function") newBuffer = function(len) {
|
|
3679
|
-
return Buffer.allocUnsafe(len);
|
|
3680
|
-
};
|
|
3681
|
-
else newBuffer = function(len) {
|
|
3682
|
-
return new Buffer(len);
|
|
3683
|
-
};
|
|
3684
|
-
function defaultCallback(err) {
|
|
3685
|
-
if (err) throw err;
|
|
3686
|
-
}
|
|
3687
|
-
}));
|
|
3688
|
-
//#endregion
|
|
3689
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/buffer-reader.js
|
|
3690
|
-
var require_buffer_reader = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
3691
|
-
module.exports = class BufferReader {
|
|
3692
|
-
constructor(buffer) {
|
|
3693
|
-
this._buffer = buffer;
|
|
3694
|
-
}
|
|
3695
|
-
open() {
|
|
3696
|
-
return Promise.resolve();
|
|
3697
|
-
}
|
|
3698
|
-
close() {
|
|
3699
|
-
return Promise.resolve();
|
|
3700
|
-
}
|
|
3701
|
-
read(buffer, offset, length, position) {
|
|
3702
|
-
this._buffer.copy(buffer, offset, position, position + length);
|
|
3703
|
-
return Promise.resolve(buffer);
|
|
3704
|
-
}
|
|
3705
|
-
buffer() {
|
|
3706
|
-
return this._buffer;
|
|
3707
|
-
}
|
|
3708
|
-
static isBufferReader(instance) {
|
|
3709
|
-
return instance instanceof BufferReader;
|
|
3710
|
-
}
|
|
3711
|
-
};
|
|
3712
|
-
}));
|
|
3713
|
-
//#endregion
|
|
3714
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/file-reader.js
|
|
3715
|
-
var require_file_reader = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
3716
|
-
/**
|
|
3717
|
-
* @module file-reader
|
|
3718
|
-
*
|
|
3719
|
-
* @description
|
|
3720
|
-
* Exports a class {@link FileReader}, used internally to handle
|
|
3721
|
-
* access when a string filename is passed. This provides a consistent
|
|
3722
|
-
* interface between reading from files and buffers, so that in-memory
|
|
3723
|
-
* files can be handled efficiently.
|
|
3724
|
-
*/
|
|
3725
|
-
const fs = __require("fs");
|
|
3726
|
-
module.exports = class FileReader {
|
|
3727
|
-
/**
|
|
3728
|
-
* Creates a new file reader instance, using the given filename.
|
|
3729
|
-
* @param {*} filename
|
|
3730
|
-
*/
|
|
3731
|
-
constructor(filename) {
|
|
3732
|
-
this._filename = filename;
|
|
3733
|
-
}
|
|
3734
|
-
/**
|
|
3735
|
-
* Opens the file descriptor for a file, and returns a promise that resolves
|
|
3736
|
-
* when the file is open. After this, {@link FileReader#read} can be called
|
|
3737
|
-
* to read file content into a buffer.
|
|
3738
|
-
* @returns a promise
|
|
3739
|
-
*/
|
|
3740
|
-
open() {
|
|
3741
|
-
return new Promise((resolve, reject) => {
|
|
3742
|
-
fs.open(this._filename, "r", 438, (err, fd) => {
|
|
3743
|
-
if (err) return reject(err);
|
|
3744
|
-
this._fd = fd;
|
|
3745
|
-
resolve();
|
|
3746
|
-
});
|
|
3747
|
-
});
|
|
3748
|
-
}
|
|
3749
|
-
/**
|
|
3750
|
-
* Closes the file descriptor associated with an open document, if there
|
|
3751
|
-
* is one, and returns a promise that resolves when the file handle is closed.
|
|
3752
|
-
* @returns a promise
|
|
3753
|
-
*/
|
|
3754
|
-
close() {
|
|
3755
|
-
return new Promise((resolve, reject) => {
|
|
3756
|
-
if (this._fd) fs.close(this._fd, (err) => {
|
|
3757
|
-
if (err) return reject(err);
|
|
3758
|
-
delete this._fd;
|
|
3759
|
-
resolve();
|
|
3760
|
-
});
|
|
3761
|
-
else resolve();
|
|
3762
|
-
});
|
|
3763
|
-
}
|
|
3764
|
-
/**
|
|
3765
|
-
* Reads a buffer of `length` bytes into the `buffer`. The new data will
|
|
3766
|
-
* be added to the buffer at offset `offset`, and will be read from the
|
|
3767
|
-
* file starting at position `position`
|
|
3768
|
-
* @param {*} buffer
|
|
3769
|
-
* @param {*} offset
|
|
3770
|
-
* @param {*} length
|
|
3771
|
-
* @param {*} position
|
|
3772
|
-
* @returns a promise that resolves to the buffer when the data is present
|
|
3773
|
-
*/
|
|
3774
|
-
read(buffer, offset, length, position) {
|
|
3775
|
-
return new Promise((resolve, reject) => {
|
|
3776
|
-
if (!this._fd) return reject(/* @__PURE__ */ new Error("file not open"));
|
|
3777
|
-
fs.read(this._fd, buffer, offset, length, position, (err, bytesRead, buffer) => {
|
|
3778
|
-
if (err) return reject(err);
|
|
3779
|
-
resolve(buffer);
|
|
3780
|
-
});
|
|
3781
|
-
});
|
|
3782
|
-
}
|
|
3783
|
-
/**
|
|
3784
|
-
* Returns the open file descriptor
|
|
3785
|
-
* @returns the file descriptor
|
|
3786
|
-
*/
|
|
3787
|
-
fd() {
|
|
3788
|
-
return this._fd;
|
|
3789
|
-
}
|
|
3790
|
-
/**
|
|
3791
|
-
* Returns true if the passed instance is an instance of this class.
|
|
3792
|
-
* @param {*} instance
|
|
3793
|
-
* @returns true if `instance` is an instance of {@link FileReader}.
|
|
3794
|
-
*/
|
|
3795
|
-
static isFileReader(instance) {
|
|
3796
|
-
return instance instanceof FileReader;
|
|
3797
|
-
}
|
|
3798
|
-
};
|
|
3799
|
-
}));
|
|
3800
|
-
//#endregion
|
|
3801
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/open-office-extractor.js
|
|
3802
|
-
var require_open_office_extractor = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
3803
|
-
/**
|
|
3804
|
-
* @module open-office-extractor
|
|
3805
|
-
*
|
|
3806
|
-
* @description
|
|
3807
|
-
* Implements the main Open Office format extractor. Open Office .docx files
|
|
3808
|
-
* are essentially zip files containing streams, and each of these streams contains
|
|
3809
|
-
* XML content in one form or another. So we need to use {@link zlib} to extract
|
|
3810
|
-
* the streams, and something like `sax-js` to parse the XML that we find
|
|
3811
|
-
* there.
|
|
3812
|
-
*
|
|
3813
|
-
* We probably don't need the whole of the Open Office data, we're only likely
|
|
3814
|
-
* to need a few streams. Sadly, the documentation for the file format is literally
|
|
3815
|
-
* 5000 pages.
|
|
3816
|
-
* Note that [WordOleExtractor]{@link module:word-ole-extractor~WordOleExtractor} is
|
|
3817
|
-
* used for older, OLE-style, compound document files.
|
|
3818
|
-
*/
|
|
3819
|
-
const path = __require("path");
|
|
3820
|
-
const SAXES = require_saxes();
|
|
3821
|
-
const yauzl = require_yauzl();
|
|
3822
|
-
const BufferReader = require_buffer_reader();
|
|
3823
|
-
const FileReader = require_file_reader();
|
|
3824
|
-
const Document = require_document();
|
|
3825
|
-
function each(callback, array, index) {
|
|
3826
|
-
if (index === array.length) return Promise.resolve();
|
|
3827
|
-
else return Promise.resolve(callback(array[index++])).then(() => each(callback, array, index));
|
|
3828
|
-
}
|
|
3829
|
-
/**
|
|
3830
|
-
* @class
|
|
3831
|
-
* The main class implementing extraction from Open Office Word files.
|
|
3832
|
-
*/
|
|
3833
|
-
var OpenOfficeExtractor = class {
|
|
3834
|
-
constructor() {
|
|
3835
|
-
this._streamTypes = {
|
|
3836
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml": true,
|
|
3837
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml": true,
|
|
3838
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml": true,
|
|
3839
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml": true,
|
|
3840
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml": true,
|
|
3841
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml": true,
|
|
3842
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml": true,
|
|
3843
|
-
"application/vnd.openxmlformats-package.relationships+xml": true
|
|
3844
|
-
};
|
|
3845
|
-
this._headerTypes = {
|
|
3846
|
-
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/header": true,
|
|
3847
|
-
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer": true
|
|
3848
|
-
};
|
|
3849
|
-
this._actions = {};
|
|
3850
|
-
this._defaults = {};
|
|
3851
|
-
}
|
|
3852
|
-
shouldProcess(filename) {
|
|
3853
|
-
if (this._actions[filename]) return true;
|
|
3854
|
-
const extension = path.posix.extname(filename).replace(/^\./, "");
|
|
3855
|
-
if (!extension) return false;
|
|
3856
|
-
const defaultType = this._defaults[extension];
|
|
3857
|
-
if (defaultType && this._streamTypes[defaultType]) return true;
|
|
3858
|
-
return false;
|
|
3859
|
-
}
|
|
3860
|
-
openArchive(reader) {
|
|
3861
|
-
if (BufferReader.isBufferReader(reader)) return new Promise((resolve, reject) => {
|
|
3862
|
-
yauzl.fromBuffer(reader.buffer(), { lazyEntries: true }, function(err, zipfile) {
|
|
3863
|
-
if (err) return reject(err);
|
|
3864
|
-
resolve(zipfile);
|
|
3865
|
-
});
|
|
3866
|
-
});
|
|
3867
|
-
else if (FileReader.isFileReader(reader)) return new Promise((resolve, reject) => {
|
|
3868
|
-
yauzl.fromFd(reader.fd(), {
|
|
3869
|
-
lazyEntries: true,
|
|
3870
|
-
autoClose: false
|
|
3871
|
-
}, function(err, zipfile) {
|
|
3872
|
-
if (err) return reject(err);
|
|
3873
|
-
resolve(zipfile);
|
|
3874
|
-
});
|
|
3875
|
-
});
|
|
3876
|
-
else throw new Error("Unexpected reader type: " + reader.constructor.name);
|
|
3877
|
-
}
|
|
3878
|
-
processEntries(zipfile) {
|
|
3879
|
-
let entryTable = {};
|
|
3880
|
-
let entryNames = [];
|
|
3881
|
-
return new Promise((resolve, reject) => {
|
|
3882
|
-
zipfile.readEntry();
|
|
3883
|
-
zipfile.on("error", reject);
|
|
3884
|
-
zipfile.on("entry", (entry) => {
|
|
3885
|
-
const filename = entry.fileName;
|
|
3886
|
-
entryTable[filename] = entry;
|
|
3887
|
-
entryNames.push(filename);
|
|
3888
|
-
zipfile.readEntry();
|
|
3889
|
-
});
|
|
3890
|
-
zipfile.on("end", () => resolve(this._document));
|
|
3891
|
-
}).then(() => {
|
|
3892
|
-
const index = entryNames.indexOf("[Content_Types].xml");
|
|
3893
|
-
if (index === -1) throw new Error("Invalid Open Office XML: missing content types");
|
|
3894
|
-
entryNames.splice(index, 1);
|
|
3895
|
-
entryNames.unshift("[Content_Types].xml");
|
|
3896
|
-
this._actions["[Content_Types].xml"] = true;
|
|
3897
|
-
return each((name) => {
|
|
3898
|
-
if (this.shouldProcess(name)) return this.handleEntry(zipfile, entryTable[name]);
|
|
3899
|
-
}, entryNames, 0);
|
|
3900
|
-
});
|
|
3901
|
-
}
|
|
3902
|
-
extract(reader) {
|
|
3903
|
-
let archive = this.openArchive(reader);
|
|
3904
|
-
this._document = new Document();
|
|
3905
|
-
this._relationships = {};
|
|
3906
|
-
this._entryTable = {};
|
|
3907
|
-
this._entries = [];
|
|
3908
|
-
return archive.then((zipfile) => this.processEntries(zipfile)).then(() => {
|
|
3909
|
-
let document = this._document;
|
|
3910
|
-
if (document._textboxes && document._textboxes.length > 0) document._textboxes = document._textboxes + "\n";
|
|
3911
|
-
if (document._headerTextboxes && document._headerTextboxes.length > 0) document._headerTextboxes = document._headerTextboxes + "\n";
|
|
3912
|
-
return document;
|
|
3913
|
-
});
|
|
3914
|
-
}
|
|
3915
|
-
handleOpenTag(node) {
|
|
3916
|
-
if (node.name === "Override") {
|
|
3917
|
-
const actionFunction = this._streamTypes[node.attributes["ContentType"]];
|
|
3918
|
-
if (actionFunction) {
|
|
3919
|
-
const partName = node.attributes["PartName"].replace(/^[/]+/, "");
|
|
3920
|
-
const action = {
|
|
3921
|
-
action: actionFunction,
|
|
3922
|
-
type: node.attributes["ContentType"]
|
|
3923
|
-
};
|
|
3924
|
-
this._actions[partName] = action;
|
|
3925
|
-
}
|
|
3926
|
-
} else if (node.name === "Default") {
|
|
3927
|
-
const extension = node.attributes["Extension"];
|
|
3928
|
-
const contentType = node.attributes["ContentType"];
|
|
3929
|
-
this._defaults[extension] = contentType;
|
|
3930
|
-
} else if (node.name === "Relationship") this._relationships[node.attributes["Id"]] = {
|
|
3931
|
-
type: node.attributes["Type"],
|
|
3932
|
-
target: node.attributes["Target"]
|
|
3933
|
-
};
|
|
3934
|
-
else if (node.name === "w:document" || node.name === "w:footnotes" || node.name === "w:endnotes" || node.name === "w:comments") {
|
|
3935
|
-
this._context = ["content", "body"];
|
|
3936
|
-
this._pieces = [];
|
|
3937
|
-
} else if (node.name === "w:hdr" || node.name === "w:ftr") {
|
|
3938
|
-
this._context = ["content", "header"];
|
|
3939
|
-
this._pieces = [];
|
|
3940
|
-
} else if (node.name === "w:endnote" || node.name === "w:footnote") {
|
|
3941
|
-
const type = node.attributes["w:type"] || this._context[0];
|
|
3942
|
-
this._context.unshift(type);
|
|
3943
|
-
} else if (node.name === "w:tab" && this._context[0] === "content") this._pieces.push(" ");
|
|
3944
|
-
else if (node.name === "w:br" && this._context[0] === "content") if ((node.attributes["w:type"] || "") === "page") this._pieces.push("\n");
|
|
3945
|
-
else this._pieces.push("\n");
|
|
3946
|
-
else if (node.name === "w:del" || node.name === "w:instrText") this._context.unshift("deleted");
|
|
3947
|
-
else if (node.name === "w:tabs") this._context.unshift("tabs");
|
|
3948
|
-
else if (node.name === "w:tc") this._context.unshift("cell");
|
|
3949
|
-
else if (node.name === "w:drawing") this._context.unshift("drawing");
|
|
3950
|
-
else if (node.name === "w:txbxContent") {
|
|
3951
|
-
this._context.unshift(this._pieces);
|
|
3952
|
-
this._context.unshift("textbox");
|
|
3953
|
-
this._pieces = [];
|
|
3954
|
-
}
|
|
3955
|
-
}
|
|
3956
|
-
handleCloseTag(node) {
|
|
3957
|
-
if (node.name === "w:document") {
|
|
3958
|
-
this._context = null;
|
|
3959
|
-
this._document._body = this._pieces.join("");
|
|
3960
|
-
} else if (node.name === "w:footnote" || node.name === "w:endnote") this._context.shift();
|
|
3961
|
-
else if (node.name === "w:footnotes") {
|
|
3962
|
-
this._context = null;
|
|
3963
|
-
this._document._footnotes = this._pieces.join("");
|
|
3964
|
-
} else if (node.name === "w:endnotes") {
|
|
3965
|
-
this._context = null;
|
|
3966
|
-
this._document._endnotes = this._pieces.join("");
|
|
3967
|
-
} else if (node.name === "w:comments") {
|
|
3968
|
-
this._context = null;
|
|
3969
|
-
this._document._annotations = this._pieces.join("");
|
|
3970
|
-
} else if (node.name === "w:hdr") {
|
|
3971
|
-
this._context = null;
|
|
3972
|
-
this._document._headers = this._document._headers + this._pieces.join("");
|
|
3973
|
-
} else if (node.name === "w:ftr") {
|
|
3974
|
-
this._context = null;
|
|
3975
|
-
this._document._footers = this._document._footers + this._pieces.join("");
|
|
3976
|
-
} else if (node.name === "w:p") {
|
|
3977
|
-
if (this._context[0] === "content" || this._context[0] === "cell" || this._context[0] === "textbox") this._pieces.push("\n");
|
|
3978
|
-
} else if (node.name === "w:del" || node.name === "w:instrText") this._context.shift();
|
|
3979
|
-
else if (node.name === "w:tabs") this._context.shift();
|
|
3980
|
-
else if (node.name === "w:tc") {
|
|
3981
|
-
this._pieces.pop();
|
|
3982
|
-
this._pieces.push(" ");
|
|
3983
|
-
this._context.shift();
|
|
3984
|
-
} else if (node.name === "w:tr") this._pieces.push("\n");
|
|
3985
|
-
else if (node.name === "w:drawing") this._context.shift();
|
|
3986
|
-
else if (node.name === "w:txbxContent") {
|
|
3987
|
-
const textBox = this._pieces.join("");
|
|
3988
|
-
if (this._context.shift() !== "textbox") throw new Error("Invalid textbox context");
|
|
3989
|
-
this._pieces = this._context.shift();
|
|
3990
|
-
if (this._context[0] === "drawing") return;
|
|
3991
|
-
if (textBox.length == 0) return;
|
|
3992
|
-
const documentField = this._context.includes("header") ? "_headerTextboxes" : "_textboxes";
|
|
3993
|
-
if (this._document[documentField]) this._document[documentField] = this._document[documentField] + "\n" + textBox;
|
|
3994
|
-
else this._document[documentField] = textBox;
|
|
3995
|
-
}
|
|
3996
|
-
}
|
|
3997
|
-
createXmlParser() {
|
|
3998
|
-
const parser = new SAXES.SaxesParser();
|
|
3999
|
-
parser.on("opentag", (node) => {
|
|
4000
|
-
try {
|
|
4001
|
-
this.handleOpenTag(node);
|
|
4002
|
-
} catch (e) {
|
|
4003
|
-
parser.fail(e.message);
|
|
4004
|
-
}
|
|
4005
|
-
});
|
|
4006
|
-
parser.on("closetag", (node) => {
|
|
4007
|
-
try {
|
|
4008
|
-
this.handleCloseTag(node);
|
|
4009
|
-
} catch (e) {
|
|
4010
|
-
parser.fail(e.message);
|
|
4011
|
-
}
|
|
4012
|
-
});
|
|
4013
|
-
parser.on("text", (string) => {
|
|
4014
|
-
try {
|
|
4015
|
-
if (!this._context) return;
|
|
4016
|
-
if (this._context[0] === "content" || this._context[0] === "cell" || this._context[0] === "textbox") this._pieces.push(string);
|
|
4017
|
-
} catch (e) {
|
|
4018
|
-
parser.fail(e.message);
|
|
4019
|
-
}
|
|
4020
|
-
});
|
|
4021
|
-
return parser;
|
|
4022
|
-
}
|
|
4023
|
-
handleEntry(zipfile, entry) {
|
|
4024
|
-
return new Promise((resolve, reject) => {
|
|
4025
|
-
zipfile.openReadStream(entry, (err, readStream) => {
|
|
4026
|
-
if (err) return reject(err);
|
|
4027
|
-
this._source = entry.fileName;
|
|
4028
|
-
const parser = this.createXmlParser();
|
|
4029
|
-
parser.on("error", (e) => {
|
|
4030
|
-
readStream.destroy(e);
|
|
4031
|
-
reject(e);
|
|
4032
|
-
});
|
|
4033
|
-
parser.on("end", () => resolve());
|
|
4034
|
-
readStream.on("end", () => parser.close());
|
|
4035
|
-
readStream.on("error", (e) => reject(e));
|
|
4036
|
-
readStream.on("readable", () => {
|
|
4037
|
-
while (true) {
|
|
4038
|
-
const chunk = readStream.read(4096);
|
|
4039
|
-
if (chunk === null) return;
|
|
4040
|
-
parser.write(chunk);
|
|
4041
|
-
}
|
|
4042
|
-
});
|
|
4043
|
-
});
|
|
4044
|
-
});
|
|
4045
|
-
}
|
|
4046
|
-
};
|
|
4047
|
-
module.exports = OpenOfficeExtractor;
|
|
4048
|
-
}));
|
|
4049
|
-
//#endregion
|
|
4050
|
-
//#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/word.js
|
|
4051
|
-
var require_word = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
4052
|
-
/**
|
|
4053
|
-
* @module word
|
|
4054
|
-
*
|
|
4055
|
-
* @description
|
|
4056
|
-
* The main module for the package. This exports an extractor class, which
|
|
4057
|
-
* provides a single `extract` method that can be called with either a
|
|
4058
|
-
* string (filename) or a buffer.
|
|
4059
|
-
*/
|
|
4060
|
-
const { Buffer: Buffer$1 } = __require("buffer");
|
|
4061
|
-
const WordOleExtractor = require_word_ole_extractor();
|
|
4062
|
-
const OpenOfficeExtractor = require_open_office_extractor();
|
|
4063
|
-
const BufferReader = require_buffer_reader();
|
|
4064
|
-
const FileReader = require_file_reader();
|
|
4065
|
-
/**
|
|
4066
|
-
* The main class for the word extraction package. Typically, people will make
|
|
4067
|
-
* an instance of this class, and call the {@link #extract} method to transform
|
|
4068
|
-
* a Word file into a {@link Document} instance, which provides the accessors
|
|
4069
|
-
* needed to read its body, and so on.
|
|
4070
|
-
*/
|
|
4071
|
-
var WordExtractor = class {
|
|
4072
|
-
constructor() {}
|
|
4073
|
-
/**
|
|
4074
|
-
* Extracts the main contents of the file. If a Buffer is passed, that
|
|
4075
|
-
* is used instead. Opens the file, and reads the first block, uses that
|
|
4076
|
-
* to detect whether this is a .doc file or a .docx file, and then calls
|
|
4077
|
-
* either {@link WordOleDocument#extract} or {@link OpenOfficeDocument#extract}
|
|
4078
|
-
* accordingly.
|
|
4079
|
-
*
|
|
4080
|
-
* @param {string|Buffer} source - either a string filename, or a Buffer containing the file content
|
|
4081
|
-
* @returns a {@link Document} providing accessors onto the text
|
|
4082
|
-
*/
|
|
4083
|
-
extract(source) {
|
|
4084
|
-
let reader = null;
|
|
4085
|
-
if (Buffer$1.isBuffer(source)) reader = new BufferReader(source);
|
|
4086
|
-
else if (typeof source === "string") reader = new FileReader(source);
|
|
4087
|
-
const buffer = Buffer$1.alloc(512);
|
|
4088
|
-
return reader.open().then(() => reader.read(buffer, 0, 512, 0)).then((buffer) => {
|
|
4089
|
-
let extractor = null;
|
|
4090
|
-
if (buffer.readUInt16BE(0) === 53455) extractor = WordOleExtractor;
|
|
4091
|
-
else if (buffer.readUInt16BE(0) === 20555) {
|
|
4092
|
-
const next = buffer.readUInt16BE(2);
|
|
4093
|
-
if (next === 772 || next === 1286 || next === 1800) extractor = OpenOfficeExtractor;
|
|
4094
|
-
}
|
|
4095
|
-
if (!extractor) throw new Error("Unable to read this type of file");
|
|
4096
|
-
return new extractor().extract(reader);
|
|
4097
|
-
}).finally(() => reader.close());
|
|
4098
|
-
}
|
|
4099
|
-
};
|
|
4100
|
-
module.exports = WordExtractor;
|
|
4101
|
-
}));
|
|
4102
|
-
//#endregion
|
|
4103
|
-
//#region ../../packages/file-loaders/src/loaders/doc/index.ts
|
|
4104
|
-
var import_src = /* @__PURE__ */ __toESM(require_src());
|
|
4105
|
-
var import_word = /* @__PURE__ */ __toESM(require_word());
|
|
4106
|
-
const log = (0, import_src.default)("file-loaders:doc");
|
|
4107
|
-
/**
|
|
4108
|
-
* Loads legacy Word documents (.doc) using word-extractor.
|
|
4109
|
-
* Extracts plain text content and basic metadata from DOC files.
|
|
4110
|
-
*/
|
|
4111
|
-
var DocLoader = class {
|
|
4112
|
-
async loadPages(filePath) {
|
|
4113
|
-
log("Loading DOC file:", filePath);
|
|
4114
|
-
try {
|
|
4115
|
-
const extracted = await new import_word.default().extract(filePath);
|
|
4116
|
-
const pageContent = extracted && typeof extracted.getBody === "function" ? extracted.getBody() : extracted?.text ?? "";
|
|
4117
|
-
const lineCount = pageContent.split("\n").length;
|
|
4118
|
-
const page = {
|
|
4119
|
-
charCount: pageContent.length,
|
|
4120
|
-
lineCount,
|
|
4121
|
-
metadata: { pageNumber: 1 },
|
|
4122
|
-
pageContent
|
|
4123
|
-
};
|
|
4124
|
-
log("DOC loading completed");
|
|
4125
|
-
return [page];
|
|
4126
|
-
} catch (e) {
|
|
4127
|
-
const error = e;
|
|
4128
|
-
log("Error encountered while loading DOC file");
|
|
4129
|
-
console.error(`Error loading DOC file ${filePath}: ${error.message}`);
|
|
4130
|
-
return [{
|
|
4131
|
-
charCount: 0,
|
|
4132
|
-
lineCount: 0,
|
|
4133
|
-
metadata: { error: `Failed to load DOC file: ${error.message}` },
|
|
4134
|
-
pageContent: ""
|
|
4135
|
-
}];
|
|
4136
|
-
}
|
|
4137
|
-
}
|
|
4138
|
-
async aggregateContent(pages) {
|
|
4139
|
-
log("Aggregating content from", pages.length, "DOC pages");
|
|
4140
|
-
return pages.map((p) => p.pageContent).join("\n\n");
|
|
4141
|
-
}
|
|
4142
|
-
};
|
|
4143
|
-
//#endregion
|
|
4144
|
-
export { DocLoader };
|