@lobehub/cli 0.0.1-canary.9 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/buffer-crc32-BlUV1nEz.js +342 -0
- package/dist/doc-CR0Zm5t8.js +4144 -0
- package/dist/docx-73xqMWN6.js +33097 -0
- package/dist/excel-3NB56vnT.js +23451 -0
- package/dist/index.js +30387 -6995
- package/dist/node-BbXtxEF5.js +48 -0
- package/dist/pdf-B5Ukgxfs.js +108386 -0
- package/dist/pptx-DwZ8X2Hk.js +9929 -0
- package/dist/text-LU5pflEU.js +59 -0
- package/man/man1/lh.1 +166 -0
- package/man/man1/lobe.1 +1 -0
- package/man/man1/lobehub.1 +1 -0
- package/package.json +27 -21
- package/dist/chunk-LMMQX4CK.js +0 -49
- package/dist/chunk-OHUXGRBO.js +0 -66
- package/dist/chunk-SIPW4ILE.js +0 -374
- package/dist/doc-GP5GLKH3.js +0 -4361
- package/dist/docx-WBQXROXK.js +0 -31061
- package/dist/excel-7OIQJBE4.js +0 -26983
- package/dist/pdf-DR5MENXH.js +0 -94752
- package/dist/pptx-TR7ZMKBG.js +0 -10784
- package/dist/text-6IT6PEWE.js +0 -60
package/dist/doc-GP5GLKH3.js
DELETED
|
@@ -1,4361 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import {
|
|
3
|
-
require_buffer_crc32,
|
|
4
|
-
require_pend
|
|
5
|
-
} from "./chunk-SIPW4ILE.js";
|
|
6
|
-
import {
|
|
7
|
-
__commonJS,
|
|
8
|
-
__require,
|
|
9
|
-
__toESM
|
|
10
|
-
} from "./chunk-LMMQX4CK.js";
|
|
11
|
-
|
|
12
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-header.js
|
|
13
|
-
var require_ole_header = __commonJS({
|
|
14
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-header.js"(exports, module) {
|
|
15
|
-
"use strict";
|
|
16
|
-
var HEADER_DATA = Buffer.from("D0CF11E0A1B11AE1", "hex");
|
|
17
|
-
var Header = class {
|
|
18
|
-
constructor() {
|
|
19
|
-
}
|
|
20
|
-
load(buffer) {
|
|
21
|
-
for (let i = 0; i < HEADER_DATA.length; i++) {
|
|
22
|
-
if (HEADER_DATA[i] != buffer[i])
|
|
23
|
-
return false;
|
|
24
|
-
}
|
|
25
|
-
this.secSize = 1 << buffer.readInt16LE(30);
|
|
26
|
-
this.shortSecSize = 1 << buffer.readInt16LE(32);
|
|
27
|
-
this.SATSize = buffer.readInt32LE(44);
|
|
28
|
-
this.dirSecId = buffer.readInt32LE(48);
|
|
29
|
-
this.shortStreamMax = buffer.readInt32LE(56);
|
|
30
|
-
this.SSATSecId = buffer.readInt32LE(60);
|
|
31
|
-
this.SSATSize = buffer.readInt32LE(64);
|
|
32
|
-
this.MSATSecId = buffer.readInt32LE(68);
|
|
33
|
-
this.MSATSize = buffer.readInt32LE(72);
|
|
34
|
-
this.partialMSAT = new Array(109);
|
|
35
|
-
for (let i = 0; i < 109; i++)
|
|
36
|
-
this.partialMSAT[i] = buffer.readInt32LE(76 + i * 4);
|
|
37
|
-
return true;
|
|
38
|
-
}
|
|
39
|
-
};
|
|
40
|
-
module.exports = Header;
|
|
41
|
-
}
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-allocation-table.js
|
|
45
|
-
var require_ole_allocation_table = __commonJS({
|
|
46
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-allocation-table.js"(exports, module) {
|
|
47
|
-
"use strict";
|
|
48
|
-
var ALLOCATION_TABLE_SEC_ID_FREE = -1;
|
|
49
|
-
var AllocationTable = class {
|
|
50
|
-
constructor(doc) {
|
|
51
|
-
this._doc = doc;
|
|
52
|
-
}
|
|
53
|
-
load(secIds) {
|
|
54
|
-
const doc = this._doc;
|
|
55
|
-
const header = doc._header;
|
|
56
|
-
this._table = new Array(secIds.length * (header.secSize / 4));
|
|
57
|
-
return doc._readSectors(secIds).then((buffer) => {
|
|
58
|
-
for (let i = 0; i < buffer.length / 4; i++) {
|
|
59
|
-
this._table[i] = buffer.readInt32LE(i * 4);
|
|
60
|
-
}
|
|
61
|
-
});
|
|
62
|
-
}
|
|
63
|
-
getSecIdChain(startSecId) {
|
|
64
|
-
let secId = startSecId;
|
|
65
|
-
const secIds = [];
|
|
66
|
-
while (secId > ALLOCATION_TABLE_SEC_ID_FREE) {
|
|
67
|
-
secIds.push(secId);
|
|
68
|
-
const secIdPrior = secId;
|
|
69
|
-
secId = this._table[secId];
|
|
70
|
-
if (secId === secIdPrior) {
|
|
71
|
-
break;
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
return secIds;
|
|
75
|
-
}
|
|
76
|
-
};
|
|
77
|
-
module.exports = AllocationTable;
|
|
78
|
-
}
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-directory-tree.js
|
|
82
|
-
var require_ole_directory_tree = __commonJS({
|
|
83
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-directory-tree.js"(exports, module) {
|
|
84
|
-
"use strict";
|
|
85
|
-
var DIRECTORY_TREE_ENTRY_TYPE_STORAGE = 1;
|
|
86
|
-
var DIRECTORY_TREE_ENTRY_TYPE_STREAM = 2;
|
|
87
|
-
var DIRECTORY_TREE_ENTRY_TYPE_ROOT = 5;
|
|
88
|
-
var DIRECTORY_TREE_LEAF = -1;
|
|
89
|
-
var DirectoryTree = class {
|
|
90
|
-
constructor(doc) {
|
|
91
|
-
this._doc = doc;
|
|
92
|
-
}
|
|
93
|
-
load(secIds) {
|
|
94
|
-
const doc = this._doc;
|
|
95
|
-
return doc._readSectors(secIds).then((buffer) => {
|
|
96
|
-
const count = buffer.length / 128;
|
|
97
|
-
this._entries = new Array(count);
|
|
98
|
-
for (let i = 0; i < count; i++) {
|
|
99
|
-
const offset = i * 128;
|
|
100
|
-
const nameLength = Math.max(buffer.readInt16LE(64 + offset) - 1, 0);
|
|
101
|
-
const entry = {};
|
|
102
|
-
entry.name = buffer.toString("utf16le", 0 + offset, nameLength + offset);
|
|
103
|
-
entry.type = buffer.readInt8(66 + offset);
|
|
104
|
-
entry.nodeColor = buffer.readInt8(67 + offset);
|
|
105
|
-
entry.left = buffer.readInt32LE(68 + offset);
|
|
106
|
-
entry.right = buffer.readInt32LE(72 + offset);
|
|
107
|
-
entry.storageDirId = buffer.readInt32LE(76 + offset);
|
|
108
|
-
entry.secId = buffer.readInt32LE(116 + offset);
|
|
109
|
-
entry.size = buffer.readInt32LE(120 + offset);
|
|
110
|
-
this._entries[i] = entry;
|
|
111
|
-
}
|
|
112
|
-
this.root = this._entries.find((entry) => entry.type === DIRECTORY_TREE_ENTRY_TYPE_ROOT);
|
|
113
|
-
this._buildHierarchy(this.root);
|
|
114
|
-
});
|
|
115
|
-
}
|
|
116
|
-
_buildHierarchy(storageEntry) {
|
|
117
|
-
const childIds = this._getChildIds(storageEntry);
|
|
118
|
-
storageEntry.storages = {};
|
|
119
|
-
storageEntry.streams = {};
|
|
120
|
-
for (const childId of childIds) {
|
|
121
|
-
const childEntry = this._entries[childId];
|
|
122
|
-
const name = childEntry.name;
|
|
123
|
-
if (childEntry.type === DIRECTORY_TREE_ENTRY_TYPE_STORAGE) {
|
|
124
|
-
storageEntry.storages[name] = childEntry;
|
|
125
|
-
}
|
|
126
|
-
if (childEntry.type === DIRECTORY_TREE_ENTRY_TYPE_STREAM) {
|
|
127
|
-
storageEntry.streams[name] = childEntry;
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
for (const name in storageEntry.storages) {
|
|
131
|
-
this._buildHierarchy(storageEntry.storages[name]);
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
_getChildIds(storageEntry) {
|
|
135
|
-
const childIds = [];
|
|
136
|
-
const visit = (visitEntry) => {
|
|
137
|
-
if (visitEntry.left !== DIRECTORY_TREE_LEAF) {
|
|
138
|
-
childIds.push(visitEntry.left);
|
|
139
|
-
visit(this._entries[visitEntry.left]);
|
|
140
|
-
}
|
|
141
|
-
if (visitEntry.right !== DIRECTORY_TREE_LEAF) {
|
|
142
|
-
childIds.push(visitEntry.right);
|
|
143
|
-
visit(this._entries[visitEntry.right]);
|
|
144
|
-
}
|
|
145
|
-
};
|
|
146
|
-
if (storageEntry.storageDirId > -1) {
|
|
147
|
-
childIds.push(storageEntry.storageDirId);
|
|
148
|
-
const rootChildEntry = this._entries[storageEntry.storageDirId];
|
|
149
|
-
visit(rootChildEntry);
|
|
150
|
-
}
|
|
151
|
-
return childIds;
|
|
152
|
-
}
|
|
153
|
-
};
|
|
154
|
-
module.exports = DirectoryTree;
|
|
155
|
-
}
|
|
156
|
-
});
|
|
157
|
-
|
|
158
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-storage-stream.js
|
|
159
|
-
var require_ole_storage_stream = __commonJS({
|
|
160
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-storage-stream.js"(exports, module) {
|
|
161
|
-
"use strict";
|
|
162
|
-
var { Readable } = __require("stream");
|
|
163
|
-
var StorageStream = class extends Readable {
|
|
164
|
-
constructor(doc, streamEntry) {
|
|
165
|
-
super();
|
|
166
|
-
this._doc = doc;
|
|
167
|
-
this._streamEntry = streamEntry;
|
|
168
|
-
this.initialize();
|
|
169
|
-
}
|
|
170
|
-
initialize() {
|
|
171
|
-
this._index = 0;
|
|
172
|
-
this._done = true;
|
|
173
|
-
if (!this._streamEntry) {
|
|
174
|
-
return;
|
|
175
|
-
}
|
|
176
|
-
const doc = this._doc;
|
|
177
|
-
this._bytes = this._streamEntry.size;
|
|
178
|
-
this._allocationTable = doc._SAT;
|
|
179
|
-
this._shortStream = false;
|
|
180
|
-
if (this._bytes < doc._header.shortStreamMax) {
|
|
181
|
-
this._shortStream = true;
|
|
182
|
-
this._allocationTable = doc._SSAT;
|
|
183
|
-
}
|
|
184
|
-
this._secIds = this._allocationTable.getSecIdChain(this._streamEntry.secId);
|
|
185
|
-
this._done = false;
|
|
186
|
-
}
|
|
187
|
-
_readSector(sector) {
|
|
188
|
-
if (this._shortStream) {
|
|
189
|
-
return this._doc._readShortSector(sector);
|
|
190
|
-
} else {
|
|
191
|
-
return this._doc._readSector(sector);
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
_read() {
|
|
195
|
-
if (this._done) {
|
|
196
|
-
return this.push(null);
|
|
197
|
-
}
|
|
198
|
-
if (this._index >= this._secIds.length) {
|
|
199
|
-
this._done = true;
|
|
200
|
-
return this.push(null);
|
|
201
|
-
}
|
|
202
|
-
return this._readSector(this._secIds[this._index]).then((buffer) => {
|
|
203
|
-
if (this._bytes - buffer.length < 0) {
|
|
204
|
-
buffer = buffer.slice(0, this._bytes);
|
|
205
|
-
}
|
|
206
|
-
this._bytes -= buffer.length;
|
|
207
|
-
this._index++;
|
|
208
|
-
this.push(buffer);
|
|
209
|
-
});
|
|
210
|
-
}
|
|
211
|
-
};
|
|
212
|
-
module.exports = StorageStream;
|
|
213
|
-
}
|
|
214
|
-
});
|
|
215
|
-
|
|
216
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-storage.js
|
|
217
|
-
var require_ole_storage = __commonJS({
|
|
218
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-storage.js"(exports, module) {
|
|
219
|
-
"use strict";
|
|
220
|
-
var StorageStream = require_ole_storage_stream();
|
|
221
|
-
var Storage = class _Storage {
|
|
222
|
-
constructor(doc, dirEntry) {
|
|
223
|
-
this._doc = doc;
|
|
224
|
-
this._dirEntry = dirEntry;
|
|
225
|
-
}
|
|
226
|
-
storage(storageName) {
|
|
227
|
-
return new _Storage(this._doc, this._dirEntry.storages[storageName]);
|
|
228
|
-
}
|
|
229
|
-
stream(streamName) {
|
|
230
|
-
return new StorageStream(this._doc, this._dirEntry.streams[streamName]);
|
|
231
|
-
}
|
|
232
|
-
};
|
|
233
|
-
module.exports = Storage;
|
|
234
|
-
}
|
|
235
|
-
});
|
|
236
|
-
|
|
237
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-compound-doc.js
|
|
238
|
-
var require_ole_compound_doc = __commonJS({
|
|
239
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-compound-doc.js"(exports, module) {
|
|
240
|
-
"use strict";
|
|
241
|
-
var Header = require_ole_header();
|
|
242
|
-
var AllocationTable = require_ole_allocation_table();
|
|
243
|
-
var DirectoryTree = require_ole_directory_tree();
|
|
244
|
-
var Storage = require_ole_storage();
|
|
245
|
-
var OleCompoundDoc = class {
|
|
246
|
-
constructor(reader) {
|
|
247
|
-
this._reader = reader;
|
|
248
|
-
this._skipBytes = 0;
|
|
249
|
-
}
|
|
250
|
-
read() {
|
|
251
|
-
return Promise.resolve().then(() => this._readHeader()).then(() => this._readMSAT()).then(() => this._readSAT()).then(() => this._readSSAT()).then(() => this._readDirectoryTree()).then(() => {
|
|
252
|
-
if (this._skipBytes != 0) {
|
|
253
|
-
return this._readCustomHeader();
|
|
254
|
-
}
|
|
255
|
-
}).then(() => this);
|
|
256
|
-
}
|
|
257
|
-
_readCustomHeader() {
|
|
258
|
-
const buffer = Buffer.alloc(this._skipBytes);
|
|
259
|
-
return this._reader.read(buffer, 0, this._skipBytes, 0).then((buffer2) => {
|
|
260
|
-
if (!this._customHeaderCallback(buffer2))
|
|
261
|
-
return;
|
|
262
|
-
});
|
|
263
|
-
}
|
|
264
|
-
_readHeader() {
|
|
265
|
-
const buffer = Buffer.alloc(512);
|
|
266
|
-
return this._reader.read(buffer, 0, 512, 0 + this._skipBytes).then((buffer2) => {
|
|
267
|
-
const header = this._header = new Header();
|
|
268
|
-
if (!header.load(buffer2)) {
|
|
269
|
-
throw new Error("Not a valid compound document");
|
|
270
|
-
}
|
|
271
|
-
});
|
|
272
|
-
}
|
|
273
|
-
_readMSAT() {
|
|
274
|
-
const header = this._header;
|
|
275
|
-
this._MSAT = header.partialMSAT.slice(0);
|
|
276
|
-
this._MSAT.length = header.SATSize;
|
|
277
|
-
if (header.SATSize <= 109 || header.MSATSize == 0) {
|
|
278
|
-
return Promise.resolve();
|
|
279
|
-
}
|
|
280
|
-
let currMSATIndex = 109;
|
|
281
|
-
let i = 0;
|
|
282
|
-
const readOneMSAT = (i2, currMSATIndex2, secId) => {
|
|
283
|
-
if (i2 >= header.MSATSize) {
|
|
284
|
-
return Promise.resolve();
|
|
285
|
-
}
|
|
286
|
-
return this._readSector(secId).then((sectorBuffer) => {
|
|
287
|
-
let s;
|
|
288
|
-
for (s = 0; s < header.secSize - 4; s += 4) {
|
|
289
|
-
if (currMSATIndex2 >= header.SATSize)
|
|
290
|
-
break;
|
|
291
|
-
else
|
|
292
|
-
this._MSAT[currMSATIndex2] = sectorBuffer.readInt32LE(s);
|
|
293
|
-
currMSATIndex2++;
|
|
294
|
-
}
|
|
295
|
-
secId = sectorBuffer.readInt32LE(header.secSize - 4);
|
|
296
|
-
return readOneMSAT(i2 + 1, currMSATIndex2, secId);
|
|
297
|
-
});
|
|
298
|
-
};
|
|
299
|
-
return readOneMSAT(i, currMSATIndex, header.MSATSecId);
|
|
300
|
-
}
|
|
301
|
-
_readSector(secId) {
|
|
302
|
-
return this._readSectors([secId]);
|
|
303
|
-
}
|
|
304
|
-
_readSectors(secIds) {
|
|
305
|
-
const header = this._header;
|
|
306
|
-
const buffer = Buffer.alloc(secIds.length * header.secSize);
|
|
307
|
-
const readOneSector = (i) => {
|
|
308
|
-
if (i >= secIds.length) {
|
|
309
|
-
return Promise.resolve(buffer);
|
|
310
|
-
}
|
|
311
|
-
const bufferOffset = i * header.secSize;
|
|
312
|
-
const fileOffset = this._getFileOffsetForSec(secIds[i]);
|
|
313
|
-
return this._reader.read(buffer, bufferOffset, header.secSize, fileOffset).then(() => readOneSector(i + 1));
|
|
314
|
-
};
|
|
315
|
-
return readOneSector(0);
|
|
316
|
-
}
|
|
317
|
-
_readShortSector(secId) {
|
|
318
|
-
return this._readShortSectors([secId]);
|
|
319
|
-
}
|
|
320
|
-
_readShortSectors(secIds) {
|
|
321
|
-
const header = this._header;
|
|
322
|
-
const buffer = Buffer.alloc(secIds.length * header.shortSecSize);
|
|
323
|
-
const readOneShortSector = (i) => {
|
|
324
|
-
if (i >= secIds.length) {
|
|
325
|
-
return Promise.resolve(buffer);
|
|
326
|
-
}
|
|
327
|
-
const bufferOffset = i * header.shortSecSize;
|
|
328
|
-
const fileOffset = this._getFileOffsetForShortSec(secIds[i]);
|
|
329
|
-
return this._reader.read(buffer, bufferOffset, header.shortSecSize, fileOffset).then(() => readOneShortSector(i + 1));
|
|
330
|
-
};
|
|
331
|
-
return readOneShortSector(0);
|
|
332
|
-
}
|
|
333
|
-
_readSAT() {
|
|
334
|
-
this._SAT = new AllocationTable(this);
|
|
335
|
-
return this._SAT.load(this._MSAT);
|
|
336
|
-
}
|
|
337
|
-
_readSSAT() {
|
|
338
|
-
const header = this._header;
|
|
339
|
-
const secIds = this._SAT.getSecIdChain(header.SSATSecId);
|
|
340
|
-
if (secIds.length != header.SSATSize) {
|
|
341
|
-
return Promise.reject(new Error("Invalid Short Sector Allocation Table"));
|
|
342
|
-
}
|
|
343
|
-
this._SSAT = new AllocationTable(this);
|
|
344
|
-
return this._SSAT.load(secIds);
|
|
345
|
-
}
|
|
346
|
-
_readDirectoryTree() {
|
|
347
|
-
const header = this._header;
|
|
348
|
-
this._directoryTree = new DirectoryTree(this);
|
|
349
|
-
const secIds = this._SAT.getSecIdChain(header.dirSecId);
|
|
350
|
-
return this._directoryTree.load(secIds).then(() => {
|
|
351
|
-
const rootEntry = this._directoryTree.root;
|
|
352
|
-
this._rootStorage = new Storage(this, rootEntry);
|
|
353
|
-
this._shortStreamSecIds = this._SAT.getSecIdChain(rootEntry.secId);
|
|
354
|
-
});
|
|
355
|
-
}
|
|
356
|
-
_getFileOffsetForSec(secId) {
|
|
357
|
-
const secSize = this._header.secSize;
|
|
358
|
-
return this._skipBytes + (secId + 1) * secSize;
|
|
359
|
-
}
|
|
360
|
-
_getFileOffsetForShortSec(shortSecId) {
|
|
361
|
-
const shortSecSize = this._header.shortSecSize;
|
|
362
|
-
const shortStreamOffset = shortSecId * shortSecSize;
|
|
363
|
-
const secSize = this._header.secSize;
|
|
364
|
-
const secIdIndex = Math.floor(shortStreamOffset / secSize);
|
|
365
|
-
const secOffset = shortStreamOffset % secSize;
|
|
366
|
-
const secId = this._shortStreamSecIds[secIdIndex];
|
|
367
|
-
return this._getFileOffsetForSec(secId) + secOffset;
|
|
368
|
-
}
|
|
369
|
-
storage(storageName) {
|
|
370
|
-
return this._rootStorage.storage(storageName);
|
|
371
|
-
}
|
|
372
|
-
stream(streamName) {
|
|
373
|
-
return this._rootStorage.stream(streamName);
|
|
374
|
-
}
|
|
375
|
-
};
|
|
376
|
-
module.exports = OleCompoundDoc;
|
|
377
|
-
}
|
|
378
|
-
});
|
|
379
|
-
|
|
380
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/filters.js
|
|
381
|
-
var require_filters = __commonJS({
|
|
382
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/filters.js"(exports, module) {
|
|
383
|
-
"use strict";
|
|
384
|
-
var replaceTable = [];
|
|
385
|
-
replaceTable[2] = "\0";
|
|
386
|
-
replaceTable[5] = "\0";
|
|
387
|
-
replaceTable[7] = " ";
|
|
388
|
-
replaceTable[8] = "\0";
|
|
389
|
-
replaceTable[10] = "\n";
|
|
390
|
-
replaceTable[11] = "\n";
|
|
391
|
-
replaceTable[12] = "\n";
|
|
392
|
-
replaceTable[13] = "\n";
|
|
393
|
-
replaceTable[30] = "\u2011";
|
|
394
|
-
var binaryToUnicodeTable = [];
|
|
395
|
-
binaryToUnicodeTable[130] = "\u201A";
|
|
396
|
-
binaryToUnicodeTable[131] = "\u0192";
|
|
397
|
-
binaryToUnicodeTable[132] = "\u201E";
|
|
398
|
-
binaryToUnicodeTable[133] = "\u2026";
|
|
399
|
-
binaryToUnicodeTable[134] = "\u2020";
|
|
400
|
-
binaryToUnicodeTable[135] = "\u2021";
|
|
401
|
-
binaryToUnicodeTable[136] = "\u02C6";
|
|
402
|
-
binaryToUnicodeTable[137] = "\u2030";
|
|
403
|
-
binaryToUnicodeTable[138] = "\u0160";
|
|
404
|
-
binaryToUnicodeTable[139] = "\u2039";
|
|
405
|
-
binaryToUnicodeTable[140] = "\u0152";
|
|
406
|
-
binaryToUnicodeTable[142] = "\u017D";
|
|
407
|
-
binaryToUnicodeTable[145] = "\u2018";
|
|
408
|
-
binaryToUnicodeTable[146] = "\u2019";
|
|
409
|
-
binaryToUnicodeTable[147] = "\u201C";
|
|
410
|
-
binaryToUnicodeTable[148] = "\u201D";
|
|
411
|
-
binaryToUnicodeTable[149] = "\u2022";
|
|
412
|
-
binaryToUnicodeTable[150] = "\u2013";
|
|
413
|
-
binaryToUnicodeTable[151] = "\u2014";
|
|
414
|
-
binaryToUnicodeTable[152] = "\u02DC";
|
|
415
|
-
binaryToUnicodeTable[153] = "\u2122";
|
|
416
|
-
binaryToUnicodeTable[154] = "\u0161";
|
|
417
|
-
binaryToUnicodeTable[155] = "\u203A";
|
|
418
|
-
binaryToUnicodeTable[156] = "\u0153";
|
|
419
|
-
binaryToUnicodeTable[158] = "\u017E";
|
|
420
|
-
binaryToUnicodeTable[159] = "\u0178";
|
|
421
|
-
module.exports.binaryToUnicode = (string) => {
|
|
422
|
-
return string.replace(/([\x80-\x9f])/g, (match) => binaryToUnicodeTable[match.charCodeAt(0)]);
|
|
423
|
-
};
|
|
424
|
-
module.exports.clean = (string) => {
|
|
425
|
-
string = string.replace(/([\x02\x05\x07\x08\x0a\x0b\x0c\x0d\x1f])/g, (match) => replaceTable[match.charCodeAt(0)]);
|
|
426
|
-
let called = true;
|
|
427
|
-
while (called) {
|
|
428
|
-
called = false;
|
|
429
|
-
string = string.replace(/(?:\x13[^\x13\x14\x15]*\x14?([^\x13\x14\x15]*)\x15)/g, (match, p1) => {
|
|
430
|
-
called = true;
|
|
431
|
-
return p1;
|
|
432
|
-
});
|
|
433
|
-
}
|
|
434
|
-
return string.replace(/[\x00-\x07]/g, "");
|
|
435
|
-
};
|
|
436
|
-
var filterTable = [];
|
|
437
|
-
filterTable[8194] = " ";
|
|
438
|
-
filterTable[8195] = " ";
|
|
439
|
-
filterTable[8210] = "-";
|
|
440
|
-
filterTable[8211] = "-";
|
|
441
|
-
filterTable[8212] = "-";
|
|
442
|
-
filterTable[8216] = "'";
|
|
443
|
-
filterTable[8217] = "'";
|
|
444
|
-
filterTable[8220] = '"';
|
|
445
|
-
filterTable[8221] = '"';
|
|
446
|
-
module.exports.filter = (string) => {
|
|
447
|
-
return string.replace(/[\u2002\u2003\u2012\u2013\u2014\u2018\u2019\u201c\u201d]/g, (match) => filterTable[match.charCodeAt(0)]);
|
|
448
|
-
};
|
|
449
|
-
}
|
|
450
|
-
});
|
|
451
|
-
|
|
452
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/document.js
|
|
453
|
-
var require_document = __commonJS({
|
|
454
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/document.js"(exports, module) {
|
|
455
|
-
"use strict";
|
|
456
|
-
var { filter } = require_filters();
|
|
457
|
-
var Document = class {
|
|
458
|
-
constructor() {
|
|
459
|
-
this._body = "";
|
|
460
|
-
this._footnotes = "";
|
|
461
|
-
this._endnotes = "";
|
|
462
|
-
this._headers = "";
|
|
463
|
-
this._footers = "";
|
|
464
|
-
this._annotations = "";
|
|
465
|
-
this._textboxes = "";
|
|
466
|
-
this._headerTextboxes = "";
|
|
467
|
-
}
|
|
468
|
-
/**
|
|
469
|
-
* Accessor to read the main body part of a Word file
|
|
470
|
-
* @param {Object} options - options for body data
|
|
471
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
472
|
-
* to standard ASCII characters
|
|
473
|
-
* @returns a string, containing the Word file body
|
|
474
|
-
*/
|
|
475
|
-
getBody(options) {
|
|
476
|
-
options = options || {};
|
|
477
|
-
const value = this._body;
|
|
478
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
479
|
-
}
|
|
480
|
-
/**
|
|
481
|
-
* Accessor to read the footnotes part of a Word file
|
|
482
|
-
* @param {Object} options - options for body data
|
|
483
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
484
|
-
* to standard ASCII characters
|
|
485
|
-
* @returns a string, containing the Word file footnotes
|
|
486
|
-
*/
|
|
487
|
-
getFootnotes(options) {
|
|
488
|
-
options = options || {};
|
|
489
|
-
const value = this._footnotes;
|
|
490
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
491
|
-
}
|
|
492
|
-
/**
|
|
493
|
-
* Accessor to read the endnotes part of a Word file
|
|
494
|
-
* @param {Object} options - options for body data
|
|
495
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
496
|
-
* to standard ASCII characters
|
|
497
|
-
* @returns a string, containing the Word file endnotes
|
|
498
|
-
*/
|
|
499
|
-
getEndnotes(options) {
|
|
500
|
-
options = options || {};
|
|
501
|
-
const value = this._endnotes;
|
|
502
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
503
|
-
}
|
|
504
|
-
/**
|
|
505
|
-
* Accessor to read the headers part of a Word file
|
|
506
|
-
* @param {Object} options - options for body data
|
|
507
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
508
|
-
* to standard ASCII characters
|
|
509
|
-
* @param {boolean} options.includeFooters - if true (the default), returns headers and footers
|
|
510
|
-
* as a single string
|
|
511
|
-
* @returns a string, containing the Word file headers
|
|
512
|
-
*/
|
|
513
|
-
getHeaders(options) {
|
|
514
|
-
options = options || {};
|
|
515
|
-
const value = this._headers + (options.includeFooters == false ? "" : this._footers);
|
|
516
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
517
|
-
}
|
|
518
|
-
/**
|
|
519
|
-
* Accessor to read the footers part of a Word file
|
|
520
|
-
* @param {Object} options - options for body data
|
|
521
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
522
|
-
* to standard ASCII characters
|
|
523
|
-
* @returns a string, containing the Word file footers
|
|
524
|
-
*/
|
|
525
|
-
getFooters(options) {
|
|
526
|
-
options = options || {};
|
|
527
|
-
const value = this._footers;
|
|
528
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
529
|
-
}
|
|
530
|
-
/**
|
|
531
|
-
* Accessor to read the annotations part of a Word file
|
|
532
|
-
* @param {Object} options - options for body data
|
|
533
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
534
|
-
* to standard ASCII characters
|
|
535
|
-
* @returns a string, containing the Word file annotations
|
|
536
|
-
*/
|
|
537
|
-
getAnnotations(options) {
|
|
538
|
-
options = options || {};
|
|
539
|
-
const value = this._annotations;
|
|
540
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
541
|
-
}
|
|
542
|
-
/**
|
|
543
|
-
* Accessor to read the textboxes from a Word file. The text box content is aggregated as a
|
|
544
|
-
* single long string. When both the body and header content exists, they will be separated
|
|
545
|
-
* by a newline.
|
|
546
|
-
* @param {Object} options - options for body data
|
|
547
|
-
* @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
|
|
548
|
-
* to standard ASCII characters
|
|
549
|
-
* @param {boolean} options.includeHeadersAndFooters - if true (the default), includes text box
|
|
550
|
-
* content in headers and footers
|
|
551
|
-
* @param {boolean} options.includeBody - if true (the default), includes text box
|
|
552
|
-
* content in the document body
|
|
553
|
-
* @returns a string, containing the Word file text box content
|
|
554
|
-
*/
|
|
555
|
-
getTextboxes(options) {
|
|
556
|
-
options = options || {};
|
|
557
|
-
const segments = [];
|
|
558
|
-
if (options.includeBody != false)
|
|
559
|
-
segments.push(this._textboxes);
|
|
560
|
-
if (options.includeHeadersAndFooters != false)
|
|
561
|
-
segments.push(this._headerTextboxes);
|
|
562
|
-
const value = segments.join("\n");
|
|
563
|
-
return options.filterUnicode == false ? value : filter(value);
|
|
564
|
-
}
|
|
565
|
-
};
|
|
566
|
-
module.exports = Document;
|
|
567
|
-
}
|
|
568
|
-
});
|
|
569
|
-
|
|
570
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/word-ole-extractor.js
|
|
571
|
-
var require_word_ole_extractor = __commonJS({
|
|
572
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/word-ole-extractor.js"(exports, module) {
|
|
573
|
-
"use strict";
|
|
574
|
-
var OleCompoundDoc = require_ole_compound_doc();
|
|
575
|
-
var Document = require_document();
|
|
576
|
-
var { binaryToUnicode, clean } = require_filters();
|
|
577
|
-
var sprmCFRMarkDel = 0;
|
|
578
|
-
var getPieceIndexByCP = (pieces, position) => {
|
|
579
|
-
for (let i = 0; i < pieces.length; i++) {
|
|
580
|
-
const piece = pieces[i];
|
|
581
|
-
if (position <= piece.endCp) {
|
|
582
|
-
return i;
|
|
583
|
-
}
|
|
584
|
-
}
|
|
585
|
-
};
|
|
586
|
-
var getPieceIndexByFilePos = (pieces, position) => {
|
|
587
|
-
for (let i = 0; i < pieces.length; i++) {
|
|
588
|
-
const piece = pieces[i];
|
|
589
|
-
if (position <= piece.endFilePos) {
|
|
590
|
-
return i;
|
|
591
|
-
}
|
|
592
|
-
}
|
|
593
|
-
};
|
|
594
|
-
function getTextRangeByCP(pieces, start, end) {
|
|
595
|
-
const startPiece = getPieceIndexByCP(pieces, start);
|
|
596
|
-
const endPiece = getPieceIndexByCP(pieces, end);
|
|
597
|
-
const result = [];
|
|
598
|
-
for (let i = startPiece, end1 = endPiece; i <= end1; i++) {
|
|
599
|
-
const piece = pieces[i];
|
|
600
|
-
const xstart = i === startPiece ? start - piece.startCp : 0;
|
|
601
|
-
const xend = i === endPiece ? end - piece.startCp : piece.endCp;
|
|
602
|
-
result.push(piece.text.substring(xstart, xend));
|
|
603
|
-
}
|
|
604
|
-
return result.join("");
|
|
605
|
-
}
|
|
606
|
-
function fillPieceRange(piece, start, end, character) {
|
|
607
|
-
const pieceStart = piece.startCp;
|
|
608
|
-
const pieceEnd = pieceStart + piece.length;
|
|
609
|
-
const original = piece.text;
|
|
610
|
-
if (start < pieceStart) start = pieceStart;
|
|
611
|
-
if (end > pieceEnd) end = pieceEnd;
|
|
612
|
-
const modified = (start == pieceStart ? "" : original.slice(0, start - pieceStart)) + "".padStart(end - start, character) + (end == pieceEnd ? "" : original.slice(end - pieceEnd));
|
|
613
|
-
piece.text = modified;
|
|
614
|
-
}
|
|
615
|
-
function fillPieceRangeByFilePos(piece, start, end, character) {
|
|
616
|
-
const pieceStart = piece.startFilePos;
|
|
617
|
-
const pieceEnd = pieceStart + piece.size;
|
|
618
|
-
const original = piece.text;
|
|
619
|
-
if (start < pieceStart) start = pieceStart;
|
|
620
|
-
if (end > pieceEnd) end = pieceEnd;
|
|
621
|
-
const modified = (start == pieceStart ? "" : original.slice(0, (start - pieceStart) / piece.bpc)) + "".padStart((end - start) / piece.bpc, character) + (end == pieceEnd ? "" : original.slice((end - pieceEnd) / piece.bpc));
|
|
622
|
-
piece.text = modified;
|
|
623
|
-
}
|
|
624
|
-
function replaceSelectedRange(pieces, start, end, character) {
|
|
625
|
-
const startPiece = getPieceIndexByCP(pieces, start);
|
|
626
|
-
const endPiece = getPieceIndexByCP(pieces, end);
|
|
627
|
-
for (let i = startPiece, end1 = endPiece; i <= end1; i++) {
|
|
628
|
-
const piece = pieces[i];
|
|
629
|
-
fillPieceRange(piece, start, end, character);
|
|
630
|
-
}
|
|
631
|
-
}
|
|
632
|
-
function replaceSelectedRangeByFilePos(pieces, start, end, character) {
|
|
633
|
-
const startPiece = getPieceIndexByFilePos(pieces, start);
|
|
634
|
-
const endPiece = getPieceIndexByFilePos(pieces, end);
|
|
635
|
-
for (let i = startPiece, end1 = endPiece; i <= end1; i++) {
|
|
636
|
-
const piece = pieces[i];
|
|
637
|
-
fillPieceRangeByFilePos(piece, start, end, character);
|
|
638
|
-
}
|
|
639
|
-
}
|
|
640
|
-
function markDeletedRange(pieces, start, end) {
|
|
641
|
-
replaceSelectedRangeByFilePos(pieces, start, end, "\0");
|
|
642
|
-
}
|
|
643
|
-
var processSprms = (buffer, offset, handler) => {
|
|
644
|
-
while (offset < buffer.length - 1) {
|
|
645
|
-
const sprm = buffer.readUInt16LE(offset);
|
|
646
|
-
const ispmd = sprm & 31;
|
|
647
|
-
const fspec = sprm >> 9 & 1;
|
|
648
|
-
const sgc = sprm >> 10 & 7;
|
|
649
|
-
const spra = sprm >> 13 & 7;
|
|
650
|
-
offset += 2;
|
|
651
|
-
handler(buffer, offset, sprm, ispmd, fspec, sgc, spra);
|
|
652
|
-
if (spra === 0) {
|
|
653
|
-
offset += 1;
|
|
654
|
-
continue;
|
|
655
|
-
} else if (spra === 1) {
|
|
656
|
-
offset += 1;
|
|
657
|
-
continue;
|
|
658
|
-
} else if (spra === 2) {
|
|
659
|
-
offset += 2;
|
|
660
|
-
continue;
|
|
661
|
-
} else if (spra === 3) {
|
|
662
|
-
offset += 4;
|
|
663
|
-
continue;
|
|
664
|
-
} else if (spra === 4 || spra === 5) {
|
|
665
|
-
offset += 2;
|
|
666
|
-
continue;
|
|
667
|
-
} else if (spra === 6) {
|
|
668
|
-
offset += buffer.readUInt8(offset) + 1;
|
|
669
|
-
continue;
|
|
670
|
-
} else if (spra === 7) {
|
|
671
|
-
offset += 3;
|
|
672
|
-
continue;
|
|
673
|
-
} else {
|
|
674
|
-
throw new Error("Unparsed sprm");
|
|
675
|
-
}
|
|
676
|
-
}
|
|
677
|
-
};
|
|
678
|
-
var WordOleExtractor = class {
|
|
679
|
-
constructor() {
|
|
680
|
-
this._pieces = [];
|
|
681
|
-
this._bookmarks = {};
|
|
682
|
-
this._boundaries = {};
|
|
683
|
-
this._taggedHeaders = [];
|
|
684
|
-
}
|
|
685
|
-
/**
|
|
686
|
-
* The main extraction method. This creates an OLE compound document
|
|
687
|
-
* interface, then opens up a stream and extracts out the main
|
|
688
|
-
* stream.
|
|
689
|
-
* @param {*} reader
|
|
690
|
-
*/
|
|
691
|
-
extract(reader) {
|
|
692
|
-
const document = new OleCompoundDoc(reader);
|
|
693
|
-
return document.read().then(
|
|
694
|
-
() => this.documentStream(document, "WordDocument").then((stream) => this.streamBuffer(stream)).then((buffer) => this.extractWordDocument(document, buffer))
|
|
695
|
-
);
|
|
696
|
-
}
|
|
697
|
-
/**
|
|
698
|
-
* Builds and returns a {@link Document} object corresponding to the text
|
|
699
|
-
* in the original document. This involves reading and retrieving the text
|
|
700
|
-
* ranges corresponding to the primary document parts. The text segments are
|
|
701
|
-
* read from the extracted table of text pieces.
|
|
702
|
-
* @returns a {@link Document} object
|
|
703
|
-
*/
|
|
704
|
-
buildDocument() {
|
|
705
|
-
const document = new Document();
|
|
706
|
-
const pieces = this._pieces;
|
|
707
|
-
let start = 0;
|
|
708
|
-
document._body = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpText));
|
|
709
|
-
start += this._boundaries.ccpText;
|
|
710
|
-
if (this._boundaries.ccpFtn) {
|
|
711
|
-
document._footnotes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpFtn - 1));
|
|
712
|
-
start += this._boundaries.ccpFtn;
|
|
713
|
-
}
|
|
714
|
-
if (this._boundaries.ccpHdd) {
|
|
715
|
-
document._headers = clean(this._taggedHeaders.filter((s) => s.type === "headers").map((s) => s.text).join(""));
|
|
716
|
-
document._footers = clean(this._taggedHeaders.filter((s) => s.type === "footers").map((s) => s.text).join(""));
|
|
717
|
-
start += this._boundaries.ccpHdd;
|
|
718
|
-
}
|
|
719
|
-
if (this._boundaries.ccpAtn) {
|
|
720
|
-
document._annotations = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpAtn - 1));
|
|
721
|
-
start += this._boundaries.ccpAtn;
|
|
722
|
-
}
|
|
723
|
-
if (this._boundaries.ccpEdn) {
|
|
724
|
-
document._endnotes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpEdn - 1));
|
|
725
|
-
start += this._boundaries.ccpEdn;
|
|
726
|
-
}
|
|
727
|
-
if (this._boundaries.ccpTxbx) {
|
|
728
|
-
document._textboxes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpTxbx - 1));
|
|
729
|
-
start += this._boundaries.ccpTxbx;
|
|
730
|
-
}
|
|
731
|
-
if (this._boundaries.ccpHdrTxbx) {
|
|
732
|
-
document._headerTextboxes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpHdrTxbx - 1));
|
|
733
|
-
start += this._boundaries.ccpHdrTxbx;
|
|
734
|
-
}
|
|
735
|
-
return document;
|
|
736
|
-
}
|
|
737
|
-
/**
|
|
738
|
-
* Main logic top level function for unpacking a Word document
|
|
739
|
-
* @param {*} document the OLE document
|
|
740
|
-
* @param {*} buffer a buffer
|
|
741
|
-
* @returns a Promise which resolves to a {@link Document}
|
|
742
|
-
*/
|
|
743
|
-
extractWordDocument(document, buffer) {
|
|
744
|
-
const magic = buffer.readUInt16LE(0);
|
|
745
|
-
if (magic !== 42476) {
|
|
746
|
-
return Promise.reject(new Error(`This does not seem to be a Word document: Invalid magic number: ${magic.toString(16)}`));
|
|
747
|
-
}
|
|
748
|
-
const flags = buffer.readUInt16LE(10);
|
|
749
|
-
const streamName = (flags & 512) !== 0 ? "1Table" : "0Table";
|
|
750
|
-
return this.documentStream(document, streamName).then((stream) => this.streamBuffer(stream)).then((streamBuffer) => {
|
|
751
|
-
this._boundaries.fcMin = buffer.readUInt32LE(24);
|
|
752
|
-
this._boundaries.ccpText = buffer.readUInt32LE(76);
|
|
753
|
-
this._boundaries.ccpFtn = buffer.readUInt32LE(80);
|
|
754
|
-
this._boundaries.ccpHdd = buffer.readUInt32LE(84);
|
|
755
|
-
this._boundaries.ccpAtn = buffer.readUInt32LE(92);
|
|
756
|
-
this._boundaries.ccpEdn = buffer.readUInt32LE(96);
|
|
757
|
-
this._boundaries.ccpTxbx = buffer.readUInt32LE(100);
|
|
758
|
-
this._boundaries.ccpHdrTxbx = buffer.readUInt32LE(104);
|
|
759
|
-
this.writeBookmarks(buffer, streamBuffer);
|
|
760
|
-
this.writePieces(buffer, streamBuffer);
|
|
761
|
-
this.writeCharacterProperties(buffer, streamBuffer);
|
|
762
|
-
this.writeParagraphProperties(buffer, streamBuffer);
|
|
763
|
-
this.normalizeHeaders(buffer, streamBuffer);
|
|
764
|
-
return this.buildDocument();
|
|
765
|
-
});
|
|
766
|
-
}
|
|
767
|
-
/**
|
|
768
|
-
* Returns a promise that resolves to the named stream.
|
|
769
|
-
* @param {*} document
|
|
770
|
-
* @param {*} streamName
|
|
771
|
-
* @returns a promise that resolves to the named stream
|
|
772
|
-
*/
|
|
773
|
-
documentStream(document, streamName) {
|
|
774
|
-
return Promise.resolve(document.stream(streamName));
|
|
775
|
-
}
|
|
776
|
-
/**
|
|
777
|
-
* Returns a promise that resolves to a Buffer containing the contents of
|
|
778
|
-
* the given stream.
|
|
779
|
-
* @param {*} stream
|
|
780
|
-
* @returns a promise that resolves to the sream contents
|
|
781
|
-
*/
|
|
782
|
-
streamBuffer(stream) {
|
|
783
|
-
return new Promise((resolve, reject) => {
|
|
784
|
-
const chunks = [];
|
|
785
|
-
stream.on("data", (chunk) => chunks.push(chunk));
|
|
786
|
-
stream.on("error", (error) => reject(error));
|
|
787
|
-
stream.on("end", () => resolve(Buffer.concat(chunks)));
|
|
788
|
-
return stream;
|
|
789
|
-
});
|
|
790
|
-
}
|
|
791
|
-
writeFields(buffer, tableBuffer, result) {
|
|
792
|
-
const fcPlcffldMom = buffer.readInt32LE(282);
|
|
793
|
-
const lcbPlcffldMom = buffer.readUInt32LE(286);
|
|
794
|
-
if (lcbPlcffldMom == 0) {
|
|
795
|
-
return;
|
|
796
|
-
}
|
|
797
|
-
const fieldCount = (lcbPlcffldMom - 4) / 6;
|
|
798
|
-
const dataOffset = (fieldCount + 1) * 4;
|
|
799
|
-
const plcffldMom = tableBuffer.slice(fcPlcffldMom, fcPlcffldMom + lcbPlcffldMom);
|
|
800
|
-
for (let i = 0; i < fieldCount; i++) {
|
|
801
|
-
const cp = plcffldMom.readUInt32LE(i * 4);
|
|
802
|
-
const fld = plcffldMom.readUInt16LE(dataOffset + i * 2);
|
|
803
|
-
const byte1 = fld & 255;
|
|
804
|
-
const byte2 = fld >> 8;
|
|
805
|
-
if ((byte1 & 31) == 19) {
|
|
806
|
-
} else {
|
|
807
|
-
}
|
|
808
|
-
}
|
|
809
|
-
}
|
|
810
|
-
/**
|
|
811
|
-
* Extracts and stores the document bookmarks into a local field.
|
|
812
|
-
* @param {*} buffer
|
|
813
|
-
* @param {*} tableBuffer
|
|
814
|
-
*/
|
|
815
|
-
writeBookmarks(buffer, tableBuffer) {
|
|
816
|
-
const fcSttbfBkmk = buffer.readUInt32LE(322);
|
|
817
|
-
const lcbSttbfBkmk = buffer.readUInt32LE(326);
|
|
818
|
-
const fcPlcfBkf = buffer.readUInt32LE(330);
|
|
819
|
-
const lcbPlcfBkf = buffer.readUInt32LE(334);
|
|
820
|
-
const fcPlcfBkl = buffer.readUInt32LE(338);
|
|
821
|
-
const lcbPlcfBkl = buffer.readUInt32LE(342);
|
|
822
|
-
if (lcbSttbfBkmk === 0) {
|
|
823
|
-
return;
|
|
824
|
-
}
|
|
825
|
-
const sttbfBkmk = tableBuffer.slice(fcSttbfBkmk, fcSttbfBkmk + lcbSttbfBkmk);
|
|
826
|
-
const plcfBkf = tableBuffer.slice(fcPlcfBkf, fcPlcfBkf + lcbPlcfBkf);
|
|
827
|
-
const plcfBkl = tableBuffer.slice(fcPlcfBkl, fcPlcfBkl + lcbPlcfBkl);
|
|
828
|
-
const fcExtend = sttbfBkmk.readUInt16LE(0);
|
|
829
|
-
const cData = sttbfBkmk.readUInt16LE(2);
|
|
830
|
-
const cbExtra = sttbfBkmk.readUInt16LE(4);
|
|
831
|
-
if (fcExtend !== 65535) {
|
|
832
|
-
throw new Error("Internal error: unexpected single-byte bookmark data");
|
|
833
|
-
}
|
|
834
|
-
let offset = 6;
|
|
835
|
-
const index = 0;
|
|
836
|
-
while (offset < lcbSttbfBkmk) {
|
|
837
|
-
let length = sttbfBkmk.readUInt16LE(offset);
|
|
838
|
-
length = length * 2;
|
|
839
|
-
const segment = sttbfBkmk.slice(offset + 2, offset + 2 + length);
|
|
840
|
-
const cpStart = plcfBkf.readUInt32LE(index * 4);
|
|
841
|
-
const cpEnd = plcfBkl.readUInt32LE(index * 4);
|
|
842
|
-
this._bookmarks[segment] = { start: cpStart, end: cpEnd };
|
|
843
|
-
offset = offset + length + 2;
|
|
844
|
-
}
|
|
845
|
-
}
|
|
846
|
-
/**
|
|
847
|
-
* Extracts and stores the document text pieces into a local field. This is
|
|
848
|
-
* probably the most crucial part of text extraction, as it is where we
|
|
849
|
-
* get text corresponding to character positions. These may be stored in a
|
|
850
|
-
* different order in the file compared to the order we want them.
|
|
851
|
-
*
|
|
852
|
-
* @param {*} buffer
|
|
853
|
-
* @param {*} tableBuffer
|
|
854
|
-
*/
|
|
855
|
-
writePieces(buffer, tableBuffer) {
|
|
856
|
-
let flag;
|
|
857
|
-
let pos = buffer.readUInt32LE(418);
|
|
858
|
-
while (true) {
|
|
859
|
-
flag = tableBuffer.readUInt8(pos);
|
|
860
|
-
if (flag !== 1) {
|
|
861
|
-
break;
|
|
862
|
-
}
|
|
863
|
-
pos = pos + 1;
|
|
864
|
-
const skip = tableBuffer.readUInt16LE(pos);
|
|
865
|
-
pos = pos + 2 + skip;
|
|
866
|
-
}
|
|
867
|
-
flag = tableBuffer.readUInt8(pos);
|
|
868
|
-
pos = pos + 1;
|
|
869
|
-
if (flag !== 2) {
|
|
870
|
-
throw new Error("Internal error: ccorrupted Word file");
|
|
871
|
-
}
|
|
872
|
-
const pieceTableSize = tableBuffer.readUInt32LE(pos);
|
|
873
|
-
pos = pos + 4;
|
|
874
|
-
const pieces = (pieceTableSize - 4) / 12;
|
|
875
|
-
let startCp = 0;
|
|
876
|
-
let startStream = 0;
|
|
877
|
-
for (let x = 0, end = pieces - 1; x <= end; x++) {
|
|
878
|
-
const offset = pos + (pieces + 1) * 4 + x * 8 + 2;
|
|
879
|
-
let startFilePos = tableBuffer.readUInt32LE(offset);
|
|
880
|
-
let unicode = false;
|
|
881
|
-
if ((startFilePos & 1073741824) === 0) {
|
|
882
|
-
unicode = true;
|
|
883
|
-
} else {
|
|
884
|
-
startFilePos = startFilePos & ~1073741824;
|
|
885
|
-
startFilePos = Math.floor(startFilePos / 2);
|
|
886
|
-
}
|
|
887
|
-
const lStart = tableBuffer.readUInt32LE(pos + x * 4);
|
|
888
|
-
const lEnd = tableBuffer.readUInt32LE(pos + (x + 1) * 4);
|
|
889
|
-
const totLength = lEnd - lStart;
|
|
890
|
-
const piece = {
|
|
891
|
-
startCp,
|
|
892
|
-
startStream,
|
|
893
|
-
totLength,
|
|
894
|
-
startFilePos,
|
|
895
|
-
unicode,
|
|
896
|
-
bpc: unicode ? 2 : 1
|
|
897
|
-
};
|
|
898
|
-
piece.size = piece.bpc * (lEnd - lStart);
|
|
899
|
-
const textBuffer = buffer.slice(startFilePos, startFilePos + piece.size);
|
|
900
|
-
if (unicode) {
|
|
901
|
-
piece.text = textBuffer.toString("ucs2");
|
|
902
|
-
} else {
|
|
903
|
-
piece.text = binaryToUnicode(textBuffer.toString("binary"));
|
|
904
|
-
}
|
|
905
|
-
piece.length = piece.text.length;
|
|
906
|
-
piece.endCp = piece.startCp + piece.length;
|
|
907
|
-
piece.endStream = piece.startStream + piece.size;
|
|
908
|
-
piece.endFilePos = piece.startFilePos + piece.size;
|
|
909
|
-
startCp = piece.endCp;
|
|
910
|
-
startStream = piece.endStream;
|
|
911
|
-
this._pieces.push(piece);
|
|
912
|
-
}
|
|
913
|
-
}
|
|
914
|
-
/**
|
|
915
|
-
* Processes the headers and footers. The main logic here is that we might have a mix
|
|
916
|
-
* of "real" and "pseudo" headers. For example, a footnote generates some footnote
|
|
917
|
-
* separator footer elements, which, unless they contain something interesting, we
|
|
918
|
-
* can dispense with. In fact, we want to dispense with anything which is made up of
|
|
919
|
-
* whitespace and control characters, in general. This means locating the segments of
|
|
920
|
-
* text in the extracted pieces, and conditionally replacing them with nulls.
|
|
921
|
-
*
|
|
922
|
-
* @param {*} buffer
|
|
923
|
-
* @param {*} tableBuffer
|
|
924
|
-
*/
|
|
925
|
-
normalizeHeaders(buffer, tableBuffer) {
|
|
926
|
-
const pieces = this._pieces;
|
|
927
|
-
const fcPlcfhdd = buffer.readUInt32LE(242);
|
|
928
|
-
const lcbPlcfhdd = buffer.readUInt32LE(246);
|
|
929
|
-
if (lcbPlcfhdd < 8) {
|
|
930
|
-
return;
|
|
931
|
-
}
|
|
932
|
-
const offset = this._boundaries.ccpText + this._boundaries.ccpFtn;
|
|
933
|
-
const ccpHdd = this._boundaries.ccpHdd;
|
|
934
|
-
const plcHdd = tableBuffer.slice(fcPlcfhdd, fcPlcfhdd + lcbPlcfhdd);
|
|
935
|
-
const plcHddCount = lcbPlcfhdd / 4;
|
|
936
|
-
let start = offset + plcHdd.readUInt32LE(0);
|
|
937
|
-
for (let i = 1; i < plcHddCount; i++) {
|
|
938
|
-
let end = offset + plcHdd.readUInt32LE(i * 4);
|
|
939
|
-
if (end > offset + ccpHdd) {
|
|
940
|
-
end = offset + ccpHdd;
|
|
941
|
-
}
|
|
942
|
-
const string = getTextRangeByCP(pieces, start, end);
|
|
943
|
-
const story = i - 1;
|
|
944
|
-
if ([0, 1, 2].includes(story)) {
|
|
945
|
-
this._taggedHeaders.push({ type: "footnoteSeparators", text: string });
|
|
946
|
-
} else if ([3, 4, 5].includes(story)) {
|
|
947
|
-
this._taggedHeaders.push({ type: "endSeparators", text: string });
|
|
948
|
-
} else if ([0, 1, 4].includes(story % 6)) {
|
|
949
|
-
this._taggedHeaders.push({ type: "headers", text: string });
|
|
950
|
-
} else if ([2, 3, 5].includes(story % 6)) {
|
|
951
|
-
this._taggedHeaders.push({ type: "footers", text: string });
|
|
952
|
-
}
|
|
953
|
-
if (!/[^\r\n\u0002-\u0008]/.test(string)) {
|
|
954
|
-
replaceSelectedRange(pieces, start, end, "\0");
|
|
955
|
-
} else {
|
|
956
|
-
replaceSelectedRange(pieces, end - 1, end, "\0");
|
|
957
|
-
}
|
|
958
|
-
start = end;
|
|
959
|
-
}
|
|
960
|
-
}
|
|
961
|
-
writeParagraphProperties(buffer, tableBuffer) {
|
|
962
|
-
const pieces = this._pieces;
|
|
963
|
-
const fcPlcfbtePapx = buffer.readUInt32LE(258);
|
|
964
|
-
const lcbPlcfbtePapx = buffer.readUInt32LE(262);
|
|
965
|
-
const plcBtePapxCount = (lcbPlcfbtePapx - 4) / 8;
|
|
966
|
-
const dataOffset = (plcBtePapxCount + 1) * 4;
|
|
967
|
-
const plcBtePapx = tableBuffer.slice(fcPlcfbtePapx, fcPlcfbtePapx + lcbPlcfbtePapx);
|
|
968
|
-
for (let i = 0; i < plcBtePapxCount; i++) {
|
|
969
|
-
const cp = plcBtePapx.readUInt32LE(i * 4);
|
|
970
|
-
const papxFkpBlock = plcBtePapx.readUInt32LE(dataOffset + i * 4);
|
|
971
|
-
const papxFkpBlockBuffer = buffer.slice(papxFkpBlock * 512, (papxFkpBlock + 1) * 512);
|
|
972
|
-
const crun = papxFkpBlockBuffer.readUInt8(511);
|
|
973
|
-
for (let j = 0; j < crun; j++) {
|
|
974
|
-
const rgfc = papxFkpBlockBuffer.readUInt32LE(j * 4);
|
|
975
|
-
const rgfcNext = papxFkpBlockBuffer.readUInt32LE((j + 1) * 4);
|
|
976
|
-
const cbLocation = (crun + 1) * 4 + j * 13;
|
|
977
|
-
const cbIndex = papxFkpBlockBuffer.readUInt8(cbLocation) * 2;
|
|
978
|
-
const cb = papxFkpBlockBuffer.readUInt8(cbIndex);
|
|
979
|
-
let grpPrlAndIstd = null;
|
|
980
|
-
if (cb !== 0) {
|
|
981
|
-
grpPrlAndIstd = papxFkpBlockBuffer.slice(cbIndex + 1, cbIndex + 1 + 2 * cb - 1);
|
|
982
|
-
} else {
|
|
983
|
-
const cb2 = papxFkpBlockBuffer.readUInt8(cbIndex + 1);
|
|
984
|
-
grpPrlAndIstd = papxFkpBlockBuffer.slice(cbIndex + 2, cbIndex + 2 + 2 * cb2);
|
|
985
|
-
}
|
|
986
|
-
const istd = grpPrlAndIstd.readUInt16LE(0);
|
|
987
|
-
processSprms(grpPrlAndIstd, 2, (buffer2, offset, sprm, ispmd, fspec, sgc, spra) => {
|
|
988
|
-
if (sprm === 9239) {
|
|
989
|
-
replaceSelectedRangeByFilePos(pieces, rgfc, rgfcNext, "\n");
|
|
990
|
-
}
|
|
991
|
-
});
|
|
992
|
-
}
|
|
993
|
-
}
|
|
994
|
-
}
|
|
995
|
-
writeCharacterProperties(buffer, tableBuffer) {
|
|
996
|
-
const pieces = this._pieces;
|
|
997
|
-
const fcPlcfbteChpx = buffer.readUInt32LE(250);
|
|
998
|
-
const lcbPlcfbteChpx = buffer.readUInt32LE(254);
|
|
999
|
-
const plcBteChpxCount = (lcbPlcfbteChpx - 4) / 8;
|
|
1000
|
-
const dataOffset = (plcBteChpxCount + 1) * 4;
|
|
1001
|
-
const plcBteChpx = tableBuffer.slice(fcPlcfbteChpx, fcPlcfbteChpx + lcbPlcfbteChpx);
|
|
1002
|
-
let lastDeletionEnd = null;
|
|
1003
|
-
for (let i = 0; i < plcBteChpxCount; i++) {
|
|
1004
|
-
const cp = plcBteChpx.readUInt32LE(i * 4);
|
|
1005
|
-
const chpxFkpBlock = plcBteChpx.readUInt32LE(dataOffset + i * 4);
|
|
1006
|
-
const chpxFkpBlockBuffer = buffer.slice(chpxFkpBlock * 512, (chpxFkpBlock + 1) * 512);
|
|
1007
|
-
const crun = chpxFkpBlockBuffer.readUInt8(511);
|
|
1008
|
-
for (let j = 0; j < crun; j++) {
|
|
1009
|
-
const rgfc = chpxFkpBlockBuffer.readUInt32LE(j * 4);
|
|
1010
|
-
const rgfcNext = chpxFkpBlockBuffer.readUInt32LE((j + 1) * 4);
|
|
1011
|
-
const rgb = chpxFkpBlockBuffer.readUInt8((crun + 1) * 4 + j);
|
|
1012
|
-
if (rgb == 0) {
|
|
1013
|
-
continue;
|
|
1014
|
-
}
|
|
1015
|
-
const chpxOffset = rgb * 2;
|
|
1016
|
-
const cb = chpxFkpBlockBuffer.readUInt8(chpxOffset);
|
|
1017
|
-
const grpprl = chpxFkpBlockBuffer.slice(chpxOffset + 1, chpxOffset + 1 + cb);
|
|
1018
|
-
processSprms(grpprl, 0, (buffer2, offset, sprm, ispmd) => {
|
|
1019
|
-
if (ispmd === sprmCFRMarkDel) {
|
|
1020
|
-
if ((buffer2[offset] & 1) != 1) {
|
|
1021
|
-
return;
|
|
1022
|
-
}
|
|
1023
|
-
if (lastDeletionEnd === rgfc) {
|
|
1024
|
-
markDeletedRange(pieces, lastDeletionEnd, rgfcNext);
|
|
1025
|
-
} else {
|
|
1026
|
-
markDeletedRange(pieces, rgfc, rgfcNext);
|
|
1027
|
-
}
|
|
1028
|
-
lastDeletionEnd = rgfcNext;
|
|
1029
|
-
}
|
|
1030
|
-
});
|
|
1031
|
-
}
|
|
1032
|
-
}
|
|
1033
|
-
}
|
|
1034
|
-
};
|
|
1035
|
-
module.exports = WordOleExtractor;
|
|
1036
|
-
}
|
|
1037
|
-
});
|
|
1038
|
-
|
|
1039
|
-
// ../../node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xml/1.0/ed5.js
|
|
1040
|
-
var require_ed5 = __commonJS({
|
|
1041
|
-
"../../node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xml/1.0/ed5.js"(exports) {
|
|
1042
|
-
"use strict";
|
|
1043
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1044
|
-
exports.CHAR = " \n\r -\uD7FF\uE000-\uFFFD\u{10000}-\u{10FFFF}";
|
|
1045
|
-
exports.S = " \r\n";
|
|
1046
|
-
exports.NAME_START_CHAR = ":A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u{10000}-\u{EFFFF}";
|
|
1047
|
-
exports.NAME_CHAR = "-" + exports.NAME_START_CHAR + ".0-9\xB7\u0300-\u036F\u203F-\u2040";
|
|
1048
|
-
exports.CHAR_RE = new RegExp("^[" + exports.CHAR + "]$", "u");
|
|
1049
|
-
exports.S_RE = new RegExp("^[" + exports.S + "]+$", "u");
|
|
1050
|
-
exports.NAME_START_CHAR_RE = new RegExp("^[" + exports.NAME_START_CHAR + "]$", "u");
|
|
1051
|
-
exports.NAME_CHAR_RE = new RegExp("^[" + exports.NAME_CHAR + "]$", "u");
|
|
1052
|
-
exports.NAME_RE = new RegExp("^[" + exports.NAME_START_CHAR + "][" + exports.NAME_CHAR + "]*$", "u");
|
|
1053
|
-
exports.NMTOKEN_RE = new RegExp("^[" + exports.NAME_CHAR + "]+$", "u");
|
|
1054
|
-
var TAB = 9;
|
|
1055
|
-
var NL = 10;
|
|
1056
|
-
var CR = 13;
|
|
1057
|
-
var SPACE = 32;
|
|
1058
|
-
exports.S_LIST = [SPACE, NL, CR, TAB];
|
|
1059
|
-
function isChar(c) {
|
|
1060
|
-
return c >= SPACE && c <= 55295 || c === NL || c === CR || c === TAB || c >= 57344 && c <= 65533 || c >= 65536 && c <= 1114111;
|
|
1061
|
-
}
|
|
1062
|
-
exports.isChar = isChar;
|
|
1063
|
-
function isS(c) {
|
|
1064
|
-
return c === SPACE || c === NL || c === CR || c === TAB;
|
|
1065
|
-
}
|
|
1066
|
-
exports.isS = isS;
|
|
1067
|
-
function isNameStartChar(c) {
|
|
1068
|
-
return c >= 65 && c <= 90 || c >= 97 && c <= 122 || c === 58 || c === 95 || c === 8204 || c === 8205 || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533 || c >= 65536 && c <= 983039;
|
|
1069
|
-
}
|
|
1070
|
-
exports.isNameStartChar = isNameStartChar;
|
|
1071
|
-
function isNameChar(c) {
|
|
1072
|
-
return isNameStartChar(c) || c >= 48 && c <= 57 || c === 45 || c === 46 || c === 183 || c >= 768 && c <= 879 || c >= 8255 && c <= 8256;
|
|
1073
|
-
}
|
|
1074
|
-
exports.isNameChar = isNameChar;
|
|
1075
|
-
}
|
|
1076
|
-
});
|
|
1077
|
-
|
|
1078
|
-
// ../../node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xml/1.1/ed2.js
|
|
1079
|
-
var require_ed2 = __commonJS({
|
|
1080
|
-
"../../node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xml/1.1/ed2.js"(exports) {
|
|
1081
|
-
"use strict";
|
|
1082
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1083
|
-
exports.CHAR = "-\uD7FF\uE000-\uFFFD\u{10000}-\u{10FFFF}";
|
|
1084
|
-
exports.RESTRICTED_CHAR = "-\b\v\f-\x7F-\x84\x86-\x9F";
|
|
1085
|
-
exports.S = " \r\n";
|
|
1086
|
-
exports.NAME_START_CHAR = ":A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u{10000}-\u{EFFFF}";
|
|
1087
|
-
exports.NAME_CHAR = "-" + exports.NAME_START_CHAR + ".0-9\xB7\u0300-\u036F\u203F-\u2040";
|
|
1088
|
-
exports.CHAR_RE = new RegExp("^[" + exports.CHAR + "]$", "u");
|
|
1089
|
-
exports.RESTRICTED_CHAR_RE = new RegExp("^[" + exports.RESTRICTED_CHAR + "]$", "u");
|
|
1090
|
-
exports.S_RE = new RegExp("^[" + exports.S + "]+$", "u");
|
|
1091
|
-
exports.NAME_START_CHAR_RE = new RegExp("^[" + exports.NAME_START_CHAR + "]$", "u");
|
|
1092
|
-
exports.NAME_CHAR_RE = new RegExp("^[" + exports.NAME_CHAR + "]$", "u");
|
|
1093
|
-
exports.NAME_RE = new RegExp("^[" + exports.NAME_START_CHAR + "][" + exports.NAME_CHAR + "]*$", "u");
|
|
1094
|
-
exports.NMTOKEN_RE = new RegExp("^[" + exports.NAME_CHAR + "]+$", "u");
|
|
1095
|
-
var TAB = 9;
|
|
1096
|
-
var NL = 10;
|
|
1097
|
-
var CR = 13;
|
|
1098
|
-
var SPACE = 32;
|
|
1099
|
-
exports.S_LIST = [SPACE, NL, CR, TAB];
|
|
1100
|
-
function isChar(c) {
|
|
1101
|
-
return c >= 1 && c <= 55295 || c >= 57344 && c <= 65533 || c >= 65536 && c <= 1114111;
|
|
1102
|
-
}
|
|
1103
|
-
exports.isChar = isChar;
|
|
1104
|
-
function isRestrictedChar(c) {
|
|
1105
|
-
return c >= 1 && c <= 8 || c === 11 || c === 12 || c >= 14 && c <= 31 || c >= 127 && c <= 132 || c >= 134 && c <= 159;
|
|
1106
|
-
}
|
|
1107
|
-
exports.isRestrictedChar = isRestrictedChar;
|
|
1108
|
-
function isCharAndNotRestricted(c) {
|
|
1109
|
-
return c === 9 || c === 10 || c === 13 || c > 31 && c < 127 || c === 133 || c > 159 && c <= 55295 || c >= 57344 && c <= 65533 || c >= 65536 && c <= 1114111;
|
|
1110
|
-
}
|
|
1111
|
-
exports.isCharAndNotRestricted = isCharAndNotRestricted;
|
|
1112
|
-
function isS(c) {
|
|
1113
|
-
return c === SPACE || c === NL || c === CR || c === TAB;
|
|
1114
|
-
}
|
|
1115
|
-
exports.isS = isS;
|
|
1116
|
-
function isNameStartChar(c) {
|
|
1117
|
-
return c >= 65 && c <= 90 || c >= 97 && c <= 122 || c === 58 || c === 95 || c === 8204 || c === 8205 || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533 || c >= 65536 && c <= 983039;
|
|
1118
|
-
}
|
|
1119
|
-
exports.isNameStartChar = isNameStartChar;
|
|
1120
|
-
function isNameChar(c) {
|
|
1121
|
-
return isNameStartChar(c) || c >= 48 && c <= 57 || c === 45 || c === 46 || c === 183 || c >= 768 && c <= 879 || c >= 8255 && c <= 8256;
|
|
1122
|
-
}
|
|
1123
|
-
exports.isNameChar = isNameChar;
|
|
1124
|
-
}
|
|
1125
|
-
});
|
|
1126
|
-
|
|
1127
|
-
// ../../node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xmlns/1.0/ed3.js
|
|
1128
|
-
var require_ed3 = __commonJS({
|
|
1129
|
-
"../../node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xmlns/1.0/ed3.js"(exports) {
|
|
1130
|
-
"use strict";
|
|
1131
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1132
|
-
exports.NC_NAME_START_CHAR = "A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u{10000}-\u{EFFFF}";
|
|
1133
|
-
exports.NC_NAME_CHAR = "-" + exports.NC_NAME_START_CHAR + ".0-9\xB7\u0300-\u036F\u203F-\u2040";
|
|
1134
|
-
exports.NC_NAME_START_CHAR_RE = new RegExp("^[" + exports.NC_NAME_START_CHAR + "]$", "u");
|
|
1135
|
-
exports.NC_NAME_CHAR_RE = new RegExp("^[" + exports.NC_NAME_CHAR + "]$", "u");
|
|
1136
|
-
exports.NC_NAME_RE = new RegExp("^[" + exports.NC_NAME_START_CHAR + "][" + exports.NC_NAME_CHAR + "]*$", "u");
|
|
1137
|
-
function isNCNameStartChar(c) {
|
|
1138
|
-
return c >= 65 && c <= 90 || c === 95 || c >= 97 && c <= 122 || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8204 && c <= 8205 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533 || c >= 65536 && c <= 983039;
|
|
1139
|
-
}
|
|
1140
|
-
exports.isNCNameStartChar = isNCNameStartChar;
|
|
1141
|
-
function isNCNameChar(c) {
|
|
1142
|
-
return isNCNameStartChar(c) || (c === 45 || c === 46 || c >= 48 && c <= 57 || c === 183 || c >= 768 && c <= 879 || c >= 8255 && c <= 8256);
|
|
1143
|
-
}
|
|
1144
|
-
exports.isNCNameChar = isNCNameChar;
|
|
1145
|
-
}
|
|
1146
|
-
});
|
|
1147
|
-
|
|
1148
|
-
// ../../node_modules/.pnpm/saxes@5.0.1/node_modules/saxes/saxes.js
|
|
1149
|
-
var require_saxes = __commonJS({
|
|
1150
|
-
"../../node_modules/.pnpm/saxes@5.0.1/node_modules/saxes/saxes.js"(exports) {
|
|
1151
|
-
"use strict";
|
|
1152
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1153
|
-
var ed5 = require_ed5();
|
|
1154
|
-
var ed2 = require_ed2();
|
|
1155
|
-
var NSed3 = require_ed3();
|
|
1156
|
-
var isS = ed5.isS;
|
|
1157
|
-
var isChar10 = ed5.isChar;
|
|
1158
|
-
var isNameStartChar = ed5.isNameStartChar;
|
|
1159
|
-
var isNameChar = ed5.isNameChar;
|
|
1160
|
-
var S_LIST = ed5.S_LIST;
|
|
1161
|
-
var NAME_RE = ed5.NAME_RE;
|
|
1162
|
-
var isChar11 = ed2.isChar;
|
|
1163
|
-
var isNCNameStartChar = NSed3.isNCNameStartChar;
|
|
1164
|
-
var isNCNameChar = NSed3.isNCNameChar;
|
|
1165
|
-
var NC_NAME_RE = NSed3.NC_NAME_RE;
|
|
1166
|
-
var XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace";
|
|
1167
|
-
var XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
|
|
1168
|
-
var rootNS = {
|
|
1169
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1170
|
-
__proto__: null,
|
|
1171
|
-
xml: XML_NAMESPACE,
|
|
1172
|
-
xmlns: XMLNS_NAMESPACE
|
|
1173
|
-
};
|
|
1174
|
-
var XML_ENTITIES = {
|
|
1175
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1176
|
-
__proto__: null,
|
|
1177
|
-
amp: "&",
|
|
1178
|
-
gt: ">",
|
|
1179
|
-
lt: "<",
|
|
1180
|
-
quot: '"',
|
|
1181
|
-
apos: "'"
|
|
1182
|
-
};
|
|
1183
|
-
var EOC = -1;
|
|
1184
|
-
var NL_LIKE = -2;
|
|
1185
|
-
var S_BEGIN = 0;
|
|
1186
|
-
var S_BEGIN_WHITESPACE = 1;
|
|
1187
|
-
var S_DOCTYPE = 2;
|
|
1188
|
-
var S_DOCTYPE_QUOTE = 3;
|
|
1189
|
-
var S_DTD = 4;
|
|
1190
|
-
var S_DTD_QUOTED = 5;
|
|
1191
|
-
var S_DTD_OPEN_WAKA = 6;
|
|
1192
|
-
var S_DTD_OPEN_WAKA_BANG = 7;
|
|
1193
|
-
var S_DTD_COMMENT = 8;
|
|
1194
|
-
var S_DTD_COMMENT_ENDING = 9;
|
|
1195
|
-
var S_DTD_COMMENT_ENDED = 10;
|
|
1196
|
-
var S_DTD_PI = 11;
|
|
1197
|
-
var S_DTD_PI_ENDING = 12;
|
|
1198
|
-
var S_TEXT = 13;
|
|
1199
|
-
var S_ENTITY = 14;
|
|
1200
|
-
var S_OPEN_WAKA = 15;
|
|
1201
|
-
var S_OPEN_WAKA_BANG = 16;
|
|
1202
|
-
var S_COMMENT = 17;
|
|
1203
|
-
var S_COMMENT_ENDING = 18;
|
|
1204
|
-
var S_COMMENT_ENDED = 19;
|
|
1205
|
-
var S_CDATA = 20;
|
|
1206
|
-
var S_CDATA_ENDING = 21;
|
|
1207
|
-
var S_CDATA_ENDING_2 = 22;
|
|
1208
|
-
var S_PI_FIRST_CHAR = 23;
|
|
1209
|
-
var S_PI_REST = 24;
|
|
1210
|
-
var S_PI_BODY = 25;
|
|
1211
|
-
var S_PI_ENDING = 26;
|
|
1212
|
-
var S_XML_DECL_NAME_START = 27;
|
|
1213
|
-
var S_XML_DECL_NAME = 28;
|
|
1214
|
-
var S_XML_DECL_EQ = 29;
|
|
1215
|
-
var S_XML_DECL_VALUE_START = 30;
|
|
1216
|
-
var S_XML_DECL_VALUE = 31;
|
|
1217
|
-
var S_XML_DECL_SEPARATOR = 32;
|
|
1218
|
-
var S_XML_DECL_ENDING = 33;
|
|
1219
|
-
var S_OPEN_TAG = 34;
|
|
1220
|
-
var S_OPEN_TAG_SLASH = 35;
|
|
1221
|
-
var S_ATTRIB = 36;
|
|
1222
|
-
var S_ATTRIB_NAME = 37;
|
|
1223
|
-
var S_ATTRIB_NAME_SAW_WHITE = 38;
|
|
1224
|
-
var S_ATTRIB_VALUE = 39;
|
|
1225
|
-
var S_ATTRIB_VALUE_QUOTED = 40;
|
|
1226
|
-
var S_ATTRIB_VALUE_CLOSED = 41;
|
|
1227
|
-
var S_ATTRIB_VALUE_UNQUOTED = 42;
|
|
1228
|
-
var S_CLOSE_TAG = 43;
|
|
1229
|
-
var S_CLOSE_TAG_SAW_WHITE = 44;
|
|
1230
|
-
var TAB = 9;
|
|
1231
|
-
var NL = 10;
|
|
1232
|
-
var CR = 13;
|
|
1233
|
-
var SPACE = 32;
|
|
1234
|
-
var BANG = 33;
|
|
1235
|
-
var DQUOTE = 34;
|
|
1236
|
-
var AMP = 38;
|
|
1237
|
-
var SQUOTE = 39;
|
|
1238
|
-
var MINUS = 45;
|
|
1239
|
-
var FORWARD_SLASH = 47;
|
|
1240
|
-
var SEMICOLON = 59;
|
|
1241
|
-
var LESS = 60;
|
|
1242
|
-
var EQUAL = 61;
|
|
1243
|
-
var GREATER = 62;
|
|
1244
|
-
var QUESTION = 63;
|
|
1245
|
-
var OPEN_BRACKET = 91;
|
|
1246
|
-
var CLOSE_BRACKET = 93;
|
|
1247
|
-
var NEL = 133;
|
|
1248
|
-
var LS = 8232;
|
|
1249
|
-
var isQuote = (c) => c === DQUOTE || c === SQUOTE;
|
|
1250
|
-
var QUOTES = [DQUOTE, SQUOTE];
|
|
1251
|
-
var DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER];
|
|
1252
|
-
var DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET];
|
|
1253
|
-
var XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST];
|
|
1254
|
-
var ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS];
|
|
1255
|
-
function nsPairCheck(parser, prefix, uri) {
|
|
1256
|
-
switch (prefix) {
|
|
1257
|
-
case "xml":
|
|
1258
|
-
if (uri !== XML_NAMESPACE) {
|
|
1259
|
-
parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`);
|
|
1260
|
-
}
|
|
1261
|
-
break;
|
|
1262
|
-
case "xmlns":
|
|
1263
|
-
if (uri !== XMLNS_NAMESPACE) {
|
|
1264
|
-
parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`);
|
|
1265
|
-
}
|
|
1266
|
-
break;
|
|
1267
|
-
default:
|
|
1268
|
-
}
|
|
1269
|
-
switch (uri) {
|
|
1270
|
-
case XMLNS_NAMESPACE:
|
|
1271
|
-
parser.fail(prefix === "" ? `the default namespace may not be set to ${uri}.` : `may not assign a prefix (even "xmlns") to the URI ${XMLNS_NAMESPACE}.`);
|
|
1272
|
-
break;
|
|
1273
|
-
case XML_NAMESPACE:
|
|
1274
|
-
switch (prefix) {
|
|
1275
|
-
case "xml":
|
|
1276
|
-
break;
|
|
1277
|
-
case "":
|
|
1278
|
-
parser.fail(`the default namespace may not be set to ${uri}.`);
|
|
1279
|
-
break;
|
|
1280
|
-
default:
|
|
1281
|
-
parser.fail("may not assign the xml namespace to another prefix.");
|
|
1282
|
-
}
|
|
1283
|
-
break;
|
|
1284
|
-
default:
|
|
1285
|
-
}
|
|
1286
|
-
}
|
|
1287
|
-
function nsMappingCheck(parser, mapping) {
|
|
1288
|
-
for (const local of Object.keys(mapping)) {
|
|
1289
|
-
nsPairCheck(parser, local, mapping[local]);
|
|
1290
|
-
}
|
|
1291
|
-
}
|
|
1292
|
-
var isNCName = (name) => NC_NAME_RE.test(name);
|
|
1293
|
-
var isName = (name) => NAME_RE.test(name);
|
|
1294
|
-
var FORBIDDEN_START = 0;
|
|
1295
|
-
var FORBIDDEN_BRACKET = 1;
|
|
1296
|
-
var FORBIDDEN_BRACKET_BRACKET = 2;
|
|
1297
|
-
exports.EVENTS = [
|
|
1298
|
-
"xmldecl",
|
|
1299
|
-
"text",
|
|
1300
|
-
"processinginstruction",
|
|
1301
|
-
"doctype",
|
|
1302
|
-
"comment",
|
|
1303
|
-
"opentagstart",
|
|
1304
|
-
"attribute",
|
|
1305
|
-
"opentag",
|
|
1306
|
-
"closetag",
|
|
1307
|
-
"cdata",
|
|
1308
|
-
"error",
|
|
1309
|
-
"end",
|
|
1310
|
-
"ready"
|
|
1311
|
-
];
|
|
1312
|
-
var EVENT_NAME_TO_HANDLER_NAME = {
|
|
1313
|
-
xmldecl: "xmldeclHandler",
|
|
1314
|
-
text: "textHandler",
|
|
1315
|
-
processinginstruction: "piHandler",
|
|
1316
|
-
doctype: "doctypeHandler",
|
|
1317
|
-
comment: "commentHandler",
|
|
1318
|
-
opentagstart: "openTagStartHandler",
|
|
1319
|
-
attribute: "attributeHandler",
|
|
1320
|
-
opentag: "openTagHandler",
|
|
1321
|
-
closetag: "closeTagHandler",
|
|
1322
|
-
cdata: "cdataHandler",
|
|
1323
|
-
error: "errorHandler",
|
|
1324
|
-
end: "endHandler",
|
|
1325
|
-
ready: "readyHandler"
|
|
1326
|
-
};
|
|
1327
|
-
var SaxesParser = class {
|
|
1328
|
-
/**
|
|
1329
|
-
* @param opt The parser options.
|
|
1330
|
-
*/
|
|
1331
|
-
constructor(opt) {
|
|
1332
|
-
this.opt = opt !== null && opt !== void 0 ? opt : {};
|
|
1333
|
-
this.fragmentOpt = !!this.opt.fragment;
|
|
1334
|
-
const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns;
|
|
1335
|
-
this.trackPosition = this.opt.position !== false;
|
|
1336
|
-
this.fileName = this.opt.fileName;
|
|
1337
|
-
if (xmlnsOpt) {
|
|
1338
|
-
this.nameStartCheck = isNCNameStartChar;
|
|
1339
|
-
this.nameCheck = isNCNameChar;
|
|
1340
|
-
this.isName = isNCName;
|
|
1341
|
-
this.processAttribs = this.processAttribsNS;
|
|
1342
|
-
this.pushAttrib = this.pushAttribNS;
|
|
1343
|
-
this.ns = Object.assign({ __proto__: null }, rootNS);
|
|
1344
|
-
const additional = this.opt.additionalNamespaces;
|
|
1345
|
-
if (additional != null) {
|
|
1346
|
-
nsMappingCheck(this, additional);
|
|
1347
|
-
Object.assign(this.ns, additional);
|
|
1348
|
-
}
|
|
1349
|
-
} else {
|
|
1350
|
-
this.nameStartCheck = isNameStartChar;
|
|
1351
|
-
this.nameCheck = isNameChar;
|
|
1352
|
-
this.isName = isName;
|
|
1353
|
-
this.processAttribs = this.processAttribsPlain;
|
|
1354
|
-
this.pushAttrib = this.pushAttribPlain;
|
|
1355
|
-
}
|
|
1356
|
-
this.stateTable = [
|
|
1357
|
-
/* eslint-disable @typescript-eslint/unbound-method */
|
|
1358
|
-
this.sBegin,
|
|
1359
|
-
this.sBeginWhitespace,
|
|
1360
|
-
this.sDoctype,
|
|
1361
|
-
this.sDoctypeQuote,
|
|
1362
|
-
this.sDTD,
|
|
1363
|
-
this.sDTDQuoted,
|
|
1364
|
-
this.sDTDOpenWaka,
|
|
1365
|
-
this.sDTDOpenWakaBang,
|
|
1366
|
-
this.sDTDComment,
|
|
1367
|
-
this.sDTDCommentEnding,
|
|
1368
|
-
this.sDTDCommentEnded,
|
|
1369
|
-
this.sDTDPI,
|
|
1370
|
-
this.sDTDPIEnding,
|
|
1371
|
-
this.sText,
|
|
1372
|
-
this.sEntity,
|
|
1373
|
-
this.sOpenWaka,
|
|
1374
|
-
this.sOpenWakaBang,
|
|
1375
|
-
this.sComment,
|
|
1376
|
-
this.sCommentEnding,
|
|
1377
|
-
this.sCommentEnded,
|
|
1378
|
-
this.sCData,
|
|
1379
|
-
this.sCDataEnding,
|
|
1380
|
-
this.sCDataEnding2,
|
|
1381
|
-
this.sPIFirstChar,
|
|
1382
|
-
this.sPIRest,
|
|
1383
|
-
this.sPIBody,
|
|
1384
|
-
this.sPIEnding,
|
|
1385
|
-
this.sXMLDeclNameStart,
|
|
1386
|
-
this.sXMLDeclName,
|
|
1387
|
-
this.sXMLDeclEq,
|
|
1388
|
-
this.sXMLDeclValueStart,
|
|
1389
|
-
this.sXMLDeclValue,
|
|
1390
|
-
this.sXMLDeclSeparator,
|
|
1391
|
-
this.sXMLDeclEnding,
|
|
1392
|
-
this.sOpenTag,
|
|
1393
|
-
this.sOpenTagSlash,
|
|
1394
|
-
this.sAttrib,
|
|
1395
|
-
this.sAttribName,
|
|
1396
|
-
this.sAttribNameSawWhite,
|
|
1397
|
-
this.sAttribValue,
|
|
1398
|
-
this.sAttribValueQuoted,
|
|
1399
|
-
this.sAttribValueClosed,
|
|
1400
|
-
this.sAttribValueUnquoted,
|
|
1401
|
-
this.sCloseTag,
|
|
1402
|
-
this.sCloseTagSawWhite
|
|
1403
|
-
];
|
|
1404
|
-
this._init();
|
|
1405
|
-
}
|
|
1406
|
-
/**
|
|
1407
|
-
* Indicates whether or not the parser is closed. If ``true``, wait for
|
|
1408
|
-
* the ``ready`` event to write again.
|
|
1409
|
-
*/
|
|
1410
|
-
get closed() {
|
|
1411
|
-
return this._closed;
|
|
1412
|
-
}
|
|
1413
|
-
_init() {
|
|
1414
|
-
var _a;
|
|
1415
|
-
this.openWakaBang = "";
|
|
1416
|
-
this.text = "";
|
|
1417
|
-
this.name = "";
|
|
1418
|
-
this.piTarget = "";
|
|
1419
|
-
this.entity = "";
|
|
1420
|
-
this.q = null;
|
|
1421
|
-
this.tags = [];
|
|
1422
|
-
this.tag = null;
|
|
1423
|
-
this.topNS = null;
|
|
1424
|
-
this.chunk = "";
|
|
1425
|
-
this.chunkPosition = 0;
|
|
1426
|
-
this.i = 0;
|
|
1427
|
-
this.prevI = 0;
|
|
1428
|
-
this.carriedFromPrevious = void 0;
|
|
1429
|
-
this.forbiddenState = FORBIDDEN_START;
|
|
1430
|
-
this.attribList = [];
|
|
1431
|
-
const { fragmentOpt } = this;
|
|
1432
|
-
this.state = fragmentOpt ? S_TEXT : S_BEGIN;
|
|
1433
|
-
this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot = this.sawRoot = fragmentOpt;
|
|
1434
|
-
this.xmlDeclPossible = !fragmentOpt;
|
|
1435
|
-
this.xmlDeclExpects = ["version"];
|
|
1436
|
-
this.entityReturnState = void 0;
|
|
1437
|
-
let { defaultXMLVersion } = this.opt;
|
|
1438
|
-
if (defaultXMLVersion === void 0) {
|
|
1439
|
-
if (this.opt.forceXMLVersion === true) {
|
|
1440
|
-
throw new Error("forceXMLVersion set but defaultXMLVersion is not set");
|
|
1441
|
-
}
|
|
1442
|
-
defaultXMLVersion = "1.0";
|
|
1443
|
-
}
|
|
1444
|
-
this.setXMLVersion(defaultXMLVersion);
|
|
1445
|
-
this.positionAtNewLine = 0;
|
|
1446
|
-
this.doctype = false;
|
|
1447
|
-
this._closed = false;
|
|
1448
|
-
this.xmlDecl = {
|
|
1449
|
-
version: void 0,
|
|
1450
|
-
encoding: void 0,
|
|
1451
|
-
standalone: void 0
|
|
1452
|
-
};
|
|
1453
|
-
this.line = 1;
|
|
1454
|
-
this.column = 0;
|
|
1455
|
-
this.ENTITIES = Object.create(XML_ENTITIES);
|
|
1456
|
-
(_a = this.readyHandler) === null || _a === void 0 ? void 0 : _a.call(this);
|
|
1457
|
-
}
|
|
1458
|
-
/**
|
|
1459
|
-
* The stream position the parser is currently looking at. This field is
|
|
1460
|
-
* zero-based.
|
|
1461
|
-
*
|
|
1462
|
-
* This field is not based on counting Unicode characters but is to be
|
|
1463
|
-
* interpreted as a plain index into a JavaScript string.
|
|
1464
|
-
*/
|
|
1465
|
-
get position() {
|
|
1466
|
-
return this.chunkPosition + this.i;
|
|
1467
|
-
}
|
|
1468
|
-
/**
|
|
1469
|
-
* The column number of the next character to be read by the parser. *
|
|
1470
|
-
* This field is zero-based. (The first column in a line is 0.)
|
|
1471
|
-
*
|
|
1472
|
-
* This field reports the index at which the next character would be in the
|
|
1473
|
-
* line if the line were represented as a JavaScript string. Note that this
|
|
1474
|
-
* *can* be different to a count based on the number of *Unicode characters*
|
|
1475
|
-
* due to how JavaScript handles astral plane characters.
|
|
1476
|
-
*
|
|
1477
|
-
* See [[column]] for a number that corresponds to a count of Unicode
|
|
1478
|
-
* characters.
|
|
1479
|
-
*/
|
|
1480
|
-
get columnIndex() {
|
|
1481
|
-
return this.position - this.positionAtNewLine;
|
|
1482
|
-
}
|
|
1483
|
-
/**
|
|
1484
|
-
* Set an event listener on an event. The parser supports one handler per
|
|
1485
|
-
* event type. If you try to set an event handler over an existing handler,
|
|
1486
|
-
* the old handler is silently overwritten.
|
|
1487
|
-
*
|
|
1488
|
-
* @param name The event to listen to.
|
|
1489
|
-
*
|
|
1490
|
-
* @param handler The handler to set.
|
|
1491
|
-
*/
|
|
1492
|
-
on(name, handler) {
|
|
1493
|
-
this[EVENT_NAME_TO_HANDLER_NAME[name]] = handler;
|
|
1494
|
-
}
|
|
1495
|
-
/**
|
|
1496
|
-
* Unset an event handler.
|
|
1497
|
-
*
|
|
1498
|
-
* @parma name The event to stop listening to.
|
|
1499
|
-
*/
|
|
1500
|
-
off(name) {
|
|
1501
|
-
this[EVENT_NAME_TO_HANDLER_NAME[name]] = void 0;
|
|
1502
|
-
}
|
|
1503
|
-
/**
|
|
1504
|
-
* Make an error object. The error object will have a message that contains
|
|
1505
|
-
* the ``fileName`` option passed at the creation of the parser. If position
|
|
1506
|
-
* tracking was turned on, it will also have line and column number
|
|
1507
|
-
* information.
|
|
1508
|
-
*
|
|
1509
|
-
* @param message The message describing the error to report.
|
|
1510
|
-
*
|
|
1511
|
-
* @returns An error object with a properly formatted message.
|
|
1512
|
-
*/
|
|
1513
|
-
makeError(message) {
|
|
1514
|
-
var _a;
|
|
1515
|
-
let msg = (_a = this.fileName) !== null && _a !== void 0 ? _a : "";
|
|
1516
|
-
if (this.trackPosition) {
|
|
1517
|
-
if (msg.length > 0) {
|
|
1518
|
-
msg += ":";
|
|
1519
|
-
}
|
|
1520
|
-
msg += `${this.line}:${this.column}`;
|
|
1521
|
-
}
|
|
1522
|
-
if (msg.length > 0) {
|
|
1523
|
-
msg += ": ";
|
|
1524
|
-
}
|
|
1525
|
-
return new Error(msg + message);
|
|
1526
|
-
}
|
|
1527
|
-
/**
|
|
1528
|
-
* Report a parsing error. This method is made public so that client code may
|
|
1529
|
-
* check for issues that are outside the scope of this project and can report
|
|
1530
|
-
* errors.
|
|
1531
|
-
*
|
|
1532
|
-
* @param message The error to report.
|
|
1533
|
-
*
|
|
1534
|
-
* @returns this
|
|
1535
|
-
*/
|
|
1536
|
-
fail(message) {
|
|
1537
|
-
const err = this.makeError(message);
|
|
1538
|
-
const handler = this.errorHandler;
|
|
1539
|
-
if (handler === void 0) {
|
|
1540
|
-
throw err;
|
|
1541
|
-
} else {
|
|
1542
|
-
handler(err);
|
|
1543
|
-
}
|
|
1544
|
-
return this;
|
|
1545
|
-
}
|
|
1546
|
-
/**
|
|
1547
|
-
* Write a XML data to the parser.
|
|
1548
|
-
*
|
|
1549
|
-
* @param chunk The XML data to write.
|
|
1550
|
-
*
|
|
1551
|
-
* @returns this
|
|
1552
|
-
*/
|
|
1553
|
-
write(chunk) {
|
|
1554
|
-
if (this.closed) {
|
|
1555
|
-
return this.fail("cannot write after close; assign an onready handler.");
|
|
1556
|
-
}
|
|
1557
|
-
let end = false;
|
|
1558
|
-
if (chunk === null) {
|
|
1559
|
-
end = true;
|
|
1560
|
-
chunk = "";
|
|
1561
|
-
} else if (typeof chunk === "object") {
|
|
1562
|
-
chunk = chunk.toString();
|
|
1563
|
-
}
|
|
1564
|
-
if (this.carriedFromPrevious !== void 0) {
|
|
1565
|
-
chunk = `${this.carriedFromPrevious}${chunk}`;
|
|
1566
|
-
this.carriedFromPrevious = void 0;
|
|
1567
|
-
}
|
|
1568
|
-
let limit = chunk.length;
|
|
1569
|
-
const lastCode = chunk.charCodeAt(limit - 1);
|
|
1570
|
-
if (!end && // A trailing CR or surrogate must be carried over to the next
|
|
1571
|
-
// chunk.
|
|
1572
|
-
(lastCode === CR || lastCode >= 55296 && lastCode <= 56319)) {
|
|
1573
|
-
this.carriedFromPrevious = chunk[limit - 1];
|
|
1574
|
-
limit--;
|
|
1575
|
-
chunk = chunk.slice(0, limit);
|
|
1576
|
-
}
|
|
1577
|
-
const { stateTable } = this;
|
|
1578
|
-
this.chunk = chunk;
|
|
1579
|
-
this.i = 0;
|
|
1580
|
-
while (this.i < limit) {
|
|
1581
|
-
stateTable[this.state].call(this);
|
|
1582
|
-
}
|
|
1583
|
-
this.chunkPosition += limit;
|
|
1584
|
-
return end ? this.end() : this;
|
|
1585
|
-
}
|
|
1586
|
-
/**
|
|
1587
|
-
* Close the current stream. Perform final well-formedness checks and reset
|
|
1588
|
-
* the parser tstate.
|
|
1589
|
-
*
|
|
1590
|
-
* @returns this
|
|
1591
|
-
*/
|
|
1592
|
-
close() {
|
|
1593
|
-
return this.write(null);
|
|
1594
|
-
}
|
|
1595
|
-
/**
|
|
1596
|
-
* Get a single code point out of the current chunk. This updates the current
|
|
1597
|
-
* position if we do position tracking.
|
|
1598
|
-
*
|
|
1599
|
-
* This is the algorithm to use for XML 1.0.
|
|
1600
|
-
*
|
|
1601
|
-
* @returns The character read.
|
|
1602
|
-
*/
|
|
1603
|
-
getCode10() {
|
|
1604
|
-
const { chunk, i } = this;
|
|
1605
|
-
this.prevI = i;
|
|
1606
|
-
this.i = i + 1;
|
|
1607
|
-
if (i >= chunk.length) {
|
|
1608
|
-
return EOC;
|
|
1609
|
-
}
|
|
1610
|
-
const code = chunk.charCodeAt(i);
|
|
1611
|
-
this.column++;
|
|
1612
|
-
if (code < 55296) {
|
|
1613
|
-
if (code >= SPACE || code === TAB) {
|
|
1614
|
-
return code;
|
|
1615
|
-
}
|
|
1616
|
-
switch (code) {
|
|
1617
|
-
case NL:
|
|
1618
|
-
this.line++;
|
|
1619
|
-
this.column = 0;
|
|
1620
|
-
this.positionAtNewLine = this.position;
|
|
1621
|
-
return NL;
|
|
1622
|
-
case CR:
|
|
1623
|
-
if (chunk.charCodeAt(i + 1) === NL) {
|
|
1624
|
-
this.i = i + 2;
|
|
1625
|
-
}
|
|
1626
|
-
this.line++;
|
|
1627
|
-
this.column = 0;
|
|
1628
|
-
this.positionAtNewLine = this.position;
|
|
1629
|
-
return NL_LIKE;
|
|
1630
|
-
default:
|
|
1631
|
-
this.fail("disallowed character.");
|
|
1632
|
-
return code;
|
|
1633
|
-
}
|
|
1634
|
-
}
|
|
1635
|
-
if (code > 56319) {
|
|
1636
|
-
if (!(code >= 57344 && code <= 65533)) {
|
|
1637
|
-
this.fail("disallowed character.");
|
|
1638
|
-
}
|
|
1639
|
-
return code;
|
|
1640
|
-
}
|
|
1641
|
-
const final = 65536 + (code - 55296) * 1024 + (chunk.charCodeAt(i + 1) - 56320);
|
|
1642
|
-
this.i = i + 2;
|
|
1643
|
-
if (final > 1114111) {
|
|
1644
|
-
this.fail("disallowed character.");
|
|
1645
|
-
}
|
|
1646
|
-
return final;
|
|
1647
|
-
}
|
|
1648
|
-
/**
|
|
1649
|
-
* Get a single code point out of the current chunk. This updates the current
|
|
1650
|
-
* position if we do position tracking.
|
|
1651
|
-
*
|
|
1652
|
-
* This is the algorithm to use for XML 1.1.
|
|
1653
|
-
*
|
|
1654
|
-
* @returns {number} The character read.
|
|
1655
|
-
*/
|
|
1656
|
-
getCode11() {
|
|
1657
|
-
const { chunk, i } = this;
|
|
1658
|
-
this.prevI = i;
|
|
1659
|
-
this.i = i + 1;
|
|
1660
|
-
if (i >= chunk.length) {
|
|
1661
|
-
return EOC;
|
|
1662
|
-
}
|
|
1663
|
-
const code = chunk.charCodeAt(i);
|
|
1664
|
-
this.column++;
|
|
1665
|
-
if (code < 55296) {
|
|
1666
|
-
if (code > 31 && code < 127 || code > 159 && code !== LS || code === TAB) {
|
|
1667
|
-
return code;
|
|
1668
|
-
}
|
|
1669
|
-
switch (code) {
|
|
1670
|
-
case NL:
|
|
1671
|
-
this.line++;
|
|
1672
|
-
this.column = 0;
|
|
1673
|
-
this.positionAtNewLine = this.position;
|
|
1674
|
-
return NL;
|
|
1675
|
-
case CR: {
|
|
1676
|
-
const next = chunk.charCodeAt(i + 1);
|
|
1677
|
-
if (next === NL || next === NEL) {
|
|
1678
|
-
this.i = i + 2;
|
|
1679
|
-
}
|
|
1680
|
-
}
|
|
1681
|
-
/* yes, fall through */
|
|
1682
|
-
case NEL:
|
|
1683
|
-
// 0x85
|
|
1684
|
-
case LS:
|
|
1685
|
-
this.line++;
|
|
1686
|
-
this.column = 0;
|
|
1687
|
-
this.positionAtNewLine = this.position;
|
|
1688
|
-
return NL_LIKE;
|
|
1689
|
-
default:
|
|
1690
|
-
this.fail("disallowed character.");
|
|
1691
|
-
return code;
|
|
1692
|
-
}
|
|
1693
|
-
}
|
|
1694
|
-
if (code > 56319) {
|
|
1695
|
-
if (!(code >= 57344 && code <= 65533)) {
|
|
1696
|
-
this.fail("disallowed character.");
|
|
1697
|
-
}
|
|
1698
|
-
return code;
|
|
1699
|
-
}
|
|
1700
|
-
const final = 65536 + (code - 55296) * 1024 + (chunk.charCodeAt(i + 1) - 56320);
|
|
1701
|
-
this.i = i + 2;
|
|
1702
|
-
if (final > 1114111) {
|
|
1703
|
-
this.fail("disallowed character.");
|
|
1704
|
-
}
|
|
1705
|
-
return final;
|
|
1706
|
-
}
|
|
1707
|
-
/**
|
|
1708
|
-
* Like ``getCode`` but with the return value normalized so that ``NL`` is
|
|
1709
|
-
* returned for ``NL_LIKE``.
|
|
1710
|
-
*/
|
|
1711
|
-
getCodeNorm() {
|
|
1712
|
-
const c = this.getCode();
|
|
1713
|
-
return c === NL_LIKE ? NL : c;
|
|
1714
|
-
}
|
|
1715
|
-
unget() {
|
|
1716
|
-
this.i = this.prevI;
|
|
1717
|
-
this.column--;
|
|
1718
|
-
}
|
|
1719
|
-
/**
|
|
1720
|
-
* Capture characters into a buffer until encountering one of a set of
|
|
1721
|
-
* characters.
|
|
1722
|
-
*
|
|
1723
|
-
* @param chars An array of codepoints. Encountering a character in the array
|
|
1724
|
-
* ends the capture. (``chars`` may safely contain ``NL``.)
|
|
1725
|
-
*
|
|
1726
|
-
* @return The character code that made the capture end, or ``EOC`` if we hit
|
|
1727
|
-
* the end of the chunk. The return value cannot be NL_LIKE: NL is returned
|
|
1728
|
-
* instead.
|
|
1729
|
-
*/
|
|
1730
|
-
captureTo(chars) {
|
|
1731
|
-
let { i: start } = this;
|
|
1732
|
-
const { chunk } = this;
|
|
1733
|
-
while (true) {
|
|
1734
|
-
const c = this.getCode();
|
|
1735
|
-
const isNLLike = c === NL_LIKE;
|
|
1736
|
-
const final = isNLLike ? NL : c;
|
|
1737
|
-
if (final === EOC || chars.includes(final)) {
|
|
1738
|
-
this.text += chunk.slice(start, this.prevI);
|
|
1739
|
-
return final;
|
|
1740
|
-
}
|
|
1741
|
-
if (isNLLike) {
|
|
1742
|
-
this.text += `${chunk.slice(start, this.prevI)}
|
|
1743
|
-
`;
|
|
1744
|
-
start = this.i;
|
|
1745
|
-
}
|
|
1746
|
-
}
|
|
1747
|
-
}
|
|
1748
|
-
/**
|
|
1749
|
-
* Capture characters into a buffer until encountering a character.
|
|
1750
|
-
*
|
|
1751
|
-
* @param char The codepoint that ends the capture. **NOTE ``char`` MAY NOT
|
|
1752
|
-
* CONTAIN ``NL``.** Passing ``NL`` will result in buggy behavior.
|
|
1753
|
-
*
|
|
1754
|
-
* @return ``true`` if we ran into the character. Otherwise, we ran into the
|
|
1755
|
-
* end of the current chunk.
|
|
1756
|
-
*/
|
|
1757
|
-
captureToChar(char) {
|
|
1758
|
-
let { i: start } = this;
|
|
1759
|
-
const { chunk } = this;
|
|
1760
|
-
while (true) {
|
|
1761
|
-
let c = this.getCode();
|
|
1762
|
-
switch (c) {
|
|
1763
|
-
case NL_LIKE:
|
|
1764
|
-
this.text += `${chunk.slice(start, this.prevI)}
|
|
1765
|
-
`;
|
|
1766
|
-
start = this.i;
|
|
1767
|
-
c = NL;
|
|
1768
|
-
break;
|
|
1769
|
-
case EOC:
|
|
1770
|
-
this.text += chunk.slice(start);
|
|
1771
|
-
return false;
|
|
1772
|
-
default:
|
|
1773
|
-
}
|
|
1774
|
-
if (c === char) {
|
|
1775
|
-
this.text += chunk.slice(start, this.prevI);
|
|
1776
|
-
return true;
|
|
1777
|
-
}
|
|
1778
|
-
}
|
|
1779
|
-
}
|
|
1780
|
-
/**
|
|
1781
|
-
* Capture characters that satisfy ``isNameChar`` into the ``name`` field of
|
|
1782
|
-
* this parser.
|
|
1783
|
-
*
|
|
1784
|
-
* @return The character code that made the test fail, or ``EOC`` if we hit
|
|
1785
|
-
* the end of the chunk. The return value cannot be NL_LIKE: NL is returned
|
|
1786
|
-
* instead.
|
|
1787
|
-
*/
|
|
1788
|
-
captureNameChars() {
|
|
1789
|
-
const { chunk, i: start } = this;
|
|
1790
|
-
while (true) {
|
|
1791
|
-
const c = this.getCode();
|
|
1792
|
-
if (c === EOC) {
|
|
1793
|
-
this.name += chunk.slice(start);
|
|
1794
|
-
return EOC;
|
|
1795
|
-
}
|
|
1796
|
-
if (!isNameChar(c)) {
|
|
1797
|
-
this.name += chunk.slice(start, this.prevI);
|
|
1798
|
-
return c === NL_LIKE ? NL : c;
|
|
1799
|
-
}
|
|
1800
|
-
}
|
|
1801
|
-
}
|
|
1802
|
-
/**
|
|
1803
|
-
* Skip white spaces.
|
|
1804
|
-
*
|
|
1805
|
-
* @return The character that ended the skip, or ``EOC`` if we hit
|
|
1806
|
-
* the end of the chunk. The return value cannot be NL_LIKE: NL is returned
|
|
1807
|
-
* instead.
|
|
1808
|
-
*/
|
|
1809
|
-
skipSpaces() {
|
|
1810
|
-
while (true) {
|
|
1811
|
-
const c = this.getCodeNorm();
|
|
1812
|
-
if (c === EOC || !isS(c)) {
|
|
1813
|
-
return c;
|
|
1814
|
-
}
|
|
1815
|
-
}
|
|
1816
|
-
}
|
|
1817
|
-
setXMLVersion(version) {
|
|
1818
|
-
this.currentXMLVersion = version;
|
|
1819
|
-
if (version === "1.0") {
|
|
1820
|
-
this.isChar = isChar10;
|
|
1821
|
-
this.getCode = this.getCode10;
|
|
1822
|
-
} else {
|
|
1823
|
-
this.isChar = isChar11;
|
|
1824
|
-
this.getCode = this.getCode11;
|
|
1825
|
-
}
|
|
1826
|
-
}
|
|
1827
|
-
// STATE ENGINE METHODS
|
|
1828
|
-
// This needs to be a state separate from S_BEGIN_WHITESPACE because we want
|
|
1829
|
-
// to be sure never to come back to this state later.
|
|
1830
|
-
sBegin() {
|
|
1831
|
-
if (this.chunk.charCodeAt(0) === 65279) {
|
|
1832
|
-
this.i++;
|
|
1833
|
-
this.column++;
|
|
1834
|
-
}
|
|
1835
|
-
this.state = S_BEGIN_WHITESPACE;
|
|
1836
|
-
}
|
|
1837
|
-
sBeginWhitespace() {
|
|
1838
|
-
const iBefore = this.i;
|
|
1839
|
-
const c = this.skipSpaces();
|
|
1840
|
-
if (this.prevI !== iBefore) {
|
|
1841
|
-
this.xmlDeclPossible = false;
|
|
1842
|
-
}
|
|
1843
|
-
switch (c) {
|
|
1844
|
-
case LESS:
|
|
1845
|
-
this.state = S_OPEN_WAKA;
|
|
1846
|
-
if (this.text.length !== 0) {
|
|
1847
|
-
throw new Error("no-empty text at start");
|
|
1848
|
-
}
|
|
1849
|
-
break;
|
|
1850
|
-
case EOC:
|
|
1851
|
-
break;
|
|
1852
|
-
default:
|
|
1853
|
-
this.unget();
|
|
1854
|
-
this.state = S_TEXT;
|
|
1855
|
-
this.xmlDeclPossible = false;
|
|
1856
|
-
}
|
|
1857
|
-
}
|
|
1858
|
-
sDoctype() {
|
|
1859
|
-
var _a;
|
|
1860
|
-
const c = this.captureTo(DOCTYPE_TERMINATOR);
|
|
1861
|
-
switch (c) {
|
|
1862
|
-
case GREATER: {
|
|
1863
|
-
(_a = this.doctypeHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text);
|
|
1864
|
-
this.text = "";
|
|
1865
|
-
this.state = S_TEXT;
|
|
1866
|
-
this.doctype = true;
|
|
1867
|
-
break;
|
|
1868
|
-
}
|
|
1869
|
-
case EOC:
|
|
1870
|
-
break;
|
|
1871
|
-
default:
|
|
1872
|
-
this.text += String.fromCodePoint(c);
|
|
1873
|
-
if (c === OPEN_BRACKET) {
|
|
1874
|
-
this.state = S_DTD;
|
|
1875
|
-
} else if (isQuote(c)) {
|
|
1876
|
-
this.state = S_DOCTYPE_QUOTE;
|
|
1877
|
-
this.q = c;
|
|
1878
|
-
}
|
|
1879
|
-
}
|
|
1880
|
-
}
|
|
1881
|
-
sDoctypeQuote() {
|
|
1882
|
-
const q = this.q;
|
|
1883
|
-
if (this.captureToChar(q)) {
|
|
1884
|
-
this.text += String.fromCodePoint(q);
|
|
1885
|
-
this.q = null;
|
|
1886
|
-
this.state = S_DOCTYPE;
|
|
1887
|
-
}
|
|
1888
|
-
}
|
|
1889
|
-
sDTD() {
|
|
1890
|
-
const c = this.captureTo(DTD_TERMINATOR);
|
|
1891
|
-
if (c === EOC) {
|
|
1892
|
-
return;
|
|
1893
|
-
}
|
|
1894
|
-
this.text += String.fromCodePoint(c);
|
|
1895
|
-
if (c === CLOSE_BRACKET) {
|
|
1896
|
-
this.state = S_DOCTYPE;
|
|
1897
|
-
} else if (c === LESS) {
|
|
1898
|
-
this.state = S_DTD_OPEN_WAKA;
|
|
1899
|
-
} else if (isQuote(c)) {
|
|
1900
|
-
this.state = S_DTD_QUOTED;
|
|
1901
|
-
this.q = c;
|
|
1902
|
-
}
|
|
1903
|
-
}
|
|
1904
|
-
sDTDQuoted() {
|
|
1905
|
-
const q = this.q;
|
|
1906
|
-
if (this.captureToChar(q)) {
|
|
1907
|
-
this.text += String.fromCodePoint(q);
|
|
1908
|
-
this.state = S_DTD;
|
|
1909
|
-
this.q = null;
|
|
1910
|
-
}
|
|
1911
|
-
}
|
|
1912
|
-
sDTDOpenWaka() {
|
|
1913
|
-
const c = this.getCodeNorm();
|
|
1914
|
-
this.text += String.fromCodePoint(c);
|
|
1915
|
-
switch (c) {
|
|
1916
|
-
case BANG:
|
|
1917
|
-
this.state = S_DTD_OPEN_WAKA_BANG;
|
|
1918
|
-
this.openWakaBang = "";
|
|
1919
|
-
break;
|
|
1920
|
-
case QUESTION:
|
|
1921
|
-
this.state = S_DTD_PI;
|
|
1922
|
-
break;
|
|
1923
|
-
default:
|
|
1924
|
-
this.state = S_DTD;
|
|
1925
|
-
}
|
|
1926
|
-
}
|
|
1927
|
-
sDTDOpenWakaBang() {
|
|
1928
|
-
const char = String.fromCodePoint(this.getCodeNorm());
|
|
1929
|
-
const owb = this.openWakaBang += char;
|
|
1930
|
-
this.text += char;
|
|
1931
|
-
if (owb !== "-") {
|
|
1932
|
-
this.state = owb === "--" ? S_DTD_COMMENT : S_DTD;
|
|
1933
|
-
this.openWakaBang = "";
|
|
1934
|
-
}
|
|
1935
|
-
}
|
|
1936
|
-
sDTDComment() {
|
|
1937
|
-
if (this.captureToChar(MINUS)) {
|
|
1938
|
-
this.text += "-";
|
|
1939
|
-
this.state = S_DTD_COMMENT_ENDING;
|
|
1940
|
-
}
|
|
1941
|
-
}
|
|
1942
|
-
sDTDCommentEnding() {
|
|
1943
|
-
const c = this.getCodeNorm();
|
|
1944
|
-
this.text += String.fromCodePoint(c);
|
|
1945
|
-
this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT;
|
|
1946
|
-
}
|
|
1947
|
-
sDTDCommentEnded() {
|
|
1948
|
-
const c = this.getCodeNorm();
|
|
1949
|
-
this.text += String.fromCodePoint(c);
|
|
1950
|
-
if (c === GREATER) {
|
|
1951
|
-
this.state = S_DTD;
|
|
1952
|
-
} else {
|
|
1953
|
-
this.fail("malformed comment.");
|
|
1954
|
-
this.state = S_DTD_COMMENT;
|
|
1955
|
-
}
|
|
1956
|
-
}
|
|
1957
|
-
sDTDPI() {
|
|
1958
|
-
if (this.captureToChar(QUESTION)) {
|
|
1959
|
-
this.text += "?";
|
|
1960
|
-
this.state = S_DTD_PI_ENDING;
|
|
1961
|
-
}
|
|
1962
|
-
}
|
|
1963
|
-
sDTDPIEnding() {
|
|
1964
|
-
const c = this.getCodeNorm();
|
|
1965
|
-
this.text += String.fromCodePoint(c);
|
|
1966
|
-
if (c === GREATER) {
|
|
1967
|
-
this.state = S_DTD;
|
|
1968
|
-
}
|
|
1969
|
-
}
|
|
1970
|
-
sText() {
|
|
1971
|
-
if (this.tags.length !== 0) {
|
|
1972
|
-
this.handleTextInRoot();
|
|
1973
|
-
} else {
|
|
1974
|
-
this.handleTextOutsideRoot();
|
|
1975
|
-
}
|
|
1976
|
-
}
|
|
1977
|
-
sEntity() {
|
|
1978
|
-
let { i: start } = this;
|
|
1979
|
-
const { chunk } = this;
|
|
1980
|
-
loop:
|
|
1981
|
-
while (true) {
|
|
1982
|
-
switch (this.getCode()) {
|
|
1983
|
-
case NL_LIKE:
|
|
1984
|
-
this.entity += `${chunk.slice(start, this.prevI)}
|
|
1985
|
-
`;
|
|
1986
|
-
start = this.i;
|
|
1987
|
-
break;
|
|
1988
|
-
case SEMICOLON: {
|
|
1989
|
-
const { entityReturnState } = this;
|
|
1990
|
-
const entity = this.entity + chunk.slice(start, this.prevI);
|
|
1991
|
-
this.state = entityReturnState;
|
|
1992
|
-
let parsed;
|
|
1993
|
-
if (entity === "") {
|
|
1994
|
-
this.fail("empty entity name.");
|
|
1995
|
-
parsed = "&;";
|
|
1996
|
-
} else {
|
|
1997
|
-
parsed = this.parseEntity(entity);
|
|
1998
|
-
this.entity = "";
|
|
1999
|
-
}
|
|
2000
|
-
if (entityReturnState !== S_TEXT || this.textHandler !== void 0) {
|
|
2001
|
-
this.text += parsed;
|
|
2002
|
-
}
|
|
2003
|
-
break loop;
|
|
2004
|
-
}
|
|
2005
|
-
case EOC:
|
|
2006
|
-
this.entity += chunk.slice(start);
|
|
2007
|
-
break loop;
|
|
2008
|
-
default:
|
|
2009
|
-
}
|
|
2010
|
-
}
|
|
2011
|
-
}
|
|
2012
|
-
sOpenWaka() {
|
|
2013
|
-
const c = this.getCode();
|
|
2014
|
-
if (isNameStartChar(c)) {
|
|
2015
|
-
this.state = S_OPEN_TAG;
|
|
2016
|
-
this.unget();
|
|
2017
|
-
this.xmlDeclPossible = false;
|
|
2018
|
-
} else {
|
|
2019
|
-
switch (c) {
|
|
2020
|
-
case FORWARD_SLASH:
|
|
2021
|
-
this.state = S_CLOSE_TAG;
|
|
2022
|
-
this.xmlDeclPossible = false;
|
|
2023
|
-
break;
|
|
2024
|
-
case BANG:
|
|
2025
|
-
this.state = S_OPEN_WAKA_BANG;
|
|
2026
|
-
this.openWakaBang = "";
|
|
2027
|
-
this.xmlDeclPossible = false;
|
|
2028
|
-
break;
|
|
2029
|
-
case QUESTION:
|
|
2030
|
-
this.state = S_PI_FIRST_CHAR;
|
|
2031
|
-
break;
|
|
2032
|
-
default:
|
|
2033
|
-
this.fail("disallowed character in tag name");
|
|
2034
|
-
this.state = S_TEXT;
|
|
2035
|
-
this.xmlDeclPossible = false;
|
|
2036
|
-
}
|
|
2037
|
-
}
|
|
2038
|
-
}
|
|
2039
|
-
sOpenWakaBang() {
|
|
2040
|
-
this.openWakaBang += String.fromCodePoint(this.getCodeNorm());
|
|
2041
|
-
switch (this.openWakaBang) {
|
|
2042
|
-
case "[CDATA[":
|
|
2043
|
-
if (!this.sawRoot && !this.reportedTextBeforeRoot) {
|
|
2044
|
-
this.fail("text data outside of root node.");
|
|
2045
|
-
this.reportedTextBeforeRoot = true;
|
|
2046
|
-
}
|
|
2047
|
-
if (this.closedRoot && !this.reportedTextAfterRoot) {
|
|
2048
|
-
this.fail("text data outside of root node.");
|
|
2049
|
-
this.reportedTextAfterRoot = true;
|
|
2050
|
-
}
|
|
2051
|
-
this.state = S_CDATA;
|
|
2052
|
-
this.openWakaBang = "";
|
|
2053
|
-
break;
|
|
2054
|
-
case "--":
|
|
2055
|
-
this.state = S_COMMENT;
|
|
2056
|
-
this.openWakaBang = "";
|
|
2057
|
-
break;
|
|
2058
|
-
case "DOCTYPE":
|
|
2059
|
-
this.state = S_DOCTYPE;
|
|
2060
|
-
if (this.doctype || this.sawRoot) {
|
|
2061
|
-
this.fail("inappropriately located doctype declaration.");
|
|
2062
|
-
}
|
|
2063
|
-
this.openWakaBang = "";
|
|
2064
|
-
break;
|
|
2065
|
-
default:
|
|
2066
|
-
if (this.openWakaBang.length >= 7) {
|
|
2067
|
-
this.fail("incorrect syntax.");
|
|
2068
|
-
}
|
|
2069
|
-
}
|
|
2070
|
-
}
|
|
2071
|
-
sComment() {
|
|
2072
|
-
if (this.captureToChar(MINUS)) {
|
|
2073
|
-
this.state = S_COMMENT_ENDING;
|
|
2074
|
-
}
|
|
2075
|
-
}
|
|
2076
|
-
sCommentEnding() {
|
|
2077
|
-
var _a;
|
|
2078
|
-
const c = this.getCodeNorm();
|
|
2079
|
-
if (c === MINUS) {
|
|
2080
|
-
this.state = S_COMMENT_ENDED;
|
|
2081
|
-
(_a = this.commentHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text);
|
|
2082
|
-
this.text = "";
|
|
2083
|
-
} else {
|
|
2084
|
-
this.text += `-${String.fromCodePoint(c)}`;
|
|
2085
|
-
this.state = S_COMMENT;
|
|
2086
|
-
}
|
|
2087
|
-
}
|
|
2088
|
-
sCommentEnded() {
|
|
2089
|
-
const c = this.getCodeNorm();
|
|
2090
|
-
if (c !== GREATER) {
|
|
2091
|
-
this.fail("malformed comment.");
|
|
2092
|
-
this.text += `--${String.fromCodePoint(c)}`;
|
|
2093
|
-
this.state = S_COMMENT;
|
|
2094
|
-
} else {
|
|
2095
|
-
this.state = S_TEXT;
|
|
2096
|
-
}
|
|
2097
|
-
}
|
|
2098
|
-
sCData() {
|
|
2099
|
-
if (this.captureToChar(CLOSE_BRACKET)) {
|
|
2100
|
-
this.state = S_CDATA_ENDING;
|
|
2101
|
-
}
|
|
2102
|
-
}
|
|
2103
|
-
sCDataEnding() {
|
|
2104
|
-
const c = this.getCodeNorm();
|
|
2105
|
-
if (c === CLOSE_BRACKET) {
|
|
2106
|
-
this.state = S_CDATA_ENDING_2;
|
|
2107
|
-
} else {
|
|
2108
|
-
this.text += `]${String.fromCodePoint(c)}`;
|
|
2109
|
-
this.state = S_CDATA;
|
|
2110
|
-
}
|
|
2111
|
-
}
|
|
2112
|
-
sCDataEnding2() {
|
|
2113
|
-
var _a;
|
|
2114
|
-
const c = this.getCodeNorm();
|
|
2115
|
-
switch (c) {
|
|
2116
|
-
case GREATER: {
|
|
2117
|
-
(_a = this.cdataHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.text);
|
|
2118
|
-
this.text = "";
|
|
2119
|
-
this.state = S_TEXT;
|
|
2120
|
-
break;
|
|
2121
|
-
}
|
|
2122
|
-
case CLOSE_BRACKET:
|
|
2123
|
-
this.text += "]";
|
|
2124
|
-
break;
|
|
2125
|
-
default:
|
|
2126
|
-
this.text += `]]${String.fromCodePoint(c)}`;
|
|
2127
|
-
this.state = S_CDATA;
|
|
2128
|
-
}
|
|
2129
|
-
}
|
|
2130
|
-
// We need this separate state to check the first character fo the pi target
|
|
2131
|
-
// with this.nameStartCheck which allows less characters than this.nameCheck.
|
|
2132
|
-
sPIFirstChar() {
|
|
2133
|
-
const c = this.getCodeNorm();
|
|
2134
|
-
if (this.nameStartCheck(c)) {
|
|
2135
|
-
this.piTarget += String.fromCodePoint(c);
|
|
2136
|
-
this.state = S_PI_REST;
|
|
2137
|
-
} else if (c === QUESTION || isS(c)) {
|
|
2138
|
-
this.fail("processing instruction without a target.");
|
|
2139
|
-
this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
|
|
2140
|
-
} else {
|
|
2141
|
-
this.fail("disallowed character in processing instruction name.");
|
|
2142
|
-
this.piTarget += String.fromCodePoint(c);
|
|
2143
|
-
this.state = S_PI_REST;
|
|
2144
|
-
}
|
|
2145
|
-
}
|
|
2146
|
-
sPIRest() {
|
|
2147
|
-
const { chunk, i: start } = this;
|
|
2148
|
-
while (true) {
|
|
2149
|
-
const c = this.getCodeNorm();
|
|
2150
|
-
if (c === EOC) {
|
|
2151
|
-
this.piTarget += chunk.slice(start);
|
|
2152
|
-
return;
|
|
2153
|
-
}
|
|
2154
|
-
if (!this.nameCheck(c)) {
|
|
2155
|
-
this.piTarget += chunk.slice(start, this.prevI);
|
|
2156
|
-
const isQuestion = c === QUESTION;
|
|
2157
|
-
if (isQuestion || isS(c)) {
|
|
2158
|
-
if (this.piTarget === "xml") {
|
|
2159
|
-
if (!this.xmlDeclPossible) {
|
|
2160
|
-
this.fail("an XML declaration must be at the start of the document.");
|
|
2161
|
-
}
|
|
2162
|
-
this.state = isQuestion ? S_XML_DECL_ENDING : S_XML_DECL_NAME_START;
|
|
2163
|
-
} else {
|
|
2164
|
-
this.state = isQuestion ? S_PI_ENDING : S_PI_BODY;
|
|
2165
|
-
}
|
|
2166
|
-
} else {
|
|
2167
|
-
this.fail("disallowed character in processing instruction name.");
|
|
2168
|
-
this.piTarget += String.fromCodePoint(c);
|
|
2169
|
-
}
|
|
2170
|
-
break;
|
|
2171
|
-
}
|
|
2172
|
-
}
|
|
2173
|
-
}
|
|
2174
|
-
sPIBody() {
|
|
2175
|
-
if (this.text.length === 0) {
|
|
2176
|
-
const c = this.getCodeNorm();
|
|
2177
|
-
if (c === QUESTION) {
|
|
2178
|
-
this.state = S_PI_ENDING;
|
|
2179
|
-
} else if (!isS(c)) {
|
|
2180
|
-
this.text = String.fromCodePoint(c);
|
|
2181
|
-
}
|
|
2182
|
-
} else if (this.captureToChar(QUESTION)) {
|
|
2183
|
-
this.state = S_PI_ENDING;
|
|
2184
|
-
}
|
|
2185
|
-
}
|
|
2186
|
-
sPIEnding() {
|
|
2187
|
-
var _a;
|
|
2188
|
-
const c = this.getCodeNorm();
|
|
2189
|
-
if (c === GREATER) {
|
|
2190
|
-
const { piTarget } = this;
|
|
2191
|
-
if (piTarget.toLowerCase() === "xml") {
|
|
2192
|
-
this.fail("the XML declaration must appear at the start of the document.");
|
|
2193
|
-
}
|
|
2194
|
-
(_a = this.piHandler) === null || _a === void 0 ? void 0 : _a.call(this, {
|
|
2195
|
-
target: piTarget,
|
|
2196
|
-
body: this.text
|
|
2197
|
-
});
|
|
2198
|
-
this.piTarget = this.text = "";
|
|
2199
|
-
this.state = S_TEXT;
|
|
2200
|
-
} else if (c === QUESTION) {
|
|
2201
|
-
this.text += "?";
|
|
2202
|
-
} else {
|
|
2203
|
-
this.text += `?${String.fromCodePoint(c)}`;
|
|
2204
|
-
this.state = S_PI_BODY;
|
|
2205
|
-
}
|
|
2206
|
-
this.xmlDeclPossible = false;
|
|
2207
|
-
}
|
|
2208
|
-
sXMLDeclNameStart() {
|
|
2209
|
-
const c = this.skipSpaces();
|
|
2210
|
-
if (c === QUESTION) {
|
|
2211
|
-
this.state = S_XML_DECL_ENDING;
|
|
2212
|
-
return;
|
|
2213
|
-
}
|
|
2214
|
-
if (c !== EOC) {
|
|
2215
|
-
this.state = S_XML_DECL_NAME;
|
|
2216
|
-
this.name = String.fromCodePoint(c);
|
|
2217
|
-
}
|
|
2218
|
-
}
|
|
2219
|
-
sXMLDeclName() {
|
|
2220
|
-
const c = this.captureTo(XML_DECL_NAME_TERMINATOR);
|
|
2221
|
-
if (c === QUESTION) {
|
|
2222
|
-
this.state = S_XML_DECL_ENDING;
|
|
2223
|
-
this.name += this.text;
|
|
2224
|
-
this.text = "";
|
|
2225
|
-
this.fail("XML declaration is incomplete.");
|
|
2226
|
-
return;
|
|
2227
|
-
}
|
|
2228
|
-
if (!(isS(c) || c === EQUAL)) {
|
|
2229
|
-
return;
|
|
2230
|
-
}
|
|
2231
|
-
this.name += this.text;
|
|
2232
|
-
this.text = "";
|
|
2233
|
-
if (!this.xmlDeclExpects.includes(this.name)) {
|
|
2234
|
-
switch (this.name.length) {
|
|
2235
|
-
case 0:
|
|
2236
|
-
this.fail("did not expect any more name/value pairs.");
|
|
2237
|
-
break;
|
|
2238
|
-
case 1:
|
|
2239
|
-
this.fail(`expected the name ${this.xmlDeclExpects[0]}.`);
|
|
2240
|
-
break;
|
|
2241
|
-
default:
|
|
2242
|
-
this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`);
|
|
2243
|
-
}
|
|
2244
|
-
}
|
|
2245
|
-
this.state = c === EQUAL ? S_XML_DECL_VALUE_START : S_XML_DECL_EQ;
|
|
2246
|
-
}
|
|
2247
|
-
sXMLDeclEq() {
|
|
2248
|
-
const c = this.getCodeNorm();
|
|
2249
|
-
if (c === QUESTION) {
|
|
2250
|
-
this.state = S_XML_DECL_ENDING;
|
|
2251
|
-
this.fail("XML declaration is incomplete.");
|
|
2252
|
-
return;
|
|
2253
|
-
}
|
|
2254
|
-
if (isS(c)) {
|
|
2255
|
-
return;
|
|
2256
|
-
}
|
|
2257
|
-
if (c !== EQUAL) {
|
|
2258
|
-
this.fail("value required.");
|
|
2259
|
-
}
|
|
2260
|
-
this.state = S_XML_DECL_VALUE_START;
|
|
2261
|
-
}
|
|
2262
|
-
sXMLDeclValueStart() {
|
|
2263
|
-
const c = this.getCodeNorm();
|
|
2264
|
-
if (c === QUESTION) {
|
|
2265
|
-
this.state = S_XML_DECL_ENDING;
|
|
2266
|
-
this.fail("XML declaration is incomplete.");
|
|
2267
|
-
return;
|
|
2268
|
-
}
|
|
2269
|
-
if (isS(c)) {
|
|
2270
|
-
return;
|
|
2271
|
-
}
|
|
2272
|
-
if (!isQuote(c)) {
|
|
2273
|
-
this.fail("value must be quoted.");
|
|
2274
|
-
this.q = SPACE;
|
|
2275
|
-
} else {
|
|
2276
|
-
this.q = c;
|
|
2277
|
-
}
|
|
2278
|
-
this.state = S_XML_DECL_VALUE;
|
|
2279
|
-
}
|
|
2280
|
-
sXMLDeclValue() {
|
|
2281
|
-
const c = this.captureTo([this.q, QUESTION]);
|
|
2282
|
-
if (c === QUESTION) {
|
|
2283
|
-
this.state = S_XML_DECL_ENDING;
|
|
2284
|
-
this.text = "";
|
|
2285
|
-
this.fail("XML declaration is incomplete.");
|
|
2286
|
-
return;
|
|
2287
|
-
}
|
|
2288
|
-
if (c === EOC) {
|
|
2289
|
-
return;
|
|
2290
|
-
}
|
|
2291
|
-
const value = this.text;
|
|
2292
|
-
this.text = "";
|
|
2293
|
-
switch (this.name) {
|
|
2294
|
-
case "version": {
|
|
2295
|
-
this.xmlDeclExpects = ["encoding", "standalone"];
|
|
2296
|
-
const version = value;
|
|
2297
|
-
this.xmlDecl.version = version;
|
|
2298
|
-
if (!/^1\.[0-9]+$/.test(version)) {
|
|
2299
|
-
this.fail("version number must match /^1\\.[0-9]+$/.");
|
|
2300
|
-
} else if (!this.opt.forceXMLVersion) {
|
|
2301
|
-
this.setXMLVersion(version);
|
|
2302
|
-
}
|
|
2303
|
-
break;
|
|
2304
|
-
}
|
|
2305
|
-
case "encoding":
|
|
2306
|
-
if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(value)) {
|
|
2307
|
-
this.fail("encoding value must match /^[A-Za-z0-9][A-Za-z0-9._-]*$/.");
|
|
2308
|
-
}
|
|
2309
|
-
this.xmlDeclExpects = ["standalone"];
|
|
2310
|
-
this.xmlDecl.encoding = value;
|
|
2311
|
-
break;
|
|
2312
|
-
case "standalone":
|
|
2313
|
-
if (value !== "yes" && value !== "no") {
|
|
2314
|
-
this.fail('standalone value must match "yes" or "no".');
|
|
2315
|
-
}
|
|
2316
|
-
this.xmlDeclExpects = [];
|
|
2317
|
-
this.xmlDecl.standalone = value;
|
|
2318
|
-
break;
|
|
2319
|
-
default:
|
|
2320
|
-
}
|
|
2321
|
-
this.name = "";
|
|
2322
|
-
this.state = S_XML_DECL_SEPARATOR;
|
|
2323
|
-
}
|
|
2324
|
-
sXMLDeclSeparator() {
|
|
2325
|
-
const c = this.getCodeNorm();
|
|
2326
|
-
if (c === QUESTION) {
|
|
2327
|
-
this.state = S_XML_DECL_ENDING;
|
|
2328
|
-
return;
|
|
2329
|
-
}
|
|
2330
|
-
if (!isS(c)) {
|
|
2331
|
-
this.fail("whitespace required.");
|
|
2332
|
-
this.unget();
|
|
2333
|
-
}
|
|
2334
|
-
this.state = S_XML_DECL_NAME_START;
|
|
2335
|
-
}
|
|
2336
|
-
sXMLDeclEnding() {
|
|
2337
|
-
var _a;
|
|
2338
|
-
const c = this.getCodeNorm();
|
|
2339
|
-
if (c === GREATER) {
|
|
2340
|
-
if (this.piTarget !== "xml") {
|
|
2341
|
-
this.fail("processing instructions are not allowed before root.");
|
|
2342
|
-
} else if (this.name !== "version" && this.xmlDeclExpects.includes("version")) {
|
|
2343
|
-
this.fail("XML declaration must contain a version.");
|
|
2344
|
-
}
|
|
2345
|
-
(_a = this.xmldeclHandler) === null || _a === void 0 ? void 0 : _a.call(this, this.xmlDecl);
|
|
2346
|
-
this.name = "";
|
|
2347
|
-
this.piTarget = this.text = "";
|
|
2348
|
-
this.state = S_TEXT;
|
|
2349
|
-
} else {
|
|
2350
|
-
this.fail("The character ? is disallowed anywhere in XML declarations.");
|
|
2351
|
-
}
|
|
2352
|
-
this.xmlDeclPossible = false;
|
|
2353
|
-
}
|
|
2354
|
-
sOpenTag() {
|
|
2355
|
-
var _a;
|
|
2356
|
-
const c = this.captureNameChars();
|
|
2357
|
-
if (c === EOC) {
|
|
2358
|
-
return;
|
|
2359
|
-
}
|
|
2360
|
-
const tag = this.tag = {
|
|
2361
|
-
name: this.name,
|
|
2362
|
-
attributes: /* @__PURE__ */ Object.create(null)
|
|
2363
|
-
};
|
|
2364
|
-
this.name = "";
|
|
2365
|
-
if (this.xmlnsOpt) {
|
|
2366
|
-
this.topNS = tag.ns = /* @__PURE__ */ Object.create(null);
|
|
2367
|
-
}
|
|
2368
|
-
(_a = this.openTagStartHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag);
|
|
2369
|
-
this.sawRoot = true;
|
|
2370
|
-
if (!this.fragmentOpt && this.closedRoot) {
|
|
2371
|
-
this.fail("documents may contain only one root.");
|
|
2372
|
-
}
|
|
2373
|
-
switch (c) {
|
|
2374
|
-
case GREATER:
|
|
2375
|
-
this.openTag();
|
|
2376
|
-
break;
|
|
2377
|
-
case FORWARD_SLASH:
|
|
2378
|
-
this.state = S_OPEN_TAG_SLASH;
|
|
2379
|
-
break;
|
|
2380
|
-
default:
|
|
2381
|
-
if (!isS(c)) {
|
|
2382
|
-
this.fail("disallowed character in tag name.");
|
|
2383
|
-
}
|
|
2384
|
-
this.state = S_ATTRIB;
|
|
2385
|
-
}
|
|
2386
|
-
}
|
|
2387
|
-
sOpenTagSlash() {
|
|
2388
|
-
if (this.getCode() === GREATER) {
|
|
2389
|
-
this.openSelfClosingTag();
|
|
2390
|
-
} else {
|
|
2391
|
-
this.fail("forward-slash in opening tag not followed by >.");
|
|
2392
|
-
this.state = S_ATTRIB;
|
|
2393
|
-
}
|
|
2394
|
-
}
|
|
2395
|
-
sAttrib() {
|
|
2396
|
-
const c = this.skipSpaces();
|
|
2397
|
-
if (c === EOC) {
|
|
2398
|
-
return;
|
|
2399
|
-
}
|
|
2400
|
-
if (isNameStartChar(c)) {
|
|
2401
|
-
this.unget();
|
|
2402
|
-
this.state = S_ATTRIB_NAME;
|
|
2403
|
-
} else if (c === GREATER) {
|
|
2404
|
-
this.openTag();
|
|
2405
|
-
} else if (c === FORWARD_SLASH) {
|
|
2406
|
-
this.state = S_OPEN_TAG_SLASH;
|
|
2407
|
-
} else {
|
|
2408
|
-
this.fail("disallowed character in attribute name.");
|
|
2409
|
-
}
|
|
2410
|
-
}
|
|
2411
|
-
sAttribName() {
|
|
2412
|
-
const c = this.captureNameChars();
|
|
2413
|
-
if (c === EQUAL) {
|
|
2414
|
-
this.state = S_ATTRIB_VALUE;
|
|
2415
|
-
} else if (isS(c)) {
|
|
2416
|
-
this.state = S_ATTRIB_NAME_SAW_WHITE;
|
|
2417
|
-
} else if (c === GREATER) {
|
|
2418
|
-
this.fail("attribute without value.");
|
|
2419
|
-
this.pushAttrib(this.name, this.name);
|
|
2420
|
-
this.name = this.text = "";
|
|
2421
|
-
this.openTag();
|
|
2422
|
-
} else if (c !== EOC) {
|
|
2423
|
-
this.fail("disallowed character in attribute name.");
|
|
2424
|
-
}
|
|
2425
|
-
}
|
|
2426
|
-
sAttribNameSawWhite() {
|
|
2427
|
-
const c = this.skipSpaces();
|
|
2428
|
-
switch (c) {
|
|
2429
|
-
case EOC:
|
|
2430
|
-
return;
|
|
2431
|
-
case EQUAL:
|
|
2432
|
-
this.state = S_ATTRIB_VALUE;
|
|
2433
|
-
break;
|
|
2434
|
-
default:
|
|
2435
|
-
this.fail("attribute without value.");
|
|
2436
|
-
this.text = "";
|
|
2437
|
-
this.name = "";
|
|
2438
|
-
if (c === GREATER) {
|
|
2439
|
-
this.openTag();
|
|
2440
|
-
} else if (isNameStartChar(c)) {
|
|
2441
|
-
this.unget();
|
|
2442
|
-
this.state = S_ATTRIB_NAME;
|
|
2443
|
-
} else {
|
|
2444
|
-
this.fail("disallowed character in attribute name.");
|
|
2445
|
-
this.state = S_ATTRIB;
|
|
2446
|
-
}
|
|
2447
|
-
}
|
|
2448
|
-
}
|
|
2449
|
-
sAttribValue() {
|
|
2450
|
-
const c = this.getCodeNorm();
|
|
2451
|
-
if (isQuote(c)) {
|
|
2452
|
-
this.q = c;
|
|
2453
|
-
this.state = S_ATTRIB_VALUE_QUOTED;
|
|
2454
|
-
} else if (!isS(c)) {
|
|
2455
|
-
this.fail("unquoted attribute value.");
|
|
2456
|
-
this.state = S_ATTRIB_VALUE_UNQUOTED;
|
|
2457
|
-
this.unget();
|
|
2458
|
-
}
|
|
2459
|
-
}
|
|
2460
|
-
sAttribValueQuoted() {
|
|
2461
|
-
const { q, chunk } = this;
|
|
2462
|
-
let { i: start } = this;
|
|
2463
|
-
while (true) {
|
|
2464
|
-
switch (this.getCode()) {
|
|
2465
|
-
case q:
|
|
2466
|
-
this.pushAttrib(this.name, this.text + chunk.slice(start, this.prevI));
|
|
2467
|
-
this.name = this.text = "";
|
|
2468
|
-
this.q = null;
|
|
2469
|
-
this.state = S_ATTRIB_VALUE_CLOSED;
|
|
2470
|
-
return;
|
|
2471
|
-
case AMP:
|
|
2472
|
-
this.text += chunk.slice(start, this.prevI);
|
|
2473
|
-
this.state = S_ENTITY;
|
|
2474
|
-
this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
|
|
2475
|
-
return;
|
|
2476
|
-
case NL:
|
|
2477
|
-
case NL_LIKE:
|
|
2478
|
-
case TAB:
|
|
2479
|
-
this.text += `${chunk.slice(start, this.prevI)} `;
|
|
2480
|
-
start = this.i;
|
|
2481
|
-
break;
|
|
2482
|
-
case LESS:
|
|
2483
|
-
this.text += chunk.slice(start, this.prevI);
|
|
2484
|
-
this.fail("disallowed character.");
|
|
2485
|
-
return;
|
|
2486
|
-
case EOC:
|
|
2487
|
-
this.text += chunk.slice(start);
|
|
2488
|
-
return;
|
|
2489
|
-
default:
|
|
2490
|
-
}
|
|
2491
|
-
}
|
|
2492
|
-
}
|
|
2493
|
-
sAttribValueClosed() {
|
|
2494
|
-
const c = this.getCodeNorm();
|
|
2495
|
-
if (isS(c)) {
|
|
2496
|
-
this.state = S_ATTRIB;
|
|
2497
|
-
} else if (c === GREATER) {
|
|
2498
|
-
this.openTag();
|
|
2499
|
-
} else if (c === FORWARD_SLASH) {
|
|
2500
|
-
this.state = S_OPEN_TAG_SLASH;
|
|
2501
|
-
} else if (isNameStartChar(c)) {
|
|
2502
|
-
this.fail("no whitespace between attributes.");
|
|
2503
|
-
this.unget();
|
|
2504
|
-
this.state = S_ATTRIB_NAME;
|
|
2505
|
-
} else {
|
|
2506
|
-
this.fail("disallowed character in attribute name.");
|
|
2507
|
-
}
|
|
2508
|
-
}
|
|
2509
|
-
sAttribValueUnquoted() {
|
|
2510
|
-
const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR);
|
|
2511
|
-
switch (c) {
|
|
2512
|
-
case AMP:
|
|
2513
|
-
this.state = S_ENTITY;
|
|
2514
|
-
this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED;
|
|
2515
|
-
break;
|
|
2516
|
-
case LESS:
|
|
2517
|
-
this.fail("disallowed character.");
|
|
2518
|
-
break;
|
|
2519
|
-
case EOC:
|
|
2520
|
-
break;
|
|
2521
|
-
default:
|
|
2522
|
-
if (this.text.includes("]]>")) {
|
|
2523
|
-
this.fail('the string "]]>" is disallowed in char data.');
|
|
2524
|
-
}
|
|
2525
|
-
this.pushAttrib(this.name, this.text);
|
|
2526
|
-
this.name = this.text = "";
|
|
2527
|
-
if (c === GREATER) {
|
|
2528
|
-
this.openTag();
|
|
2529
|
-
} else {
|
|
2530
|
-
this.state = S_ATTRIB;
|
|
2531
|
-
}
|
|
2532
|
-
}
|
|
2533
|
-
}
|
|
2534
|
-
sCloseTag() {
|
|
2535
|
-
const c = this.captureNameChars();
|
|
2536
|
-
if (c === GREATER) {
|
|
2537
|
-
this.closeTag();
|
|
2538
|
-
} else if (isS(c)) {
|
|
2539
|
-
this.state = S_CLOSE_TAG_SAW_WHITE;
|
|
2540
|
-
} else if (c !== EOC) {
|
|
2541
|
-
this.fail("disallowed character in closing tag.");
|
|
2542
|
-
}
|
|
2543
|
-
}
|
|
2544
|
-
sCloseTagSawWhite() {
|
|
2545
|
-
switch (this.skipSpaces()) {
|
|
2546
|
-
case GREATER:
|
|
2547
|
-
this.closeTag();
|
|
2548
|
-
break;
|
|
2549
|
-
case EOC:
|
|
2550
|
-
break;
|
|
2551
|
-
default:
|
|
2552
|
-
this.fail("disallowed character in closing tag.");
|
|
2553
|
-
}
|
|
2554
|
-
}
|
|
2555
|
-
// END OF STATE ENGINE METHODS
|
|
2556
|
-
handleTextInRoot() {
|
|
2557
|
-
let { i: start, forbiddenState } = this;
|
|
2558
|
-
const { chunk, textHandler: handler } = this;
|
|
2559
|
-
scanLoop:
|
|
2560
|
-
while (true) {
|
|
2561
|
-
switch (this.getCode()) {
|
|
2562
|
-
case LESS: {
|
|
2563
|
-
this.state = S_OPEN_WAKA;
|
|
2564
|
-
if (handler !== void 0) {
|
|
2565
|
-
const { text } = this;
|
|
2566
|
-
const slice = chunk.slice(start, this.prevI);
|
|
2567
|
-
if (text.length !== 0) {
|
|
2568
|
-
handler(text + slice);
|
|
2569
|
-
this.text = "";
|
|
2570
|
-
} else if (slice.length !== 0) {
|
|
2571
|
-
handler(slice);
|
|
2572
|
-
}
|
|
2573
|
-
}
|
|
2574
|
-
forbiddenState = FORBIDDEN_START;
|
|
2575
|
-
break scanLoop;
|
|
2576
|
-
}
|
|
2577
|
-
case AMP:
|
|
2578
|
-
this.state = S_ENTITY;
|
|
2579
|
-
this.entityReturnState = S_TEXT;
|
|
2580
|
-
if (handler !== void 0) {
|
|
2581
|
-
this.text += chunk.slice(start, this.prevI);
|
|
2582
|
-
}
|
|
2583
|
-
forbiddenState = FORBIDDEN_START;
|
|
2584
|
-
break scanLoop;
|
|
2585
|
-
case CLOSE_BRACKET:
|
|
2586
|
-
switch (forbiddenState) {
|
|
2587
|
-
case FORBIDDEN_START:
|
|
2588
|
-
forbiddenState = FORBIDDEN_BRACKET;
|
|
2589
|
-
break;
|
|
2590
|
-
case FORBIDDEN_BRACKET:
|
|
2591
|
-
forbiddenState = FORBIDDEN_BRACKET_BRACKET;
|
|
2592
|
-
break;
|
|
2593
|
-
case FORBIDDEN_BRACKET_BRACKET:
|
|
2594
|
-
break;
|
|
2595
|
-
default:
|
|
2596
|
-
throw new Error("impossible state");
|
|
2597
|
-
}
|
|
2598
|
-
break;
|
|
2599
|
-
case GREATER:
|
|
2600
|
-
if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) {
|
|
2601
|
-
this.fail('the string "]]>" is disallowed in char data.');
|
|
2602
|
-
}
|
|
2603
|
-
forbiddenState = FORBIDDEN_START;
|
|
2604
|
-
break;
|
|
2605
|
-
case NL_LIKE:
|
|
2606
|
-
if (handler !== void 0) {
|
|
2607
|
-
this.text += `${chunk.slice(start, this.prevI)}
|
|
2608
|
-
`;
|
|
2609
|
-
}
|
|
2610
|
-
start = this.i;
|
|
2611
|
-
forbiddenState = FORBIDDEN_START;
|
|
2612
|
-
break;
|
|
2613
|
-
case EOC:
|
|
2614
|
-
if (handler !== void 0) {
|
|
2615
|
-
this.text += chunk.slice(start);
|
|
2616
|
-
}
|
|
2617
|
-
break scanLoop;
|
|
2618
|
-
default:
|
|
2619
|
-
forbiddenState = FORBIDDEN_START;
|
|
2620
|
-
}
|
|
2621
|
-
}
|
|
2622
|
-
this.forbiddenState = forbiddenState;
|
|
2623
|
-
}
|
|
2624
|
-
handleTextOutsideRoot() {
|
|
2625
|
-
let { i: start } = this;
|
|
2626
|
-
const { chunk, textHandler: handler } = this;
|
|
2627
|
-
let nonSpace = false;
|
|
2628
|
-
outRootLoop:
|
|
2629
|
-
while (true) {
|
|
2630
|
-
const code = this.getCode();
|
|
2631
|
-
switch (code) {
|
|
2632
|
-
case LESS: {
|
|
2633
|
-
this.state = S_OPEN_WAKA;
|
|
2634
|
-
if (handler !== void 0) {
|
|
2635
|
-
const { text } = this;
|
|
2636
|
-
const slice = chunk.slice(start, this.prevI);
|
|
2637
|
-
if (text.length !== 0) {
|
|
2638
|
-
handler(text + slice);
|
|
2639
|
-
this.text = "";
|
|
2640
|
-
} else if (slice.length !== 0) {
|
|
2641
|
-
handler(slice);
|
|
2642
|
-
}
|
|
2643
|
-
}
|
|
2644
|
-
break outRootLoop;
|
|
2645
|
-
}
|
|
2646
|
-
case AMP:
|
|
2647
|
-
this.state = S_ENTITY;
|
|
2648
|
-
this.entityReturnState = S_TEXT;
|
|
2649
|
-
if (handler !== void 0) {
|
|
2650
|
-
this.text += chunk.slice(start, this.prevI);
|
|
2651
|
-
}
|
|
2652
|
-
nonSpace = true;
|
|
2653
|
-
break outRootLoop;
|
|
2654
|
-
case NL_LIKE:
|
|
2655
|
-
if (handler !== void 0) {
|
|
2656
|
-
this.text += `${chunk.slice(start, this.prevI)}
|
|
2657
|
-
`;
|
|
2658
|
-
}
|
|
2659
|
-
start = this.i;
|
|
2660
|
-
break;
|
|
2661
|
-
case EOC:
|
|
2662
|
-
if (handler !== void 0) {
|
|
2663
|
-
this.text += chunk.slice(start);
|
|
2664
|
-
}
|
|
2665
|
-
break outRootLoop;
|
|
2666
|
-
default:
|
|
2667
|
-
if (!isS(code)) {
|
|
2668
|
-
nonSpace = true;
|
|
2669
|
-
}
|
|
2670
|
-
}
|
|
2671
|
-
}
|
|
2672
|
-
if (!nonSpace) {
|
|
2673
|
-
return;
|
|
2674
|
-
}
|
|
2675
|
-
if (!this.sawRoot && !this.reportedTextBeforeRoot) {
|
|
2676
|
-
this.fail("text data outside of root node.");
|
|
2677
|
-
this.reportedTextBeforeRoot = true;
|
|
2678
|
-
}
|
|
2679
|
-
if (this.closedRoot && !this.reportedTextAfterRoot) {
|
|
2680
|
-
this.fail("text data outside of root node.");
|
|
2681
|
-
this.reportedTextAfterRoot = true;
|
|
2682
|
-
}
|
|
2683
|
-
}
|
|
2684
|
-
pushAttribNS(name, value) {
|
|
2685
|
-
var _a;
|
|
2686
|
-
const { prefix, local } = this.qname(name);
|
|
2687
|
-
const attr = { name, prefix, local, value };
|
|
2688
|
-
this.attribList.push(attr);
|
|
2689
|
-
(_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr);
|
|
2690
|
-
if (prefix === "xmlns") {
|
|
2691
|
-
const trimmed = value.trim();
|
|
2692
|
-
if (this.currentXMLVersion === "1.0" && trimmed === "") {
|
|
2693
|
-
this.fail("invalid attempt to undefine prefix in XML 1.0");
|
|
2694
|
-
}
|
|
2695
|
-
this.topNS[local] = trimmed;
|
|
2696
|
-
nsPairCheck(this, local, trimmed);
|
|
2697
|
-
} else if (name === "xmlns") {
|
|
2698
|
-
const trimmed = value.trim();
|
|
2699
|
-
this.topNS[""] = trimmed;
|
|
2700
|
-
nsPairCheck(this, "", trimmed);
|
|
2701
|
-
}
|
|
2702
|
-
}
|
|
2703
|
-
pushAttribPlain(name, value) {
|
|
2704
|
-
var _a;
|
|
2705
|
-
const attr = { name, value };
|
|
2706
|
-
this.attribList.push(attr);
|
|
2707
|
-
(_a = this.attributeHandler) === null || _a === void 0 ? void 0 : _a.call(this, attr);
|
|
2708
|
-
}
|
|
2709
|
-
/**
|
|
2710
|
-
* End parsing. This performs final well-formedness checks and resets the
|
|
2711
|
-
* parser to a clean state.
|
|
2712
|
-
*
|
|
2713
|
-
* @returns this
|
|
2714
|
-
*/
|
|
2715
|
-
end() {
|
|
2716
|
-
var _a, _b;
|
|
2717
|
-
if (!this.sawRoot) {
|
|
2718
|
-
this.fail("document must contain a root element.");
|
|
2719
|
-
}
|
|
2720
|
-
const { tags } = this;
|
|
2721
|
-
while (tags.length > 0) {
|
|
2722
|
-
const tag = tags.pop();
|
|
2723
|
-
this.fail(`unclosed tag: ${tag.name}`);
|
|
2724
|
-
}
|
|
2725
|
-
if (this.state !== S_BEGIN && this.state !== S_TEXT) {
|
|
2726
|
-
this.fail("unexpected end.");
|
|
2727
|
-
}
|
|
2728
|
-
const { text } = this;
|
|
2729
|
-
if (text.length !== 0) {
|
|
2730
|
-
(_a = this.textHandler) === null || _a === void 0 ? void 0 : _a.call(this, text);
|
|
2731
|
-
this.text = "";
|
|
2732
|
-
}
|
|
2733
|
-
this._closed = true;
|
|
2734
|
-
(_b = this.endHandler) === null || _b === void 0 ? void 0 : _b.call(this);
|
|
2735
|
-
this._init();
|
|
2736
|
-
return this;
|
|
2737
|
-
}
|
|
2738
|
-
/**
|
|
2739
|
-
* Resolve a namespace prefix.
|
|
2740
|
-
*
|
|
2741
|
-
* @param prefix The prefix to resolve.
|
|
2742
|
-
*
|
|
2743
|
-
* @returns The namespace URI or ``undefined`` if the prefix is not defined.
|
|
2744
|
-
*/
|
|
2745
|
-
resolve(prefix) {
|
|
2746
|
-
var _a, _b;
|
|
2747
|
-
let uri = this.topNS[prefix];
|
|
2748
|
-
if (uri !== void 0) {
|
|
2749
|
-
return uri;
|
|
2750
|
-
}
|
|
2751
|
-
const { tags } = this;
|
|
2752
|
-
for (let index = tags.length - 1; index >= 0; index--) {
|
|
2753
|
-
uri = tags[index].ns[prefix];
|
|
2754
|
-
if (uri !== void 0) {
|
|
2755
|
-
return uri;
|
|
2756
|
-
}
|
|
2757
|
-
}
|
|
2758
|
-
uri = this.ns[prefix];
|
|
2759
|
-
if (uri !== void 0) {
|
|
2760
|
-
return uri;
|
|
2761
|
-
}
|
|
2762
|
-
return (_b = (_a = this.opt).resolvePrefix) === null || _b === void 0 ? void 0 : _b.call(_a, prefix);
|
|
2763
|
-
}
|
|
2764
|
-
/**
|
|
2765
|
-
* Parse a qname into its prefix and local name parts.
|
|
2766
|
-
*
|
|
2767
|
-
* @param name The name to parse
|
|
2768
|
-
*
|
|
2769
|
-
* @returns
|
|
2770
|
-
*/
|
|
2771
|
-
qname(name) {
|
|
2772
|
-
const colon = name.indexOf(":");
|
|
2773
|
-
if (colon === -1) {
|
|
2774
|
-
return { prefix: "", local: name };
|
|
2775
|
-
}
|
|
2776
|
-
const local = name.slice(colon + 1);
|
|
2777
|
-
const prefix = name.slice(0, colon);
|
|
2778
|
-
if (prefix === "" || local === "" || local.includes(":")) {
|
|
2779
|
-
this.fail(`malformed name: ${name}.`);
|
|
2780
|
-
}
|
|
2781
|
-
return { prefix, local };
|
|
2782
|
-
}
|
|
2783
|
-
processAttribsNS() {
|
|
2784
|
-
var _a;
|
|
2785
|
-
const { attribList } = this;
|
|
2786
|
-
const tag = this.tag;
|
|
2787
|
-
{
|
|
2788
|
-
const { prefix, local } = this.qname(tag.name);
|
|
2789
|
-
tag.prefix = prefix;
|
|
2790
|
-
tag.local = local;
|
|
2791
|
-
const uri = tag.uri = (_a = this.resolve(prefix)) !== null && _a !== void 0 ? _a : "";
|
|
2792
|
-
if (prefix !== "") {
|
|
2793
|
-
if (prefix === "xmlns") {
|
|
2794
|
-
this.fail('tags may not have "xmlns" as prefix.');
|
|
2795
|
-
}
|
|
2796
|
-
if (uri === "") {
|
|
2797
|
-
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
|
|
2798
|
-
tag.uri = prefix;
|
|
2799
|
-
}
|
|
2800
|
-
}
|
|
2801
|
-
}
|
|
2802
|
-
if (attribList.length === 0) {
|
|
2803
|
-
return;
|
|
2804
|
-
}
|
|
2805
|
-
const { attributes } = tag;
|
|
2806
|
-
const seen = /* @__PURE__ */ new Set();
|
|
2807
|
-
for (const attr of attribList) {
|
|
2808
|
-
const { name, prefix, local } = attr;
|
|
2809
|
-
let uri;
|
|
2810
|
-
let eqname;
|
|
2811
|
-
if (prefix === "") {
|
|
2812
|
-
uri = name === "xmlns" ? XMLNS_NAMESPACE : "";
|
|
2813
|
-
eqname = name;
|
|
2814
|
-
} else {
|
|
2815
|
-
uri = this.resolve(prefix);
|
|
2816
|
-
if (uri === void 0) {
|
|
2817
|
-
this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
|
|
2818
|
-
uri = prefix;
|
|
2819
|
-
}
|
|
2820
|
-
eqname = `{${uri}}${local}`;
|
|
2821
|
-
}
|
|
2822
|
-
if (seen.has(eqname)) {
|
|
2823
|
-
this.fail(`duplicate attribute: ${eqname}.`);
|
|
2824
|
-
}
|
|
2825
|
-
seen.add(eqname);
|
|
2826
|
-
attr.uri = uri;
|
|
2827
|
-
attributes[name] = attr;
|
|
2828
|
-
}
|
|
2829
|
-
this.attribList = [];
|
|
2830
|
-
}
|
|
2831
|
-
processAttribsPlain() {
|
|
2832
|
-
const { attribList } = this;
|
|
2833
|
-
const attributes = this.tag.attributes;
|
|
2834
|
-
for (const { name, value } of attribList) {
|
|
2835
|
-
if (attributes[name] !== void 0) {
|
|
2836
|
-
this.fail(`duplicate attribute: ${name}.`);
|
|
2837
|
-
}
|
|
2838
|
-
attributes[name] = value;
|
|
2839
|
-
}
|
|
2840
|
-
this.attribList = [];
|
|
2841
|
-
}
|
|
2842
|
-
/**
|
|
2843
|
-
* Handle a complete open tag. This parser code calls this once it has seen
|
|
2844
|
-
* the whole tag. This method checks for well-formeness and then emits
|
|
2845
|
-
* ``onopentag``.
|
|
2846
|
-
*/
|
|
2847
|
-
openTag() {
|
|
2848
|
-
var _a;
|
|
2849
|
-
this.processAttribs();
|
|
2850
|
-
const { tags } = this;
|
|
2851
|
-
const tag = this.tag;
|
|
2852
|
-
tag.isSelfClosing = false;
|
|
2853
|
-
(_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag);
|
|
2854
|
-
tags.push(tag);
|
|
2855
|
-
this.state = S_TEXT;
|
|
2856
|
-
this.name = "";
|
|
2857
|
-
}
|
|
2858
|
-
/**
|
|
2859
|
-
* Handle a complete self-closing tag. This parser code calls this once it has
|
|
2860
|
-
* seen the whole tag. This method checks for well-formeness and then emits
|
|
2861
|
-
* ``onopentag`` and ``onclosetag``.
|
|
2862
|
-
*/
|
|
2863
|
-
openSelfClosingTag() {
|
|
2864
|
-
var _a, _b, _c;
|
|
2865
|
-
this.processAttribs();
|
|
2866
|
-
const { tags } = this;
|
|
2867
|
-
const tag = this.tag;
|
|
2868
|
-
tag.isSelfClosing = true;
|
|
2869
|
-
(_a = this.openTagHandler) === null || _a === void 0 ? void 0 : _a.call(this, tag);
|
|
2870
|
-
(_b = this.closeTagHandler) === null || _b === void 0 ? void 0 : _b.call(this, tag);
|
|
2871
|
-
const top = this.tag = (_c = tags[tags.length - 1]) !== null && _c !== void 0 ? _c : null;
|
|
2872
|
-
if (top === null) {
|
|
2873
|
-
this.closedRoot = true;
|
|
2874
|
-
}
|
|
2875
|
-
this.state = S_TEXT;
|
|
2876
|
-
this.name = "";
|
|
2877
|
-
}
|
|
2878
|
-
/**
|
|
2879
|
-
* Handle a complete close tag. This parser code calls this once it has seen
|
|
2880
|
-
* the whole tag. This method checks for well-formeness and then emits
|
|
2881
|
-
* ``onclosetag``.
|
|
2882
|
-
*/
|
|
2883
|
-
closeTag() {
|
|
2884
|
-
const { tags, name } = this;
|
|
2885
|
-
this.state = S_TEXT;
|
|
2886
|
-
this.name = "";
|
|
2887
|
-
if (name === "") {
|
|
2888
|
-
this.fail("weird empty close tag.");
|
|
2889
|
-
this.text += "</>";
|
|
2890
|
-
return;
|
|
2891
|
-
}
|
|
2892
|
-
const handler = this.closeTagHandler;
|
|
2893
|
-
let l = tags.length;
|
|
2894
|
-
while (l-- > 0) {
|
|
2895
|
-
const tag = this.tag = tags.pop();
|
|
2896
|
-
this.topNS = tag.ns;
|
|
2897
|
-
handler === null || handler === void 0 ? void 0 : handler(tag);
|
|
2898
|
-
if (tag.name === name) {
|
|
2899
|
-
break;
|
|
2900
|
-
}
|
|
2901
|
-
this.fail("unexpected close tag.");
|
|
2902
|
-
}
|
|
2903
|
-
if (l === 0) {
|
|
2904
|
-
this.closedRoot = true;
|
|
2905
|
-
} else if (l < 0) {
|
|
2906
|
-
this.fail(`unmatched closing tag: ${name}.`);
|
|
2907
|
-
this.text += `</${name}>`;
|
|
2908
|
-
}
|
|
2909
|
-
}
|
|
2910
|
-
/**
|
|
2911
|
-
* Resolves an entity. Makes any necessary well-formedness checks.
|
|
2912
|
-
*
|
|
2913
|
-
* @param entity The entity to resolve.
|
|
2914
|
-
*
|
|
2915
|
-
* @returns The parsed entity.
|
|
2916
|
-
*/
|
|
2917
|
-
parseEntity(entity) {
|
|
2918
|
-
if (entity[0] !== "#") {
|
|
2919
|
-
const defined = this.ENTITIES[entity];
|
|
2920
|
-
if (defined !== void 0) {
|
|
2921
|
-
return defined;
|
|
2922
|
-
}
|
|
2923
|
-
this.fail(this.isName(entity) ? "undefined entity." : "disallowed character in entity name.");
|
|
2924
|
-
return `&${entity};`;
|
|
2925
|
-
}
|
|
2926
|
-
let num = NaN;
|
|
2927
|
-
if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) {
|
|
2928
|
-
num = parseInt(entity.slice(2), 16);
|
|
2929
|
-
} else if (/^#[0-9]+$/.test(entity)) {
|
|
2930
|
-
num = parseInt(entity.slice(1), 10);
|
|
2931
|
-
}
|
|
2932
|
-
if (!this.isChar(num)) {
|
|
2933
|
-
this.fail("malformed character entity.");
|
|
2934
|
-
return `&${entity};`;
|
|
2935
|
-
}
|
|
2936
|
-
return String.fromCodePoint(num);
|
|
2937
|
-
}
|
|
2938
|
-
};
|
|
2939
|
-
exports.SaxesParser = SaxesParser;
|
|
2940
|
-
}
|
|
2941
|
-
});
|
|
2942
|
-
|
|
2943
|
-
// ../../node_modules/.pnpm/fd-slicer@1.1.0/node_modules/fd-slicer/index.js
|
|
2944
|
-
var require_fd_slicer = __commonJS({
|
|
2945
|
-
"../../node_modules/.pnpm/fd-slicer@1.1.0/node_modules/fd-slicer/index.js"(exports) {
|
|
2946
|
-
"use strict";
|
|
2947
|
-
var fs = __require("fs");
|
|
2948
|
-
var util = __require("util");
|
|
2949
|
-
var stream = __require("stream");
|
|
2950
|
-
var Readable = stream.Readable;
|
|
2951
|
-
var Writable = stream.Writable;
|
|
2952
|
-
var PassThrough = stream.PassThrough;
|
|
2953
|
-
var Pend = require_pend();
|
|
2954
|
-
var EventEmitter = __require("events").EventEmitter;
|
|
2955
|
-
exports.createFromBuffer = createFromBuffer;
|
|
2956
|
-
exports.createFromFd = createFromFd;
|
|
2957
|
-
exports.BufferSlicer = BufferSlicer;
|
|
2958
|
-
exports.FdSlicer = FdSlicer;
|
|
2959
|
-
util.inherits(FdSlicer, EventEmitter);
|
|
2960
|
-
function FdSlicer(fd, options) {
|
|
2961
|
-
options = options || {};
|
|
2962
|
-
EventEmitter.call(this);
|
|
2963
|
-
this.fd = fd;
|
|
2964
|
-
this.pend = new Pend();
|
|
2965
|
-
this.pend.max = 1;
|
|
2966
|
-
this.refCount = 0;
|
|
2967
|
-
this.autoClose = !!options.autoClose;
|
|
2968
|
-
}
|
|
2969
|
-
FdSlicer.prototype.read = function(buffer, offset, length, position, callback) {
|
|
2970
|
-
var self = this;
|
|
2971
|
-
self.pend.go(function(cb) {
|
|
2972
|
-
fs.read(self.fd, buffer, offset, length, position, function(err, bytesRead, buffer2) {
|
|
2973
|
-
cb();
|
|
2974
|
-
callback(err, bytesRead, buffer2);
|
|
2975
|
-
});
|
|
2976
|
-
});
|
|
2977
|
-
};
|
|
2978
|
-
FdSlicer.prototype.write = function(buffer, offset, length, position, callback) {
|
|
2979
|
-
var self = this;
|
|
2980
|
-
self.pend.go(function(cb) {
|
|
2981
|
-
fs.write(self.fd, buffer, offset, length, position, function(err, written, buffer2) {
|
|
2982
|
-
cb();
|
|
2983
|
-
callback(err, written, buffer2);
|
|
2984
|
-
});
|
|
2985
|
-
});
|
|
2986
|
-
};
|
|
2987
|
-
FdSlicer.prototype.createReadStream = function(options) {
|
|
2988
|
-
return new ReadStream(this, options);
|
|
2989
|
-
};
|
|
2990
|
-
FdSlicer.prototype.createWriteStream = function(options) {
|
|
2991
|
-
return new WriteStream(this, options);
|
|
2992
|
-
};
|
|
2993
|
-
FdSlicer.prototype.ref = function() {
|
|
2994
|
-
this.refCount += 1;
|
|
2995
|
-
};
|
|
2996
|
-
FdSlicer.prototype.unref = function() {
|
|
2997
|
-
var self = this;
|
|
2998
|
-
self.refCount -= 1;
|
|
2999
|
-
if (self.refCount > 0) return;
|
|
3000
|
-
if (self.refCount < 0) throw new Error("invalid unref");
|
|
3001
|
-
if (self.autoClose) {
|
|
3002
|
-
fs.close(self.fd, onCloseDone);
|
|
3003
|
-
}
|
|
3004
|
-
function onCloseDone(err) {
|
|
3005
|
-
if (err) {
|
|
3006
|
-
self.emit("error", err);
|
|
3007
|
-
} else {
|
|
3008
|
-
self.emit("close");
|
|
3009
|
-
}
|
|
3010
|
-
}
|
|
3011
|
-
};
|
|
3012
|
-
util.inherits(ReadStream, Readable);
|
|
3013
|
-
function ReadStream(context, options) {
|
|
3014
|
-
options = options || {};
|
|
3015
|
-
Readable.call(this, options);
|
|
3016
|
-
this.context = context;
|
|
3017
|
-
this.context.ref();
|
|
3018
|
-
this.start = options.start || 0;
|
|
3019
|
-
this.endOffset = options.end;
|
|
3020
|
-
this.pos = this.start;
|
|
3021
|
-
this.destroyed = false;
|
|
3022
|
-
}
|
|
3023
|
-
ReadStream.prototype._read = function(n) {
|
|
3024
|
-
var self = this;
|
|
3025
|
-
if (self.destroyed) return;
|
|
3026
|
-
var toRead = Math.min(self._readableState.highWaterMark, n);
|
|
3027
|
-
if (self.endOffset != null) {
|
|
3028
|
-
toRead = Math.min(toRead, self.endOffset - self.pos);
|
|
3029
|
-
}
|
|
3030
|
-
if (toRead <= 0) {
|
|
3031
|
-
self.destroyed = true;
|
|
3032
|
-
self.push(null);
|
|
3033
|
-
self.context.unref();
|
|
3034
|
-
return;
|
|
3035
|
-
}
|
|
3036
|
-
self.context.pend.go(function(cb) {
|
|
3037
|
-
if (self.destroyed) return cb();
|
|
3038
|
-
var buffer = new Buffer(toRead);
|
|
3039
|
-
fs.read(self.context.fd, buffer, 0, toRead, self.pos, function(err, bytesRead) {
|
|
3040
|
-
if (err) {
|
|
3041
|
-
self.destroy(err);
|
|
3042
|
-
} else if (bytesRead === 0) {
|
|
3043
|
-
self.destroyed = true;
|
|
3044
|
-
self.push(null);
|
|
3045
|
-
self.context.unref();
|
|
3046
|
-
} else {
|
|
3047
|
-
self.pos += bytesRead;
|
|
3048
|
-
self.push(buffer.slice(0, bytesRead));
|
|
3049
|
-
}
|
|
3050
|
-
cb();
|
|
3051
|
-
});
|
|
3052
|
-
});
|
|
3053
|
-
};
|
|
3054
|
-
ReadStream.prototype.destroy = function(err) {
|
|
3055
|
-
if (this.destroyed) return;
|
|
3056
|
-
err = err || new Error("stream destroyed");
|
|
3057
|
-
this.destroyed = true;
|
|
3058
|
-
this.emit("error", err);
|
|
3059
|
-
this.context.unref();
|
|
3060
|
-
};
|
|
3061
|
-
util.inherits(WriteStream, Writable);
|
|
3062
|
-
function WriteStream(context, options) {
|
|
3063
|
-
options = options || {};
|
|
3064
|
-
Writable.call(this, options);
|
|
3065
|
-
this.context = context;
|
|
3066
|
-
this.context.ref();
|
|
3067
|
-
this.start = options.start || 0;
|
|
3068
|
-
this.endOffset = options.end == null ? Infinity : +options.end;
|
|
3069
|
-
this.bytesWritten = 0;
|
|
3070
|
-
this.pos = this.start;
|
|
3071
|
-
this.destroyed = false;
|
|
3072
|
-
this.on("finish", this.destroy.bind(this));
|
|
3073
|
-
}
|
|
3074
|
-
WriteStream.prototype._write = function(buffer, encoding, callback) {
|
|
3075
|
-
var self = this;
|
|
3076
|
-
if (self.destroyed) return;
|
|
3077
|
-
if (self.pos + buffer.length > self.endOffset) {
|
|
3078
|
-
var err = new Error("maximum file length exceeded");
|
|
3079
|
-
err.code = "ETOOBIG";
|
|
3080
|
-
self.destroy();
|
|
3081
|
-
callback(err);
|
|
3082
|
-
return;
|
|
3083
|
-
}
|
|
3084
|
-
self.context.pend.go(function(cb) {
|
|
3085
|
-
if (self.destroyed) return cb();
|
|
3086
|
-
fs.write(self.context.fd, buffer, 0, buffer.length, self.pos, function(err2, bytes) {
|
|
3087
|
-
if (err2) {
|
|
3088
|
-
self.destroy();
|
|
3089
|
-
cb();
|
|
3090
|
-
callback(err2);
|
|
3091
|
-
} else {
|
|
3092
|
-
self.bytesWritten += bytes;
|
|
3093
|
-
self.pos += bytes;
|
|
3094
|
-
self.emit("progress");
|
|
3095
|
-
cb();
|
|
3096
|
-
callback();
|
|
3097
|
-
}
|
|
3098
|
-
});
|
|
3099
|
-
});
|
|
3100
|
-
};
|
|
3101
|
-
WriteStream.prototype.destroy = function() {
|
|
3102
|
-
if (this.destroyed) return;
|
|
3103
|
-
this.destroyed = true;
|
|
3104
|
-
this.context.unref();
|
|
3105
|
-
};
|
|
3106
|
-
util.inherits(BufferSlicer, EventEmitter);
|
|
3107
|
-
function BufferSlicer(buffer, options) {
|
|
3108
|
-
EventEmitter.call(this);
|
|
3109
|
-
options = options || {};
|
|
3110
|
-
this.refCount = 0;
|
|
3111
|
-
this.buffer = buffer;
|
|
3112
|
-
this.maxChunkSize = options.maxChunkSize || Number.MAX_SAFE_INTEGER;
|
|
3113
|
-
}
|
|
3114
|
-
BufferSlicer.prototype.read = function(buffer, offset, length, position, callback) {
|
|
3115
|
-
var end = position + length;
|
|
3116
|
-
var delta = end - this.buffer.length;
|
|
3117
|
-
var written = delta > 0 ? delta : length;
|
|
3118
|
-
this.buffer.copy(buffer, offset, position, end);
|
|
3119
|
-
setImmediate(function() {
|
|
3120
|
-
callback(null, written);
|
|
3121
|
-
});
|
|
3122
|
-
};
|
|
3123
|
-
BufferSlicer.prototype.write = function(buffer, offset, length, position, callback) {
|
|
3124
|
-
buffer.copy(this.buffer, position, offset, offset + length);
|
|
3125
|
-
setImmediate(function() {
|
|
3126
|
-
callback(null, length, buffer);
|
|
3127
|
-
});
|
|
3128
|
-
};
|
|
3129
|
-
BufferSlicer.prototype.createReadStream = function(options) {
|
|
3130
|
-
options = options || {};
|
|
3131
|
-
var readStream = new PassThrough(options);
|
|
3132
|
-
readStream.destroyed = false;
|
|
3133
|
-
readStream.start = options.start || 0;
|
|
3134
|
-
readStream.endOffset = options.end;
|
|
3135
|
-
readStream.pos = readStream.endOffset || this.buffer.length;
|
|
3136
|
-
var entireSlice = this.buffer.slice(readStream.start, readStream.pos);
|
|
3137
|
-
var offset = 0;
|
|
3138
|
-
while (true) {
|
|
3139
|
-
var nextOffset = offset + this.maxChunkSize;
|
|
3140
|
-
if (nextOffset >= entireSlice.length) {
|
|
3141
|
-
if (offset < entireSlice.length) {
|
|
3142
|
-
readStream.write(entireSlice.slice(offset, entireSlice.length));
|
|
3143
|
-
}
|
|
3144
|
-
break;
|
|
3145
|
-
}
|
|
3146
|
-
readStream.write(entireSlice.slice(offset, nextOffset));
|
|
3147
|
-
offset = nextOffset;
|
|
3148
|
-
}
|
|
3149
|
-
readStream.end();
|
|
3150
|
-
readStream.destroy = function() {
|
|
3151
|
-
readStream.destroyed = true;
|
|
3152
|
-
};
|
|
3153
|
-
return readStream;
|
|
3154
|
-
};
|
|
3155
|
-
BufferSlicer.prototype.createWriteStream = function(options) {
|
|
3156
|
-
var bufferSlicer = this;
|
|
3157
|
-
options = options || {};
|
|
3158
|
-
var writeStream = new Writable(options);
|
|
3159
|
-
writeStream.start = options.start || 0;
|
|
3160
|
-
writeStream.endOffset = options.end == null ? this.buffer.length : +options.end;
|
|
3161
|
-
writeStream.bytesWritten = 0;
|
|
3162
|
-
writeStream.pos = writeStream.start;
|
|
3163
|
-
writeStream.destroyed = false;
|
|
3164
|
-
writeStream._write = function(buffer, encoding, callback) {
|
|
3165
|
-
if (writeStream.destroyed) return;
|
|
3166
|
-
var end = writeStream.pos + buffer.length;
|
|
3167
|
-
if (end > writeStream.endOffset) {
|
|
3168
|
-
var err = new Error("maximum file length exceeded");
|
|
3169
|
-
err.code = "ETOOBIG";
|
|
3170
|
-
writeStream.destroyed = true;
|
|
3171
|
-
callback(err);
|
|
3172
|
-
return;
|
|
3173
|
-
}
|
|
3174
|
-
buffer.copy(bufferSlicer.buffer, writeStream.pos, 0, buffer.length);
|
|
3175
|
-
writeStream.bytesWritten += buffer.length;
|
|
3176
|
-
writeStream.pos = end;
|
|
3177
|
-
writeStream.emit("progress");
|
|
3178
|
-
callback();
|
|
3179
|
-
};
|
|
3180
|
-
writeStream.destroy = function() {
|
|
3181
|
-
writeStream.destroyed = true;
|
|
3182
|
-
};
|
|
3183
|
-
return writeStream;
|
|
3184
|
-
};
|
|
3185
|
-
BufferSlicer.prototype.ref = function() {
|
|
3186
|
-
this.refCount += 1;
|
|
3187
|
-
};
|
|
3188
|
-
BufferSlicer.prototype.unref = function() {
|
|
3189
|
-
this.refCount -= 1;
|
|
3190
|
-
if (this.refCount < 0) {
|
|
3191
|
-
throw new Error("invalid unref");
|
|
3192
|
-
}
|
|
3193
|
-
};
|
|
3194
|
-
function createFromBuffer(buffer, options) {
|
|
3195
|
-
return new BufferSlicer(buffer, options);
|
|
3196
|
-
}
|
|
3197
|
-
function createFromFd(fd, options) {
|
|
3198
|
-
return new FdSlicer(fd, options);
|
|
3199
|
-
}
|
|
3200
|
-
}
|
|
3201
|
-
});
|
|
3202
|
-
|
|
3203
|
-
// ../../node_modules/.pnpm/yauzl@2.10.0/node_modules/yauzl/index.js
|
|
3204
|
-
var require_yauzl = __commonJS({
|
|
3205
|
-
"../../node_modules/.pnpm/yauzl@2.10.0/node_modules/yauzl/index.js"(exports) {
|
|
3206
|
-
"use strict";
|
|
3207
|
-
var fs = __require("fs");
|
|
3208
|
-
var zlib = __require("zlib");
|
|
3209
|
-
var fd_slicer = require_fd_slicer();
|
|
3210
|
-
var crc32 = require_buffer_crc32();
|
|
3211
|
-
var util = __require("util");
|
|
3212
|
-
var EventEmitter = __require("events").EventEmitter;
|
|
3213
|
-
var Transform = __require("stream").Transform;
|
|
3214
|
-
var PassThrough = __require("stream").PassThrough;
|
|
3215
|
-
var Writable = __require("stream").Writable;
|
|
3216
|
-
exports.open = open;
|
|
3217
|
-
exports.fromFd = fromFd;
|
|
3218
|
-
exports.fromBuffer = fromBuffer;
|
|
3219
|
-
exports.fromRandomAccessReader = fromRandomAccessReader;
|
|
3220
|
-
exports.dosDateTimeToDate = dosDateTimeToDate;
|
|
3221
|
-
exports.validateFileName = validateFileName;
|
|
3222
|
-
exports.ZipFile = ZipFile;
|
|
3223
|
-
exports.Entry = Entry;
|
|
3224
|
-
exports.RandomAccessReader = RandomAccessReader;
|
|
3225
|
-
function open(path, options, callback) {
|
|
3226
|
-
if (typeof options === "function") {
|
|
3227
|
-
callback = options;
|
|
3228
|
-
options = null;
|
|
3229
|
-
}
|
|
3230
|
-
if (options == null) options = {};
|
|
3231
|
-
if (options.autoClose == null) options.autoClose = true;
|
|
3232
|
-
if (options.lazyEntries == null) options.lazyEntries = false;
|
|
3233
|
-
if (options.decodeStrings == null) options.decodeStrings = true;
|
|
3234
|
-
if (options.validateEntrySizes == null) options.validateEntrySizes = true;
|
|
3235
|
-
if (options.strictFileNames == null) options.strictFileNames = false;
|
|
3236
|
-
if (callback == null) callback = defaultCallback;
|
|
3237
|
-
fs.open(path, "r", function(err, fd) {
|
|
3238
|
-
if (err) return callback(err);
|
|
3239
|
-
fromFd(fd, options, function(err2, zipfile) {
|
|
3240
|
-
if (err2) fs.close(fd, defaultCallback);
|
|
3241
|
-
callback(err2, zipfile);
|
|
3242
|
-
});
|
|
3243
|
-
});
|
|
3244
|
-
}
|
|
3245
|
-
function fromFd(fd, options, callback) {
|
|
3246
|
-
if (typeof options === "function") {
|
|
3247
|
-
callback = options;
|
|
3248
|
-
options = null;
|
|
3249
|
-
}
|
|
3250
|
-
if (options == null) options = {};
|
|
3251
|
-
if (options.autoClose == null) options.autoClose = false;
|
|
3252
|
-
if (options.lazyEntries == null) options.lazyEntries = false;
|
|
3253
|
-
if (options.decodeStrings == null) options.decodeStrings = true;
|
|
3254
|
-
if (options.validateEntrySizes == null) options.validateEntrySizes = true;
|
|
3255
|
-
if (options.strictFileNames == null) options.strictFileNames = false;
|
|
3256
|
-
if (callback == null) callback = defaultCallback;
|
|
3257
|
-
fs.fstat(fd, function(err, stats) {
|
|
3258
|
-
if (err) return callback(err);
|
|
3259
|
-
var reader = fd_slicer.createFromFd(fd, { autoClose: true });
|
|
3260
|
-
fromRandomAccessReader(reader, stats.size, options, callback);
|
|
3261
|
-
});
|
|
3262
|
-
}
|
|
3263
|
-
function fromBuffer(buffer, options, callback) {
|
|
3264
|
-
if (typeof options === "function") {
|
|
3265
|
-
callback = options;
|
|
3266
|
-
options = null;
|
|
3267
|
-
}
|
|
3268
|
-
if (options == null) options = {};
|
|
3269
|
-
options.autoClose = false;
|
|
3270
|
-
if (options.lazyEntries == null) options.lazyEntries = false;
|
|
3271
|
-
if (options.decodeStrings == null) options.decodeStrings = true;
|
|
3272
|
-
if (options.validateEntrySizes == null) options.validateEntrySizes = true;
|
|
3273
|
-
if (options.strictFileNames == null) options.strictFileNames = false;
|
|
3274
|
-
var reader = fd_slicer.createFromBuffer(buffer, { maxChunkSize: 65536 });
|
|
3275
|
-
fromRandomAccessReader(reader, buffer.length, options, callback);
|
|
3276
|
-
}
|
|
3277
|
-
function fromRandomAccessReader(reader, totalSize, options, callback) {
|
|
3278
|
-
if (typeof options === "function") {
|
|
3279
|
-
callback = options;
|
|
3280
|
-
options = null;
|
|
3281
|
-
}
|
|
3282
|
-
if (options == null) options = {};
|
|
3283
|
-
if (options.autoClose == null) options.autoClose = true;
|
|
3284
|
-
if (options.lazyEntries == null) options.lazyEntries = false;
|
|
3285
|
-
if (options.decodeStrings == null) options.decodeStrings = true;
|
|
3286
|
-
var decodeStrings = !!options.decodeStrings;
|
|
3287
|
-
if (options.validateEntrySizes == null) options.validateEntrySizes = true;
|
|
3288
|
-
if (options.strictFileNames == null) options.strictFileNames = false;
|
|
3289
|
-
if (callback == null) callback = defaultCallback;
|
|
3290
|
-
if (typeof totalSize !== "number") throw new Error("expected totalSize parameter to be a number");
|
|
3291
|
-
if (totalSize > Number.MAX_SAFE_INTEGER) {
|
|
3292
|
-
throw new Error("zip file too large. only file sizes up to 2^52 are supported due to JavaScript's Number type being an IEEE 754 double.");
|
|
3293
|
-
}
|
|
3294
|
-
reader.ref();
|
|
3295
|
-
var eocdrWithoutCommentSize = 22;
|
|
3296
|
-
var maxCommentSize = 65535;
|
|
3297
|
-
var bufferSize = Math.min(eocdrWithoutCommentSize + maxCommentSize, totalSize);
|
|
3298
|
-
var buffer = newBuffer(bufferSize);
|
|
3299
|
-
var bufferReadStart = totalSize - buffer.length;
|
|
3300
|
-
readAndAssertNoEof(reader, buffer, 0, bufferSize, bufferReadStart, function(err) {
|
|
3301
|
-
if (err) return callback(err);
|
|
3302
|
-
for (var i = bufferSize - eocdrWithoutCommentSize; i >= 0; i -= 1) {
|
|
3303
|
-
if (buffer.readUInt32LE(i) !== 101010256) continue;
|
|
3304
|
-
var eocdrBuffer = buffer.slice(i);
|
|
3305
|
-
var diskNumber = eocdrBuffer.readUInt16LE(4);
|
|
3306
|
-
if (diskNumber !== 0) {
|
|
3307
|
-
return callback(new Error("multi-disk zip files are not supported: found disk number: " + diskNumber));
|
|
3308
|
-
}
|
|
3309
|
-
var entryCount = eocdrBuffer.readUInt16LE(10);
|
|
3310
|
-
var centralDirectoryOffset = eocdrBuffer.readUInt32LE(16);
|
|
3311
|
-
var commentLength = eocdrBuffer.readUInt16LE(20);
|
|
3312
|
-
var expectedCommentLength = eocdrBuffer.length - eocdrWithoutCommentSize;
|
|
3313
|
-
if (commentLength !== expectedCommentLength) {
|
|
3314
|
-
return callback(new Error("invalid comment length. expected: " + expectedCommentLength + ". found: " + commentLength));
|
|
3315
|
-
}
|
|
3316
|
-
var comment = decodeStrings ? decodeBuffer(eocdrBuffer, 22, eocdrBuffer.length, false) : eocdrBuffer.slice(22);
|
|
3317
|
-
if (!(entryCount === 65535 || centralDirectoryOffset === 4294967295)) {
|
|
3318
|
-
return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose, options.lazyEntries, decodeStrings, options.validateEntrySizes, options.strictFileNames));
|
|
3319
|
-
}
|
|
3320
|
-
var zip64EocdlBuffer = newBuffer(20);
|
|
3321
|
-
var zip64EocdlOffset = bufferReadStart + i - zip64EocdlBuffer.length;
|
|
3322
|
-
readAndAssertNoEof(reader, zip64EocdlBuffer, 0, zip64EocdlBuffer.length, zip64EocdlOffset, function(err2) {
|
|
3323
|
-
if (err2) return callback(err2);
|
|
3324
|
-
if (zip64EocdlBuffer.readUInt32LE(0) !== 117853008) {
|
|
3325
|
-
return callback(new Error("invalid zip64 end of central directory locator signature"));
|
|
3326
|
-
}
|
|
3327
|
-
var zip64EocdrOffset = readUInt64LE(zip64EocdlBuffer, 8);
|
|
3328
|
-
var zip64EocdrBuffer = newBuffer(56);
|
|
3329
|
-
readAndAssertNoEof(reader, zip64EocdrBuffer, 0, zip64EocdrBuffer.length, zip64EocdrOffset, function(err3) {
|
|
3330
|
-
if (err3) return callback(err3);
|
|
3331
|
-
if (zip64EocdrBuffer.readUInt32LE(0) !== 101075792) {
|
|
3332
|
-
return callback(new Error("invalid zip64 end of central directory record signature"));
|
|
3333
|
-
}
|
|
3334
|
-
entryCount = readUInt64LE(zip64EocdrBuffer, 32);
|
|
3335
|
-
centralDirectoryOffset = readUInt64LE(zip64EocdrBuffer, 48);
|
|
3336
|
-
return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose, options.lazyEntries, decodeStrings, options.validateEntrySizes, options.strictFileNames));
|
|
3337
|
-
});
|
|
3338
|
-
});
|
|
3339
|
-
return;
|
|
3340
|
-
}
|
|
3341
|
-
callback(new Error("end of central directory record signature not found"));
|
|
3342
|
-
});
|
|
3343
|
-
}
|
|
3344
|
-
util.inherits(ZipFile, EventEmitter);
|
|
3345
|
-
function ZipFile(reader, centralDirectoryOffset, fileSize, entryCount, comment, autoClose, lazyEntries, decodeStrings, validateEntrySizes, strictFileNames) {
|
|
3346
|
-
var self = this;
|
|
3347
|
-
EventEmitter.call(self);
|
|
3348
|
-
self.reader = reader;
|
|
3349
|
-
self.reader.on("error", function(err) {
|
|
3350
|
-
emitError(self, err);
|
|
3351
|
-
});
|
|
3352
|
-
self.reader.once("close", function() {
|
|
3353
|
-
self.emit("close");
|
|
3354
|
-
});
|
|
3355
|
-
self.readEntryCursor = centralDirectoryOffset;
|
|
3356
|
-
self.fileSize = fileSize;
|
|
3357
|
-
self.entryCount = entryCount;
|
|
3358
|
-
self.comment = comment;
|
|
3359
|
-
self.entriesRead = 0;
|
|
3360
|
-
self.autoClose = !!autoClose;
|
|
3361
|
-
self.lazyEntries = !!lazyEntries;
|
|
3362
|
-
self.decodeStrings = !!decodeStrings;
|
|
3363
|
-
self.validateEntrySizes = !!validateEntrySizes;
|
|
3364
|
-
self.strictFileNames = !!strictFileNames;
|
|
3365
|
-
self.isOpen = true;
|
|
3366
|
-
self.emittedError = false;
|
|
3367
|
-
if (!self.lazyEntries) self._readEntry();
|
|
3368
|
-
}
|
|
3369
|
-
ZipFile.prototype.close = function() {
|
|
3370
|
-
if (!this.isOpen) return;
|
|
3371
|
-
this.isOpen = false;
|
|
3372
|
-
this.reader.unref();
|
|
3373
|
-
};
|
|
3374
|
-
function emitErrorAndAutoClose(self, err) {
|
|
3375
|
-
if (self.autoClose) self.close();
|
|
3376
|
-
emitError(self, err);
|
|
3377
|
-
}
|
|
3378
|
-
function emitError(self, err) {
|
|
3379
|
-
if (self.emittedError) return;
|
|
3380
|
-
self.emittedError = true;
|
|
3381
|
-
self.emit("error", err);
|
|
3382
|
-
}
|
|
3383
|
-
ZipFile.prototype.readEntry = function() {
|
|
3384
|
-
if (!this.lazyEntries) throw new Error("readEntry() called without lazyEntries:true");
|
|
3385
|
-
this._readEntry();
|
|
3386
|
-
};
|
|
3387
|
-
ZipFile.prototype._readEntry = function() {
|
|
3388
|
-
var self = this;
|
|
3389
|
-
if (self.entryCount === self.entriesRead) {
|
|
3390
|
-
setImmediate(function() {
|
|
3391
|
-
if (self.autoClose) self.close();
|
|
3392
|
-
if (self.emittedError) return;
|
|
3393
|
-
self.emit("end");
|
|
3394
|
-
});
|
|
3395
|
-
return;
|
|
3396
|
-
}
|
|
3397
|
-
if (self.emittedError) return;
|
|
3398
|
-
var buffer = newBuffer(46);
|
|
3399
|
-
readAndAssertNoEof(self.reader, buffer, 0, buffer.length, self.readEntryCursor, function(err) {
|
|
3400
|
-
if (err) return emitErrorAndAutoClose(self, err);
|
|
3401
|
-
if (self.emittedError) return;
|
|
3402
|
-
var entry = new Entry();
|
|
3403
|
-
var signature = buffer.readUInt32LE(0);
|
|
3404
|
-
if (signature !== 33639248) return emitErrorAndAutoClose(self, new Error("invalid central directory file header signature: 0x" + signature.toString(16)));
|
|
3405
|
-
entry.versionMadeBy = buffer.readUInt16LE(4);
|
|
3406
|
-
entry.versionNeededToExtract = buffer.readUInt16LE(6);
|
|
3407
|
-
entry.generalPurposeBitFlag = buffer.readUInt16LE(8);
|
|
3408
|
-
entry.compressionMethod = buffer.readUInt16LE(10);
|
|
3409
|
-
entry.lastModFileTime = buffer.readUInt16LE(12);
|
|
3410
|
-
entry.lastModFileDate = buffer.readUInt16LE(14);
|
|
3411
|
-
entry.crc32 = buffer.readUInt32LE(16);
|
|
3412
|
-
entry.compressedSize = buffer.readUInt32LE(20);
|
|
3413
|
-
entry.uncompressedSize = buffer.readUInt32LE(24);
|
|
3414
|
-
entry.fileNameLength = buffer.readUInt16LE(28);
|
|
3415
|
-
entry.extraFieldLength = buffer.readUInt16LE(30);
|
|
3416
|
-
entry.fileCommentLength = buffer.readUInt16LE(32);
|
|
3417
|
-
entry.internalFileAttributes = buffer.readUInt16LE(36);
|
|
3418
|
-
entry.externalFileAttributes = buffer.readUInt32LE(38);
|
|
3419
|
-
entry.relativeOffsetOfLocalHeader = buffer.readUInt32LE(42);
|
|
3420
|
-
if (entry.generalPurposeBitFlag & 64) return emitErrorAndAutoClose(self, new Error("strong encryption is not supported"));
|
|
3421
|
-
self.readEntryCursor += 46;
|
|
3422
|
-
buffer = newBuffer(entry.fileNameLength + entry.extraFieldLength + entry.fileCommentLength);
|
|
3423
|
-
readAndAssertNoEof(self.reader, buffer, 0, buffer.length, self.readEntryCursor, function(err2) {
|
|
3424
|
-
if (err2) return emitErrorAndAutoClose(self, err2);
|
|
3425
|
-
if (self.emittedError) return;
|
|
3426
|
-
var isUtf8 = (entry.generalPurposeBitFlag & 2048) !== 0;
|
|
3427
|
-
entry.fileName = self.decodeStrings ? decodeBuffer(buffer, 0, entry.fileNameLength, isUtf8) : buffer.slice(0, entry.fileNameLength);
|
|
3428
|
-
var fileCommentStart = entry.fileNameLength + entry.extraFieldLength;
|
|
3429
|
-
var extraFieldBuffer = buffer.slice(entry.fileNameLength, fileCommentStart);
|
|
3430
|
-
entry.extraFields = [];
|
|
3431
|
-
var i = 0;
|
|
3432
|
-
while (i < extraFieldBuffer.length - 3) {
|
|
3433
|
-
var headerId = extraFieldBuffer.readUInt16LE(i + 0);
|
|
3434
|
-
var dataSize = extraFieldBuffer.readUInt16LE(i + 2);
|
|
3435
|
-
var dataStart = i + 4;
|
|
3436
|
-
var dataEnd = dataStart + dataSize;
|
|
3437
|
-
if (dataEnd > extraFieldBuffer.length) return emitErrorAndAutoClose(self, new Error("extra field length exceeds extra field buffer size"));
|
|
3438
|
-
var dataBuffer = newBuffer(dataSize);
|
|
3439
|
-
extraFieldBuffer.copy(dataBuffer, 0, dataStart, dataEnd);
|
|
3440
|
-
entry.extraFields.push({
|
|
3441
|
-
id: headerId,
|
|
3442
|
-
data: dataBuffer
|
|
3443
|
-
});
|
|
3444
|
-
i = dataEnd;
|
|
3445
|
-
}
|
|
3446
|
-
entry.fileComment = self.decodeStrings ? decodeBuffer(buffer, fileCommentStart, fileCommentStart + entry.fileCommentLength, isUtf8) : buffer.slice(fileCommentStart, fileCommentStart + entry.fileCommentLength);
|
|
3447
|
-
entry.comment = entry.fileComment;
|
|
3448
|
-
self.readEntryCursor += buffer.length;
|
|
3449
|
-
self.entriesRead += 1;
|
|
3450
|
-
if (entry.uncompressedSize === 4294967295 || entry.compressedSize === 4294967295 || entry.relativeOffsetOfLocalHeader === 4294967295) {
|
|
3451
|
-
var zip64EiefBuffer = null;
|
|
3452
|
-
for (var i = 0; i < entry.extraFields.length; i++) {
|
|
3453
|
-
var extraField = entry.extraFields[i];
|
|
3454
|
-
if (extraField.id === 1) {
|
|
3455
|
-
zip64EiefBuffer = extraField.data;
|
|
3456
|
-
break;
|
|
3457
|
-
}
|
|
3458
|
-
}
|
|
3459
|
-
if (zip64EiefBuffer == null) {
|
|
3460
|
-
return emitErrorAndAutoClose(self, new Error("expected zip64 extended information extra field"));
|
|
3461
|
-
}
|
|
3462
|
-
var index = 0;
|
|
3463
|
-
if (entry.uncompressedSize === 4294967295) {
|
|
3464
|
-
if (index + 8 > zip64EiefBuffer.length) {
|
|
3465
|
-
return emitErrorAndAutoClose(self, new Error("zip64 extended information extra field does not include uncompressed size"));
|
|
3466
|
-
}
|
|
3467
|
-
entry.uncompressedSize = readUInt64LE(zip64EiefBuffer, index);
|
|
3468
|
-
index += 8;
|
|
3469
|
-
}
|
|
3470
|
-
if (entry.compressedSize === 4294967295) {
|
|
3471
|
-
if (index + 8 > zip64EiefBuffer.length) {
|
|
3472
|
-
return emitErrorAndAutoClose(self, new Error("zip64 extended information extra field does not include compressed size"));
|
|
3473
|
-
}
|
|
3474
|
-
entry.compressedSize = readUInt64LE(zip64EiefBuffer, index);
|
|
3475
|
-
index += 8;
|
|
3476
|
-
}
|
|
3477
|
-
if (entry.relativeOffsetOfLocalHeader === 4294967295) {
|
|
3478
|
-
if (index + 8 > zip64EiefBuffer.length) {
|
|
3479
|
-
return emitErrorAndAutoClose(self, new Error("zip64 extended information extra field does not include relative header offset"));
|
|
3480
|
-
}
|
|
3481
|
-
entry.relativeOffsetOfLocalHeader = readUInt64LE(zip64EiefBuffer, index);
|
|
3482
|
-
index += 8;
|
|
3483
|
-
}
|
|
3484
|
-
}
|
|
3485
|
-
if (self.decodeStrings) {
|
|
3486
|
-
for (var i = 0; i < entry.extraFields.length; i++) {
|
|
3487
|
-
var extraField = entry.extraFields[i];
|
|
3488
|
-
if (extraField.id === 28789) {
|
|
3489
|
-
if (extraField.data.length < 6) {
|
|
3490
|
-
continue;
|
|
3491
|
-
}
|
|
3492
|
-
if (extraField.data.readUInt8(0) !== 1) {
|
|
3493
|
-
continue;
|
|
3494
|
-
}
|
|
3495
|
-
var oldNameCrc32 = extraField.data.readUInt32LE(1);
|
|
3496
|
-
if (crc32.unsigned(buffer.slice(0, entry.fileNameLength)) !== oldNameCrc32) {
|
|
3497
|
-
continue;
|
|
3498
|
-
}
|
|
3499
|
-
entry.fileName = decodeBuffer(extraField.data, 5, extraField.data.length, true);
|
|
3500
|
-
break;
|
|
3501
|
-
}
|
|
3502
|
-
}
|
|
3503
|
-
}
|
|
3504
|
-
if (self.validateEntrySizes && entry.compressionMethod === 0) {
|
|
3505
|
-
var expectedCompressedSize = entry.uncompressedSize;
|
|
3506
|
-
if (entry.isEncrypted()) {
|
|
3507
|
-
expectedCompressedSize += 12;
|
|
3508
|
-
}
|
|
3509
|
-
if (entry.compressedSize !== expectedCompressedSize) {
|
|
3510
|
-
var msg = "compressed/uncompressed size mismatch for stored file: " + entry.compressedSize + " != " + entry.uncompressedSize;
|
|
3511
|
-
return emitErrorAndAutoClose(self, new Error(msg));
|
|
3512
|
-
}
|
|
3513
|
-
}
|
|
3514
|
-
if (self.decodeStrings) {
|
|
3515
|
-
if (!self.strictFileNames) {
|
|
3516
|
-
entry.fileName = entry.fileName.replace(/\\/g, "/");
|
|
3517
|
-
}
|
|
3518
|
-
var errorMessage = validateFileName(entry.fileName, self.validateFileNameOptions);
|
|
3519
|
-
if (errorMessage != null) return emitErrorAndAutoClose(self, new Error(errorMessage));
|
|
3520
|
-
}
|
|
3521
|
-
self.emit("entry", entry);
|
|
3522
|
-
if (!self.lazyEntries) self._readEntry();
|
|
3523
|
-
});
|
|
3524
|
-
});
|
|
3525
|
-
};
|
|
3526
|
-
ZipFile.prototype.openReadStream = function(entry, options, callback) {
|
|
3527
|
-
var self = this;
|
|
3528
|
-
var relativeStart = 0;
|
|
3529
|
-
var relativeEnd = entry.compressedSize;
|
|
3530
|
-
if (callback == null) {
|
|
3531
|
-
callback = options;
|
|
3532
|
-
options = {};
|
|
3533
|
-
} else {
|
|
3534
|
-
if (options.decrypt != null) {
|
|
3535
|
-
if (!entry.isEncrypted()) {
|
|
3536
|
-
throw new Error("options.decrypt can only be specified for encrypted entries");
|
|
3537
|
-
}
|
|
3538
|
-
if (options.decrypt !== false) throw new Error("invalid options.decrypt value: " + options.decrypt);
|
|
3539
|
-
if (entry.isCompressed()) {
|
|
3540
|
-
if (options.decompress !== false) throw new Error("entry is encrypted and compressed, and options.decompress !== false");
|
|
3541
|
-
}
|
|
3542
|
-
}
|
|
3543
|
-
if (options.decompress != null) {
|
|
3544
|
-
if (!entry.isCompressed()) {
|
|
3545
|
-
throw new Error("options.decompress can only be specified for compressed entries");
|
|
3546
|
-
}
|
|
3547
|
-
if (!(options.decompress === false || options.decompress === true)) {
|
|
3548
|
-
throw new Error("invalid options.decompress value: " + options.decompress);
|
|
3549
|
-
}
|
|
3550
|
-
}
|
|
3551
|
-
if (options.start != null || options.end != null) {
|
|
3552
|
-
if (entry.isCompressed() && options.decompress !== false) {
|
|
3553
|
-
throw new Error("start/end range not allowed for compressed entry without options.decompress === false");
|
|
3554
|
-
}
|
|
3555
|
-
if (entry.isEncrypted() && options.decrypt !== false) {
|
|
3556
|
-
throw new Error("start/end range not allowed for encrypted entry without options.decrypt === false");
|
|
3557
|
-
}
|
|
3558
|
-
}
|
|
3559
|
-
if (options.start != null) {
|
|
3560
|
-
relativeStart = options.start;
|
|
3561
|
-
if (relativeStart < 0) throw new Error("options.start < 0");
|
|
3562
|
-
if (relativeStart > entry.compressedSize) throw new Error("options.start > entry.compressedSize");
|
|
3563
|
-
}
|
|
3564
|
-
if (options.end != null) {
|
|
3565
|
-
relativeEnd = options.end;
|
|
3566
|
-
if (relativeEnd < 0) throw new Error("options.end < 0");
|
|
3567
|
-
if (relativeEnd > entry.compressedSize) throw new Error("options.end > entry.compressedSize");
|
|
3568
|
-
if (relativeEnd < relativeStart) throw new Error("options.end < options.start");
|
|
3569
|
-
}
|
|
3570
|
-
}
|
|
3571
|
-
if (!self.isOpen) return callback(new Error("closed"));
|
|
3572
|
-
if (entry.isEncrypted()) {
|
|
3573
|
-
if (options.decrypt !== false) return callback(new Error("entry is encrypted, and options.decrypt !== false"));
|
|
3574
|
-
}
|
|
3575
|
-
self.reader.ref();
|
|
3576
|
-
var buffer = newBuffer(30);
|
|
3577
|
-
readAndAssertNoEof(self.reader, buffer, 0, buffer.length, entry.relativeOffsetOfLocalHeader, function(err) {
|
|
3578
|
-
try {
|
|
3579
|
-
if (err) return callback(err);
|
|
3580
|
-
var signature = buffer.readUInt32LE(0);
|
|
3581
|
-
if (signature !== 67324752) {
|
|
3582
|
-
return callback(new Error("invalid local file header signature: 0x" + signature.toString(16)));
|
|
3583
|
-
}
|
|
3584
|
-
var fileNameLength = buffer.readUInt16LE(26);
|
|
3585
|
-
var extraFieldLength = buffer.readUInt16LE(28);
|
|
3586
|
-
var localFileHeaderEnd = entry.relativeOffsetOfLocalHeader + buffer.length + fileNameLength + extraFieldLength;
|
|
3587
|
-
var decompress;
|
|
3588
|
-
if (entry.compressionMethod === 0) {
|
|
3589
|
-
decompress = false;
|
|
3590
|
-
} else if (entry.compressionMethod === 8) {
|
|
3591
|
-
decompress = options.decompress != null ? options.decompress : true;
|
|
3592
|
-
} else {
|
|
3593
|
-
return callback(new Error("unsupported compression method: " + entry.compressionMethod));
|
|
3594
|
-
}
|
|
3595
|
-
var fileDataStart = localFileHeaderEnd;
|
|
3596
|
-
var fileDataEnd = fileDataStart + entry.compressedSize;
|
|
3597
|
-
if (entry.compressedSize !== 0) {
|
|
3598
|
-
if (fileDataEnd > self.fileSize) {
|
|
3599
|
-
return callback(new Error("file data overflows file bounds: " + fileDataStart + " + " + entry.compressedSize + " > " + self.fileSize));
|
|
3600
|
-
}
|
|
3601
|
-
}
|
|
3602
|
-
var readStream = self.reader.createReadStream({
|
|
3603
|
-
start: fileDataStart + relativeStart,
|
|
3604
|
-
end: fileDataStart + relativeEnd
|
|
3605
|
-
});
|
|
3606
|
-
var endpointStream = readStream;
|
|
3607
|
-
if (decompress) {
|
|
3608
|
-
var destroyed = false;
|
|
3609
|
-
var inflateFilter = zlib.createInflateRaw();
|
|
3610
|
-
readStream.on("error", function(err2) {
|
|
3611
|
-
setImmediate(function() {
|
|
3612
|
-
if (!destroyed) inflateFilter.emit("error", err2);
|
|
3613
|
-
});
|
|
3614
|
-
});
|
|
3615
|
-
readStream.pipe(inflateFilter);
|
|
3616
|
-
if (self.validateEntrySizes) {
|
|
3617
|
-
endpointStream = new AssertByteCountStream(entry.uncompressedSize);
|
|
3618
|
-
inflateFilter.on("error", function(err2) {
|
|
3619
|
-
setImmediate(function() {
|
|
3620
|
-
if (!destroyed) endpointStream.emit("error", err2);
|
|
3621
|
-
});
|
|
3622
|
-
});
|
|
3623
|
-
inflateFilter.pipe(endpointStream);
|
|
3624
|
-
} else {
|
|
3625
|
-
endpointStream = inflateFilter;
|
|
3626
|
-
}
|
|
3627
|
-
endpointStream.destroy = function() {
|
|
3628
|
-
destroyed = true;
|
|
3629
|
-
if (inflateFilter !== endpointStream) inflateFilter.unpipe(endpointStream);
|
|
3630
|
-
readStream.unpipe(inflateFilter);
|
|
3631
|
-
readStream.destroy();
|
|
3632
|
-
};
|
|
3633
|
-
}
|
|
3634
|
-
callback(null, endpointStream);
|
|
3635
|
-
} finally {
|
|
3636
|
-
self.reader.unref();
|
|
3637
|
-
}
|
|
3638
|
-
});
|
|
3639
|
-
};
|
|
3640
|
-
function Entry() {
|
|
3641
|
-
}
|
|
3642
|
-
Entry.prototype.getLastModDate = function() {
|
|
3643
|
-
return dosDateTimeToDate(this.lastModFileDate, this.lastModFileTime);
|
|
3644
|
-
};
|
|
3645
|
-
Entry.prototype.isEncrypted = function() {
|
|
3646
|
-
return (this.generalPurposeBitFlag & 1) !== 0;
|
|
3647
|
-
};
|
|
3648
|
-
Entry.prototype.isCompressed = function() {
|
|
3649
|
-
return this.compressionMethod === 8;
|
|
3650
|
-
};
|
|
3651
|
-
function dosDateTimeToDate(date, time) {
|
|
3652
|
-
var day = date & 31;
|
|
3653
|
-
var month = (date >> 5 & 15) - 1;
|
|
3654
|
-
var year = (date >> 9 & 127) + 1980;
|
|
3655
|
-
var millisecond = 0;
|
|
3656
|
-
var second = (time & 31) * 2;
|
|
3657
|
-
var minute = time >> 5 & 63;
|
|
3658
|
-
var hour = time >> 11 & 31;
|
|
3659
|
-
return new Date(year, month, day, hour, minute, second, millisecond);
|
|
3660
|
-
}
|
|
3661
|
-
function validateFileName(fileName) {
|
|
3662
|
-
if (fileName.indexOf("\\") !== -1) {
|
|
3663
|
-
return "invalid characters in fileName: " + fileName;
|
|
3664
|
-
}
|
|
3665
|
-
if (/^[a-zA-Z]:/.test(fileName) || /^\//.test(fileName)) {
|
|
3666
|
-
return "absolute path: " + fileName;
|
|
3667
|
-
}
|
|
3668
|
-
if (fileName.split("/").indexOf("..") !== -1) {
|
|
3669
|
-
return "invalid relative path: " + fileName;
|
|
3670
|
-
}
|
|
3671
|
-
return null;
|
|
3672
|
-
}
|
|
3673
|
-
function readAndAssertNoEof(reader, buffer, offset, length, position, callback) {
|
|
3674
|
-
if (length === 0) {
|
|
3675
|
-
return setImmediate(function() {
|
|
3676
|
-
callback(null, newBuffer(0));
|
|
3677
|
-
});
|
|
3678
|
-
}
|
|
3679
|
-
reader.read(buffer, offset, length, position, function(err, bytesRead) {
|
|
3680
|
-
if (err) return callback(err);
|
|
3681
|
-
if (bytesRead < length) {
|
|
3682
|
-
return callback(new Error("unexpected EOF"));
|
|
3683
|
-
}
|
|
3684
|
-
callback();
|
|
3685
|
-
});
|
|
3686
|
-
}
|
|
3687
|
-
util.inherits(AssertByteCountStream, Transform);
|
|
3688
|
-
function AssertByteCountStream(byteCount) {
|
|
3689
|
-
Transform.call(this);
|
|
3690
|
-
this.actualByteCount = 0;
|
|
3691
|
-
this.expectedByteCount = byteCount;
|
|
3692
|
-
}
|
|
3693
|
-
AssertByteCountStream.prototype._transform = function(chunk, encoding, cb) {
|
|
3694
|
-
this.actualByteCount += chunk.length;
|
|
3695
|
-
if (this.actualByteCount > this.expectedByteCount) {
|
|
3696
|
-
var msg = "too many bytes in the stream. expected " + this.expectedByteCount + ". got at least " + this.actualByteCount;
|
|
3697
|
-
return cb(new Error(msg));
|
|
3698
|
-
}
|
|
3699
|
-
cb(null, chunk);
|
|
3700
|
-
};
|
|
3701
|
-
AssertByteCountStream.prototype._flush = function(cb) {
|
|
3702
|
-
if (this.actualByteCount < this.expectedByteCount) {
|
|
3703
|
-
var msg = "not enough bytes in the stream. expected " + this.expectedByteCount + ". got only " + this.actualByteCount;
|
|
3704
|
-
return cb(new Error(msg));
|
|
3705
|
-
}
|
|
3706
|
-
cb();
|
|
3707
|
-
};
|
|
3708
|
-
util.inherits(RandomAccessReader, EventEmitter);
|
|
3709
|
-
function RandomAccessReader() {
|
|
3710
|
-
EventEmitter.call(this);
|
|
3711
|
-
this.refCount = 0;
|
|
3712
|
-
}
|
|
3713
|
-
RandomAccessReader.prototype.ref = function() {
|
|
3714
|
-
this.refCount += 1;
|
|
3715
|
-
};
|
|
3716
|
-
RandomAccessReader.prototype.unref = function() {
|
|
3717
|
-
var self = this;
|
|
3718
|
-
self.refCount -= 1;
|
|
3719
|
-
if (self.refCount > 0) return;
|
|
3720
|
-
if (self.refCount < 0) throw new Error("invalid unref");
|
|
3721
|
-
self.close(onCloseDone);
|
|
3722
|
-
function onCloseDone(err) {
|
|
3723
|
-
if (err) return self.emit("error", err);
|
|
3724
|
-
self.emit("close");
|
|
3725
|
-
}
|
|
3726
|
-
};
|
|
3727
|
-
RandomAccessReader.prototype.createReadStream = function(options) {
|
|
3728
|
-
var start = options.start;
|
|
3729
|
-
var end = options.end;
|
|
3730
|
-
if (start === end) {
|
|
3731
|
-
var emptyStream = new PassThrough();
|
|
3732
|
-
setImmediate(function() {
|
|
3733
|
-
emptyStream.end();
|
|
3734
|
-
});
|
|
3735
|
-
return emptyStream;
|
|
3736
|
-
}
|
|
3737
|
-
var stream = this._readStreamForRange(start, end);
|
|
3738
|
-
var destroyed = false;
|
|
3739
|
-
var refUnrefFilter = new RefUnrefFilter(this);
|
|
3740
|
-
stream.on("error", function(err) {
|
|
3741
|
-
setImmediate(function() {
|
|
3742
|
-
if (!destroyed) refUnrefFilter.emit("error", err);
|
|
3743
|
-
});
|
|
3744
|
-
});
|
|
3745
|
-
refUnrefFilter.destroy = function() {
|
|
3746
|
-
stream.unpipe(refUnrefFilter);
|
|
3747
|
-
refUnrefFilter.unref();
|
|
3748
|
-
stream.destroy();
|
|
3749
|
-
};
|
|
3750
|
-
var byteCounter = new AssertByteCountStream(end - start);
|
|
3751
|
-
refUnrefFilter.on("error", function(err) {
|
|
3752
|
-
setImmediate(function() {
|
|
3753
|
-
if (!destroyed) byteCounter.emit("error", err);
|
|
3754
|
-
});
|
|
3755
|
-
});
|
|
3756
|
-
byteCounter.destroy = function() {
|
|
3757
|
-
destroyed = true;
|
|
3758
|
-
refUnrefFilter.unpipe(byteCounter);
|
|
3759
|
-
refUnrefFilter.destroy();
|
|
3760
|
-
};
|
|
3761
|
-
return stream.pipe(refUnrefFilter).pipe(byteCounter);
|
|
3762
|
-
};
|
|
3763
|
-
RandomAccessReader.prototype._readStreamForRange = function(start, end) {
|
|
3764
|
-
throw new Error("not implemented");
|
|
3765
|
-
};
|
|
3766
|
-
RandomAccessReader.prototype.read = function(buffer, offset, length, position, callback) {
|
|
3767
|
-
var readStream = this.createReadStream({ start: position, end: position + length });
|
|
3768
|
-
var writeStream = new Writable();
|
|
3769
|
-
var written = 0;
|
|
3770
|
-
writeStream._write = function(chunk, encoding, cb) {
|
|
3771
|
-
chunk.copy(buffer, offset + written, 0, chunk.length);
|
|
3772
|
-
written += chunk.length;
|
|
3773
|
-
cb();
|
|
3774
|
-
};
|
|
3775
|
-
writeStream.on("finish", callback);
|
|
3776
|
-
readStream.on("error", function(error) {
|
|
3777
|
-
callback(error);
|
|
3778
|
-
});
|
|
3779
|
-
readStream.pipe(writeStream);
|
|
3780
|
-
};
|
|
3781
|
-
RandomAccessReader.prototype.close = function(callback) {
|
|
3782
|
-
setImmediate(callback);
|
|
3783
|
-
};
|
|
3784
|
-
util.inherits(RefUnrefFilter, PassThrough);
|
|
3785
|
-
function RefUnrefFilter(context) {
|
|
3786
|
-
PassThrough.call(this);
|
|
3787
|
-
this.context = context;
|
|
3788
|
-
this.context.ref();
|
|
3789
|
-
this.unreffedYet = false;
|
|
3790
|
-
}
|
|
3791
|
-
RefUnrefFilter.prototype._flush = function(cb) {
|
|
3792
|
-
this.unref();
|
|
3793
|
-
cb();
|
|
3794
|
-
};
|
|
3795
|
-
RefUnrefFilter.prototype.unref = function(cb) {
|
|
3796
|
-
if (this.unreffedYet) return;
|
|
3797
|
-
this.unreffedYet = true;
|
|
3798
|
-
this.context.unref();
|
|
3799
|
-
};
|
|
3800
|
-
var cp437 = "\0\u263A\u263B\u2665\u2666\u2663\u2660\u2022\u25D8\u25CB\u25D9\u2642\u2640\u266A\u266B\u263C\u25BA\u25C4\u2195\u203C\xB6\xA7\u25AC\u21A8\u2191\u2193\u2192\u2190\u221F\u2194\u25B2\u25BC !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u2302\xC7\xFC\xE9\xE2\xE4\xE0\xE5\xE7\xEA\xEB\xE8\xEF\xEE\xEC\xC4\xC5\xC9\xE6\xC6\xF4\xF6\xF2\xFB\xF9\xFF\xD6\xDC\xA2\xA3\xA5\u20A7\u0192\xE1\xED\xF3\xFA\xF1\xD1\xAA\xBA\xBF\u2310\xAC\xBD\xBC\xA1\xAB\xBB\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556\u2555\u2563\u2551\u2557\u255D\u255C\u255B\u2510\u2514\u2534\u252C\u251C\u2500\u253C\u255E\u255F\u255A\u2554\u2569\u2566\u2560\u2550\u256C\u2567\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256B\u256A\u2518\u250C\u2588\u2584\u258C\u2590\u2580\u03B1\xDF\u0393\u03C0\u03A3\u03C3\xB5\u03C4\u03A6\u0398\u03A9\u03B4\u221E\u03C6\u03B5\u2229\u2261\xB1\u2265\u2264\u2320\u2321\xF7\u2248\xB0\u2219\xB7\u221A\u207F\xB2\u25A0\xA0";
|
|
3801
|
-
function decodeBuffer(buffer, start, end, isUtf8) {
|
|
3802
|
-
if (isUtf8) {
|
|
3803
|
-
return buffer.toString("utf8", start, end);
|
|
3804
|
-
} else {
|
|
3805
|
-
var result = "";
|
|
3806
|
-
for (var i = start; i < end; i++) {
|
|
3807
|
-
result += cp437[buffer[i]];
|
|
3808
|
-
}
|
|
3809
|
-
return result;
|
|
3810
|
-
}
|
|
3811
|
-
}
|
|
3812
|
-
function readUInt64LE(buffer, offset) {
|
|
3813
|
-
var lower32 = buffer.readUInt32LE(offset);
|
|
3814
|
-
var upper32 = buffer.readUInt32LE(offset + 4);
|
|
3815
|
-
return upper32 * 4294967296 + lower32;
|
|
3816
|
-
}
|
|
3817
|
-
var newBuffer;
|
|
3818
|
-
if (typeof Buffer.allocUnsafe === "function") {
|
|
3819
|
-
newBuffer = function(len) {
|
|
3820
|
-
return Buffer.allocUnsafe(len);
|
|
3821
|
-
};
|
|
3822
|
-
} else {
|
|
3823
|
-
newBuffer = function(len) {
|
|
3824
|
-
return new Buffer(len);
|
|
3825
|
-
};
|
|
3826
|
-
}
|
|
3827
|
-
function defaultCallback(err) {
|
|
3828
|
-
if (err) throw err;
|
|
3829
|
-
}
|
|
3830
|
-
}
|
|
3831
|
-
});
|
|
3832
|
-
|
|
3833
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/buffer-reader.js
|
|
3834
|
-
var require_buffer_reader = __commonJS({
|
|
3835
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/buffer-reader.js"(exports, module) {
|
|
3836
|
-
"use strict";
|
|
3837
|
-
var BufferReader = class _BufferReader {
|
|
3838
|
-
constructor(buffer) {
|
|
3839
|
-
this._buffer = buffer;
|
|
3840
|
-
}
|
|
3841
|
-
open() {
|
|
3842
|
-
return Promise.resolve();
|
|
3843
|
-
}
|
|
3844
|
-
close() {
|
|
3845
|
-
return Promise.resolve();
|
|
3846
|
-
}
|
|
3847
|
-
read(buffer, offset, length, position) {
|
|
3848
|
-
this._buffer.copy(buffer, offset, position, position + length);
|
|
3849
|
-
return Promise.resolve(buffer);
|
|
3850
|
-
}
|
|
3851
|
-
buffer() {
|
|
3852
|
-
return this._buffer;
|
|
3853
|
-
}
|
|
3854
|
-
static isBufferReader(instance) {
|
|
3855
|
-
return instance instanceof _BufferReader;
|
|
3856
|
-
}
|
|
3857
|
-
};
|
|
3858
|
-
module.exports = BufferReader;
|
|
3859
|
-
}
|
|
3860
|
-
});
|
|
3861
|
-
|
|
3862
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/file-reader.js
|
|
3863
|
-
var require_file_reader = __commonJS({
|
|
3864
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/file-reader.js"(exports, module) {
|
|
3865
|
-
"use strict";
|
|
3866
|
-
var fs = __require("fs");
|
|
3867
|
-
var FileReader = class _FileReader {
|
|
3868
|
-
/**
|
|
3869
|
-
* Creates a new file reader instance, using the given filename.
|
|
3870
|
-
* @param {*} filename
|
|
3871
|
-
*/
|
|
3872
|
-
constructor(filename) {
|
|
3873
|
-
this._filename = filename;
|
|
3874
|
-
}
|
|
3875
|
-
/**
|
|
3876
|
-
* Opens the file descriptor for a file, and returns a promise that resolves
|
|
3877
|
-
* when the file is open. After this, {@link FileReader#read} can be called
|
|
3878
|
-
* to read file content into a buffer.
|
|
3879
|
-
* @returns a promise
|
|
3880
|
-
*/
|
|
3881
|
-
open() {
|
|
3882
|
-
return new Promise((resolve, reject) => {
|
|
3883
|
-
fs.open(this._filename, "r", 438, (err, fd) => {
|
|
3884
|
-
if (err) {
|
|
3885
|
-
return reject(err);
|
|
3886
|
-
}
|
|
3887
|
-
this._fd = fd;
|
|
3888
|
-
resolve();
|
|
3889
|
-
});
|
|
3890
|
-
});
|
|
3891
|
-
}
|
|
3892
|
-
/**
|
|
3893
|
-
* Closes the file descriptor associated with an open document, if there
|
|
3894
|
-
* is one, and returns a promise that resolves when the file handle is closed.
|
|
3895
|
-
* @returns a promise
|
|
3896
|
-
*/
|
|
3897
|
-
close() {
|
|
3898
|
-
return new Promise((resolve, reject) => {
|
|
3899
|
-
if (this._fd) {
|
|
3900
|
-
fs.close(this._fd, (err) => {
|
|
3901
|
-
if (err) {
|
|
3902
|
-
return reject(err);
|
|
3903
|
-
}
|
|
3904
|
-
delete this._fd;
|
|
3905
|
-
resolve();
|
|
3906
|
-
});
|
|
3907
|
-
} else {
|
|
3908
|
-
resolve();
|
|
3909
|
-
}
|
|
3910
|
-
});
|
|
3911
|
-
}
|
|
3912
|
-
/**
|
|
3913
|
-
* Reads a buffer of `length` bytes into the `buffer`. The new data will
|
|
3914
|
-
* be added to the buffer at offset `offset`, and will be read from the
|
|
3915
|
-
* file starting at position `position`
|
|
3916
|
-
* @param {*} buffer
|
|
3917
|
-
* @param {*} offset
|
|
3918
|
-
* @param {*} length
|
|
3919
|
-
* @param {*} position
|
|
3920
|
-
* @returns a promise that resolves to the buffer when the data is present
|
|
3921
|
-
*/
|
|
3922
|
-
read(buffer, offset, length, position) {
|
|
3923
|
-
return new Promise((resolve, reject) => {
|
|
3924
|
-
if (!this._fd) {
|
|
3925
|
-
return reject(new Error("file not open"));
|
|
3926
|
-
}
|
|
3927
|
-
fs.read(this._fd, buffer, offset, length, position, (err, bytesRead, buffer2) => {
|
|
3928
|
-
if (err) {
|
|
3929
|
-
return reject(err);
|
|
3930
|
-
}
|
|
3931
|
-
resolve(buffer2);
|
|
3932
|
-
});
|
|
3933
|
-
});
|
|
3934
|
-
}
|
|
3935
|
-
/**
|
|
3936
|
-
* Returns the open file descriptor
|
|
3937
|
-
* @returns the file descriptor
|
|
3938
|
-
*/
|
|
3939
|
-
fd() {
|
|
3940
|
-
return this._fd;
|
|
3941
|
-
}
|
|
3942
|
-
/**
|
|
3943
|
-
* Returns true if the passed instance is an instance of this class.
|
|
3944
|
-
* @param {*} instance
|
|
3945
|
-
* @returns true if `instance` is an instance of {@link FileReader}.
|
|
3946
|
-
*/
|
|
3947
|
-
static isFileReader(instance) {
|
|
3948
|
-
return instance instanceof _FileReader;
|
|
3949
|
-
}
|
|
3950
|
-
};
|
|
3951
|
-
module.exports = FileReader;
|
|
3952
|
-
}
|
|
3953
|
-
});
|
|
3954
|
-
|
|
3955
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/open-office-extractor.js
|
|
3956
|
-
var require_open_office_extractor = __commonJS({
|
|
3957
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/open-office-extractor.js"(exports, module) {
|
|
3958
|
-
"use strict";
|
|
3959
|
-
var path = __require("path");
|
|
3960
|
-
var SAXES = require_saxes();
|
|
3961
|
-
var yauzl = require_yauzl();
|
|
3962
|
-
var BufferReader = require_buffer_reader();
|
|
3963
|
-
var FileReader = require_file_reader();
|
|
3964
|
-
var Document = require_document();
|
|
3965
|
-
function each(callback, array, index) {
|
|
3966
|
-
if (index === array.length) {
|
|
3967
|
-
return Promise.resolve();
|
|
3968
|
-
} else {
|
|
3969
|
-
return Promise.resolve(callback(array[index++])).then(() => each(callback, array, index));
|
|
3970
|
-
}
|
|
3971
|
-
}
|
|
3972
|
-
var OpenOfficeExtractor = class {
|
|
3973
|
-
constructor() {
|
|
3974
|
-
this._streamTypes = {
|
|
3975
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml": true,
|
|
3976
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml": true,
|
|
3977
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml": true,
|
|
3978
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml": true,
|
|
3979
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml": true,
|
|
3980
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml": true,
|
|
3981
|
-
"application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml": true,
|
|
3982
|
-
"application/vnd.openxmlformats-package.relationships+xml": true
|
|
3983
|
-
};
|
|
3984
|
-
this._headerTypes = {
|
|
3985
|
-
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/header": true,
|
|
3986
|
-
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer": true
|
|
3987
|
-
};
|
|
3988
|
-
this._actions = {};
|
|
3989
|
-
this._defaults = {};
|
|
3990
|
-
}
|
|
3991
|
-
shouldProcess(filename) {
|
|
3992
|
-
if (this._actions[filename]) {
|
|
3993
|
-
return true;
|
|
3994
|
-
}
|
|
3995
|
-
const extension = path.posix.extname(filename).replace(/^\./, "");
|
|
3996
|
-
if (!extension) {
|
|
3997
|
-
return false;
|
|
3998
|
-
}
|
|
3999
|
-
const defaultType = this._defaults[extension];
|
|
4000
|
-
if (defaultType && this._streamTypes[defaultType]) {
|
|
4001
|
-
return true;
|
|
4002
|
-
}
|
|
4003
|
-
return false;
|
|
4004
|
-
}
|
|
4005
|
-
openArchive(reader) {
|
|
4006
|
-
if (BufferReader.isBufferReader(reader)) {
|
|
4007
|
-
return new Promise((resolve, reject) => {
|
|
4008
|
-
yauzl.fromBuffer(reader.buffer(), { lazyEntries: true }, function(err, zipfile) {
|
|
4009
|
-
if (err) {
|
|
4010
|
-
return reject(err);
|
|
4011
|
-
}
|
|
4012
|
-
resolve(zipfile);
|
|
4013
|
-
});
|
|
4014
|
-
});
|
|
4015
|
-
} else if (FileReader.isFileReader(reader)) {
|
|
4016
|
-
return new Promise((resolve, reject) => {
|
|
4017
|
-
yauzl.fromFd(reader.fd(), { lazyEntries: true, autoClose: false }, function(err, zipfile) {
|
|
4018
|
-
if (err) {
|
|
4019
|
-
return reject(err);
|
|
4020
|
-
}
|
|
4021
|
-
resolve(zipfile);
|
|
4022
|
-
});
|
|
4023
|
-
});
|
|
4024
|
-
} else {
|
|
4025
|
-
throw new Error("Unexpected reader type: " + reader.constructor.name);
|
|
4026
|
-
}
|
|
4027
|
-
}
|
|
4028
|
-
processEntries(zipfile) {
|
|
4029
|
-
let entryTable = {};
|
|
4030
|
-
let entryNames = [];
|
|
4031
|
-
return new Promise((resolve, reject) => {
|
|
4032
|
-
zipfile.readEntry();
|
|
4033
|
-
zipfile.on("error", reject);
|
|
4034
|
-
zipfile.on("entry", (entry) => {
|
|
4035
|
-
const filename = entry.fileName;
|
|
4036
|
-
entryTable[filename] = entry;
|
|
4037
|
-
entryNames.push(filename);
|
|
4038
|
-
zipfile.readEntry();
|
|
4039
|
-
});
|
|
4040
|
-
zipfile.on("end", () => resolve(this._document));
|
|
4041
|
-
}).then(() => {
|
|
4042
|
-
const index = entryNames.indexOf("[Content_Types].xml");
|
|
4043
|
-
if (index === -1) {
|
|
4044
|
-
throw new Error("Invalid Open Office XML: missing content types");
|
|
4045
|
-
}
|
|
4046
|
-
entryNames.splice(index, 1);
|
|
4047
|
-
entryNames.unshift("[Content_Types].xml");
|
|
4048
|
-
this._actions["[Content_Types].xml"] = true;
|
|
4049
|
-
return each((name) => {
|
|
4050
|
-
if (this.shouldProcess(name)) {
|
|
4051
|
-
return this.handleEntry(zipfile, entryTable[name]);
|
|
4052
|
-
}
|
|
4053
|
-
}, entryNames, 0);
|
|
4054
|
-
});
|
|
4055
|
-
}
|
|
4056
|
-
extract(reader) {
|
|
4057
|
-
let archive = this.openArchive(reader);
|
|
4058
|
-
this._document = new Document();
|
|
4059
|
-
this._relationships = {};
|
|
4060
|
-
this._entryTable = {};
|
|
4061
|
-
this._entries = [];
|
|
4062
|
-
return archive.then((zipfile) => this.processEntries(zipfile)).then(() => {
|
|
4063
|
-
let document = this._document;
|
|
4064
|
-
if (document._textboxes && document._textboxes.length > 0) {
|
|
4065
|
-
document._textboxes = document._textboxes + "\n";
|
|
4066
|
-
}
|
|
4067
|
-
if (document._headerTextboxes && document._headerTextboxes.length > 0) {
|
|
4068
|
-
document._headerTextboxes = document._headerTextboxes + "\n";
|
|
4069
|
-
}
|
|
4070
|
-
return document;
|
|
4071
|
-
});
|
|
4072
|
-
}
|
|
4073
|
-
handleOpenTag(node) {
|
|
4074
|
-
if (node.name === "Override") {
|
|
4075
|
-
const actionFunction = this._streamTypes[node.attributes["ContentType"]];
|
|
4076
|
-
if (actionFunction) {
|
|
4077
|
-
const partName = node.attributes["PartName"].replace(/^[/]+/, "");
|
|
4078
|
-
const action = { action: actionFunction, type: node.attributes["ContentType"] };
|
|
4079
|
-
this._actions[partName] = action;
|
|
4080
|
-
}
|
|
4081
|
-
} else if (node.name === "Default") {
|
|
4082
|
-
const extension = node.attributes["Extension"];
|
|
4083
|
-
const contentType = node.attributes["ContentType"];
|
|
4084
|
-
this._defaults[extension] = contentType;
|
|
4085
|
-
} else if (node.name === "Relationship") {
|
|
4086
|
-
this._relationships[node.attributes["Id"]] = {
|
|
4087
|
-
type: node.attributes["Type"],
|
|
4088
|
-
target: node.attributes["Target"]
|
|
4089
|
-
};
|
|
4090
|
-
} else if (node.name === "w:document" || node.name === "w:footnotes" || node.name === "w:endnotes" || node.name === "w:comments") {
|
|
4091
|
-
this._context = ["content", "body"];
|
|
4092
|
-
this._pieces = [];
|
|
4093
|
-
} else if (node.name === "w:hdr" || node.name === "w:ftr") {
|
|
4094
|
-
this._context = ["content", "header"];
|
|
4095
|
-
this._pieces = [];
|
|
4096
|
-
} else if (node.name === "w:endnote" || node.name === "w:footnote") {
|
|
4097
|
-
const type = node.attributes["w:type"] || this._context[0];
|
|
4098
|
-
this._context.unshift(type);
|
|
4099
|
-
} else if (node.name === "w:tab" && this._context[0] === "content") {
|
|
4100
|
-
this._pieces.push(" ");
|
|
4101
|
-
} else if (node.name === "w:br" && this._context[0] === "content") {
|
|
4102
|
-
if ((node.attributes["w:type"] || "") === "page") {
|
|
4103
|
-
this._pieces.push("\n");
|
|
4104
|
-
} else {
|
|
4105
|
-
this._pieces.push("\n");
|
|
4106
|
-
}
|
|
4107
|
-
} else if (node.name === "w:del" || node.name === "w:instrText") {
|
|
4108
|
-
this._context.unshift("deleted");
|
|
4109
|
-
} else if (node.name === "w:tabs") {
|
|
4110
|
-
this._context.unshift("tabs");
|
|
4111
|
-
} else if (node.name === "w:tc") {
|
|
4112
|
-
this._context.unshift("cell");
|
|
4113
|
-
} else if (node.name === "w:drawing") {
|
|
4114
|
-
this._context.unshift("drawing");
|
|
4115
|
-
} else if (node.name === "w:txbxContent") {
|
|
4116
|
-
this._context.unshift(this._pieces);
|
|
4117
|
-
this._context.unshift("textbox");
|
|
4118
|
-
this._pieces = [];
|
|
4119
|
-
}
|
|
4120
|
-
}
|
|
4121
|
-
handleCloseTag(node) {
|
|
4122
|
-
if (node.name === "w:document") {
|
|
4123
|
-
this._context = null;
|
|
4124
|
-
this._document._body = this._pieces.join("");
|
|
4125
|
-
} else if (node.name === "w:footnote" || node.name === "w:endnote") {
|
|
4126
|
-
this._context.shift();
|
|
4127
|
-
} else if (node.name === "w:footnotes") {
|
|
4128
|
-
this._context = null;
|
|
4129
|
-
this._document._footnotes = this._pieces.join("");
|
|
4130
|
-
} else if (node.name === "w:endnotes") {
|
|
4131
|
-
this._context = null;
|
|
4132
|
-
this._document._endnotes = this._pieces.join("");
|
|
4133
|
-
} else if (node.name === "w:comments") {
|
|
4134
|
-
this._context = null;
|
|
4135
|
-
this._document._annotations = this._pieces.join("");
|
|
4136
|
-
} else if (node.name === "w:hdr") {
|
|
4137
|
-
this._context = null;
|
|
4138
|
-
this._document._headers = this._document._headers + this._pieces.join("");
|
|
4139
|
-
} else if (node.name === "w:ftr") {
|
|
4140
|
-
this._context = null;
|
|
4141
|
-
this._document._footers = this._document._footers + this._pieces.join("");
|
|
4142
|
-
} else if (node.name === "w:p") {
|
|
4143
|
-
if (this._context[0] === "content" || this._context[0] === "cell" || this._context[0] === "textbox") {
|
|
4144
|
-
this._pieces.push("\n");
|
|
4145
|
-
}
|
|
4146
|
-
} else if (node.name === "w:del" || node.name === "w:instrText") {
|
|
4147
|
-
this._context.shift();
|
|
4148
|
-
} else if (node.name === "w:tabs") {
|
|
4149
|
-
this._context.shift();
|
|
4150
|
-
} else if (node.name === "w:tc") {
|
|
4151
|
-
this._pieces.pop();
|
|
4152
|
-
this._pieces.push(" ");
|
|
4153
|
-
this._context.shift();
|
|
4154
|
-
} else if (node.name === "w:tr") {
|
|
4155
|
-
this._pieces.push("\n");
|
|
4156
|
-
} else if (node.name === "w:drawing") {
|
|
4157
|
-
this._context.shift();
|
|
4158
|
-
} else if (node.name === "w:txbxContent") {
|
|
4159
|
-
const textBox = this._pieces.join("");
|
|
4160
|
-
const context = this._context.shift();
|
|
4161
|
-
if (context !== "textbox") {
|
|
4162
|
-
throw new Error("Invalid textbox context");
|
|
4163
|
-
}
|
|
4164
|
-
this._pieces = this._context.shift();
|
|
4165
|
-
if (this._context[0] === "drawing")
|
|
4166
|
-
return;
|
|
4167
|
-
if (textBox.length == 0)
|
|
4168
|
-
return;
|
|
4169
|
-
const inHeader = this._context.includes("header");
|
|
4170
|
-
const documentField = inHeader ? "_headerTextboxes" : "_textboxes";
|
|
4171
|
-
if (this._document[documentField]) {
|
|
4172
|
-
this._document[documentField] = this._document[documentField] + "\n" + textBox;
|
|
4173
|
-
} else {
|
|
4174
|
-
this._document[documentField] = textBox;
|
|
4175
|
-
}
|
|
4176
|
-
}
|
|
4177
|
-
}
|
|
4178
|
-
createXmlParser() {
|
|
4179
|
-
const parser = new SAXES.SaxesParser();
|
|
4180
|
-
parser.on("opentag", (node) => {
|
|
4181
|
-
try {
|
|
4182
|
-
this.handleOpenTag(node);
|
|
4183
|
-
} catch (e) {
|
|
4184
|
-
parser.fail(e.message);
|
|
4185
|
-
}
|
|
4186
|
-
});
|
|
4187
|
-
parser.on("closetag", (node) => {
|
|
4188
|
-
try {
|
|
4189
|
-
this.handleCloseTag(node);
|
|
4190
|
-
} catch (e) {
|
|
4191
|
-
parser.fail(e.message);
|
|
4192
|
-
}
|
|
4193
|
-
});
|
|
4194
|
-
parser.on("text", (string) => {
|
|
4195
|
-
try {
|
|
4196
|
-
if (!this._context)
|
|
4197
|
-
return;
|
|
4198
|
-
if (this._context[0] === "content" || this._context[0] === "cell" || this._context[0] === "textbox") {
|
|
4199
|
-
this._pieces.push(string);
|
|
4200
|
-
}
|
|
4201
|
-
} catch (e) {
|
|
4202
|
-
parser.fail(e.message);
|
|
4203
|
-
}
|
|
4204
|
-
});
|
|
4205
|
-
return parser;
|
|
4206
|
-
}
|
|
4207
|
-
handleEntry(zipfile, entry) {
|
|
4208
|
-
return new Promise((resolve, reject) => {
|
|
4209
|
-
zipfile.openReadStream(entry, (err, readStream) => {
|
|
4210
|
-
if (err) {
|
|
4211
|
-
return reject(err);
|
|
4212
|
-
}
|
|
4213
|
-
this._source = entry.fileName;
|
|
4214
|
-
const parser = this.createXmlParser();
|
|
4215
|
-
parser.on("error", (e) => {
|
|
4216
|
-
readStream.destroy(e);
|
|
4217
|
-
reject(e);
|
|
4218
|
-
});
|
|
4219
|
-
parser.on("end", () => resolve());
|
|
4220
|
-
readStream.on("end", () => parser.close());
|
|
4221
|
-
readStream.on("error", (e) => reject(e));
|
|
4222
|
-
readStream.on("readable", () => {
|
|
4223
|
-
while (true) {
|
|
4224
|
-
const chunk = readStream.read(4096);
|
|
4225
|
-
if (chunk === null) {
|
|
4226
|
-
return;
|
|
4227
|
-
}
|
|
4228
|
-
parser.write(chunk);
|
|
4229
|
-
}
|
|
4230
|
-
});
|
|
4231
|
-
});
|
|
4232
|
-
});
|
|
4233
|
-
}
|
|
4234
|
-
};
|
|
4235
|
-
module.exports = OpenOfficeExtractor;
|
|
4236
|
-
}
|
|
4237
|
-
});
|
|
4238
|
-
|
|
4239
|
-
// ../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/word.js
|
|
4240
|
-
var require_word = __commonJS({
|
|
4241
|
-
"../../node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/word.js"(exports, module) {
|
|
4242
|
-
"use strict";
|
|
4243
|
-
var { Buffer: Buffer2 } = __require("buffer");
|
|
4244
|
-
var WordOleExtractor = require_word_ole_extractor();
|
|
4245
|
-
var OpenOfficeExtractor = require_open_office_extractor();
|
|
4246
|
-
var BufferReader = require_buffer_reader();
|
|
4247
|
-
var FileReader = require_file_reader();
|
|
4248
|
-
var WordExtractor2 = class {
|
|
4249
|
-
constructor() {
|
|
4250
|
-
}
|
|
4251
|
-
/**
|
|
4252
|
-
* Extracts the main contents of the file. If a Buffer is passed, that
|
|
4253
|
-
* is used instead. Opens the file, and reads the first block, uses that
|
|
4254
|
-
* to detect whether this is a .doc file or a .docx file, and then calls
|
|
4255
|
-
* either {@link WordOleDocument#extract} or {@link OpenOfficeDocument#extract}
|
|
4256
|
-
* accordingly.
|
|
4257
|
-
*
|
|
4258
|
-
* @param {string|Buffer} source - either a string filename, or a Buffer containing the file content
|
|
4259
|
-
* @returns a {@link Document} providing accessors onto the text
|
|
4260
|
-
*/
|
|
4261
|
-
extract(source) {
|
|
4262
|
-
let reader = null;
|
|
4263
|
-
if (Buffer2.isBuffer(source)) {
|
|
4264
|
-
reader = new BufferReader(source);
|
|
4265
|
-
} else if (typeof source === "string") {
|
|
4266
|
-
reader = new FileReader(source);
|
|
4267
|
-
}
|
|
4268
|
-
const buffer = Buffer2.alloc(512);
|
|
4269
|
-
return reader.open().then(() => reader.read(buffer, 0, 512, 0)).then((buffer2) => {
|
|
4270
|
-
let extractor = null;
|
|
4271
|
-
if (buffer2.readUInt16BE(0) === 53455) {
|
|
4272
|
-
extractor = WordOleExtractor;
|
|
4273
|
-
} else if (buffer2.readUInt16BE(0) === 20555) {
|
|
4274
|
-
const next = buffer2.readUInt16BE(2);
|
|
4275
|
-
if (next === 772 || next === 1286 || next === 1800) {
|
|
4276
|
-
extractor = OpenOfficeExtractor;
|
|
4277
|
-
}
|
|
4278
|
-
}
|
|
4279
|
-
if (!extractor) {
|
|
4280
|
-
throw new Error("Unable to read this type of file");
|
|
4281
|
-
}
|
|
4282
|
-
return new extractor().extract(reader);
|
|
4283
|
-
}).finally(() => reader.close());
|
|
4284
|
-
}
|
|
4285
|
-
};
|
|
4286
|
-
module.exports = WordExtractor2;
|
|
4287
|
-
}
|
|
4288
|
-
});
|
|
4289
|
-
|
|
4290
|
-
// ../../packages/file-loaders/src/loaders/doc/index.ts
|
|
4291
|
-
var import_word_extractor = __toESM(require_word());
|
|
4292
|
-
import debug from "debug";
|
|
4293
|
-
var log = debug("file-loaders:doc");
|
|
4294
|
-
var DocLoader = class {
|
|
4295
|
-
async loadPages(filePath) {
|
|
4296
|
-
log("Loading DOC file:", filePath);
|
|
4297
|
-
try {
|
|
4298
|
-
const extractor = new import_word_extractor.default();
|
|
4299
|
-
const extracted = await extractor.extract(filePath);
|
|
4300
|
-
const pageContent = extracted && typeof extracted.getBody === "function" ? extracted.getBody() : extracted?.text ?? "";
|
|
4301
|
-
const lines = pageContent.split("\n");
|
|
4302
|
-
const lineCount = lines.length;
|
|
4303
|
-
const charCount = pageContent.length;
|
|
4304
|
-
const page = {
|
|
4305
|
-
charCount,
|
|
4306
|
-
lineCount,
|
|
4307
|
-
metadata: { pageNumber: 1 },
|
|
4308
|
-
pageContent
|
|
4309
|
-
};
|
|
4310
|
-
log("DOC loading completed");
|
|
4311
|
-
return [page];
|
|
4312
|
-
} catch (e) {
|
|
4313
|
-
const error = e;
|
|
4314
|
-
log("Error encountered while loading DOC file");
|
|
4315
|
-
console.error(`Error loading DOC file ${filePath}: ${error.message}`);
|
|
4316
|
-
const errorPage = {
|
|
4317
|
-
charCount: 0,
|
|
4318
|
-
lineCount: 0,
|
|
4319
|
-
metadata: { error: `Failed to load DOC file: ${error.message}` },
|
|
4320
|
-
pageContent: ""
|
|
4321
|
-
};
|
|
4322
|
-
return [errorPage];
|
|
4323
|
-
}
|
|
4324
|
-
}
|
|
4325
|
-
async aggregateContent(pages) {
|
|
4326
|
-
log("Aggregating content from", pages.length, "DOC pages");
|
|
4327
|
-
return pages.map((p) => p.pageContent).join("\n\n");
|
|
4328
|
-
}
|
|
4329
|
-
};
|
|
4330
|
-
export {
|
|
4331
|
-
DocLoader
|
|
4332
|
-
};
|
|
4333
|
-
/*! Bundled license information:
|
|
4334
|
-
|
|
4335
|
-
xmlchars/xml/1.0/ed5.js:
|
|
4336
|
-
(**
|
|
4337
|
-
* Character classes and associated utilities for the 5th edition of XML 1.0.
|
|
4338
|
-
*
|
|
4339
|
-
* @author Louis-Dominique Dubeau
|
|
4340
|
-
* @license MIT
|
|
4341
|
-
* @copyright Louis-Dominique Dubeau
|
|
4342
|
-
*)
|
|
4343
|
-
|
|
4344
|
-
xmlchars/xml/1.1/ed2.js:
|
|
4345
|
-
(**
|
|
4346
|
-
* Character classes and associated utilities for the 2nd edition of XML 1.1.
|
|
4347
|
-
*
|
|
4348
|
-
* @author Louis-Dominique Dubeau
|
|
4349
|
-
* @license MIT
|
|
4350
|
-
* @copyright Louis-Dominique Dubeau
|
|
4351
|
-
*)
|
|
4352
|
-
|
|
4353
|
-
xmlchars/xmlns/1.0/ed3.js:
|
|
4354
|
-
(**
|
|
4355
|
-
* Character class utilities for XML NS 1.0 edition 3.
|
|
4356
|
-
*
|
|
4357
|
-
* @author Louis-Dominique Dubeau
|
|
4358
|
-
* @license MIT
|
|
4359
|
-
* @copyright Louis-Dominique Dubeau
|
|
4360
|
-
*)
|
|
4361
|
-
*/
|