@lobehub/cli 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4144 +0,0 @@
1
- #!/usr/bin/env node
2
- import { a as __require, n as __commonJSMin, s as __toESM, t as require_src } from "./index.js";
3
- import { n as require_pend, t as require_buffer_crc32 } from "./buffer-crc32-BlUV1nEz.js";
4
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-header.js
5
- var require_ole_header = /* @__PURE__ */ __commonJSMin(((exports, module) => {
6
- const HEADER_DATA = Buffer.from("D0CF11E0A1B11AE1", "hex");
7
- var Header = class {
8
- constructor() {}
9
- load(buffer) {
10
- for (let i = 0; i < HEADER_DATA.length; i++) if (HEADER_DATA[i] != buffer[i]) return false;
11
- this.secSize = 1 << buffer.readInt16LE(30);
12
- this.shortSecSize = 1 << buffer.readInt16LE(32);
13
- this.SATSize = buffer.readInt32LE(44);
14
- this.dirSecId = buffer.readInt32LE(48);
15
- this.shortStreamMax = buffer.readInt32LE(56);
16
- this.SSATSecId = buffer.readInt32LE(60);
17
- this.SSATSize = buffer.readInt32LE(64);
18
- this.MSATSecId = buffer.readInt32LE(68);
19
- this.MSATSize = buffer.readInt32LE(72);
20
- this.partialMSAT = new Array(109);
21
- for (let i = 0; i < 109; i++) this.partialMSAT[i] = buffer.readInt32LE(76 + i * 4);
22
- return true;
23
- }
24
- };
25
- module.exports = Header;
26
- }));
27
- //#endregion
28
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-allocation-table.js
29
- var require_ole_allocation_table = /* @__PURE__ */ __commonJSMin(((exports, module) => {
30
- const ALLOCATION_TABLE_SEC_ID_FREE = -1;
31
- var AllocationTable = class {
32
- constructor(doc) {
33
- this._doc = doc;
34
- }
35
- load(secIds) {
36
- const doc = this._doc;
37
- const header = doc._header;
38
- this._table = new Array(secIds.length * (header.secSize / 4));
39
- return doc._readSectors(secIds).then((buffer) => {
40
- for (let i = 0; i < buffer.length / 4; i++) this._table[i] = buffer.readInt32LE(i * 4);
41
- });
42
- }
43
- getSecIdChain(startSecId) {
44
- let secId = startSecId;
45
- const secIds = [];
46
- while (secId > ALLOCATION_TABLE_SEC_ID_FREE) {
47
- secIds.push(secId);
48
- const secIdPrior = secId;
49
- secId = this._table[secId];
50
- if (secId === secIdPrior) break;
51
- }
52
- return secIds;
53
- }
54
- };
55
- module.exports = AllocationTable;
56
- }));
57
- //#endregion
58
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-directory-tree.js
59
- var require_ole_directory_tree = /* @__PURE__ */ __commonJSMin(((exports, module) => {
60
- const DIRECTORY_TREE_ENTRY_TYPE_STORAGE = 1;
61
- const DIRECTORY_TREE_ENTRY_TYPE_STREAM = 2;
62
- const DIRECTORY_TREE_ENTRY_TYPE_ROOT = 5;
63
- const DIRECTORY_TREE_LEAF = -1;
64
- var DirectoryTree = class {
65
- constructor(doc) {
66
- this._doc = doc;
67
- }
68
- load(secIds) {
69
- return this._doc._readSectors(secIds).then((buffer) => {
70
- const count = buffer.length / 128;
71
- this._entries = new Array(count);
72
- for (let i = 0; i < count; i++) {
73
- const offset = i * 128;
74
- const nameLength = Math.max(buffer.readInt16LE(64 + offset) - 1, 0);
75
- const entry = {};
76
- entry.name = buffer.toString("utf16le", 0 + offset, nameLength + offset);
77
- entry.type = buffer.readInt8(66 + offset);
78
- entry.nodeColor = buffer.readInt8(67 + offset);
79
- entry.left = buffer.readInt32LE(68 + offset);
80
- entry.right = buffer.readInt32LE(72 + offset);
81
- entry.storageDirId = buffer.readInt32LE(76 + offset);
82
- entry.secId = buffer.readInt32LE(116 + offset);
83
- entry.size = buffer.readInt32LE(120 + offset);
84
- this._entries[i] = entry;
85
- }
86
- this.root = this._entries.find((entry) => entry.type === DIRECTORY_TREE_ENTRY_TYPE_ROOT);
87
- this._buildHierarchy(this.root);
88
- });
89
- }
90
- _buildHierarchy(storageEntry) {
91
- const childIds = this._getChildIds(storageEntry);
92
- storageEntry.storages = {};
93
- storageEntry.streams = {};
94
- for (const childId of childIds) {
95
- const childEntry = this._entries[childId];
96
- const name = childEntry.name;
97
- if (childEntry.type === DIRECTORY_TREE_ENTRY_TYPE_STORAGE) storageEntry.storages[name] = childEntry;
98
- if (childEntry.type === DIRECTORY_TREE_ENTRY_TYPE_STREAM) storageEntry.streams[name] = childEntry;
99
- }
100
- for (const name in storageEntry.storages) this._buildHierarchy(storageEntry.storages[name]);
101
- }
102
- _getChildIds(storageEntry) {
103
- const childIds = [];
104
- const visit = (visitEntry) => {
105
- if (visitEntry.left !== DIRECTORY_TREE_LEAF) {
106
- childIds.push(visitEntry.left);
107
- visit(this._entries[visitEntry.left]);
108
- }
109
- if (visitEntry.right !== DIRECTORY_TREE_LEAF) {
110
- childIds.push(visitEntry.right);
111
- visit(this._entries[visitEntry.right]);
112
- }
113
- };
114
- if (storageEntry.storageDirId > -1) {
115
- childIds.push(storageEntry.storageDirId);
116
- const rootChildEntry = this._entries[storageEntry.storageDirId];
117
- visit(rootChildEntry);
118
- }
119
- return childIds;
120
- }
121
- };
122
- module.exports = DirectoryTree;
123
- }));
124
- //#endregion
125
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-storage-stream.js
126
- var require_ole_storage_stream = /* @__PURE__ */ __commonJSMin(((exports, module) => {
127
- const { Readable } = __require("stream");
128
- var StorageStream = class extends Readable {
129
- constructor(doc, streamEntry) {
130
- super();
131
- this._doc = doc;
132
- this._streamEntry = streamEntry;
133
- this.initialize();
134
- }
135
- initialize() {
136
- this._index = 0;
137
- this._done = true;
138
- if (!this._streamEntry) return;
139
- const doc = this._doc;
140
- this._bytes = this._streamEntry.size;
141
- this._allocationTable = doc._SAT;
142
- this._shortStream = false;
143
- if (this._bytes < doc._header.shortStreamMax) {
144
- this._shortStream = true;
145
- this._allocationTable = doc._SSAT;
146
- }
147
- this._secIds = this._allocationTable.getSecIdChain(this._streamEntry.secId);
148
- this._done = false;
149
- }
150
- _readSector(sector) {
151
- if (this._shortStream) return this._doc._readShortSector(sector);
152
- else return this._doc._readSector(sector);
153
- }
154
- _read() {
155
- if (this._done) return this.push(null);
156
- if (this._index >= this._secIds.length) {
157
- this._done = true;
158
- return this.push(null);
159
- }
160
- return this._readSector(this._secIds[this._index]).then((buffer) => {
161
- if (this._bytes - buffer.length < 0) buffer = buffer.slice(0, this._bytes);
162
- this._bytes -= buffer.length;
163
- this._index++;
164
- this.push(buffer);
165
- });
166
- }
167
- };
168
- module.exports = StorageStream;
169
- }));
170
- //#endregion
171
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-storage.js
172
- var require_ole_storage = /* @__PURE__ */ __commonJSMin(((exports, module) => {
173
- const StorageStream = require_ole_storage_stream();
174
- module.exports = class Storage {
175
- constructor(doc, dirEntry) {
176
- this._doc = doc;
177
- this._dirEntry = dirEntry;
178
- }
179
- storage(storageName) {
180
- return new Storage(this._doc, this._dirEntry.storages[storageName]);
181
- }
182
- stream(streamName) {
183
- return new StorageStream(this._doc, this._dirEntry.streams[streamName]);
184
- }
185
- };
186
- }));
187
- //#endregion
188
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/ole-compound-doc.js
189
- var require_ole_compound_doc = /* @__PURE__ */ __commonJSMin(((exports, module) => {
190
- /**
191
- * @module ole-compound-doc
192
- */
193
- const Header = require_ole_header();
194
- const AllocationTable = require_ole_allocation_table();
195
- const DirectoryTree = require_ole_directory_tree();
196
- const Storage = require_ole_storage();
197
- /**
198
- * Implements the main interface used to read from an OLE compoound file.
199
- */
200
- var OleCompoundDoc = class {
201
- constructor(reader) {
202
- this._reader = reader;
203
- this._skipBytes = 0;
204
- }
205
- read() {
206
- return Promise.resolve().then(() => this._readHeader()).then(() => this._readMSAT()).then(() => this._readSAT()).then(() => this._readSSAT()).then(() => this._readDirectoryTree()).then(() => {
207
- if (this._skipBytes != 0) return this._readCustomHeader();
208
- }).then(() => this);
209
- }
210
- _readCustomHeader() {
211
- const buffer = Buffer.alloc(this._skipBytes);
212
- return this._reader.read(buffer, 0, this._skipBytes, 0).then((buffer) => {
213
- if (!this._customHeaderCallback(buffer)) return;
214
- });
215
- }
216
- _readHeader() {
217
- const buffer = Buffer.alloc(512);
218
- return this._reader.read(buffer, 0, 512, 0 + this._skipBytes).then((buffer) => {
219
- if (!(this._header = new Header()).load(buffer)) throw new Error("Not a valid compound document");
220
- });
221
- }
222
- _readMSAT() {
223
- const header = this._header;
224
- this._MSAT = header.partialMSAT.slice(0);
225
- this._MSAT.length = header.SATSize;
226
- if (header.SATSize <= 109 || header.MSATSize == 0) return Promise.resolve();
227
- let currMSATIndex = 109;
228
- let i = 0;
229
- const readOneMSAT = (i, currMSATIndex, secId) => {
230
- if (i >= header.MSATSize) return Promise.resolve();
231
- return this._readSector(secId).then((sectorBuffer) => {
232
- let s;
233
- for (s = 0; s < header.secSize - 4; s += 4) {
234
- if (currMSATIndex >= header.SATSize) break;
235
- else this._MSAT[currMSATIndex] = sectorBuffer.readInt32LE(s);
236
- currMSATIndex++;
237
- }
238
- secId = sectorBuffer.readInt32LE(header.secSize - 4);
239
- return readOneMSAT(i + 1, currMSATIndex, secId);
240
- });
241
- };
242
- return readOneMSAT(i, currMSATIndex, header.MSATSecId);
243
- }
244
- _readSector(secId) {
245
- return this._readSectors([secId]);
246
- }
247
- _readSectors(secIds) {
248
- const header = this._header;
249
- const buffer = Buffer.alloc(secIds.length * header.secSize);
250
- const readOneSector = (i) => {
251
- if (i >= secIds.length) return Promise.resolve(buffer);
252
- const bufferOffset = i * header.secSize;
253
- const fileOffset = this._getFileOffsetForSec(secIds[i]);
254
- return this._reader.read(buffer, bufferOffset, header.secSize, fileOffset).then(() => readOneSector(i + 1));
255
- };
256
- return readOneSector(0);
257
- }
258
- _readShortSector(secId) {
259
- return this._readShortSectors([secId]);
260
- }
261
- _readShortSectors(secIds) {
262
- const header = this._header;
263
- const buffer = Buffer.alloc(secIds.length * header.shortSecSize);
264
- const readOneShortSector = (i) => {
265
- if (i >= secIds.length) return Promise.resolve(buffer);
266
- const bufferOffset = i * header.shortSecSize;
267
- const fileOffset = this._getFileOffsetForShortSec(secIds[i]);
268
- return this._reader.read(buffer, bufferOffset, header.shortSecSize, fileOffset).then(() => readOneShortSector(i + 1));
269
- };
270
- return readOneShortSector(0);
271
- }
272
- _readSAT() {
273
- this._SAT = new AllocationTable(this);
274
- return this._SAT.load(this._MSAT);
275
- }
276
- _readSSAT() {
277
- const header = this._header;
278
- const secIds = this._SAT.getSecIdChain(header.SSATSecId);
279
- if (secIds.length != header.SSATSize) return Promise.reject(/* @__PURE__ */ new Error("Invalid Short Sector Allocation Table"));
280
- this._SSAT = new AllocationTable(this);
281
- return this._SSAT.load(secIds);
282
- }
283
- _readDirectoryTree() {
284
- const header = this._header;
285
- this._directoryTree = new DirectoryTree(this);
286
- const secIds = this._SAT.getSecIdChain(header.dirSecId);
287
- return this._directoryTree.load(secIds).then(() => {
288
- const rootEntry = this._directoryTree.root;
289
- this._rootStorage = new Storage(this, rootEntry);
290
- this._shortStreamSecIds = this._SAT.getSecIdChain(rootEntry.secId);
291
- });
292
- }
293
- _getFileOffsetForSec(secId) {
294
- const secSize = this._header.secSize;
295
- return this._skipBytes + (secId + 1) * secSize;
296
- }
297
- _getFileOffsetForShortSec(shortSecId) {
298
- const shortStreamOffset = shortSecId * this._header.shortSecSize;
299
- const secSize = this._header.secSize;
300
- const secIdIndex = Math.floor(shortStreamOffset / secSize);
301
- const secOffset = shortStreamOffset % secSize;
302
- const secId = this._shortStreamSecIds[secIdIndex];
303
- return this._getFileOffsetForSec(secId) + secOffset;
304
- }
305
- storage(storageName) {
306
- return this._rootStorage.storage(storageName);
307
- }
308
- stream(streamName) {
309
- return this._rootStorage.stream(streamName);
310
- }
311
- };
312
- module.exports = OleCompoundDoc;
313
- }));
314
- //#endregion
315
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/filters.js
316
- var require_filters = /* @__PURE__ */ __commonJSMin(((exports, module) => {
317
- /**
318
- * @module filters
319
- *
320
- * @description
321
- * Exports several functions that implement various methods for translating
322
- * characters into Unicode, and cleaning up some of the remaining residues from
323
- * Word's odd internal marker character usage.
324
- */
325
- /**
326
- * A replacement table, that maps Word control characters to either NULL, for
327
- * deletion, or to another more acceptable character ina Unicode world, such
328
- * as a newline.
329
- */
330
- const replaceTable = [];
331
- replaceTable[2] = "\0";
332
- replaceTable[5] = "\0";
333
- replaceTable[7] = " ";
334
- replaceTable[8] = "\0";
335
- replaceTable[10] = "\n";
336
- replaceTable[11] = "\n";
337
- replaceTable[12] = "\n";
338
- replaceTable[13] = "\n";
339
- replaceTable[30] = "‑";
340
- /**
341
- * @constant
342
- * Maps between Windows character codes, especially between 0x80 and 0x9f,
343
- * into official Unicode code points. This smooths over the differences
344
- * between UCS-2 and 8-bit code runs in Word, by allowing us to work
345
- * entirely within Unicode later on.
346
- */
347
- const binaryToUnicodeTable = [];
348
- binaryToUnicodeTable[130] = "‚";
349
- binaryToUnicodeTable[131] = "ƒ";
350
- binaryToUnicodeTable[132] = "„";
351
- binaryToUnicodeTable[133] = "…";
352
- binaryToUnicodeTable[134] = "†";
353
- binaryToUnicodeTable[135] = "‡";
354
- binaryToUnicodeTable[136] = "ˆ";
355
- binaryToUnicodeTable[137] = "‰";
356
- binaryToUnicodeTable[138] = "Š";
357
- binaryToUnicodeTable[139] = "‹";
358
- binaryToUnicodeTable[140] = "Œ";
359
- binaryToUnicodeTable[142] = "Ž";
360
- binaryToUnicodeTable[145] = "‘";
361
- binaryToUnicodeTable[146] = "’";
362
- binaryToUnicodeTable[147] = "“";
363
- binaryToUnicodeTable[148] = "”";
364
- binaryToUnicodeTable[149] = "•";
365
- binaryToUnicodeTable[150] = "–";
366
- binaryToUnicodeTable[151] = "—";
367
- binaryToUnicodeTable[152] = "˜";
368
- binaryToUnicodeTable[153] = "™";
369
- binaryToUnicodeTable[154] = "š";
370
- binaryToUnicodeTable[155] = "›";
371
- binaryToUnicodeTable[156] = "œ";
372
- binaryToUnicodeTable[158] = "ž";
373
- binaryToUnicodeTable[159] = "Ÿ";
374
- /**
375
- * Converts character codes from 0x80 to 0x9f to Unicode equivalents
376
- * within a string
377
- * @param {string} string - the input string
378
- * @returns a converted string
379
- */
380
- module.exports.binaryToUnicode = (string) => {
381
- return string.replace(/([\x80-\x9f])/g, (match) => binaryToUnicodeTable[match.charCodeAt(0)]);
382
- };
383
- /**
384
- * The main function for cleaning OLE-based text. It runs a few standard replacements on characters
385
- * that are reserved for special purposes, also removes fields, and finally strips out any weird
386
- * characters that are likely not to be useful for anyone.
387
- *
388
- * @param {string} string - an input string
389
- * @returns a cleaned up string
390
- */
391
- module.exports.clean = (string) => {
392
- string = string.replace(/([\x02\x05\x07\x08\x0a\x0b\x0c\x0d\x1f])/g, (match) => replaceTable[match.charCodeAt(0)]);
393
- let called = true;
394
- while (called) {
395
- called = false;
396
- string = string.replace(/(?:\x13[^\x13\x14\x15]*\x14?([^\x13\x14\x15]*)\x15)/g, (match, p1) => {
397
- called = true;
398
- return p1;
399
- });
400
- }
401
- return string.replace(/[\x00-\x07]/g, "");
402
- };
403
- const filterTable = [];
404
- filterTable[8194] = " ";
405
- filterTable[8195] = " ";
406
- filterTable[8210] = "-";
407
- filterTable[8211] = "-";
408
- filterTable[8212] = "-";
409
- filterTable[8216] = "'";
410
- filterTable[8217] = "'";
411
- filterTable[8220] = "\"";
412
- filterTable[8221] = "\"";
413
- /**
414
- * Filters a string, with a few common Unicode replacements, primarily for standard
415
- * punctuation like non-breaking spaces, hyphens, and left and right curly quotes.
416
- * @param {string} string - the input string
417
- * @returns a filtered string
418
- */
419
- module.exports.filter = (string) => {
420
- return string.replace(/[\u2002\u2003\u2012\u2013\u2014\u2018\u2019\u201c\u201d]/g, (match) => filterTable[match.charCodeAt(0)]);
421
- };
422
- }));
423
- //#endregion
424
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/document.js
425
- var require_document = /* @__PURE__ */ __commonJSMin(((exports, module) => {
426
- /**
427
- * @module document
428
- *
429
- * @description
430
- * Implements the main document returned when a Word file has been extracted. This exposes
431
- * methods that allow the body, annotations, headers, footnotes, and endnotes, to be
432
- * read and used.
433
- *
434
- * @author
435
- * Stuart Watt <stuart@morungos.com>
436
- */
437
- const { filter } = require_filters();
438
- /**
439
- * @class
440
- * Returned from all extractors, this class provides accessors to read the different
441
- * parts of a Word document. This also allows some options to be passed to the accessors,
442
- * so you can control some character conversion and filtering, as described in the methods
443
- * below.
444
- */
445
- var Document = class {
446
- constructor() {
447
- this._body = "";
448
- this._footnotes = "";
449
- this._endnotes = "";
450
- this._headers = "";
451
- this._footers = "";
452
- this._annotations = "";
453
- this._textboxes = "";
454
- this._headerTextboxes = "";
455
- }
456
- /**
457
- * Accessor to read the main body part of a Word file
458
- * @param {Object} options - options for body data
459
- * @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
460
- * to standard ASCII characters
461
- * @returns a string, containing the Word file body
462
- */
463
- getBody(options) {
464
- options = options || {};
465
- const value = this._body;
466
- return options.filterUnicode == false ? value : filter(value);
467
- }
468
- /**
469
- * Accessor to read the footnotes part of a Word file
470
- * @param {Object} options - options for body data
471
- * @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
472
- * to standard ASCII characters
473
- * @returns a string, containing the Word file footnotes
474
- */
475
- getFootnotes(options) {
476
- options = options || {};
477
- const value = this._footnotes;
478
- return options.filterUnicode == false ? value : filter(value);
479
- }
480
- /**
481
- * Accessor to read the endnotes part of a Word file
482
- * @param {Object} options - options for body data
483
- * @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
484
- * to standard ASCII characters
485
- * @returns a string, containing the Word file endnotes
486
- */
487
- getEndnotes(options) {
488
- options = options || {};
489
- const value = this._endnotes;
490
- return options.filterUnicode == false ? value : filter(value);
491
- }
492
- /**
493
- * Accessor to read the headers part of a Word file
494
- * @param {Object} options - options for body data
495
- * @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
496
- * to standard ASCII characters
497
- * @param {boolean} options.includeFooters - if true (the default), returns headers and footers
498
- * as a single string
499
- * @returns a string, containing the Word file headers
500
- */
501
- getHeaders(options) {
502
- options = options || {};
503
- const value = this._headers + (options.includeFooters == false ? "" : this._footers);
504
- return options.filterUnicode == false ? value : filter(value);
505
- }
506
- /**
507
- * Accessor to read the footers part of a Word file
508
- * @param {Object} options - options for body data
509
- * @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
510
- * to standard ASCII characters
511
- * @returns a string, containing the Word file footers
512
- */
513
- getFooters(options) {
514
- options = options || {};
515
- const value = this._footers;
516
- return options.filterUnicode == false ? value : filter(value);
517
- }
518
- /**
519
- * Accessor to read the annotations part of a Word file
520
- * @param {Object} options - options for body data
521
- * @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
522
- * to standard ASCII characters
523
- * @returns a string, containing the Word file annotations
524
- */
525
- getAnnotations(options) {
526
- options = options || {};
527
- const value = this._annotations;
528
- return options.filterUnicode == false ? value : filter(value);
529
- }
530
- /**
531
- * Accessor to read the textboxes from a Word file. The text box content is aggregated as a
532
- * single long string. When both the body and header content exists, they will be separated
533
- * by a newline.
534
- * @param {Object} options - options for body data
535
- * @param {boolean} options.filterUnicode - if true (the default), converts common Unicode quotes
536
- * to standard ASCII characters
537
- * @param {boolean} options.includeHeadersAndFooters - if true (the default), includes text box
538
- * content in headers and footers
539
- * @param {boolean} options.includeBody - if true (the default), includes text box
540
- * content in the document body
541
- * @returns a string, containing the Word file text box content
542
- */
543
- getTextboxes(options) {
544
- options = options || {};
545
- const segments = [];
546
- if (options.includeBody != false) segments.push(this._textboxes);
547
- if (options.includeHeadersAndFooters != false) segments.push(this._headerTextboxes);
548
- const value = segments.join("\n");
549
- return options.filterUnicode == false ? value : filter(value);
550
- }
551
- };
552
- module.exports = Document;
553
- }));
554
- //#endregion
555
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/word-ole-extractor.js
556
- var require_word_ole_extractor = /* @__PURE__ */ __commonJSMin(((exports, module) => {
557
- /**
558
- * @module word-ole-extractor
559
- *
560
- * @description
561
- * Implements the main logic of extracting text from "classic" OLE-based Word files.
562
- * Depends on [OleCompoundDoc]{@link module:ole-compound-doc~OleCompoundDoc}
563
- * for most of the underlying OLE logic. Note that
564
- * [OpenOfficeExtractor]{@link module:open-office-extractor~OpenOfficeExtractor} is
565
- * used for newer, Open Office-style, files.
566
- */
567
- const OleCompoundDoc = require_ole_compound_doc();
568
- const Document = require_document();
569
- const { binaryToUnicode, clean } = require_filters();
570
- /**
571
- * Constant for the deletion character SPRM.
572
- */
573
- const sprmCFRMarkDel = 0;
574
- /**
575
- * Given a cp-style file offset, finds the containing piece index.
576
- * @param {*} offset the character offset
577
- * @returns the piece index
578
- *
579
- * @todo
580
- * Might be better using a binary search
581
- */
582
- const getPieceIndexByCP = (pieces, position) => {
583
- for (let i = 0; i < pieces.length; i++) if (position <= pieces[i].endCp) return i;
584
- };
585
- /**
586
- * Given a file-style offset, finds the containing piece index.
587
- * @param {*} offset the character offset
588
- * @returns the piece index
589
- *
590
- * @todo
591
- * Might be better using a binary search
592
- */
593
- const getPieceIndexByFilePos = (pieces, position) => {
594
- for (let i = 0; i < pieces.length; i++) if (position <= pieces[i].endFilePos) return i;
595
- };
596
- /**
597
- * Reads and extracts a character range from the pieces. This returns the
598
- * plain text within the pieces in the given range.
599
- * @param {*} start the start offset
600
- * @param {*} end the end offset
601
- * @returns a character string
602
- */
603
- function getTextRangeByCP(pieces, start, end) {
604
- const startPiece = getPieceIndexByCP(pieces, start);
605
- const endPiece = getPieceIndexByCP(pieces, end);
606
- const result = [];
607
- for (let i = startPiece, end1 = endPiece; i <= end1; i++) {
608
- const piece = pieces[i];
609
- const xstart = i === startPiece ? start - piece.startCp : 0;
610
- const xend = i === endPiece ? end - piece.startCp : piece.endCp;
611
- result.push(piece.text.substring(xstart, xend));
612
- }
613
- return result.join("");
614
- }
615
- /**
616
- * Given a piece, and a starting and ending cp-style file offset,
617
- * and a replacement character, updates the piece text to replace
618
- * between start and end with the given character.
619
- * @param {*} piece the piece
620
- * @param {*} start the starting character offset
621
- * @param {*} end the endingcharacter offset
622
- * @param {*} character the replacement character
623
- */
624
- function fillPieceRange(piece, start, end, character) {
625
- const pieceStart = piece.startCp;
626
- const pieceEnd = pieceStart + piece.length;
627
- const original = piece.text;
628
- if (start < pieceStart) start = pieceStart;
629
- if (end > pieceEnd) end = pieceEnd;
630
- piece.text = (start == pieceStart ? "" : original.slice(0, start - pieceStart)) + "".padStart(end - start, character) + (end == pieceEnd ? "" : original.slice(end - pieceEnd));
631
- }
632
- /**
633
- * Given a piece, and a starting and ending filePos-style file offset,
634
- * and a replacement character, updates the piece text to replace
635
- * between start and end with the given character. This is used when
636
- * applying character styles, which use filePos values rather than cp
637
- * values.
638
- *
639
- * @param {*} piece the piece
640
- * @param {*} start the starting character offset
641
- * @param {*} end the endingcharacter offset
642
- * @param {*} character the replacement character
643
- */
644
- function fillPieceRangeByFilePos(piece, start, end, character) {
645
- const pieceStart = piece.startFilePos;
646
- const pieceEnd = pieceStart + piece.size;
647
- const original = piece.text;
648
- if (start < pieceStart) start = pieceStart;
649
- if (end > pieceEnd) end = pieceEnd;
650
- piece.text = (start == pieceStart ? "" : original.slice(0, (start - pieceStart) / piece.bpc)) + "".padStart((end - start) / piece.bpc, character) + (end == pieceEnd ? "" : original.slice((end - pieceEnd) / piece.bpc));
651
- }
652
- /**
653
- * Replaces a selected range in the piece table, overwriting the selection with
654
- * the given character. The length of segments in the piece table must never be
655
- * changed.
656
- * @param {*} pieces
657
- * @param {*} start
658
- * @param {*} end
659
- * @param {*} character
660
- */
661
- function replaceSelectedRange(pieces, start, end, character) {
662
- const startPiece = getPieceIndexByCP(pieces, start);
663
- const endPiece = getPieceIndexByCP(pieces, end);
664
- for (let i = startPiece, end1 = endPiece; i <= end1; i++) {
665
- const piece = pieces[i];
666
- fillPieceRange(piece, start, end, character);
667
- }
668
- }
669
- /**
670
- * Replaces a selected range in the piece table, overwriting the selection with
671
- * the given character. The length of segments in the piece table must never be
672
- * changed. The start and end values are found by file position.
673
- * @param {*} pieces
674
- * @param {*} start
675
- * @param {*} end
676
- * @param {*} character
677
- */
678
- function replaceSelectedRangeByFilePos(pieces, start, end, character) {
679
- const startPiece = getPieceIndexByFilePos(pieces, start);
680
- const endPiece = getPieceIndexByFilePos(pieces, end);
681
- for (let i = startPiece, end1 = endPiece; i <= end1; i++) {
682
- const piece = pieces[i];
683
- fillPieceRangeByFilePos(piece, start, end, character);
684
- }
685
- }
686
- /**
687
- * Marks a range as deleted. It does this by overwriting it with null characters,
688
- * wich then get removed during the later cleaning process.
689
- * @param {*} pieces
690
- * @param {*} start
691
- * @param {*} end
692
- */
693
- function markDeletedRange(pieces, start, end) {
694
- replaceSelectedRangeByFilePos(pieces, start, end, "\0");
695
- }
696
- /**
697
- * Called to iterate over a set of SPRMs in a buffer, starting at
698
- * a gived offset. The handler is called with the arguments:
699
- * buffer, offset, sprm, ispmd, fspec, sgc, spra.
700
- * @param {*} buffer the buffer
701
- * @param {*} offset the starting offset
702
- * @param {*} handler the function to call for each SPRM
703
- */
704
- const processSprms = (buffer, offset, handler) => {
705
- while (offset < buffer.length - 1) {
706
- const sprm = buffer.readUInt16LE(offset);
707
- const ispmd = sprm & 31;
708
- const fspec = sprm >> 9 & 1;
709
- const sgc = sprm >> 10 & 7;
710
- const spra = sprm >> 13 & 7;
711
- offset += 2;
712
- handler(buffer, offset, sprm, ispmd, fspec, sgc, spra);
713
- if (spra === 0) {
714
- offset += 1;
715
- continue;
716
- } else if (spra === 1) {
717
- offset += 1;
718
- continue;
719
- } else if (spra === 2) {
720
- offset += 2;
721
- continue;
722
- } else if (spra === 3) {
723
- offset += 4;
724
- continue;
725
- } else if (spra === 4 || spra === 5) {
726
- offset += 2;
727
- continue;
728
- } else if (spra === 6) {
729
- offset += buffer.readUInt8(offset) + 1;
730
- continue;
731
- } else if (spra === 7) {
732
- offset += 3;
733
- continue;
734
- } else throw new Error("Unparsed sprm");
735
- }
736
- };
737
- /**
738
- * @class
739
- * The main class implementing extraction from OLE-based Word files.
740
- * This handles all the extraction and conversion logic.
741
- */
742
- var WordOleExtractor = class {
743
- constructor() {
744
- this._pieces = [];
745
- this._bookmarks = {};
746
- this._boundaries = {};
747
- this._taggedHeaders = [];
748
- }
749
- /**
750
- * The main extraction method. This creates an OLE compound document
751
- * interface, then opens up a stream and extracts out the main
752
- * stream.
753
- * @param {*} reader
754
- */
755
- extract(reader) {
756
- const document = new OleCompoundDoc(reader);
757
- return document.read().then(() => this.documentStream(document, "WordDocument").then((stream) => this.streamBuffer(stream)).then((buffer) => this.extractWordDocument(document, buffer)));
758
- }
759
- /**
760
- * Builds and returns a {@link Document} object corresponding to the text
761
- * in the original document. This involves reading and retrieving the text
762
- * ranges corresponding to the primary document parts. The text segments are
763
- * read from the extracted table of text pieces.
764
- * @returns a {@link Document} object
765
- */
766
- buildDocument() {
767
- const document = new Document();
768
- const pieces = this._pieces;
769
- let start = 0;
770
- document._body = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpText));
771
- start += this._boundaries.ccpText;
772
- if (this._boundaries.ccpFtn) {
773
- document._footnotes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpFtn - 1));
774
- start += this._boundaries.ccpFtn;
775
- }
776
- if (this._boundaries.ccpHdd) {
777
- document._headers = clean(this._taggedHeaders.filter((s) => s.type === "headers").map((s) => s.text).join(""));
778
- document._footers = clean(this._taggedHeaders.filter((s) => s.type === "footers").map((s) => s.text).join(""));
779
- start += this._boundaries.ccpHdd;
780
- }
781
- if (this._boundaries.ccpAtn) {
782
- document._annotations = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpAtn - 1));
783
- start += this._boundaries.ccpAtn;
784
- }
785
- if (this._boundaries.ccpEdn) {
786
- document._endnotes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpEdn - 1));
787
- start += this._boundaries.ccpEdn;
788
- }
789
- if (this._boundaries.ccpTxbx) {
790
- document._textboxes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpTxbx - 1));
791
- start += this._boundaries.ccpTxbx;
792
- }
793
- if (this._boundaries.ccpHdrTxbx) {
794
- document._headerTextboxes = clean(getTextRangeByCP(pieces, start, start + this._boundaries.ccpHdrTxbx - 1));
795
- start += this._boundaries.ccpHdrTxbx;
796
- }
797
- return document;
798
- }
799
- /**
800
- * Main logic top level function for unpacking a Word document
801
- * @param {*} document the OLE document
802
- * @param {*} buffer a buffer
803
- * @returns a Promise which resolves to a {@link Document}
804
- */
805
- extractWordDocument(document, buffer) {
806
- const magic = buffer.readUInt16LE(0);
807
- if (magic !== 42476) return Promise.reject(/* @__PURE__ */ new Error(`This does not seem to be a Word document: Invalid magic number: ${magic.toString(16)}`));
808
- const streamName = (buffer.readUInt16LE(10) & 512) !== 0 ? "1Table" : "0Table";
809
- return this.documentStream(document, streamName).then((stream) => this.streamBuffer(stream)).then((streamBuffer) => {
810
- this._boundaries.fcMin = buffer.readUInt32LE(24);
811
- this._boundaries.ccpText = buffer.readUInt32LE(76);
812
- this._boundaries.ccpFtn = buffer.readUInt32LE(80);
813
- this._boundaries.ccpHdd = buffer.readUInt32LE(84);
814
- this._boundaries.ccpAtn = buffer.readUInt32LE(92);
815
- this._boundaries.ccpEdn = buffer.readUInt32LE(96);
816
- this._boundaries.ccpTxbx = buffer.readUInt32LE(100);
817
- this._boundaries.ccpHdrTxbx = buffer.readUInt32LE(104);
818
- this.writeBookmarks(buffer, streamBuffer);
819
- this.writePieces(buffer, streamBuffer);
820
- this.writeCharacterProperties(buffer, streamBuffer);
821
- this.writeParagraphProperties(buffer, streamBuffer);
822
- this.normalizeHeaders(buffer, streamBuffer);
823
- return this.buildDocument();
824
- });
825
- }
826
- /**
827
- * Returns a promise that resolves to the named stream.
828
- * @param {*} document
829
- * @param {*} streamName
830
- * @returns a promise that resolves to the named stream
831
- */
832
- documentStream(document, streamName) {
833
- return Promise.resolve(document.stream(streamName));
834
- }
835
- /**
836
- * Returns a promise that resolves to a Buffer containing the contents of
837
- * the given stream.
838
- * @param {*} stream
839
- * @returns a promise that resolves to the sream contents
840
- */
841
- streamBuffer(stream) {
842
- return new Promise((resolve, reject) => {
843
- const chunks = [];
844
- stream.on("data", (chunk) => chunks.push(chunk));
845
- stream.on("error", (error) => reject(error));
846
- stream.on("end", () => resolve(Buffer.concat(chunks)));
847
- return stream;
848
- });
849
- }
850
- writeFields(buffer, tableBuffer, result) {
851
- const fcPlcffldMom = buffer.readInt32LE(282);
852
- const lcbPlcffldMom = buffer.readUInt32LE(286);
853
- if (lcbPlcffldMom == 0) return;
854
- const fieldCount = (lcbPlcffldMom - 4) / 6;
855
- const dataOffset = (fieldCount + 1) * 4;
856
- const plcffldMom = tableBuffer.slice(fcPlcffldMom, fcPlcffldMom + lcbPlcffldMom);
857
- for (let i = 0; i < fieldCount; i++) {
858
- plcffldMom.readUInt32LE(i * 4);
859
- const fld = plcffldMom.readUInt16LE(dataOffset + i * 2);
860
- const byte1 = fld & 255;
861
- fld >> 8;
862
- if ((byte1 & 31) == 19) {}
863
- }
864
- }
865
- /**
866
- * Extracts and stores the document bookmarks into a local field.
867
- * @param {*} buffer
868
- * @param {*} tableBuffer
869
- */
870
- writeBookmarks(buffer, tableBuffer) {
871
- const fcSttbfBkmk = buffer.readUInt32LE(322);
872
- const lcbSttbfBkmk = buffer.readUInt32LE(326);
873
- const fcPlcfBkf = buffer.readUInt32LE(330);
874
- const lcbPlcfBkf = buffer.readUInt32LE(334);
875
- const fcPlcfBkl = buffer.readUInt32LE(338);
876
- const lcbPlcfBkl = buffer.readUInt32LE(342);
877
- if (lcbSttbfBkmk === 0) return;
878
- const sttbfBkmk = tableBuffer.slice(fcSttbfBkmk, fcSttbfBkmk + lcbSttbfBkmk);
879
- const plcfBkf = tableBuffer.slice(fcPlcfBkf, fcPlcfBkf + lcbPlcfBkf);
880
- const plcfBkl = tableBuffer.slice(fcPlcfBkl, fcPlcfBkl + lcbPlcfBkl);
881
- const fcExtend = sttbfBkmk.readUInt16LE(0);
882
- sttbfBkmk.readUInt16LE(2);
883
- sttbfBkmk.readUInt16LE(4);
884
- if (fcExtend !== 65535) throw new Error("Internal error: unexpected single-byte bookmark data");
885
- let offset = 6;
886
- const index = 0;
887
- while (offset < lcbSttbfBkmk) {
888
- let length = sttbfBkmk.readUInt16LE(offset);
889
- length = length * 2;
890
- const segment = sttbfBkmk.slice(offset + 2, offset + 2 + length);
891
- const cpStart = plcfBkf.readUInt32LE(index * 4);
892
- const cpEnd = plcfBkl.readUInt32LE(index * 4);
893
- this._bookmarks[segment] = {
894
- start: cpStart,
895
- end: cpEnd
896
- };
897
- offset = offset + length + 2;
898
- }
899
- }
900
- /**
901
- * Extracts and stores the document text pieces into a local field. This is
902
- * probably the most crucial part of text extraction, as it is where we
903
- * get text corresponding to character positions. These may be stored in a
904
- * different order in the file compared to the order we want them.
905
- *
906
- * @param {*} buffer
907
- * @param {*} tableBuffer
908
- */
909
- writePieces(buffer, tableBuffer) {
910
- let flag;
911
- let pos = buffer.readUInt32LE(418);
912
- while (true) {
913
- flag = tableBuffer.readUInt8(pos);
914
- if (flag !== 1) break;
915
- pos = pos + 1;
916
- const skip = tableBuffer.readUInt16LE(pos);
917
- pos = pos + 2 + skip;
918
- }
919
- flag = tableBuffer.readUInt8(pos);
920
- pos = pos + 1;
921
- if (flag !== 2) throw new Error("Internal error: ccorrupted Word file");
922
- const pieceTableSize = tableBuffer.readUInt32LE(pos);
923
- pos = pos + 4;
924
- const pieces = (pieceTableSize - 4) / 12;
925
- let startCp = 0;
926
- let startStream = 0;
927
- for (let x = 0, end = pieces - 1; x <= end; x++) {
928
- const offset = pos + (pieces + 1) * 4 + x * 8 + 2;
929
- let startFilePos = tableBuffer.readUInt32LE(offset);
930
- let unicode = false;
931
- if ((startFilePos & 1073741824) === 0) unicode = true;
932
- else {
933
- startFilePos = startFilePos & -1073741825;
934
- startFilePos = Math.floor(startFilePos / 2);
935
- }
936
- const lStart = tableBuffer.readUInt32LE(pos + x * 4);
937
- const lEnd = tableBuffer.readUInt32LE(pos + (x + 1) * 4);
938
- const totLength = lEnd - lStart;
939
- const piece = {
940
- startCp,
941
- startStream,
942
- totLength,
943
- startFilePos,
944
- unicode,
945
- bpc: unicode ? 2 : 1
946
- };
947
- piece.size = piece.bpc * (lEnd - lStart);
948
- const textBuffer = buffer.slice(startFilePos, startFilePos + piece.size);
949
- if (unicode) piece.text = textBuffer.toString("ucs2");
950
- else piece.text = binaryToUnicode(textBuffer.toString("binary"));
951
- piece.length = piece.text.length;
952
- piece.endCp = piece.startCp + piece.length;
953
- piece.endStream = piece.startStream + piece.size;
954
- piece.endFilePos = piece.startFilePos + piece.size;
955
- startCp = piece.endCp;
956
- startStream = piece.endStream;
957
- this._pieces.push(piece);
958
- }
959
- }
960
- /**
961
- * Processes the headers and footers. The main logic here is that we might have a mix
962
- * of "real" and "pseudo" headers. For example, a footnote generates some footnote
963
- * separator footer elements, which, unless they contain something interesting, we
964
- * can dispense with. In fact, we want to dispense with anything which is made up of
965
- * whitespace and control characters, in general. This means locating the segments of
966
- * text in the extracted pieces, and conditionally replacing them with nulls.
967
- *
968
- * @param {*} buffer
969
- * @param {*} tableBuffer
970
- */
971
- normalizeHeaders(buffer, tableBuffer) {
972
- const pieces = this._pieces;
973
- const fcPlcfhdd = buffer.readUInt32LE(242);
974
- const lcbPlcfhdd = buffer.readUInt32LE(246);
975
- if (lcbPlcfhdd < 8) return;
976
- const offset = this._boundaries.ccpText + this._boundaries.ccpFtn;
977
- const ccpHdd = this._boundaries.ccpHdd;
978
- const plcHdd = tableBuffer.slice(fcPlcfhdd, fcPlcfhdd + lcbPlcfhdd);
979
- const plcHddCount = lcbPlcfhdd / 4;
980
- let start = offset + plcHdd.readUInt32LE(0);
981
- for (let i = 1; i < plcHddCount; i++) {
982
- let end = offset + plcHdd.readUInt32LE(i * 4);
983
- if (end > offset + ccpHdd) end = offset + ccpHdd;
984
- const string = getTextRangeByCP(pieces, start, end);
985
- const story = i - 1;
986
- if ([
987
- 0,
988
- 1,
989
- 2
990
- ].includes(story)) this._taggedHeaders.push({
991
- type: "footnoteSeparators",
992
- text: string
993
- });
994
- else if ([
995
- 3,
996
- 4,
997
- 5
998
- ].includes(story)) this._taggedHeaders.push({
999
- type: "endSeparators",
1000
- text: string
1001
- });
1002
- else if ([
1003
- 0,
1004
- 1,
1005
- 4
1006
- ].includes(story % 6)) this._taggedHeaders.push({
1007
- type: "headers",
1008
- text: string
1009
- });
1010
- else if ([
1011
- 2,
1012
- 3,
1013
- 5
1014
- ].includes(story % 6)) this._taggedHeaders.push({
1015
- type: "footers",
1016
- text: string
1017
- });
1018
- if (!/[^\r\n\u0002-\u0008]/.test(string)) replaceSelectedRange(pieces, start, end, "\0");
1019
- else replaceSelectedRange(pieces, end - 1, end, "\0");
1020
- start = end;
1021
- }
1022
- }
1023
- writeParagraphProperties(buffer, tableBuffer) {
1024
- const pieces = this._pieces;
1025
- const fcPlcfbtePapx = buffer.readUInt32LE(258);
1026
- const lcbPlcfbtePapx = buffer.readUInt32LE(262);
1027
- const plcBtePapxCount = (lcbPlcfbtePapx - 4) / 8;
1028
- const dataOffset = (plcBtePapxCount + 1) * 4;
1029
- const plcBtePapx = tableBuffer.slice(fcPlcfbtePapx, fcPlcfbtePapx + lcbPlcfbtePapx);
1030
- for (let i = 0; i < plcBtePapxCount; i++) {
1031
- plcBtePapx.readUInt32LE(i * 4);
1032
- const papxFkpBlock = plcBtePapx.readUInt32LE(dataOffset + i * 4);
1033
- const papxFkpBlockBuffer = buffer.slice(papxFkpBlock * 512, (papxFkpBlock + 1) * 512);
1034
- const crun = papxFkpBlockBuffer.readUInt8(511);
1035
- for (let j = 0; j < crun; j++) {
1036
- const rgfc = papxFkpBlockBuffer.readUInt32LE(j * 4);
1037
- const rgfcNext = papxFkpBlockBuffer.readUInt32LE((j + 1) * 4);
1038
- const cbLocation = (crun + 1) * 4 + j * 13;
1039
- const cbIndex = papxFkpBlockBuffer.readUInt8(cbLocation) * 2;
1040
- const cb = papxFkpBlockBuffer.readUInt8(cbIndex);
1041
- let grpPrlAndIstd = null;
1042
- if (cb !== 0) grpPrlAndIstd = papxFkpBlockBuffer.slice(cbIndex + 1, cbIndex + 1 + 2 * cb - 1);
1043
- else {
1044
- const cb2 = papxFkpBlockBuffer.readUInt8(cbIndex + 1);
1045
- grpPrlAndIstd = papxFkpBlockBuffer.slice(cbIndex + 2, cbIndex + 2 + 2 * cb2);
1046
- }
1047
- grpPrlAndIstd.readUInt16LE(0);
1048
- processSprms(grpPrlAndIstd, 2, (buffer, offset, sprm, ispmd, fspec, sgc, spra) => {
1049
- if (sprm === 9239) replaceSelectedRangeByFilePos(pieces, rgfc, rgfcNext, "\n");
1050
- });
1051
- }
1052
- }
1053
- }
1054
- writeCharacterProperties(buffer, tableBuffer) {
1055
- const pieces = this._pieces;
1056
- const fcPlcfbteChpx = buffer.readUInt32LE(250);
1057
- const lcbPlcfbteChpx = buffer.readUInt32LE(254);
1058
- const plcBteChpxCount = (lcbPlcfbteChpx - 4) / 8;
1059
- const dataOffset = (plcBteChpxCount + 1) * 4;
1060
- const plcBteChpx = tableBuffer.slice(fcPlcfbteChpx, fcPlcfbteChpx + lcbPlcfbteChpx);
1061
- let lastDeletionEnd = null;
1062
- for (let i = 0; i < plcBteChpxCount; i++) {
1063
- plcBteChpx.readUInt32LE(i * 4);
1064
- const chpxFkpBlock = plcBteChpx.readUInt32LE(dataOffset + i * 4);
1065
- const chpxFkpBlockBuffer = buffer.slice(chpxFkpBlock * 512, (chpxFkpBlock + 1) * 512);
1066
- const crun = chpxFkpBlockBuffer.readUInt8(511);
1067
- for (let j = 0; j < crun; j++) {
1068
- const rgfc = chpxFkpBlockBuffer.readUInt32LE(j * 4);
1069
- const rgfcNext = chpxFkpBlockBuffer.readUInt32LE((j + 1) * 4);
1070
- const rgb = chpxFkpBlockBuffer.readUInt8((crun + 1) * 4 + j);
1071
- if (rgb == 0) continue;
1072
- const chpxOffset = rgb * 2;
1073
- const cb = chpxFkpBlockBuffer.readUInt8(chpxOffset);
1074
- processSprms(chpxFkpBlockBuffer.slice(chpxOffset + 1, chpxOffset + 1 + cb), 0, (buffer, offset, sprm, ispmd) => {
1075
- if (ispmd === sprmCFRMarkDel) {
1076
- if ((buffer[offset] & 1) != 1) return;
1077
- if (lastDeletionEnd === rgfc) markDeletedRange(pieces, lastDeletionEnd, rgfcNext);
1078
- else markDeletedRange(pieces, rgfc, rgfcNext);
1079
- lastDeletionEnd = rgfcNext;
1080
- }
1081
- });
1082
- }
1083
- }
1084
- }
1085
- };
1086
- module.exports = WordOleExtractor;
1087
- }));
1088
- //#endregion
1089
- //#region node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xml/1.0/ed5.js
1090
- var require_ed5 = /* @__PURE__ */ __commonJSMin(((exports) => {
1091
- /**
1092
- * Character classes and associated utilities for the 5th edition of XML 1.0.
1093
- *
1094
- * @author Louis-Dominique Dubeau
1095
- * @license MIT
1096
- * @copyright Louis-Dominique Dubeau
1097
- */
1098
- Object.defineProperty(exports, "__esModule", { value: true });
1099
- exports.CHAR = " \n\r -퟿-�𐀀-􏿿";
1100
- exports.S = " \r\n";
1101
- exports.NAME_START_CHAR = ":A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-῿‌‍⁰-↏Ⰰ-⿯、-퟿豈-﷏ﷰ-�𐀀-󯿿";
1102
- exports.NAME_CHAR = "-" + exports.NAME_START_CHAR + ".0-9·̀-ͯ‿-⁀";
1103
- exports.CHAR_RE = new RegExp("^[" + exports.CHAR + "]$", "u");
1104
- exports.S_RE = new RegExp("^[" + exports.S + "]+$", "u");
1105
- exports.NAME_START_CHAR_RE = new RegExp("^[" + exports.NAME_START_CHAR + "]$", "u");
1106
- exports.NAME_CHAR_RE = new RegExp("^[" + exports.NAME_CHAR + "]$", "u");
1107
- exports.NAME_RE = new RegExp("^[" + exports.NAME_START_CHAR + "][" + exports.NAME_CHAR + "]*$", "u");
1108
- exports.NMTOKEN_RE = new RegExp("^[" + exports.NAME_CHAR + "]+$", "u");
1109
- var TAB = 9;
1110
- var NL = 10;
1111
- var CR = 13;
1112
- var SPACE = 32;
1113
- /** All characters in the ``S`` production. */
1114
- exports.S_LIST = [
1115
- SPACE,
1116
- NL,
1117
- CR,
1118
- TAB
1119
- ];
1120
- /**
1121
- * Determines whether a codepoint matches the ``CHAR`` production.
1122
- *
1123
- * @param c The code point.
1124
- *
1125
- * @returns ``true`` if the codepoint matches ``CHAR``.
1126
- */
1127
- function isChar(c) {
1128
- return c >= SPACE && c <= 55295 || c === NL || c === CR || c === TAB || c >= 57344 && c <= 65533 || c >= 65536 && c <= 1114111;
1129
- }
1130
- exports.isChar = isChar;
1131
- /**
1132
- * Determines whether a codepoint matches the ``S`` (space) production.
1133
- *
1134
- * @param c The code point.
1135
- *
1136
- * @returns ``true`` if the codepoint matches ``S``.
1137
- */
1138
- function isS(c) {
1139
- return c === SPACE || c === NL || c === CR || c === TAB;
1140
- }
1141
- exports.isS = isS;
1142
- /**
1143
- * Determines whether a codepoint matches the ``NAME_START_CHAR`` production.
1144
- *
1145
- * @param c The code point.
1146
- *
1147
- * @returns ``true`` if the codepoint matches ``NAME_START_CHAR``.
1148
- */
1149
- function isNameStartChar(c) {
1150
- return c >= 65 && c <= 90 || c >= 97 && c <= 122 || c === 58 || c === 95 || c === 8204 || c === 8205 || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533 || c >= 65536 && c <= 983039;
1151
- }
1152
- exports.isNameStartChar = isNameStartChar;
1153
- /**
1154
- * Determines whether a codepoint matches the ``NAME_CHAR`` production.
1155
- *
1156
- * @param c The code point.
1157
- *
1158
- * @returns ``true`` if the codepoint matches ``NAME_CHAR``.
1159
- */
1160
- function isNameChar(c) {
1161
- return isNameStartChar(c) || c >= 48 && c <= 57 || c === 45 || c === 46 || c === 183 || c >= 768 && c <= 879 || c >= 8255 && c <= 8256;
1162
- }
1163
- exports.isNameChar = isNameChar;
1164
- }));
1165
- //#endregion
1166
- //#region node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xml/1.1/ed2.js
1167
- var require_ed2 = /* @__PURE__ */ __commonJSMin(((exports) => {
1168
- /**
1169
- * Character classes and associated utilities for the 2nd edition of XML 1.1.
1170
- *
1171
- * @author Louis-Dominique Dubeau
1172
- * @license MIT
1173
- * @copyright Louis-Dominique Dubeau
1174
- */
1175
- Object.defineProperty(exports, "__esModule", { value: true });
1176
- exports.CHAR = "-퟿-�𐀀-􏿿";
1177
- exports.RESTRICTED_CHAR = "-\b\v\f--„†-Ÿ";
1178
- exports.S = " \r\n";
1179
- exports.NAME_START_CHAR = ":A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-῿‌‍⁰-↏Ⰰ-⿯、-퟿豈-﷏ﷰ-�𐀀-󯿿";
1180
- exports.NAME_CHAR = "-" + exports.NAME_START_CHAR + ".0-9·̀-ͯ‿-⁀";
1181
- exports.CHAR_RE = new RegExp("^[" + exports.CHAR + "]$", "u");
1182
- exports.RESTRICTED_CHAR_RE = new RegExp("^[" + exports.RESTRICTED_CHAR + "]$", "u");
1183
- exports.S_RE = new RegExp("^[" + exports.S + "]+$", "u");
1184
- exports.NAME_START_CHAR_RE = new RegExp("^[" + exports.NAME_START_CHAR + "]$", "u");
1185
- exports.NAME_CHAR_RE = new RegExp("^[" + exports.NAME_CHAR + "]$", "u");
1186
- exports.NAME_RE = new RegExp("^[" + exports.NAME_START_CHAR + "][" + exports.NAME_CHAR + "]*$", "u");
1187
- exports.NMTOKEN_RE = new RegExp("^[" + exports.NAME_CHAR + "]+$", "u");
1188
- var TAB = 9;
1189
- var NL = 10;
1190
- var CR = 13;
1191
- var SPACE = 32;
1192
- /** All characters in the ``S`` production. */
1193
- exports.S_LIST = [
1194
- SPACE,
1195
- NL,
1196
- CR,
1197
- TAB
1198
- ];
1199
- /**
1200
- * Determines whether a codepoint matches the ``CHAR`` production.
1201
- *
1202
- * @param c The code point.
1203
- *
1204
- * @returns ``true`` if the codepoint matches ``CHAR``.
1205
- */
1206
- function isChar(c) {
1207
- return c >= 1 && c <= 55295 || c >= 57344 && c <= 65533 || c >= 65536 && c <= 1114111;
1208
- }
1209
- exports.isChar = isChar;
1210
- /**
1211
- * Determines whether a codepoint matches the ``RESTRICTED_CHAR`` production.
1212
- *
1213
- * @param c The code point.
1214
- *
1215
- * @returns ``true`` if the codepoint matches ``RESTRICTED_CHAR``.
1216
- */
1217
- function isRestrictedChar(c) {
1218
- return c >= 1 && c <= 8 || c === 11 || c === 12 || c >= 14 && c <= 31 || c >= 127 && c <= 132 || c >= 134 && c <= 159;
1219
- }
1220
- exports.isRestrictedChar = isRestrictedChar;
1221
- /**
1222
- * Determines whether a codepoint matches the ``CHAR`` production and does not
1223
- * match the ``RESTRICTED_CHAR`` production. ``isCharAndNotRestricted(x)`` is
1224
- * equivalent to ``isChar(x) && !isRestrictedChar(x)``. This function is faster
1225
- * than running the two-call equivalent.
1226
- *
1227
- * @param c The code point.
1228
- *
1229
- * @returns ``true`` if the codepoint matches ``CHAR`` and does not match
1230
- * ``RESTRICTED_CHAR``.
1231
- */
1232
- function isCharAndNotRestricted(c) {
1233
- return c === 9 || c === 10 || c === 13 || c > 31 && c < 127 || c === 133 || c > 159 && c <= 55295 || c >= 57344 && c <= 65533 || c >= 65536 && c <= 1114111;
1234
- }
1235
- exports.isCharAndNotRestricted = isCharAndNotRestricted;
1236
- /**
1237
- * Determines whether a codepoint matches the ``S`` (space) production.
1238
- *
1239
- * @param c The code point.
1240
- *
1241
- * @returns ``true`` if the codepoint matches ``S``.
1242
- */
1243
- function isS(c) {
1244
- return c === SPACE || c === NL || c === CR || c === TAB;
1245
- }
1246
- exports.isS = isS;
1247
- /**
1248
- * Determines whether a codepoint matches the ``NAME_START_CHAR`` production.
1249
- *
1250
- * @param c The code point.
1251
- *
1252
- * @returns ``true`` if the codepoint matches ``NAME_START_CHAR``.
1253
- */
1254
- function isNameStartChar(c) {
1255
- return c >= 65 && c <= 90 || c >= 97 && c <= 122 || c === 58 || c === 95 || c === 8204 || c === 8205 || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533 || c >= 65536 && c <= 983039;
1256
- }
1257
- exports.isNameStartChar = isNameStartChar;
1258
- /**
1259
- * Determines whether a codepoint matches the ``NAME_CHAR`` production.
1260
- *
1261
- * @param c The code point.
1262
- *
1263
- * @returns ``true`` if the codepoint matches ``NAME_CHAR``.
1264
- */
1265
- function isNameChar(c) {
1266
- return isNameStartChar(c) || c >= 48 && c <= 57 || c === 45 || c === 46 || c === 183 || c >= 768 && c <= 879 || c >= 8255 && c <= 8256;
1267
- }
1268
- exports.isNameChar = isNameChar;
1269
- }));
1270
- //#endregion
1271
- //#region node_modules/.pnpm/xmlchars@2.2.0/node_modules/xmlchars/xmlns/1.0/ed3.js
1272
- var require_ed3 = /* @__PURE__ */ __commonJSMin(((exports) => {
1273
- /**
1274
- * Character class utilities for XML NS 1.0 edition 3.
1275
- *
1276
- * @author Louis-Dominique Dubeau
1277
- * @license MIT
1278
- * @copyright Louis-Dominique Dubeau
1279
- */
1280
- Object.defineProperty(exports, "__esModule", { value: true });
1281
- exports.NC_NAME_START_CHAR = "A-Z_a-zÀ-ÖØ-öø-˿Ͱ-ͽͿ-῿‌-‍⁰-↏Ⰰ-⿯、-퟿豈-﷏ﷰ-�𐀀-󯿿";
1282
- exports.NC_NAME_CHAR = "-" + exports.NC_NAME_START_CHAR + ".0-9·̀-ͯ‿-⁀";
1283
- exports.NC_NAME_START_CHAR_RE = new RegExp("^[" + exports.NC_NAME_START_CHAR + "]$", "u");
1284
- exports.NC_NAME_CHAR_RE = new RegExp("^[" + exports.NC_NAME_CHAR + "]$", "u");
1285
- exports.NC_NAME_RE = new RegExp("^[" + exports.NC_NAME_START_CHAR + "][" + exports.NC_NAME_CHAR + "]*$", "u");
1286
- /**
1287
- * Determines whether a codepoint matches [[NC_NAME_START_CHAR]].
1288
- *
1289
- * @param c The code point.
1290
- *
1291
- * @returns ``true`` if the codepoint matches.
1292
- */
1293
- function isNCNameStartChar(c) {
1294
- return c >= 65 && c <= 90 || c === 95 || c >= 97 && c <= 122 || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8204 && c <= 8205 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533 || c >= 65536 && c <= 983039;
1295
- }
1296
- exports.isNCNameStartChar = isNCNameStartChar;
1297
- /**
1298
- * Determines whether a codepoint matches [[NC_NAME_CHAR]].
1299
- *
1300
- * @param c The code point.
1301
- *
1302
- * @returns ``true`` if the codepoint matches.
1303
- */
1304
- function isNCNameChar(c) {
1305
- return isNCNameStartChar(c) || c === 45 || c === 46 || c >= 48 && c <= 57 || c === 183 || c >= 768 && c <= 879 || c >= 8255 && c <= 8256;
1306
- }
1307
- exports.isNCNameChar = isNCNameChar;
1308
- }));
1309
- //#endregion
1310
- //#region node_modules/.pnpm/saxes@5.0.1/node_modules/saxes/saxes.js
1311
- var require_saxes = /* @__PURE__ */ __commonJSMin(((exports) => {
1312
- Object.defineProperty(exports, "__esModule", { value: true });
1313
- const ed5 = require_ed5();
1314
- const ed2 = require_ed2();
1315
- const NSed3 = require_ed3();
1316
- var isS = ed5.isS;
1317
- var isChar10 = ed5.isChar;
1318
- var isNameStartChar = ed5.isNameStartChar;
1319
- var isNameChar = ed5.isNameChar;
1320
- var S_LIST = ed5.S_LIST;
1321
- var NAME_RE = ed5.NAME_RE;
1322
- var isChar11 = ed2.isChar;
1323
- var isNCNameStartChar = NSed3.isNCNameStartChar;
1324
- var isNCNameChar = NSed3.isNCNameChar;
1325
- var NC_NAME_RE = NSed3.NC_NAME_RE;
1326
- const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace";
1327
- const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
1328
- const rootNS = {
1329
- __proto__: null,
1330
- xml: XML_NAMESPACE,
1331
- xmlns: XMLNS_NAMESPACE
1332
- };
1333
- const XML_ENTITIES = {
1334
- __proto__: null,
1335
- amp: "&",
1336
- gt: ">",
1337
- lt: "<",
1338
- quot: "\"",
1339
- apos: "'"
1340
- };
1341
- const EOC = -1;
1342
- const NL_LIKE = -2;
1343
- const S_BEGIN = 0;
1344
- const S_BEGIN_WHITESPACE = 1;
1345
- const S_DOCTYPE = 2;
1346
- const S_DOCTYPE_QUOTE = 3;
1347
- const S_DTD = 4;
1348
- const S_DTD_QUOTED = 5;
1349
- const S_DTD_OPEN_WAKA = 6;
1350
- const S_DTD_OPEN_WAKA_BANG = 7;
1351
- const S_DTD_COMMENT = 8;
1352
- const S_DTD_COMMENT_ENDING = 9;
1353
- const S_DTD_COMMENT_ENDED = 10;
1354
- const S_DTD_PI = 11;
1355
- const S_DTD_PI_ENDING = 12;
1356
- const S_TEXT = 13;
1357
- const S_ENTITY = 14;
1358
- const S_OPEN_WAKA = 15;
1359
- const S_OPEN_WAKA_BANG = 16;
1360
- const S_COMMENT = 17;
1361
- const S_COMMENT_ENDING = 18;
1362
- const S_COMMENT_ENDED = 19;
1363
- const S_CDATA = 20;
1364
- const S_CDATA_ENDING = 21;
1365
- const S_CDATA_ENDING_2 = 22;
1366
- const S_PI_FIRST_CHAR = 23;
1367
- const S_PI_REST = 24;
1368
- const S_PI_BODY = 25;
1369
- const S_PI_ENDING = 26;
1370
- const S_XML_DECL_NAME_START = 27;
1371
- const S_XML_DECL_NAME = 28;
1372
- const S_XML_DECL_EQ = 29;
1373
- const S_XML_DECL_VALUE_START = 30;
1374
- const S_XML_DECL_VALUE = 31;
1375
- const S_XML_DECL_SEPARATOR = 32;
1376
- const S_XML_DECL_ENDING = 33;
1377
- const S_OPEN_TAG = 34;
1378
- const S_OPEN_TAG_SLASH = 35;
1379
- const S_ATTRIB = 36;
1380
- const S_ATTRIB_NAME = 37;
1381
- const S_ATTRIB_NAME_SAW_WHITE = 38;
1382
- const S_ATTRIB_VALUE = 39;
1383
- const S_ATTRIB_VALUE_QUOTED = 40;
1384
- const S_ATTRIB_VALUE_CLOSED = 41;
1385
- const S_ATTRIB_VALUE_UNQUOTED = 42;
1386
- const S_CLOSE_TAG = 43;
1387
- const S_CLOSE_TAG_SAW_WHITE = 44;
1388
- const TAB = 9;
1389
- const NL = 10;
1390
- const CR = 13;
1391
- const SPACE = 32;
1392
- const BANG = 33;
1393
- const DQUOTE = 34;
1394
- const AMP = 38;
1395
- const SQUOTE = 39;
1396
- const MINUS = 45;
1397
- const FORWARD_SLASH = 47;
1398
- const SEMICOLON = 59;
1399
- const LESS = 60;
1400
- const EQUAL = 61;
1401
- const GREATER = 62;
1402
- const QUESTION = 63;
1403
- const OPEN_BRACKET = 91;
1404
- const CLOSE_BRACKET = 93;
1405
- const NEL = 133;
1406
- const LS = 8232;
1407
- const isQuote = (c) => c === DQUOTE || c === SQUOTE;
1408
- const QUOTES = [DQUOTE, SQUOTE];
1409
- const DOCTYPE_TERMINATOR = [
1410
- ...QUOTES,
1411
- OPEN_BRACKET,
1412
- GREATER
1413
- ];
1414
- const DTD_TERMINATOR = [
1415
- ...QUOTES,
1416
- LESS,
1417
- CLOSE_BRACKET
1418
- ];
1419
- const XML_DECL_NAME_TERMINATOR = [
1420
- EQUAL,
1421
- QUESTION,
1422
- ...S_LIST
1423
- ];
1424
- const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [
1425
- ...S_LIST,
1426
- GREATER,
1427
- AMP,
1428
- LESS
1429
- ];
1430
- function nsPairCheck(parser, prefix, uri) {
1431
- switch (prefix) {
1432
- case "xml":
1433
- if (uri !== XML_NAMESPACE) parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`);
1434
- break;
1435
- case "xmlns":
1436
- if (uri !== XMLNS_NAMESPACE) parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`);
1437
- break;
1438
- default:
1439
- }
1440
- switch (uri) {
1441
- case XMLNS_NAMESPACE:
1442
- parser.fail(prefix === "" ? `the default namespace may not be set to ${uri}.` : `may not assign a prefix (even "xmlns") to the URI \
1443
- ${XMLNS_NAMESPACE}.`);
1444
- break;
1445
- case XML_NAMESPACE:
1446
- switch (prefix) {
1447
- case "xml": break;
1448
- case "":
1449
- parser.fail(`the default namespace may not be set to ${uri}.`);
1450
- break;
1451
- default: parser.fail("may not assign the xml namespace to another prefix.");
1452
- }
1453
- break;
1454
- default:
1455
- }
1456
- }
1457
- function nsMappingCheck(parser, mapping) {
1458
- for (const local of Object.keys(mapping)) nsPairCheck(parser, local, mapping[local]);
1459
- }
1460
- const isNCName = (name) => NC_NAME_RE.test(name);
1461
- const isName = (name) => NAME_RE.test(name);
1462
- const FORBIDDEN_START = 0;
1463
- const FORBIDDEN_BRACKET = 1;
1464
- const FORBIDDEN_BRACKET_BRACKET = 2;
1465
- /**
1466
- * The list of supported events.
1467
- */
1468
- exports.EVENTS = [
1469
- "xmldecl",
1470
- "text",
1471
- "processinginstruction",
1472
- "doctype",
1473
- "comment",
1474
- "opentagstart",
1475
- "attribute",
1476
- "opentag",
1477
- "closetag",
1478
- "cdata",
1479
- "error",
1480
- "end",
1481
- "ready"
1482
- ];
1483
- const EVENT_NAME_TO_HANDLER_NAME = {
1484
- xmldecl: "xmldeclHandler",
1485
- text: "textHandler",
1486
- processinginstruction: "piHandler",
1487
- doctype: "doctypeHandler",
1488
- comment: "commentHandler",
1489
- opentagstart: "openTagStartHandler",
1490
- attribute: "attributeHandler",
1491
- opentag: "openTagHandler",
1492
- closetag: "closeTagHandler",
1493
- cdata: "cdataHandler",
1494
- error: "errorHandler",
1495
- end: "endHandler",
1496
- ready: "readyHandler"
1497
- };
1498
- var SaxesParser = class {
1499
- /**
1500
- * @param opt The parser options.
1501
- */
1502
- constructor(opt) {
1503
- this.opt = opt !== null && opt !== void 0 ? opt : {};
1504
- this.fragmentOpt = !!this.opt.fragment;
1505
- const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns;
1506
- this.trackPosition = this.opt.position !== false;
1507
- this.fileName = this.opt.fileName;
1508
- if (xmlnsOpt) {
1509
- this.nameStartCheck = isNCNameStartChar;
1510
- this.nameCheck = isNCNameChar;
1511
- this.isName = isNCName;
1512
- this.processAttribs = this.processAttribsNS;
1513
- this.pushAttrib = this.pushAttribNS;
1514
- this.ns = Object.assign({ __proto__: null }, rootNS);
1515
- const additional = this.opt.additionalNamespaces;
1516
- if (additional != null) {
1517
- nsMappingCheck(this, additional);
1518
- Object.assign(this.ns, additional);
1519
- }
1520
- } else {
1521
- this.nameStartCheck = isNameStartChar;
1522
- this.nameCheck = isNameChar;
1523
- this.isName = isName;
1524
- this.processAttribs = this.processAttribsPlain;
1525
- this.pushAttrib = this.pushAttribPlain;
1526
- }
1527
- this.stateTable = [
1528
- this.sBegin,
1529
- this.sBeginWhitespace,
1530
- this.sDoctype,
1531
- this.sDoctypeQuote,
1532
- this.sDTD,
1533
- this.sDTDQuoted,
1534
- this.sDTDOpenWaka,
1535
- this.sDTDOpenWakaBang,
1536
- this.sDTDComment,
1537
- this.sDTDCommentEnding,
1538
- this.sDTDCommentEnded,
1539
- this.sDTDPI,
1540
- this.sDTDPIEnding,
1541
- this.sText,
1542
- this.sEntity,
1543
- this.sOpenWaka,
1544
- this.sOpenWakaBang,
1545
- this.sComment,
1546
- this.sCommentEnding,
1547
- this.sCommentEnded,
1548
- this.sCData,
1549
- this.sCDataEnding,
1550
- this.sCDataEnding2,
1551
- this.sPIFirstChar,
1552
- this.sPIRest,
1553
- this.sPIBody,
1554
- this.sPIEnding,
1555
- this.sXMLDeclNameStart,
1556
- this.sXMLDeclName,
1557
- this.sXMLDeclEq,
1558
- this.sXMLDeclValueStart,
1559
- this.sXMLDeclValue,
1560
- this.sXMLDeclSeparator,
1561
- this.sXMLDeclEnding,
1562
- this.sOpenTag,
1563
- this.sOpenTagSlash,
1564
- this.sAttrib,
1565
- this.sAttribName,
1566
- this.sAttribNameSawWhite,
1567
- this.sAttribValue,
1568
- this.sAttribValueQuoted,
1569
- this.sAttribValueClosed,
1570
- this.sAttribValueUnquoted,
1571
- this.sCloseTag,
1572
- this.sCloseTagSawWhite
1573
- ];
1574
- this._init();
1575
- }
1576
- /**
1577
- * Indicates whether or not the parser is closed. If ``true``, wait for
1578
- * the ``ready`` event to write again.
1579
- */
1580
- get closed() {
1581
- return this._closed;
1582
- }
1583
- _init() {
1584
- var _a;
1585
- this.openWakaBang = "";
1586
- this.text = "";
1587
- this.name = "";
1588
- this.piTarget = "";
1589
- this.entity = "";
1590
- this.q = null;
1591
- this.tags = [];
1592
- this.tag = null;
1593
- this.topNS = null;
1594
- this.chunk = "";
1595
- this.chunkPosition = 0;
1596
- this.i = 0;
1597
- this.prevI = 0;
1598
- this.carriedFromPrevious = void 0;
1599
- this.forbiddenState = FORBIDDEN_START;
1600
- this.attribList = [];
1601
- const { fragmentOpt } = this;
1602
- this.state = fragmentOpt ? S_TEXT : S_BEGIN;
1603
- this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot = this.sawRoot = fragmentOpt;
1604
- this.xmlDeclPossible = !fragmentOpt;
1605
- this.xmlDeclExpects = ["version"];
1606
- this.entityReturnState = void 0;
1607
- let { defaultXMLVersion } = this.opt;
1608
- if (defaultXMLVersion === void 0) {
1609
- if (this.opt.forceXMLVersion === true) throw new Error("forceXMLVersion set but defaultXMLVersion is not set");
1610
- defaultXMLVersion = "1.0";
1611
- }
1612
- this.setXMLVersion(defaultXMLVersion);
1613
- this.positionAtNewLine = 0;
1614
- this.doctype = false;
1615
- this._closed = false;
1616
- this.xmlDecl = {
1617
- version: void 0,
1618
- encoding: void 0,
1619
- standalone: void 0
1620
- };
1621
- this.line = 1;
1622
- this.column = 0;
1623
- this.ENTITIES = Object.create(XML_ENTITIES);
1624
- (_a = this.readyHandler) === null || _a === void 0 || _a.call(this);
1625
- }
1626
- /**
1627
- * The stream position the parser is currently looking at. This field is
1628
- * zero-based.
1629
- *
1630
- * This field is not based on counting Unicode characters but is to be
1631
- * interpreted as a plain index into a JavaScript string.
1632
- */
1633
- get position() {
1634
- return this.chunkPosition + this.i;
1635
- }
1636
- /**
1637
- * The column number of the next character to be read by the parser. *
1638
- * This field is zero-based. (The first column in a line is 0.)
1639
- *
1640
- * This field reports the index at which the next character would be in the
1641
- * line if the line were represented as a JavaScript string. Note that this
1642
- * *can* be different to a count based on the number of *Unicode characters*
1643
- * due to how JavaScript handles astral plane characters.
1644
- *
1645
- * See [[column]] for a number that corresponds to a count of Unicode
1646
- * characters.
1647
- */
1648
- get columnIndex() {
1649
- return this.position - this.positionAtNewLine;
1650
- }
1651
- /**
1652
- * Set an event listener on an event. The parser supports one handler per
1653
- * event type. If you try to set an event handler over an existing handler,
1654
- * the old handler is silently overwritten.
1655
- *
1656
- * @param name The event to listen to.
1657
- *
1658
- * @param handler The handler to set.
1659
- */
1660
- on(name, handler) {
1661
- this[EVENT_NAME_TO_HANDLER_NAME[name]] = handler;
1662
- }
1663
- /**
1664
- * Unset an event handler.
1665
- *
1666
- * @parma name The event to stop listening to.
1667
- */
1668
- off(name) {
1669
- this[EVENT_NAME_TO_HANDLER_NAME[name]] = void 0;
1670
- }
1671
- /**
1672
- * Make an error object. The error object will have a message that contains
1673
- * the ``fileName`` option passed at the creation of the parser. If position
1674
- * tracking was turned on, it will also have line and column number
1675
- * information.
1676
- *
1677
- * @param message The message describing the error to report.
1678
- *
1679
- * @returns An error object with a properly formatted message.
1680
- */
1681
- makeError(message) {
1682
- var _a;
1683
- let msg = (_a = this.fileName) !== null && _a !== void 0 ? _a : "";
1684
- if (this.trackPosition) {
1685
- if (msg.length > 0) msg += ":";
1686
- msg += `${this.line}:${this.column}`;
1687
- }
1688
- if (msg.length > 0) msg += ": ";
1689
- return new Error(msg + message);
1690
- }
1691
- /**
1692
- * Report a parsing error. This method is made public so that client code may
1693
- * check for issues that are outside the scope of this project and can report
1694
- * errors.
1695
- *
1696
- * @param message The error to report.
1697
- *
1698
- * @returns this
1699
- */
1700
- fail(message) {
1701
- const err = this.makeError(message);
1702
- const handler = this.errorHandler;
1703
- if (handler === void 0) throw err;
1704
- else handler(err);
1705
- return this;
1706
- }
1707
- /**
1708
- * Write a XML data to the parser.
1709
- *
1710
- * @param chunk The XML data to write.
1711
- *
1712
- * @returns this
1713
- */
1714
- write(chunk) {
1715
- if (this.closed) return this.fail("cannot write after close; assign an onready handler.");
1716
- let end = false;
1717
- if (chunk === null) {
1718
- end = true;
1719
- chunk = "";
1720
- } else if (typeof chunk === "object") chunk = chunk.toString();
1721
- if (this.carriedFromPrevious !== void 0) {
1722
- chunk = `${this.carriedFromPrevious}${chunk}`;
1723
- this.carriedFromPrevious = void 0;
1724
- }
1725
- let limit = chunk.length;
1726
- const lastCode = chunk.charCodeAt(limit - 1);
1727
- if (!end && (lastCode === CR || lastCode >= 55296 && lastCode <= 56319)) {
1728
- this.carriedFromPrevious = chunk[limit - 1];
1729
- limit--;
1730
- chunk = chunk.slice(0, limit);
1731
- }
1732
- const { stateTable } = this;
1733
- this.chunk = chunk;
1734
- this.i = 0;
1735
- while (this.i < limit) stateTable[this.state].call(this);
1736
- this.chunkPosition += limit;
1737
- return end ? this.end() : this;
1738
- }
1739
- /**
1740
- * Close the current stream. Perform final well-formedness checks and reset
1741
- * the parser tstate.
1742
- *
1743
- * @returns this
1744
- */
1745
- close() {
1746
- return this.write(null);
1747
- }
1748
- /**
1749
- * Get a single code point out of the current chunk. This updates the current
1750
- * position if we do position tracking.
1751
- *
1752
- * This is the algorithm to use for XML 1.0.
1753
- *
1754
- * @returns The character read.
1755
- */
1756
- getCode10() {
1757
- const { chunk, i } = this;
1758
- this.prevI = i;
1759
- this.i = i + 1;
1760
- if (i >= chunk.length) return EOC;
1761
- const code = chunk.charCodeAt(i);
1762
- this.column++;
1763
- if (code < 55296) {
1764
- if (code >= SPACE || code === TAB) return code;
1765
- switch (code) {
1766
- case NL:
1767
- this.line++;
1768
- this.column = 0;
1769
- this.positionAtNewLine = this.position;
1770
- return NL;
1771
- case CR:
1772
- if (chunk.charCodeAt(i + 1) === NL) this.i = i + 2;
1773
- this.line++;
1774
- this.column = 0;
1775
- this.positionAtNewLine = this.position;
1776
- return NL_LIKE;
1777
- default:
1778
- this.fail("disallowed character.");
1779
- return code;
1780
- }
1781
- }
1782
- if (code > 56319) {
1783
- if (!(code >= 57344 && code <= 65533)) this.fail("disallowed character.");
1784
- return code;
1785
- }
1786
- const final = 65536 + (code - 55296) * 1024 + (chunk.charCodeAt(i + 1) - 56320);
1787
- this.i = i + 2;
1788
- if (final > 1114111) this.fail("disallowed character.");
1789
- return final;
1790
- }
1791
- /**
1792
- * Get a single code point out of the current chunk. This updates the current
1793
- * position if we do position tracking.
1794
- *
1795
- * This is the algorithm to use for XML 1.1.
1796
- *
1797
- * @returns {number} The character read.
1798
- */
1799
- getCode11() {
1800
- const { chunk, i } = this;
1801
- this.prevI = i;
1802
- this.i = i + 1;
1803
- if (i >= chunk.length) return EOC;
1804
- const code = chunk.charCodeAt(i);
1805
- this.column++;
1806
- if (code < 55296) {
1807
- if (code > 31 && code < 127 || code > 159 && code !== LS || code === TAB) return code;
1808
- switch (code) {
1809
- case NL:
1810
- this.line++;
1811
- this.column = 0;
1812
- this.positionAtNewLine = this.position;
1813
- return NL;
1814
- case CR: {
1815
- const next = chunk.charCodeAt(i + 1);
1816
- if (next === NL || next === NEL) this.i = i + 2;
1817
- }
1818
- case NEL:
1819
- case LS:
1820
- this.line++;
1821
- this.column = 0;
1822
- this.positionAtNewLine = this.position;
1823
- return NL_LIKE;
1824
- default:
1825
- this.fail("disallowed character.");
1826
- return code;
1827
- }
1828
- }
1829
- if (code > 56319) {
1830
- if (!(code >= 57344 && code <= 65533)) this.fail("disallowed character.");
1831
- return code;
1832
- }
1833
- const final = 65536 + (code - 55296) * 1024 + (chunk.charCodeAt(i + 1) - 56320);
1834
- this.i = i + 2;
1835
- if (final > 1114111) this.fail("disallowed character.");
1836
- return final;
1837
- }
1838
- /**
1839
- * Like ``getCode`` but with the return value normalized so that ``NL`` is
1840
- * returned for ``NL_LIKE``.
1841
- */
1842
- getCodeNorm() {
1843
- const c = this.getCode();
1844
- return c === NL_LIKE ? NL : c;
1845
- }
1846
- unget() {
1847
- this.i = this.prevI;
1848
- this.column--;
1849
- }
1850
- /**
1851
- * Capture characters into a buffer until encountering one of a set of
1852
- * characters.
1853
- *
1854
- * @param chars An array of codepoints. Encountering a character in the array
1855
- * ends the capture. (``chars`` may safely contain ``NL``.)
1856
- *
1857
- * @return The character code that made the capture end, or ``EOC`` if we hit
1858
- * the end of the chunk. The return value cannot be NL_LIKE: NL is returned
1859
- * instead.
1860
- */
1861
- captureTo(chars) {
1862
- let { i: start } = this;
1863
- const { chunk } = this;
1864
- while (true) {
1865
- const c = this.getCode();
1866
- const isNLLike = c === NL_LIKE;
1867
- const final = isNLLike ? NL : c;
1868
- if (final === EOC || chars.includes(final)) {
1869
- this.text += chunk.slice(start, this.prevI);
1870
- return final;
1871
- }
1872
- if (isNLLike) {
1873
- this.text += `${chunk.slice(start, this.prevI)}\n`;
1874
- start = this.i;
1875
- }
1876
- }
1877
- }
1878
- /**
1879
- * Capture characters into a buffer until encountering a character.
1880
- *
1881
- * @param char The codepoint that ends the capture. **NOTE ``char`` MAY NOT
1882
- * CONTAIN ``NL``.** Passing ``NL`` will result in buggy behavior.
1883
- *
1884
- * @return ``true`` if we ran into the character. Otherwise, we ran into the
1885
- * end of the current chunk.
1886
- */
1887
- captureToChar(char) {
1888
- let { i: start } = this;
1889
- const { chunk } = this;
1890
- while (true) {
1891
- let c = this.getCode();
1892
- switch (c) {
1893
- case NL_LIKE:
1894
- this.text += `${chunk.slice(start, this.prevI)}\n`;
1895
- start = this.i;
1896
- c = NL;
1897
- break;
1898
- case EOC:
1899
- this.text += chunk.slice(start);
1900
- return false;
1901
- default:
1902
- }
1903
- if (c === char) {
1904
- this.text += chunk.slice(start, this.prevI);
1905
- return true;
1906
- }
1907
- }
1908
- }
1909
- /**
1910
- * Capture characters that satisfy ``isNameChar`` into the ``name`` field of
1911
- * this parser.
1912
- *
1913
- * @return The character code that made the test fail, or ``EOC`` if we hit
1914
- * the end of the chunk. The return value cannot be NL_LIKE: NL is returned
1915
- * instead.
1916
- */
1917
- captureNameChars() {
1918
- const { chunk, i: start } = this;
1919
- while (true) {
1920
- const c = this.getCode();
1921
- if (c === EOC) {
1922
- this.name += chunk.slice(start);
1923
- return EOC;
1924
- }
1925
- if (!isNameChar(c)) {
1926
- this.name += chunk.slice(start, this.prevI);
1927
- return c === NL_LIKE ? NL : c;
1928
- }
1929
- }
1930
- }
1931
- /**
1932
- * Skip white spaces.
1933
- *
1934
- * @return The character that ended the skip, or ``EOC`` if we hit
1935
- * the end of the chunk. The return value cannot be NL_LIKE: NL is returned
1936
- * instead.
1937
- */
1938
- skipSpaces() {
1939
- while (true) {
1940
- const c = this.getCodeNorm();
1941
- if (c === EOC || !isS(c)) return c;
1942
- }
1943
- }
1944
- setXMLVersion(version) {
1945
- this.currentXMLVersion = version;
1946
- if (version === "1.0") {
1947
- this.isChar = isChar10;
1948
- this.getCode = this.getCode10;
1949
- } else {
1950
- this.isChar = isChar11;
1951
- this.getCode = this.getCode11;
1952
- }
1953
- }
1954
- sBegin() {
1955
- if (this.chunk.charCodeAt(0) === 65279) {
1956
- this.i++;
1957
- this.column++;
1958
- }
1959
- this.state = S_BEGIN_WHITESPACE;
1960
- }
1961
- sBeginWhitespace() {
1962
- const iBefore = this.i;
1963
- const c = this.skipSpaces();
1964
- if (this.prevI !== iBefore) this.xmlDeclPossible = false;
1965
- switch (c) {
1966
- case LESS:
1967
- this.state = S_OPEN_WAKA;
1968
- if (this.text.length !== 0) throw new Error("no-empty text at start");
1969
- break;
1970
- case EOC: break;
1971
- default:
1972
- this.unget();
1973
- this.state = S_TEXT;
1974
- this.xmlDeclPossible = false;
1975
- }
1976
- }
1977
- sDoctype() {
1978
- var _a;
1979
- const c = this.captureTo(DOCTYPE_TERMINATOR);
1980
- switch (c) {
1981
- case GREATER:
1982
- (_a = this.doctypeHandler) === null || _a === void 0 || _a.call(this, this.text);
1983
- this.text = "";
1984
- this.state = S_TEXT;
1985
- this.doctype = true;
1986
- break;
1987
- case EOC: break;
1988
- default:
1989
- this.text += String.fromCodePoint(c);
1990
- if (c === OPEN_BRACKET) this.state = S_DTD;
1991
- else if (isQuote(c)) {
1992
- this.state = S_DOCTYPE_QUOTE;
1993
- this.q = c;
1994
- }
1995
- }
1996
- }
1997
- sDoctypeQuote() {
1998
- const q = this.q;
1999
- if (this.captureToChar(q)) {
2000
- this.text += String.fromCodePoint(q);
2001
- this.q = null;
2002
- this.state = S_DOCTYPE;
2003
- }
2004
- }
2005
- sDTD() {
2006
- const c = this.captureTo(DTD_TERMINATOR);
2007
- if (c === EOC) return;
2008
- this.text += String.fromCodePoint(c);
2009
- if (c === CLOSE_BRACKET) this.state = S_DOCTYPE;
2010
- else if (c === LESS) this.state = S_DTD_OPEN_WAKA;
2011
- else if (isQuote(c)) {
2012
- this.state = S_DTD_QUOTED;
2013
- this.q = c;
2014
- }
2015
- }
2016
- sDTDQuoted() {
2017
- const q = this.q;
2018
- if (this.captureToChar(q)) {
2019
- this.text += String.fromCodePoint(q);
2020
- this.state = S_DTD;
2021
- this.q = null;
2022
- }
2023
- }
2024
- sDTDOpenWaka() {
2025
- const c = this.getCodeNorm();
2026
- this.text += String.fromCodePoint(c);
2027
- switch (c) {
2028
- case BANG:
2029
- this.state = S_DTD_OPEN_WAKA_BANG;
2030
- this.openWakaBang = "";
2031
- break;
2032
- case QUESTION:
2033
- this.state = S_DTD_PI;
2034
- break;
2035
- default: this.state = S_DTD;
2036
- }
2037
- }
2038
- sDTDOpenWakaBang() {
2039
- const char = String.fromCodePoint(this.getCodeNorm());
2040
- const owb = this.openWakaBang += char;
2041
- this.text += char;
2042
- if (owb !== "-") {
2043
- this.state = owb === "--" ? S_DTD_COMMENT : S_DTD;
2044
- this.openWakaBang = "";
2045
- }
2046
- }
2047
- sDTDComment() {
2048
- if (this.captureToChar(MINUS)) {
2049
- this.text += "-";
2050
- this.state = S_DTD_COMMENT_ENDING;
2051
- }
2052
- }
2053
- sDTDCommentEnding() {
2054
- const c = this.getCodeNorm();
2055
- this.text += String.fromCodePoint(c);
2056
- this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT;
2057
- }
2058
- sDTDCommentEnded() {
2059
- const c = this.getCodeNorm();
2060
- this.text += String.fromCodePoint(c);
2061
- if (c === GREATER) this.state = S_DTD;
2062
- else {
2063
- this.fail("malformed comment.");
2064
- this.state = S_DTD_COMMENT;
2065
- }
2066
- }
2067
- sDTDPI() {
2068
- if (this.captureToChar(QUESTION)) {
2069
- this.text += "?";
2070
- this.state = S_DTD_PI_ENDING;
2071
- }
2072
- }
2073
- sDTDPIEnding() {
2074
- const c = this.getCodeNorm();
2075
- this.text += String.fromCodePoint(c);
2076
- if (c === GREATER) this.state = S_DTD;
2077
- }
2078
- sText() {
2079
- if (this.tags.length !== 0) this.handleTextInRoot();
2080
- else this.handleTextOutsideRoot();
2081
- }
2082
- sEntity() {
2083
- let { i: start } = this;
2084
- const { chunk } = this;
2085
- loop: while (true) switch (this.getCode()) {
2086
- case NL_LIKE:
2087
- this.entity += `${chunk.slice(start, this.prevI)}\n`;
2088
- start = this.i;
2089
- break;
2090
- case SEMICOLON: {
2091
- const { entityReturnState } = this;
2092
- const entity = this.entity + chunk.slice(start, this.prevI);
2093
- this.state = entityReturnState;
2094
- let parsed;
2095
- if (entity === "") {
2096
- this.fail("empty entity name.");
2097
- parsed = "&;";
2098
- } else {
2099
- parsed = this.parseEntity(entity);
2100
- this.entity = "";
2101
- }
2102
- if (entityReturnState !== S_TEXT || this.textHandler !== void 0) this.text += parsed;
2103
- break loop;
2104
- }
2105
- case EOC:
2106
- this.entity += chunk.slice(start);
2107
- break loop;
2108
- default:
2109
- }
2110
- }
2111
- sOpenWaka() {
2112
- const c = this.getCode();
2113
- if (isNameStartChar(c)) {
2114
- this.state = S_OPEN_TAG;
2115
- this.unget();
2116
- this.xmlDeclPossible = false;
2117
- } else switch (c) {
2118
- case FORWARD_SLASH:
2119
- this.state = S_CLOSE_TAG;
2120
- this.xmlDeclPossible = false;
2121
- break;
2122
- case BANG:
2123
- this.state = S_OPEN_WAKA_BANG;
2124
- this.openWakaBang = "";
2125
- this.xmlDeclPossible = false;
2126
- break;
2127
- case QUESTION:
2128
- this.state = S_PI_FIRST_CHAR;
2129
- break;
2130
- default:
2131
- this.fail("disallowed character in tag name");
2132
- this.state = S_TEXT;
2133
- this.xmlDeclPossible = false;
2134
- }
2135
- }
2136
- sOpenWakaBang() {
2137
- this.openWakaBang += String.fromCodePoint(this.getCodeNorm());
2138
- switch (this.openWakaBang) {
2139
- case "[CDATA[":
2140
- if (!this.sawRoot && !this.reportedTextBeforeRoot) {
2141
- this.fail("text data outside of root node.");
2142
- this.reportedTextBeforeRoot = true;
2143
- }
2144
- if (this.closedRoot && !this.reportedTextAfterRoot) {
2145
- this.fail("text data outside of root node.");
2146
- this.reportedTextAfterRoot = true;
2147
- }
2148
- this.state = S_CDATA;
2149
- this.openWakaBang = "";
2150
- break;
2151
- case "--":
2152
- this.state = S_COMMENT;
2153
- this.openWakaBang = "";
2154
- break;
2155
- case "DOCTYPE":
2156
- this.state = S_DOCTYPE;
2157
- if (this.doctype || this.sawRoot) this.fail("inappropriately located doctype declaration.");
2158
- this.openWakaBang = "";
2159
- break;
2160
- default: if (this.openWakaBang.length >= 7) this.fail("incorrect syntax.");
2161
- }
2162
- }
2163
- sComment() {
2164
- if (this.captureToChar(MINUS)) this.state = S_COMMENT_ENDING;
2165
- }
2166
- sCommentEnding() {
2167
- var _a;
2168
- const c = this.getCodeNorm();
2169
- if (c === MINUS) {
2170
- this.state = S_COMMENT_ENDED;
2171
- (_a = this.commentHandler) === null || _a === void 0 || _a.call(this, this.text);
2172
- this.text = "";
2173
- } else {
2174
- this.text += `-${String.fromCodePoint(c)}`;
2175
- this.state = S_COMMENT;
2176
- }
2177
- }
2178
- sCommentEnded() {
2179
- const c = this.getCodeNorm();
2180
- if (c !== GREATER) {
2181
- this.fail("malformed comment.");
2182
- this.text += `--${String.fromCodePoint(c)}`;
2183
- this.state = S_COMMENT;
2184
- } else this.state = S_TEXT;
2185
- }
2186
- sCData() {
2187
- if (this.captureToChar(CLOSE_BRACKET)) this.state = S_CDATA_ENDING;
2188
- }
2189
- sCDataEnding() {
2190
- const c = this.getCodeNorm();
2191
- if (c === CLOSE_BRACKET) this.state = S_CDATA_ENDING_2;
2192
- else {
2193
- this.text += `]${String.fromCodePoint(c)}`;
2194
- this.state = S_CDATA;
2195
- }
2196
- }
2197
- sCDataEnding2() {
2198
- var _a;
2199
- const c = this.getCodeNorm();
2200
- switch (c) {
2201
- case GREATER:
2202
- (_a = this.cdataHandler) === null || _a === void 0 || _a.call(this, this.text);
2203
- this.text = "";
2204
- this.state = S_TEXT;
2205
- break;
2206
- case CLOSE_BRACKET:
2207
- this.text += "]";
2208
- break;
2209
- default:
2210
- this.text += `]]${String.fromCodePoint(c)}`;
2211
- this.state = S_CDATA;
2212
- }
2213
- }
2214
- sPIFirstChar() {
2215
- const c = this.getCodeNorm();
2216
- if (this.nameStartCheck(c)) {
2217
- this.piTarget += String.fromCodePoint(c);
2218
- this.state = S_PI_REST;
2219
- } else if (c === QUESTION || isS(c)) {
2220
- this.fail("processing instruction without a target.");
2221
- this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
2222
- } else {
2223
- this.fail("disallowed character in processing instruction name.");
2224
- this.piTarget += String.fromCodePoint(c);
2225
- this.state = S_PI_REST;
2226
- }
2227
- }
2228
- sPIRest() {
2229
- const { chunk, i: start } = this;
2230
- while (true) {
2231
- const c = this.getCodeNorm();
2232
- if (c === EOC) {
2233
- this.piTarget += chunk.slice(start);
2234
- return;
2235
- }
2236
- if (!this.nameCheck(c)) {
2237
- this.piTarget += chunk.slice(start, this.prevI);
2238
- const isQuestion = c === QUESTION;
2239
- if (isQuestion || isS(c)) if (this.piTarget === "xml") {
2240
- if (!this.xmlDeclPossible) this.fail("an XML declaration must be at the start of the document.");
2241
- this.state = isQuestion ? S_XML_DECL_ENDING : S_XML_DECL_NAME_START;
2242
- } else this.state = isQuestion ? S_PI_ENDING : S_PI_BODY;
2243
- else {
2244
- this.fail("disallowed character in processing instruction name.");
2245
- this.piTarget += String.fromCodePoint(c);
2246
- }
2247
- break;
2248
- }
2249
- }
2250
- }
2251
- sPIBody() {
2252
- if (this.text.length === 0) {
2253
- const c = this.getCodeNorm();
2254
- if (c === QUESTION) this.state = S_PI_ENDING;
2255
- else if (!isS(c)) this.text = String.fromCodePoint(c);
2256
- } else if (this.captureToChar(QUESTION)) this.state = S_PI_ENDING;
2257
- }
2258
- sPIEnding() {
2259
- var _a;
2260
- const c = this.getCodeNorm();
2261
- if (c === GREATER) {
2262
- const { piTarget } = this;
2263
- if (piTarget.toLowerCase() === "xml") this.fail("the XML declaration must appear at the start of the document.");
2264
- (_a = this.piHandler) === null || _a === void 0 || _a.call(this, {
2265
- target: piTarget,
2266
- body: this.text
2267
- });
2268
- this.piTarget = this.text = "";
2269
- this.state = S_TEXT;
2270
- } else if (c === QUESTION) this.text += "?";
2271
- else {
2272
- this.text += `?${String.fromCodePoint(c)}`;
2273
- this.state = S_PI_BODY;
2274
- }
2275
- this.xmlDeclPossible = false;
2276
- }
2277
- sXMLDeclNameStart() {
2278
- const c = this.skipSpaces();
2279
- if (c === QUESTION) {
2280
- this.state = S_XML_DECL_ENDING;
2281
- return;
2282
- }
2283
- if (c !== EOC) {
2284
- this.state = S_XML_DECL_NAME;
2285
- this.name = String.fromCodePoint(c);
2286
- }
2287
- }
2288
- sXMLDeclName() {
2289
- const c = this.captureTo(XML_DECL_NAME_TERMINATOR);
2290
- if (c === QUESTION) {
2291
- this.state = S_XML_DECL_ENDING;
2292
- this.name += this.text;
2293
- this.text = "";
2294
- this.fail("XML declaration is incomplete.");
2295
- return;
2296
- }
2297
- if (!(isS(c) || c === EQUAL)) return;
2298
- this.name += this.text;
2299
- this.text = "";
2300
- if (!this.xmlDeclExpects.includes(this.name)) switch (this.name.length) {
2301
- case 0:
2302
- this.fail("did not expect any more name/value pairs.");
2303
- break;
2304
- case 1:
2305
- this.fail(`expected the name ${this.xmlDeclExpects[0]}.`);
2306
- break;
2307
- default: this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`);
2308
- }
2309
- this.state = c === EQUAL ? S_XML_DECL_VALUE_START : S_XML_DECL_EQ;
2310
- }
2311
- sXMLDeclEq() {
2312
- const c = this.getCodeNorm();
2313
- if (c === QUESTION) {
2314
- this.state = S_XML_DECL_ENDING;
2315
- this.fail("XML declaration is incomplete.");
2316
- return;
2317
- }
2318
- if (isS(c)) return;
2319
- if (c !== EQUAL) this.fail("value required.");
2320
- this.state = S_XML_DECL_VALUE_START;
2321
- }
2322
- sXMLDeclValueStart() {
2323
- const c = this.getCodeNorm();
2324
- if (c === QUESTION) {
2325
- this.state = S_XML_DECL_ENDING;
2326
- this.fail("XML declaration is incomplete.");
2327
- return;
2328
- }
2329
- if (isS(c)) return;
2330
- if (!isQuote(c)) {
2331
- this.fail("value must be quoted.");
2332
- this.q = SPACE;
2333
- } else this.q = c;
2334
- this.state = S_XML_DECL_VALUE;
2335
- }
2336
- sXMLDeclValue() {
2337
- const c = this.captureTo([this.q, QUESTION]);
2338
- if (c === QUESTION) {
2339
- this.state = S_XML_DECL_ENDING;
2340
- this.text = "";
2341
- this.fail("XML declaration is incomplete.");
2342
- return;
2343
- }
2344
- if (c === EOC) return;
2345
- const value = this.text;
2346
- this.text = "";
2347
- switch (this.name) {
2348
- case "version": {
2349
- this.xmlDeclExpects = ["encoding", "standalone"];
2350
- const version = value;
2351
- this.xmlDecl.version = version;
2352
- if (!/^1\.[0-9]+$/.test(version)) this.fail("version number must match /^1\\.[0-9]+$/.");
2353
- else if (!this.opt.forceXMLVersion) this.setXMLVersion(version);
2354
- break;
2355
- }
2356
- case "encoding":
2357
- if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(value)) this.fail("encoding value must match /^[A-Za-z0-9][A-Za-z0-9._-]*$/.");
2358
- this.xmlDeclExpects = ["standalone"];
2359
- this.xmlDecl.encoding = value;
2360
- break;
2361
- case "standalone":
2362
- if (value !== "yes" && value !== "no") this.fail("standalone value must match \"yes\" or \"no\".");
2363
- this.xmlDeclExpects = [];
2364
- this.xmlDecl.standalone = value;
2365
- break;
2366
- default:
2367
- }
2368
- this.name = "";
2369
- this.state = S_XML_DECL_SEPARATOR;
2370
- }
2371
- sXMLDeclSeparator() {
2372
- const c = this.getCodeNorm();
2373
- if (c === QUESTION) {
2374
- this.state = S_XML_DECL_ENDING;
2375
- return;
2376
- }
2377
- if (!isS(c)) {
2378
- this.fail("whitespace required.");
2379
- this.unget();
2380
- }
2381
- this.state = S_XML_DECL_NAME_START;
2382
- }
2383
- sXMLDeclEnding() {
2384
- var _a;
2385
- if (this.getCodeNorm() === GREATER) {
2386
- if (this.piTarget !== "xml") this.fail("processing instructions are not allowed before root.");
2387
- else if (this.name !== "version" && this.xmlDeclExpects.includes("version")) this.fail("XML declaration must contain a version.");
2388
- (_a = this.xmldeclHandler) === null || _a === void 0 || _a.call(this, this.xmlDecl);
2389
- this.name = "";
2390
- this.piTarget = this.text = "";
2391
- this.state = S_TEXT;
2392
- } else this.fail("The character ? is disallowed anywhere in XML declarations.");
2393
- this.xmlDeclPossible = false;
2394
- }
2395
- sOpenTag() {
2396
- var _a;
2397
- const c = this.captureNameChars();
2398
- if (c === EOC) return;
2399
- const tag = this.tag = {
2400
- name: this.name,
2401
- attributes: Object.create(null)
2402
- };
2403
- this.name = "";
2404
- if (this.xmlnsOpt) this.topNS = tag.ns = Object.create(null);
2405
- (_a = this.openTagStartHandler) === null || _a === void 0 || _a.call(this, tag);
2406
- this.sawRoot = true;
2407
- if (!this.fragmentOpt && this.closedRoot) this.fail("documents may contain only one root.");
2408
- switch (c) {
2409
- case GREATER:
2410
- this.openTag();
2411
- break;
2412
- case FORWARD_SLASH:
2413
- this.state = S_OPEN_TAG_SLASH;
2414
- break;
2415
- default:
2416
- if (!isS(c)) this.fail("disallowed character in tag name.");
2417
- this.state = S_ATTRIB;
2418
- }
2419
- }
2420
- sOpenTagSlash() {
2421
- if (this.getCode() === GREATER) this.openSelfClosingTag();
2422
- else {
2423
- this.fail("forward-slash in opening tag not followed by >.");
2424
- this.state = S_ATTRIB;
2425
- }
2426
- }
2427
- sAttrib() {
2428
- const c = this.skipSpaces();
2429
- if (c === EOC) return;
2430
- if (isNameStartChar(c)) {
2431
- this.unget();
2432
- this.state = S_ATTRIB_NAME;
2433
- } else if (c === GREATER) this.openTag();
2434
- else if (c === FORWARD_SLASH) this.state = S_OPEN_TAG_SLASH;
2435
- else this.fail("disallowed character in attribute name.");
2436
- }
2437
- sAttribName() {
2438
- const c = this.captureNameChars();
2439
- if (c === EQUAL) this.state = S_ATTRIB_VALUE;
2440
- else if (isS(c)) this.state = S_ATTRIB_NAME_SAW_WHITE;
2441
- else if (c === GREATER) {
2442
- this.fail("attribute without value.");
2443
- this.pushAttrib(this.name, this.name);
2444
- this.name = this.text = "";
2445
- this.openTag();
2446
- } else if (c !== EOC) this.fail("disallowed character in attribute name.");
2447
- }
2448
- sAttribNameSawWhite() {
2449
- const c = this.skipSpaces();
2450
- switch (c) {
2451
- case EOC: return;
2452
- case EQUAL:
2453
- this.state = S_ATTRIB_VALUE;
2454
- break;
2455
- default:
2456
- this.fail("attribute without value.");
2457
- this.text = "";
2458
- this.name = "";
2459
- if (c === GREATER) this.openTag();
2460
- else if (isNameStartChar(c)) {
2461
- this.unget();
2462
- this.state = S_ATTRIB_NAME;
2463
- } else {
2464
- this.fail("disallowed character in attribute name.");
2465
- this.state = S_ATTRIB;
2466
- }
2467
- }
2468
- }
2469
- sAttribValue() {
2470
- const c = this.getCodeNorm();
2471
- if (isQuote(c)) {
2472
- this.q = c;
2473
- this.state = S_ATTRIB_VALUE_QUOTED;
2474
- } else if (!isS(c)) {
2475
- this.fail("unquoted attribute value.");
2476
- this.state = S_ATTRIB_VALUE_UNQUOTED;
2477
- this.unget();
2478
- }
2479
- }
2480
- sAttribValueQuoted() {
2481
- const { q, chunk } = this;
2482
- let { i: start } = this;
2483
- while (true) switch (this.getCode()) {
2484
- case q:
2485
- this.pushAttrib(this.name, this.text + chunk.slice(start, this.prevI));
2486
- this.name = this.text = "";
2487
- this.q = null;
2488
- this.state = S_ATTRIB_VALUE_CLOSED;
2489
- return;
2490
- case AMP:
2491
- this.text += chunk.slice(start, this.prevI);
2492
- this.state = S_ENTITY;
2493
- this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
2494
- return;
2495
- case NL:
2496
- case NL_LIKE:
2497
- case TAB:
2498
- this.text += `${chunk.slice(start, this.prevI)} `;
2499
- start = this.i;
2500
- break;
2501
- case LESS:
2502
- this.text += chunk.slice(start, this.prevI);
2503
- this.fail("disallowed character.");
2504
- return;
2505
- case EOC:
2506
- this.text += chunk.slice(start);
2507
- return;
2508
- default:
2509
- }
2510
- }
2511
- sAttribValueClosed() {
2512
- const c = this.getCodeNorm();
2513
- if (isS(c)) this.state = S_ATTRIB;
2514
- else if (c === GREATER) this.openTag();
2515
- else if (c === FORWARD_SLASH) this.state = S_OPEN_TAG_SLASH;
2516
- else if (isNameStartChar(c)) {
2517
- this.fail("no whitespace between attributes.");
2518
- this.unget();
2519
- this.state = S_ATTRIB_NAME;
2520
- } else this.fail("disallowed character in attribute name.");
2521
- }
2522
- sAttribValueUnquoted() {
2523
- const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR);
2524
- switch (c) {
2525
- case AMP:
2526
- this.state = S_ENTITY;
2527
- this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED;
2528
- break;
2529
- case LESS:
2530
- this.fail("disallowed character.");
2531
- break;
2532
- case EOC: break;
2533
- default:
2534
- if (this.text.includes("]]>")) this.fail("the string \"]]>\" is disallowed in char data.");
2535
- this.pushAttrib(this.name, this.text);
2536
- this.name = this.text = "";
2537
- if (c === GREATER) this.openTag();
2538
- else this.state = S_ATTRIB;
2539
- }
2540
- }
2541
- sCloseTag() {
2542
- const c = this.captureNameChars();
2543
- if (c === GREATER) this.closeTag();
2544
- else if (isS(c)) this.state = S_CLOSE_TAG_SAW_WHITE;
2545
- else if (c !== EOC) this.fail("disallowed character in closing tag.");
2546
- }
2547
- sCloseTagSawWhite() {
2548
- switch (this.skipSpaces()) {
2549
- case GREATER:
2550
- this.closeTag();
2551
- break;
2552
- case EOC: break;
2553
- default: this.fail("disallowed character in closing tag.");
2554
- }
2555
- }
2556
- handleTextInRoot() {
2557
- let { i: start, forbiddenState } = this;
2558
- const { chunk, textHandler: handler } = this;
2559
- scanLoop: while (true) switch (this.getCode()) {
2560
- case LESS:
2561
- this.state = S_OPEN_WAKA;
2562
- if (handler !== void 0) {
2563
- const { text } = this;
2564
- const slice = chunk.slice(start, this.prevI);
2565
- if (text.length !== 0) {
2566
- handler(text + slice);
2567
- this.text = "";
2568
- } else if (slice.length !== 0) handler(slice);
2569
- }
2570
- forbiddenState = FORBIDDEN_START;
2571
- break scanLoop;
2572
- case AMP:
2573
- this.state = S_ENTITY;
2574
- this.entityReturnState = S_TEXT;
2575
- if (handler !== void 0) this.text += chunk.slice(start, this.prevI);
2576
- forbiddenState = FORBIDDEN_START;
2577
- break scanLoop;
2578
- case CLOSE_BRACKET:
2579
- switch (forbiddenState) {
2580
- case FORBIDDEN_START:
2581
- forbiddenState = FORBIDDEN_BRACKET;
2582
- break;
2583
- case FORBIDDEN_BRACKET:
2584
- forbiddenState = FORBIDDEN_BRACKET_BRACKET;
2585
- break;
2586
- case FORBIDDEN_BRACKET_BRACKET: break;
2587
- default: throw new Error("impossible state");
2588
- }
2589
- break;
2590
- case GREATER:
2591
- if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) this.fail("the string \"]]>\" is disallowed in char data.");
2592
- forbiddenState = FORBIDDEN_START;
2593
- break;
2594
- case NL_LIKE:
2595
- if (handler !== void 0) this.text += `${chunk.slice(start, this.prevI)}\n`;
2596
- start = this.i;
2597
- forbiddenState = FORBIDDEN_START;
2598
- break;
2599
- case EOC:
2600
- if (handler !== void 0) this.text += chunk.slice(start);
2601
- break scanLoop;
2602
- default: forbiddenState = FORBIDDEN_START;
2603
- }
2604
- this.forbiddenState = forbiddenState;
2605
- }
2606
- handleTextOutsideRoot() {
2607
- let { i: start } = this;
2608
- const { chunk, textHandler: handler } = this;
2609
- let nonSpace = false;
2610
- outRootLoop: while (true) {
2611
- const code = this.getCode();
2612
- switch (code) {
2613
- case LESS:
2614
- this.state = S_OPEN_WAKA;
2615
- if (handler !== void 0) {
2616
- const { text } = this;
2617
- const slice = chunk.slice(start, this.prevI);
2618
- if (text.length !== 0) {
2619
- handler(text + slice);
2620
- this.text = "";
2621
- } else if (slice.length !== 0) handler(slice);
2622
- }
2623
- break outRootLoop;
2624
- case AMP:
2625
- this.state = S_ENTITY;
2626
- this.entityReturnState = S_TEXT;
2627
- if (handler !== void 0) this.text += chunk.slice(start, this.prevI);
2628
- nonSpace = true;
2629
- break outRootLoop;
2630
- case NL_LIKE:
2631
- if (handler !== void 0) this.text += `${chunk.slice(start, this.prevI)}\n`;
2632
- start = this.i;
2633
- break;
2634
- case EOC:
2635
- if (handler !== void 0) this.text += chunk.slice(start);
2636
- break outRootLoop;
2637
- default: if (!isS(code)) nonSpace = true;
2638
- }
2639
- }
2640
- if (!nonSpace) return;
2641
- if (!this.sawRoot && !this.reportedTextBeforeRoot) {
2642
- this.fail("text data outside of root node.");
2643
- this.reportedTextBeforeRoot = true;
2644
- }
2645
- if (this.closedRoot && !this.reportedTextAfterRoot) {
2646
- this.fail("text data outside of root node.");
2647
- this.reportedTextAfterRoot = true;
2648
- }
2649
- }
2650
- pushAttribNS(name, value) {
2651
- var _a;
2652
- const { prefix, local } = this.qname(name);
2653
- const attr = {
2654
- name,
2655
- prefix,
2656
- local,
2657
- value
2658
- };
2659
- this.attribList.push(attr);
2660
- (_a = this.attributeHandler) === null || _a === void 0 || _a.call(this, attr);
2661
- if (prefix === "xmlns") {
2662
- const trimmed = value.trim();
2663
- if (this.currentXMLVersion === "1.0" && trimmed === "") this.fail("invalid attempt to undefine prefix in XML 1.0");
2664
- this.topNS[local] = trimmed;
2665
- nsPairCheck(this, local, trimmed);
2666
- } else if (name === "xmlns") {
2667
- const trimmed = value.trim();
2668
- this.topNS[""] = trimmed;
2669
- nsPairCheck(this, "", trimmed);
2670
- }
2671
- }
2672
- pushAttribPlain(name, value) {
2673
- var _a;
2674
- const attr = {
2675
- name,
2676
- value
2677
- };
2678
- this.attribList.push(attr);
2679
- (_a = this.attributeHandler) === null || _a === void 0 || _a.call(this, attr);
2680
- }
2681
- /**
2682
- * End parsing. This performs final well-formedness checks and resets the
2683
- * parser to a clean state.
2684
- *
2685
- * @returns this
2686
- */
2687
- end() {
2688
- var _a, _b;
2689
- if (!this.sawRoot) this.fail("document must contain a root element.");
2690
- const { tags } = this;
2691
- while (tags.length > 0) {
2692
- const tag = tags.pop();
2693
- this.fail(`unclosed tag: ${tag.name}`);
2694
- }
2695
- if (this.state !== S_BEGIN && this.state !== S_TEXT) this.fail("unexpected end.");
2696
- const { text } = this;
2697
- if (text.length !== 0) {
2698
- (_a = this.textHandler) === null || _a === void 0 || _a.call(this, text);
2699
- this.text = "";
2700
- }
2701
- this._closed = true;
2702
- (_b = this.endHandler) === null || _b === void 0 || _b.call(this);
2703
- this._init();
2704
- return this;
2705
- }
2706
- /**
2707
- * Resolve a namespace prefix.
2708
- *
2709
- * @param prefix The prefix to resolve.
2710
- *
2711
- * @returns The namespace URI or ``undefined`` if the prefix is not defined.
2712
- */
2713
- resolve(prefix) {
2714
- var _a, _b;
2715
- let uri = this.topNS[prefix];
2716
- if (uri !== void 0) return uri;
2717
- const { tags } = this;
2718
- for (let index = tags.length - 1; index >= 0; index--) {
2719
- uri = tags[index].ns[prefix];
2720
- if (uri !== void 0) return uri;
2721
- }
2722
- uri = this.ns[prefix];
2723
- if (uri !== void 0) return uri;
2724
- return (_b = (_a = this.opt).resolvePrefix) === null || _b === void 0 ? void 0 : _b.call(_a, prefix);
2725
- }
2726
- /**
2727
- * Parse a qname into its prefix and local name parts.
2728
- *
2729
- * @param name The name to parse
2730
- *
2731
- * @returns
2732
- */
2733
- qname(name) {
2734
- const colon = name.indexOf(":");
2735
- if (colon === -1) return {
2736
- prefix: "",
2737
- local: name
2738
- };
2739
- const local = name.slice(colon + 1);
2740
- const prefix = name.slice(0, colon);
2741
- if (prefix === "" || local === "" || local.includes(":")) this.fail(`malformed name: ${name}.`);
2742
- return {
2743
- prefix,
2744
- local
2745
- };
2746
- }
2747
- processAttribsNS() {
2748
- var _a;
2749
- const { attribList } = this;
2750
- const tag = this.tag;
2751
- {
2752
- const { prefix, local } = this.qname(tag.name);
2753
- tag.prefix = prefix;
2754
- tag.local = local;
2755
- const uri = tag.uri = (_a = this.resolve(prefix)) !== null && _a !== void 0 ? _a : "";
2756
- if (prefix !== "") {
2757
- if (prefix === "xmlns") this.fail("tags may not have \"xmlns\" as prefix.");
2758
- if (uri === "") {
2759
- this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
2760
- tag.uri = prefix;
2761
- }
2762
- }
2763
- }
2764
- if (attribList.length === 0) return;
2765
- const { attributes } = tag;
2766
- const seen = /* @__PURE__ */ new Set();
2767
- for (const attr of attribList) {
2768
- const { name, prefix, local } = attr;
2769
- let uri;
2770
- let eqname;
2771
- if (prefix === "") {
2772
- uri = name === "xmlns" ? XMLNS_NAMESPACE : "";
2773
- eqname = name;
2774
- } else {
2775
- uri = this.resolve(prefix);
2776
- if (uri === void 0) {
2777
- this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
2778
- uri = prefix;
2779
- }
2780
- eqname = `{${uri}}${local}`;
2781
- }
2782
- if (seen.has(eqname)) this.fail(`duplicate attribute: ${eqname}.`);
2783
- seen.add(eqname);
2784
- attr.uri = uri;
2785
- attributes[name] = attr;
2786
- }
2787
- this.attribList = [];
2788
- }
2789
- processAttribsPlain() {
2790
- const { attribList } = this;
2791
- const attributes = this.tag.attributes;
2792
- for (const { name, value } of attribList) {
2793
- if (attributes[name] !== void 0) this.fail(`duplicate attribute: ${name}.`);
2794
- attributes[name] = value;
2795
- }
2796
- this.attribList = [];
2797
- }
2798
- /**
2799
- * Handle a complete open tag. This parser code calls this once it has seen
2800
- * the whole tag. This method checks for well-formeness and then emits
2801
- * ``onopentag``.
2802
- */
2803
- openTag() {
2804
- var _a;
2805
- this.processAttribs();
2806
- const { tags } = this;
2807
- const tag = this.tag;
2808
- tag.isSelfClosing = false;
2809
- (_a = this.openTagHandler) === null || _a === void 0 || _a.call(this, tag);
2810
- tags.push(tag);
2811
- this.state = S_TEXT;
2812
- this.name = "";
2813
- }
2814
- /**
2815
- * Handle a complete self-closing tag. This parser code calls this once it has
2816
- * seen the whole tag. This method checks for well-formeness and then emits
2817
- * ``onopentag`` and ``onclosetag``.
2818
- */
2819
- openSelfClosingTag() {
2820
- var _a, _b, _c;
2821
- this.processAttribs();
2822
- const { tags } = this;
2823
- const tag = this.tag;
2824
- tag.isSelfClosing = true;
2825
- (_a = this.openTagHandler) === null || _a === void 0 || _a.call(this, tag);
2826
- (_b = this.closeTagHandler) === null || _b === void 0 || _b.call(this, tag);
2827
- if ((this.tag = (_c = tags[tags.length - 1]) !== null && _c !== void 0 ? _c : null) === null) this.closedRoot = true;
2828
- this.state = S_TEXT;
2829
- this.name = "";
2830
- }
2831
- /**
2832
- * Handle a complete close tag. This parser code calls this once it has seen
2833
- * the whole tag. This method checks for well-formeness and then emits
2834
- * ``onclosetag``.
2835
- */
2836
- closeTag() {
2837
- const { tags, name } = this;
2838
- this.state = S_TEXT;
2839
- this.name = "";
2840
- if (name === "") {
2841
- this.fail("weird empty close tag.");
2842
- this.text += "</>";
2843
- return;
2844
- }
2845
- const handler = this.closeTagHandler;
2846
- let l = tags.length;
2847
- while (l-- > 0) {
2848
- const tag = this.tag = tags.pop();
2849
- this.topNS = tag.ns;
2850
- handler === null || handler === void 0 || handler(tag);
2851
- if (tag.name === name) break;
2852
- this.fail("unexpected close tag.");
2853
- }
2854
- if (l === 0) this.closedRoot = true;
2855
- else if (l < 0) {
2856
- this.fail(`unmatched closing tag: ${name}.`);
2857
- this.text += `</${name}>`;
2858
- }
2859
- }
2860
- /**
2861
- * Resolves an entity. Makes any necessary well-formedness checks.
2862
- *
2863
- * @param entity The entity to resolve.
2864
- *
2865
- * @returns The parsed entity.
2866
- */
2867
- parseEntity(entity) {
2868
- if (entity[0] !== "#") {
2869
- const defined = this.ENTITIES[entity];
2870
- if (defined !== void 0) return defined;
2871
- this.fail(this.isName(entity) ? "undefined entity." : "disallowed character in entity name.");
2872
- return `&${entity};`;
2873
- }
2874
- let num = NaN;
2875
- if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) num = parseInt(entity.slice(2), 16);
2876
- else if (/^#[0-9]+$/.test(entity)) num = parseInt(entity.slice(1), 10);
2877
- if (!this.isChar(num)) {
2878
- this.fail("malformed character entity.");
2879
- return `&${entity};`;
2880
- }
2881
- return String.fromCodePoint(num);
2882
- }
2883
- };
2884
- exports.SaxesParser = SaxesParser;
2885
- }));
2886
- //#endregion
2887
- //#region node_modules/.pnpm/fd-slicer@1.1.0/node_modules/fd-slicer/index.js
2888
- var require_fd_slicer = /* @__PURE__ */ __commonJSMin(((exports) => {
2889
- var fs$2 = __require("fs");
2890
- var util$1 = __require("util");
2891
- var stream = __require("stream");
2892
- var Readable = stream.Readable;
2893
- var Writable = stream.Writable;
2894
- var PassThrough = stream.PassThrough;
2895
- var Pend = require_pend();
2896
- var EventEmitter$1 = __require("events").EventEmitter;
2897
- exports.createFromBuffer = createFromBuffer;
2898
- exports.createFromFd = createFromFd;
2899
- exports.BufferSlicer = BufferSlicer;
2900
- exports.FdSlicer = FdSlicer;
2901
- util$1.inherits(FdSlicer, EventEmitter$1);
2902
- function FdSlicer(fd, options) {
2903
- options = options || {};
2904
- EventEmitter$1.call(this);
2905
- this.fd = fd;
2906
- this.pend = new Pend();
2907
- this.pend.max = 1;
2908
- this.refCount = 0;
2909
- this.autoClose = !!options.autoClose;
2910
- }
2911
- FdSlicer.prototype.read = function(buffer, offset, length, position, callback) {
2912
- var self = this;
2913
- self.pend.go(function(cb) {
2914
- fs$2.read(self.fd, buffer, offset, length, position, function(err, bytesRead, buffer) {
2915
- cb();
2916
- callback(err, bytesRead, buffer);
2917
- });
2918
- });
2919
- };
2920
- FdSlicer.prototype.write = function(buffer, offset, length, position, callback) {
2921
- var self = this;
2922
- self.pend.go(function(cb) {
2923
- fs$2.write(self.fd, buffer, offset, length, position, function(err, written, buffer) {
2924
- cb();
2925
- callback(err, written, buffer);
2926
- });
2927
- });
2928
- };
2929
- FdSlicer.prototype.createReadStream = function(options) {
2930
- return new ReadStream(this, options);
2931
- };
2932
- FdSlicer.prototype.createWriteStream = function(options) {
2933
- return new WriteStream(this, options);
2934
- };
2935
- FdSlicer.prototype.ref = function() {
2936
- this.refCount += 1;
2937
- };
2938
- FdSlicer.prototype.unref = function() {
2939
- var self = this;
2940
- self.refCount -= 1;
2941
- if (self.refCount > 0) return;
2942
- if (self.refCount < 0) throw new Error("invalid unref");
2943
- if (self.autoClose) fs$2.close(self.fd, onCloseDone);
2944
- function onCloseDone(err) {
2945
- if (err) self.emit("error", err);
2946
- else self.emit("close");
2947
- }
2948
- };
2949
- util$1.inherits(ReadStream, Readable);
2950
- function ReadStream(context, options) {
2951
- options = options || {};
2952
- Readable.call(this, options);
2953
- this.context = context;
2954
- this.context.ref();
2955
- this.start = options.start || 0;
2956
- this.endOffset = options.end;
2957
- this.pos = this.start;
2958
- this.destroyed = false;
2959
- }
2960
- ReadStream.prototype._read = function(n) {
2961
- var self = this;
2962
- if (self.destroyed) return;
2963
- var toRead = Math.min(self._readableState.highWaterMark, n);
2964
- if (self.endOffset != null) toRead = Math.min(toRead, self.endOffset - self.pos);
2965
- if (toRead <= 0) {
2966
- self.destroyed = true;
2967
- self.push(null);
2968
- self.context.unref();
2969
- return;
2970
- }
2971
- self.context.pend.go(function(cb) {
2972
- if (self.destroyed) return cb();
2973
- var buffer = new Buffer(toRead);
2974
- fs$2.read(self.context.fd, buffer, 0, toRead, self.pos, function(err, bytesRead) {
2975
- if (err) self.destroy(err);
2976
- else if (bytesRead === 0) {
2977
- self.destroyed = true;
2978
- self.push(null);
2979
- self.context.unref();
2980
- } else {
2981
- self.pos += bytesRead;
2982
- self.push(buffer.slice(0, bytesRead));
2983
- }
2984
- cb();
2985
- });
2986
- });
2987
- };
2988
- ReadStream.prototype.destroy = function(err) {
2989
- if (this.destroyed) return;
2990
- err = err || /* @__PURE__ */ new Error("stream destroyed");
2991
- this.destroyed = true;
2992
- this.emit("error", err);
2993
- this.context.unref();
2994
- };
2995
- util$1.inherits(WriteStream, Writable);
2996
- function WriteStream(context, options) {
2997
- options = options || {};
2998
- Writable.call(this, options);
2999
- this.context = context;
3000
- this.context.ref();
3001
- this.start = options.start || 0;
3002
- this.endOffset = options.end == null ? Infinity : +options.end;
3003
- this.bytesWritten = 0;
3004
- this.pos = this.start;
3005
- this.destroyed = false;
3006
- this.on("finish", this.destroy.bind(this));
3007
- }
3008
- WriteStream.prototype._write = function(buffer, encoding, callback) {
3009
- var self = this;
3010
- if (self.destroyed) return;
3011
- if (self.pos + buffer.length > self.endOffset) {
3012
- var err = /* @__PURE__ */ new Error("maximum file length exceeded");
3013
- err.code = "ETOOBIG";
3014
- self.destroy();
3015
- callback(err);
3016
- return;
3017
- }
3018
- self.context.pend.go(function(cb) {
3019
- if (self.destroyed) return cb();
3020
- fs$2.write(self.context.fd, buffer, 0, buffer.length, self.pos, function(err, bytes) {
3021
- if (err) {
3022
- self.destroy();
3023
- cb();
3024
- callback(err);
3025
- } else {
3026
- self.bytesWritten += bytes;
3027
- self.pos += bytes;
3028
- self.emit("progress");
3029
- cb();
3030
- callback();
3031
- }
3032
- });
3033
- });
3034
- };
3035
- WriteStream.prototype.destroy = function() {
3036
- if (this.destroyed) return;
3037
- this.destroyed = true;
3038
- this.context.unref();
3039
- };
3040
- util$1.inherits(BufferSlicer, EventEmitter$1);
3041
- function BufferSlicer(buffer, options) {
3042
- EventEmitter$1.call(this);
3043
- options = options || {};
3044
- this.refCount = 0;
3045
- this.buffer = buffer;
3046
- this.maxChunkSize = options.maxChunkSize || Number.MAX_SAFE_INTEGER;
3047
- }
3048
- BufferSlicer.prototype.read = function(buffer, offset, length, position, callback) {
3049
- var end = position + length;
3050
- var delta = end - this.buffer.length;
3051
- var written = delta > 0 ? delta : length;
3052
- this.buffer.copy(buffer, offset, position, end);
3053
- setImmediate(function() {
3054
- callback(null, written);
3055
- });
3056
- };
3057
- BufferSlicer.prototype.write = function(buffer, offset, length, position, callback) {
3058
- buffer.copy(this.buffer, position, offset, offset + length);
3059
- setImmediate(function() {
3060
- callback(null, length, buffer);
3061
- });
3062
- };
3063
- BufferSlicer.prototype.createReadStream = function(options) {
3064
- options = options || {};
3065
- var readStream = new PassThrough(options);
3066
- readStream.destroyed = false;
3067
- readStream.start = options.start || 0;
3068
- readStream.endOffset = options.end;
3069
- readStream.pos = readStream.endOffset || this.buffer.length;
3070
- var entireSlice = this.buffer.slice(readStream.start, readStream.pos);
3071
- var offset = 0;
3072
- while (true) {
3073
- var nextOffset = offset + this.maxChunkSize;
3074
- if (nextOffset >= entireSlice.length) {
3075
- if (offset < entireSlice.length) readStream.write(entireSlice.slice(offset, entireSlice.length));
3076
- break;
3077
- }
3078
- readStream.write(entireSlice.slice(offset, nextOffset));
3079
- offset = nextOffset;
3080
- }
3081
- readStream.end();
3082
- readStream.destroy = function() {
3083
- readStream.destroyed = true;
3084
- };
3085
- return readStream;
3086
- };
3087
- BufferSlicer.prototype.createWriteStream = function(options) {
3088
- var bufferSlicer = this;
3089
- options = options || {};
3090
- var writeStream = new Writable(options);
3091
- writeStream.start = options.start || 0;
3092
- writeStream.endOffset = options.end == null ? this.buffer.length : +options.end;
3093
- writeStream.bytesWritten = 0;
3094
- writeStream.pos = writeStream.start;
3095
- writeStream.destroyed = false;
3096
- writeStream._write = function(buffer, encoding, callback) {
3097
- if (writeStream.destroyed) return;
3098
- var end = writeStream.pos + buffer.length;
3099
- if (end > writeStream.endOffset) {
3100
- var err = /* @__PURE__ */ new Error("maximum file length exceeded");
3101
- err.code = "ETOOBIG";
3102
- writeStream.destroyed = true;
3103
- callback(err);
3104
- return;
3105
- }
3106
- buffer.copy(bufferSlicer.buffer, writeStream.pos, 0, buffer.length);
3107
- writeStream.bytesWritten += buffer.length;
3108
- writeStream.pos = end;
3109
- writeStream.emit("progress");
3110
- callback();
3111
- };
3112
- writeStream.destroy = function() {
3113
- writeStream.destroyed = true;
3114
- };
3115
- return writeStream;
3116
- };
3117
- BufferSlicer.prototype.ref = function() {
3118
- this.refCount += 1;
3119
- };
3120
- BufferSlicer.prototype.unref = function() {
3121
- this.refCount -= 1;
3122
- if (this.refCount < 0) throw new Error("invalid unref");
3123
- };
3124
- function createFromBuffer(buffer, options) {
3125
- return new BufferSlicer(buffer, options);
3126
- }
3127
- function createFromFd(fd, options) {
3128
- return new FdSlicer(fd, options);
3129
- }
3130
- }));
3131
- //#endregion
3132
- //#region node_modules/.pnpm/yauzl@2.10.0/node_modules/yauzl/index.js
3133
- var require_yauzl = /* @__PURE__ */ __commonJSMin(((exports) => {
3134
- var fs$1 = __require("fs");
3135
- var zlib = __require("zlib");
3136
- var fd_slicer = require_fd_slicer();
3137
- var crc32 = require_buffer_crc32();
3138
- var util = __require("util");
3139
- var EventEmitter = __require("events").EventEmitter;
3140
- var Transform = __require("stream").Transform;
3141
- var PassThrough = __require("stream").PassThrough;
3142
- var Writable = __require("stream").Writable;
3143
- exports.open = open;
3144
- exports.fromFd = fromFd;
3145
- exports.fromBuffer = fromBuffer;
3146
- exports.fromRandomAccessReader = fromRandomAccessReader;
3147
- exports.dosDateTimeToDate = dosDateTimeToDate;
3148
- exports.validateFileName = validateFileName;
3149
- exports.ZipFile = ZipFile;
3150
- exports.Entry = Entry;
3151
- exports.RandomAccessReader = RandomAccessReader;
3152
- function open(path, options, callback) {
3153
- if (typeof options === "function") {
3154
- callback = options;
3155
- options = null;
3156
- }
3157
- if (options == null) options = {};
3158
- if (options.autoClose == null) options.autoClose = true;
3159
- if (options.lazyEntries == null) options.lazyEntries = false;
3160
- if (options.decodeStrings == null) options.decodeStrings = true;
3161
- if (options.validateEntrySizes == null) options.validateEntrySizes = true;
3162
- if (options.strictFileNames == null) options.strictFileNames = false;
3163
- if (callback == null) callback = defaultCallback;
3164
- fs$1.open(path, "r", function(err, fd) {
3165
- if (err) return callback(err);
3166
- fromFd(fd, options, function(err, zipfile) {
3167
- if (err) fs$1.close(fd, defaultCallback);
3168
- callback(err, zipfile);
3169
- });
3170
- });
3171
- }
3172
- function fromFd(fd, options, callback) {
3173
- if (typeof options === "function") {
3174
- callback = options;
3175
- options = null;
3176
- }
3177
- if (options == null) options = {};
3178
- if (options.autoClose == null) options.autoClose = false;
3179
- if (options.lazyEntries == null) options.lazyEntries = false;
3180
- if (options.decodeStrings == null) options.decodeStrings = true;
3181
- if (options.validateEntrySizes == null) options.validateEntrySizes = true;
3182
- if (options.strictFileNames == null) options.strictFileNames = false;
3183
- if (callback == null) callback = defaultCallback;
3184
- fs$1.fstat(fd, function(err, stats) {
3185
- if (err) return callback(err);
3186
- fromRandomAccessReader(fd_slicer.createFromFd(fd, { autoClose: true }), stats.size, options, callback);
3187
- });
3188
- }
3189
- function fromBuffer(buffer, options, callback) {
3190
- if (typeof options === "function") {
3191
- callback = options;
3192
- options = null;
3193
- }
3194
- if (options == null) options = {};
3195
- options.autoClose = false;
3196
- if (options.lazyEntries == null) options.lazyEntries = false;
3197
- if (options.decodeStrings == null) options.decodeStrings = true;
3198
- if (options.validateEntrySizes == null) options.validateEntrySizes = true;
3199
- if (options.strictFileNames == null) options.strictFileNames = false;
3200
- fromRandomAccessReader(fd_slicer.createFromBuffer(buffer, { maxChunkSize: 65536 }), buffer.length, options, callback);
3201
- }
3202
- function fromRandomAccessReader(reader, totalSize, options, callback) {
3203
- if (typeof options === "function") {
3204
- callback = options;
3205
- options = null;
3206
- }
3207
- if (options == null) options = {};
3208
- if (options.autoClose == null) options.autoClose = true;
3209
- if (options.lazyEntries == null) options.lazyEntries = false;
3210
- if (options.decodeStrings == null) options.decodeStrings = true;
3211
- var decodeStrings = !!options.decodeStrings;
3212
- if (options.validateEntrySizes == null) options.validateEntrySizes = true;
3213
- if (options.strictFileNames == null) options.strictFileNames = false;
3214
- if (callback == null) callback = defaultCallback;
3215
- if (typeof totalSize !== "number") throw new Error("expected totalSize parameter to be a number");
3216
- if (totalSize > Number.MAX_SAFE_INTEGER) throw new Error("zip file too large. only file sizes up to 2^52 are supported due to JavaScript's Number type being an IEEE 754 double.");
3217
- reader.ref();
3218
- var eocdrWithoutCommentSize = 22;
3219
- var bufferSize = Math.min(eocdrWithoutCommentSize + 65535, totalSize);
3220
- var buffer = newBuffer(bufferSize);
3221
- var bufferReadStart = totalSize - buffer.length;
3222
- readAndAssertNoEof(reader, buffer, 0, bufferSize, bufferReadStart, function(err) {
3223
- if (err) return callback(err);
3224
- for (var i = bufferSize - eocdrWithoutCommentSize; i >= 0; i -= 1) {
3225
- if (buffer.readUInt32LE(i) !== 101010256) continue;
3226
- var eocdrBuffer = buffer.slice(i);
3227
- var diskNumber = eocdrBuffer.readUInt16LE(4);
3228
- if (diskNumber !== 0) return callback(/* @__PURE__ */ new Error("multi-disk zip files are not supported: found disk number: " + diskNumber));
3229
- var entryCount = eocdrBuffer.readUInt16LE(10);
3230
- var centralDirectoryOffset = eocdrBuffer.readUInt32LE(16);
3231
- var commentLength = eocdrBuffer.readUInt16LE(20);
3232
- var expectedCommentLength = eocdrBuffer.length - eocdrWithoutCommentSize;
3233
- if (commentLength !== expectedCommentLength) return callback(/* @__PURE__ */ new Error("invalid comment length. expected: " + expectedCommentLength + ". found: " + commentLength));
3234
- var comment = decodeStrings ? decodeBuffer(eocdrBuffer, 22, eocdrBuffer.length, false) : eocdrBuffer.slice(22);
3235
- if (!(entryCount === 65535 || centralDirectoryOffset === 4294967295)) return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose, options.lazyEntries, decodeStrings, options.validateEntrySizes, options.strictFileNames));
3236
- var zip64EocdlBuffer = newBuffer(20);
3237
- var zip64EocdlOffset = bufferReadStart + i - zip64EocdlBuffer.length;
3238
- readAndAssertNoEof(reader, zip64EocdlBuffer, 0, zip64EocdlBuffer.length, zip64EocdlOffset, function(err) {
3239
- if (err) return callback(err);
3240
- if (zip64EocdlBuffer.readUInt32LE(0) !== 117853008) return callback(/* @__PURE__ */ new Error("invalid zip64 end of central directory locator signature"));
3241
- var zip64EocdrOffset = readUInt64LE(zip64EocdlBuffer, 8);
3242
- var zip64EocdrBuffer = newBuffer(56);
3243
- readAndAssertNoEof(reader, zip64EocdrBuffer, 0, zip64EocdrBuffer.length, zip64EocdrOffset, function(err) {
3244
- if (err) return callback(err);
3245
- if (zip64EocdrBuffer.readUInt32LE(0) !== 101075792) return callback(/* @__PURE__ */ new Error("invalid zip64 end of central directory record signature"));
3246
- entryCount = readUInt64LE(zip64EocdrBuffer, 32);
3247
- centralDirectoryOffset = readUInt64LE(zip64EocdrBuffer, 48);
3248
- return callback(null, new ZipFile(reader, centralDirectoryOffset, totalSize, entryCount, comment, options.autoClose, options.lazyEntries, decodeStrings, options.validateEntrySizes, options.strictFileNames));
3249
- });
3250
- });
3251
- return;
3252
- }
3253
- callback(/* @__PURE__ */ new Error("end of central directory record signature not found"));
3254
- });
3255
- }
3256
- util.inherits(ZipFile, EventEmitter);
3257
- function ZipFile(reader, centralDirectoryOffset, fileSize, entryCount, comment, autoClose, lazyEntries, decodeStrings, validateEntrySizes, strictFileNames) {
3258
- var self = this;
3259
- EventEmitter.call(self);
3260
- self.reader = reader;
3261
- self.reader.on("error", function(err) {
3262
- emitError(self, err);
3263
- });
3264
- self.reader.once("close", function() {
3265
- self.emit("close");
3266
- });
3267
- self.readEntryCursor = centralDirectoryOffset;
3268
- self.fileSize = fileSize;
3269
- self.entryCount = entryCount;
3270
- self.comment = comment;
3271
- self.entriesRead = 0;
3272
- self.autoClose = !!autoClose;
3273
- self.lazyEntries = !!lazyEntries;
3274
- self.decodeStrings = !!decodeStrings;
3275
- self.validateEntrySizes = !!validateEntrySizes;
3276
- self.strictFileNames = !!strictFileNames;
3277
- self.isOpen = true;
3278
- self.emittedError = false;
3279
- if (!self.lazyEntries) self._readEntry();
3280
- }
3281
- ZipFile.prototype.close = function() {
3282
- if (!this.isOpen) return;
3283
- this.isOpen = false;
3284
- this.reader.unref();
3285
- };
3286
- function emitErrorAndAutoClose(self, err) {
3287
- if (self.autoClose) self.close();
3288
- emitError(self, err);
3289
- }
3290
- function emitError(self, err) {
3291
- if (self.emittedError) return;
3292
- self.emittedError = true;
3293
- self.emit("error", err);
3294
- }
3295
- ZipFile.prototype.readEntry = function() {
3296
- if (!this.lazyEntries) throw new Error("readEntry() called without lazyEntries:true");
3297
- this._readEntry();
3298
- };
3299
- ZipFile.prototype._readEntry = function() {
3300
- var self = this;
3301
- if (self.entryCount === self.entriesRead) {
3302
- setImmediate(function() {
3303
- if (self.autoClose) self.close();
3304
- if (self.emittedError) return;
3305
- self.emit("end");
3306
- });
3307
- return;
3308
- }
3309
- if (self.emittedError) return;
3310
- var buffer = newBuffer(46);
3311
- readAndAssertNoEof(self.reader, buffer, 0, buffer.length, self.readEntryCursor, function(err) {
3312
- if (err) return emitErrorAndAutoClose(self, err);
3313
- if (self.emittedError) return;
3314
- var entry = new Entry();
3315
- var signature = buffer.readUInt32LE(0);
3316
- if (signature !== 33639248) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("invalid central directory file header signature: 0x" + signature.toString(16)));
3317
- entry.versionMadeBy = buffer.readUInt16LE(4);
3318
- entry.versionNeededToExtract = buffer.readUInt16LE(6);
3319
- entry.generalPurposeBitFlag = buffer.readUInt16LE(8);
3320
- entry.compressionMethod = buffer.readUInt16LE(10);
3321
- entry.lastModFileTime = buffer.readUInt16LE(12);
3322
- entry.lastModFileDate = buffer.readUInt16LE(14);
3323
- entry.crc32 = buffer.readUInt32LE(16);
3324
- entry.compressedSize = buffer.readUInt32LE(20);
3325
- entry.uncompressedSize = buffer.readUInt32LE(24);
3326
- entry.fileNameLength = buffer.readUInt16LE(28);
3327
- entry.extraFieldLength = buffer.readUInt16LE(30);
3328
- entry.fileCommentLength = buffer.readUInt16LE(32);
3329
- entry.internalFileAttributes = buffer.readUInt16LE(36);
3330
- entry.externalFileAttributes = buffer.readUInt32LE(38);
3331
- entry.relativeOffsetOfLocalHeader = buffer.readUInt32LE(42);
3332
- if (entry.generalPurposeBitFlag & 64) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("strong encryption is not supported"));
3333
- self.readEntryCursor += 46;
3334
- buffer = newBuffer(entry.fileNameLength + entry.extraFieldLength + entry.fileCommentLength);
3335
- readAndAssertNoEof(self.reader, buffer, 0, buffer.length, self.readEntryCursor, function(err) {
3336
- if (err) return emitErrorAndAutoClose(self, err);
3337
- if (self.emittedError) return;
3338
- var isUtf8 = (entry.generalPurposeBitFlag & 2048) !== 0;
3339
- entry.fileName = self.decodeStrings ? decodeBuffer(buffer, 0, entry.fileNameLength, isUtf8) : buffer.slice(0, entry.fileNameLength);
3340
- var fileCommentStart = entry.fileNameLength + entry.extraFieldLength;
3341
- var extraFieldBuffer = buffer.slice(entry.fileNameLength, fileCommentStart);
3342
- entry.extraFields = [];
3343
- var i = 0;
3344
- while (i < extraFieldBuffer.length - 3) {
3345
- var headerId = extraFieldBuffer.readUInt16LE(i + 0);
3346
- var dataSize = extraFieldBuffer.readUInt16LE(i + 2);
3347
- var dataStart = i + 4;
3348
- var dataEnd = dataStart + dataSize;
3349
- if (dataEnd > extraFieldBuffer.length) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("extra field length exceeds extra field buffer size"));
3350
- var dataBuffer = newBuffer(dataSize);
3351
- extraFieldBuffer.copy(dataBuffer, 0, dataStart, dataEnd);
3352
- entry.extraFields.push({
3353
- id: headerId,
3354
- data: dataBuffer
3355
- });
3356
- i = dataEnd;
3357
- }
3358
- entry.fileComment = self.decodeStrings ? decodeBuffer(buffer, fileCommentStart, fileCommentStart + entry.fileCommentLength, isUtf8) : buffer.slice(fileCommentStart, fileCommentStart + entry.fileCommentLength);
3359
- entry.comment = entry.fileComment;
3360
- self.readEntryCursor += buffer.length;
3361
- self.entriesRead += 1;
3362
- if (entry.uncompressedSize === 4294967295 || entry.compressedSize === 4294967295 || entry.relativeOffsetOfLocalHeader === 4294967295) {
3363
- var zip64EiefBuffer = null;
3364
- for (var i = 0; i < entry.extraFields.length; i++) {
3365
- var extraField = entry.extraFields[i];
3366
- if (extraField.id === 1) {
3367
- zip64EiefBuffer = extraField.data;
3368
- break;
3369
- }
3370
- }
3371
- if (zip64EiefBuffer == null) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("expected zip64 extended information extra field"));
3372
- var index = 0;
3373
- if (entry.uncompressedSize === 4294967295) {
3374
- if (index + 8 > zip64EiefBuffer.length) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("zip64 extended information extra field does not include uncompressed size"));
3375
- entry.uncompressedSize = readUInt64LE(zip64EiefBuffer, index);
3376
- index += 8;
3377
- }
3378
- if (entry.compressedSize === 4294967295) {
3379
- if (index + 8 > zip64EiefBuffer.length) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("zip64 extended information extra field does not include compressed size"));
3380
- entry.compressedSize = readUInt64LE(zip64EiefBuffer, index);
3381
- index += 8;
3382
- }
3383
- if (entry.relativeOffsetOfLocalHeader === 4294967295) {
3384
- if (index + 8 > zip64EiefBuffer.length) return emitErrorAndAutoClose(self, /* @__PURE__ */ new Error("zip64 extended information extra field does not include relative header offset"));
3385
- entry.relativeOffsetOfLocalHeader = readUInt64LE(zip64EiefBuffer, index);
3386
- index += 8;
3387
- }
3388
- }
3389
- if (self.decodeStrings) for (var i = 0; i < entry.extraFields.length; i++) {
3390
- var extraField = entry.extraFields[i];
3391
- if (extraField.id === 28789) {
3392
- if (extraField.data.length < 6) continue;
3393
- if (extraField.data.readUInt8(0) !== 1) continue;
3394
- var oldNameCrc32 = extraField.data.readUInt32LE(1);
3395
- if (crc32.unsigned(buffer.slice(0, entry.fileNameLength)) !== oldNameCrc32) continue;
3396
- entry.fileName = decodeBuffer(extraField.data, 5, extraField.data.length, true);
3397
- break;
3398
- }
3399
- }
3400
- if (self.validateEntrySizes && entry.compressionMethod === 0) {
3401
- var expectedCompressedSize = entry.uncompressedSize;
3402
- if (entry.isEncrypted()) expectedCompressedSize += 12;
3403
- if (entry.compressedSize !== expectedCompressedSize) {
3404
- var msg = "compressed/uncompressed size mismatch for stored file: " + entry.compressedSize + " != " + entry.uncompressedSize;
3405
- return emitErrorAndAutoClose(self, new Error(msg));
3406
- }
3407
- }
3408
- if (self.decodeStrings) {
3409
- if (!self.strictFileNames) entry.fileName = entry.fileName.replace(/\\/g, "/");
3410
- var errorMessage = validateFileName(entry.fileName, self.validateFileNameOptions);
3411
- if (errorMessage != null) return emitErrorAndAutoClose(self, new Error(errorMessage));
3412
- }
3413
- self.emit("entry", entry);
3414
- if (!self.lazyEntries) self._readEntry();
3415
- });
3416
- });
3417
- };
3418
- ZipFile.prototype.openReadStream = function(entry, options, callback) {
3419
- var self = this;
3420
- var relativeStart = 0;
3421
- var relativeEnd = entry.compressedSize;
3422
- if (callback == null) {
3423
- callback = options;
3424
- options = {};
3425
- } else {
3426
- if (options.decrypt != null) {
3427
- if (!entry.isEncrypted()) throw new Error("options.decrypt can only be specified for encrypted entries");
3428
- if (options.decrypt !== false) throw new Error("invalid options.decrypt value: " + options.decrypt);
3429
- if (entry.isCompressed()) {
3430
- if (options.decompress !== false) throw new Error("entry is encrypted and compressed, and options.decompress !== false");
3431
- }
3432
- }
3433
- if (options.decompress != null) {
3434
- if (!entry.isCompressed()) throw new Error("options.decompress can only be specified for compressed entries");
3435
- if (!(options.decompress === false || options.decompress === true)) throw new Error("invalid options.decompress value: " + options.decompress);
3436
- }
3437
- if (options.start != null || options.end != null) {
3438
- if (entry.isCompressed() && options.decompress !== false) throw new Error("start/end range not allowed for compressed entry without options.decompress === false");
3439
- if (entry.isEncrypted() && options.decrypt !== false) throw new Error("start/end range not allowed for encrypted entry without options.decrypt === false");
3440
- }
3441
- if (options.start != null) {
3442
- relativeStart = options.start;
3443
- if (relativeStart < 0) throw new Error("options.start < 0");
3444
- if (relativeStart > entry.compressedSize) throw new Error("options.start > entry.compressedSize");
3445
- }
3446
- if (options.end != null) {
3447
- relativeEnd = options.end;
3448
- if (relativeEnd < 0) throw new Error("options.end < 0");
3449
- if (relativeEnd > entry.compressedSize) throw new Error("options.end > entry.compressedSize");
3450
- if (relativeEnd < relativeStart) throw new Error("options.end < options.start");
3451
- }
3452
- }
3453
- if (!self.isOpen) return callback(/* @__PURE__ */ new Error("closed"));
3454
- if (entry.isEncrypted()) {
3455
- if (options.decrypt !== false) return callback(/* @__PURE__ */ new Error("entry is encrypted, and options.decrypt !== false"));
3456
- }
3457
- self.reader.ref();
3458
- var buffer = newBuffer(30);
3459
- readAndAssertNoEof(self.reader, buffer, 0, buffer.length, entry.relativeOffsetOfLocalHeader, function(err) {
3460
- try {
3461
- if (err) return callback(err);
3462
- var signature = buffer.readUInt32LE(0);
3463
- if (signature !== 67324752) return callback(/* @__PURE__ */ new Error("invalid local file header signature: 0x" + signature.toString(16)));
3464
- var fileNameLength = buffer.readUInt16LE(26);
3465
- var extraFieldLength = buffer.readUInt16LE(28);
3466
- var localFileHeaderEnd = entry.relativeOffsetOfLocalHeader + buffer.length + fileNameLength + extraFieldLength;
3467
- var decompress;
3468
- if (entry.compressionMethod === 0) decompress = false;
3469
- else if (entry.compressionMethod === 8) decompress = options.decompress != null ? options.decompress : true;
3470
- else return callback(/* @__PURE__ */ new Error("unsupported compression method: " + entry.compressionMethod));
3471
- var fileDataStart = localFileHeaderEnd;
3472
- var fileDataEnd = fileDataStart + entry.compressedSize;
3473
- if (entry.compressedSize !== 0) {
3474
- if (fileDataEnd > self.fileSize) return callback(/* @__PURE__ */ new Error("file data overflows file bounds: " + fileDataStart + " + " + entry.compressedSize + " > " + self.fileSize));
3475
- }
3476
- var readStream = self.reader.createReadStream({
3477
- start: fileDataStart + relativeStart,
3478
- end: fileDataStart + relativeEnd
3479
- });
3480
- var endpointStream = readStream;
3481
- if (decompress) {
3482
- var destroyed = false;
3483
- var inflateFilter = zlib.createInflateRaw();
3484
- readStream.on("error", function(err) {
3485
- setImmediate(function() {
3486
- if (!destroyed) inflateFilter.emit("error", err);
3487
- });
3488
- });
3489
- readStream.pipe(inflateFilter);
3490
- if (self.validateEntrySizes) {
3491
- endpointStream = new AssertByteCountStream(entry.uncompressedSize);
3492
- inflateFilter.on("error", function(err) {
3493
- setImmediate(function() {
3494
- if (!destroyed) endpointStream.emit("error", err);
3495
- });
3496
- });
3497
- inflateFilter.pipe(endpointStream);
3498
- } else endpointStream = inflateFilter;
3499
- endpointStream.destroy = function() {
3500
- destroyed = true;
3501
- if (inflateFilter !== endpointStream) inflateFilter.unpipe(endpointStream);
3502
- readStream.unpipe(inflateFilter);
3503
- readStream.destroy();
3504
- };
3505
- }
3506
- callback(null, endpointStream);
3507
- } finally {
3508
- self.reader.unref();
3509
- }
3510
- });
3511
- };
3512
- function Entry() {}
3513
- Entry.prototype.getLastModDate = function() {
3514
- return dosDateTimeToDate(this.lastModFileDate, this.lastModFileTime);
3515
- };
3516
- Entry.prototype.isEncrypted = function() {
3517
- return (this.generalPurposeBitFlag & 1) !== 0;
3518
- };
3519
- Entry.prototype.isCompressed = function() {
3520
- return this.compressionMethod === 8;
3521
- };
3522
- function dosDateTimeToDate(date, time) {
3523
- var day = date & 31;
3524
- var month = (date >> 5 & 15) - 1;
3525
- var year = (date >> 9 & 127) + 1980;
3526
- var millisecond = 0;
3527
- var second = (time & 31) * 2;
3528
- var minute = time >> 5 & 63;
3529
- var hour = time >> 11 & 31;
3530
- return new Date(year, month, day, hour, minute, second, millisecond);
3531
- }
3532
- function validateFileName(fileName) {
3533
- if (fileName.indexOf("\\") !== -1) return "invalid characters in fileName: " + fileName;
3534
- if (/^[a-zA-Z]:/.test(fileName) || /^\//.test(fileName)) return "absolute path: " + fileName;
3535
- if (fileName.split("/").indexOf("..") !== -1) return "invalid relative path: " + fileName;
3536
- return null;
3537
- }
3538
- function readAndAssertNoEof(reader, buffer, offset, length, position, callback) {
3539
- if (length === 0) return setImmediate(function() {
3540
- callback(null, newBuffer(0));
3541
- });
3542
- reader.read(buffer, offset, length, position, function(err, bytesRead) {
3543
- if (err) return callback(err);
3544
- if (bytesRead < length) return callback(/* @__PURE__ */ new Error("unexpected EOF"));
3545
- callback();
3546
- });
3547
- }
3548
- util.inherits(AssertByteCountStream, Transform);
3549
- function AssertByteCountStream(byteCount) {
3550
- Transform.call(this);
3551
- this.actualByteCount = 0;
3552
- this.expectedByteCount = byteCount;
3553
- }
3554
- AssertByteCountStream.prototype._transform = function(chunk, encoding, cb) {
3555
- this.actualByteCount += chunk.length;
3556
- if (this.actualByteCount > this.expectedByteCount) {
3557
- var msg = "too many bytes in the stream. expected " + this.expectedByteCount + ". got at least " + this.actualByteCount;
3558
- return cb(new Error(msg));
3559
- }
3560
- cb(null, chunk);
3561
- };
3562
- AssertByteCountStream.prototype._flush = function(cb) {
3563
- if (this.actualByteCount < this.expectedByteCount) {
3564
- var msg = "not enough bytes in the stream. expected " + this.expectedByteCount + ". got only " + this.actualByteCount;
3565
- return cb(new Error(msg));
3566
- }
3567
- cb();
3568
- };
3569
- util.inherits(RandomAccessReader, EventEmitter);
3570
- function RandomAccessReader() {
3571
- EventEmitter.call(this);
3572
- this.refCount = 0;
3573
- }
3574
- RandomAccessReader.prototype.ref = function() {
3575
- this.refCount += 1;
3576
- };
3577
- RandomAccessReader.prototype.unref = function() {
3578
- var self = this;
3579
- self.refCount -= 1;
3580
- if (self.refCount > 0) return;
3581
- if (self.refCount < 0) throw new Error("invalid unref");
3582
- self.close(onCloseDone);
3583
- function onCloseDone(err) {
3584
- if (err) return self.emit("error", err);
3585
- self.emit("close");
3586
- }
3587
- };
3588
- RandomAccessReader.prototype.createReadStream = function(options) {
3589
- var start = options.start;
3590
- var end = options.end;
3591
- if (start === end) {
3592
- var emptyStream = new PassThrough();
3593
- setImmediate(function() {
3594
- emptyStream.end();
3595
- });
3596
- return emptyStream;
3597
- }
3598
- var stream = this._readStreamForRange(start, end);
3599
- var destroyed = false;
3600
- var refUnrefFilter = new RefUnrefFilter(this);
3601
- stream.on("error", function(err) {
3602
- setImmediate(function() {
3603
- if (!destroyed) refUnrefFilter.emit("error", err);
3604
- });
3605
- });
3606
- refUnrefFilter.destroy = function() {
3607
- stream.unpipe(refUnrefFilter);
3608
- refUnrefFilter.unref();
3609
- stream.destroy();
3610
- };
3611
- var byteCounter = new AssertByteCountStream(end - start);
3612
- refUnrefFilter.on("error", function(err) {
3613
- setImmediate(function() {
3614
- if (!destroyed) byteCounter.emit("error", err);
3615
- });
3616
- });
3617
- byteCounter.destroy = function() {
3618
- destroyed = true;
3619
- refUnrefFilter.unpipe(byteCounter);
3620
- refUnrefFilter.destroy();
3621
- };
3622
- return stream.pipe(refUnrefFilter).pipe(byteCounter);
3623
- };
3624
- RandomAccessReader.prototype._readStreamForRange = function(start, end) {
3625
- throw new Error("not implemented");
3626
- };
3627
- RandomAccessReader.prototype.read = function(buffer, offset, length, position, callback) {
3628
- var readStream = this.createReadStream({
3629
- start: position,
3630
- end: position + length
3631
- });
3632
- var writeStream = new Writable();
3633
- var written = 0;
3634
- writeStream._write = function(chunk, encoding, cb) {
3635
- chunk.copy(buffer, offset + written, 0, chunk.length);
3636
- written += chunk.length;
3637
- cb();
3638
- };
3639
- writeStream.on("finish", callback);
3640
- readStream.on("error", function(error) {
3641
- callback(error);
3642
- });
3643
- readStream.pipe(writeStream);
3644
- };
3645
- RandomAccessReader.prototype.close = function(callback) {
3646
- setImmediate(callback);
3647
- };
3648
- util.inherits(RefUnrefFilter, PassThrough);
3649
- function RefUnrefFilter(context) {
3650
- PassThrough.call(this);
3651
- this.context = context;
3652
- this.context.ref();
3653
- this.unreffedYet = false;
3654
- }
3655
- RefUnrefFilter.prototype._flush = function(cb) {
3656
- this.unref();
3657
- cb();
3658
- };
3659
- RefUnrefFilter.prototype.unref = function(cb) {
3660
- if (this.unreffedYet) return;
3661
- this.unreffedYet = true;
3662
- this.context.unref();
3663
- };
3664
- var cp437 = "\0☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼ !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⌂ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■\xA0";
3665
- function decodeBuffer(buffer, start, end, isUtf8) {
3666
- if (isUtf8) return buffer.toString("utf8", start, end);
3667
- else {
3668
- var result = "";
3669
- for (var i = start; i < end; i++) result += cp437[buffer[i]];
3670
- return result;
3671
- }
3672
- }
3673
- function readUInt64LE(buffer, offset) {
3674
- var lower32 = buffer.readUInt32LE(offset);
3675
- return buffer.readUInt32LE(offset + 4) * 4294967296 + lower32;
3676
- }
3677
- var newBuffer;
3678
- if (typeof Buffer.allocUnsafe === "function") newBuffer = function(len) {
3679
- return Buffer.allocUnsafe(len);
3680
- };
3681
- else newBuffer = function(len) {
3682
- return new Buffer(len);
3683
- };
3684
- function defaultCallback(err) {
3685
- if (err) throw err;
3686
- }
3687
- }));
3688
- //#endregion
3689
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/buffer-reader.js
3690
- var require_buffer_reader = /* @__PURE__ */ __commonJSMin(((exports, module) => {
3691
- module.exports = class BufferReader {
3692
- constructor(buffer) {
3693
- this._buffer = buffer;
3694
- }
3695
- open() {
3696
- return Promise.resolve();
3697
- }
3698
- close() {
3699
- return Promise.resolve();
3700
- }
3701
- read(buffer, offset, length, position) {
3702
- this._buffer.copy(buffer, offset, position, position + length);
3703
- return Promise.resolve(buffer);
3704
- }
3705
- buffer() {
3706
- return this._buffer;
3707
- }
3708
- static isBufferReader(instance) {
3709
- return instance instanceof BufferReader;
3710
- }
3711
- };
3712
- }));
3713
- //#endregion
3714
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/file-reader.js
3715
- var require_file_reader = /* @__PURE__ */ __commonJSMin(((exports, module) => {
3716
- /**
3717
- * @module file-reader
3718
- *
3719
- * @description
3720
- * Exports a class {@link FileReader}, used internally to handle
3721
- * access when a string filename is passed. This provides a consistent
3722
- * interface between reading from files and buffers, so that in-memory
3723
- * files can be handled efficiently.
3724
- */
3725
- const fs = __require("fs");
3726
- module.exports = class FileReader {
3727
- /**
3728
- * Creates a new file reader instance, using the given filename.
3729
- * @param {*} filename
3730
- */
3731
- constructor(filename) {
3732
- this._filename = filename;
3733
- }
3734
- /**
3735
- * Opens the file descriptor for a file, and returns a promise that resolves
3736
- * when the file is open. After this, {@link FileReader#read} can be called
3737
- * to read file content into a buffer.
3738
- * @returns a promise
3739
- */
3740
- open() {
3741
- return new Promise((resolve, reject) => {
3742
- fs.open(this._filename, "r", 438, (err, fd) => {
3743
- if (err) return reject(err);
3744
- this._fd = fd;
3745
- resolve();
3746
- });
3747
- });
3748
- }
3749
- /**
3750
- * Closes the file descriptor associated with an open document, if there
3751
- * is one, and returns a promise that resolves when the file handle is closed.
3752
- * @returns a promise
3753
- */
3754
- close() {
3755
- return new Promise((resolve, reject) => {
3756
- if (this._fd) fs.close(this._fd, (err) => {
3757
- if (err) return reject(err);
3758
- delete this._fd;
3759
- resolve();
3760
- });
3761
- else resolve();
3762
- });
3763
- }
3764
- /**
3765
- * Reads a buffer of `length` bytes into the `buffer`. The new data will
3766
- * be added to the buffer at offset `offset`, and will be read from the
3767
- * file starting at position `position`
3768
- * @param {*} buffer
3769
- * @param {*} offset
3770
- * @param {*} length
3771
- * @param {*} position
3772
- * @returns a promise that resolves to the buffer when the data is present
3773
- */
3774
- read(buffer, offset, length, position) {
3775
- return new Promise((resolve, reject) => {
3776
- if (!this._fd) return reject(/* @__PURE__ */ new Error("file not open"));
3777
- fs.read(this._fd, buffer, offset, length, position, (err, bytesRead, buffer) => {
3778
- if (err) return reject(err);
3779
- resolve(buffer);
3780
- });
3781
- });
3782
- }
3783
- /**
3784
- * Returns the open file descriptor
3785
- * @returns the file descriptor
3786
- */
3787
- fd() {
3788
- return this._fd;
3789
- }
3790
- /**
3791
- * Returns true if the passed instance is an instance of this class.
3792
- * @param {*} instance
3793
- * @returns true if `instance` is an instance of {@link FileReader}.
3794
- */
3795
- static isFileReader(instance) {
3796
- return instance instanceof FileReader;
3797
- }
3798
- };
3799
- }));
3800
- //#endregion
3801
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/open-office-extractor.js
3802
- var require_open_office_extractor = /* @__PURE__ */ __commonJSMin(((exports, module) => {
3803
- /**
3804
- * @module open-office-extractor
3805
- *
3806
- * @description
3807
- * Implements the main Open Office format extractor. Open Office .docx files
3808
- * are essentially zip files containing streams, and each of these streams contains
3809
- * XML content in one form or another. So we need to use {@link zlib} to extract
3810
- * the streams, and something like `sax-js` to parse the XML that we find
3811
- * there.
3812
- *
3813
- * We probably don't need the whole of the Open Office data, we're only likely
3814
- * to need a few streams. Sadly, the documentation for the file format is literally
3815
- * 5000 pages.
3816
- * Note that [WordOleExtractor]{@link module:word-ole-extractor~WordOleExtractor} is
3817
- * used for older, OLE-style, compound document files.
3818
- */
3819
- const path = __require("path");
3820
- const SAXES = require_saxes();
3821
- const yauzl = require_yauzl();
3822
- const BufferReader = require_buffer_reader();
3823
- const FileReader = require_file_reader();
3824
- const Document = require_document();
3825
- function each(callback, array, index) {
3826
- if (index === array.length) return Promise.resolve();
3827
- else return Promise.resolve(callback(array[index++])).then(() => each(callback, array, index));
3828
- }
3829
- /**
3830
- * @class
3831
- * The main class implementing extraction from Open Office Word files.
3832
- */
3833
- var OpenOfficeExtractor = class {
3834
- constructor() {
3835
- this._streamTypes = {
3836
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml": true,
3837
- "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml": true,
3838
- "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml": true,
3839
- "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml": true,
3840
- "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml": true,
3841
- "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml": true,
3842
- "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml": true,
3843
- "application/vnd.openxmlformats-package.relationships+xml": true
3844
- };
3845
- this._headerTypes = {
3846
- "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header": true,
3847
- "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer": true
3848
- };
3849
- this._actions = {};
3850
- this._defaults = {};
3851
- }
3852
- shouldProcess(filename) {
3853
- if (this._actions[filename]) return true;
3854
- const extension = path.posix.extname(filename).replace(/^\./, "");
3855
- if (!extension) return false;
3856
- const defaultType = this._defaults[extension];
3857
- if (defaultType && this._streamTypes[defaultType]) return true;
3858
- return false;
3859
- }
3860
- openArchive(reader) {
3861
- if (BufferReader.isBufferReader(reader)) return new Promise((resolve, reject) => {
3862
- yauzl.fromBuffer(reader.buffer(), { lazyEntries: true }, function(err, zipfile) {
3863
- if (err) return reject(err);
3864
- resolve(zipfile);
3865
- });
3866
- });
3867
- else if (FileReader.isFileReader(reader)) return new Promise((resolve, reject) => {
3868
- yauzl.fromFd(reader.fd(), {
3869
- lazyEntries: true,
3870
- autoClose: false
3871
- }, function(err, zipfile) {
3872
- if (err) return reject(err);
3873
- resolve(zipfile);
3874
- });
3875
- });
3876
- else throw new Error("Unexpected reader type: " + reader.constructor.name);
3877
- }
3878
- processEntries(zipfile) {
3879
- let entryTable = {};
3880
- let entryNames = [];
3881
- return new Promise((resolve, reject) => {
3882
- zipfile.readEntry();
3883
- zipfile.on("error", reject);
3884
- zipfile.on("entry", (entry) => {
3885
- const filename = entry.fileName;
3886
- entryTable[filename] = entry;
3887
- entryNames.push(filename);
3888
- zipfile.readEntry();
3889
- });
3890
- zipfile.on("end", () => resolve(this._document));
3891
- }).then(() => {
3892
- const index = entryNames.indexOf("[Content_Types].xml");
3893
- if (index === -1) throw new Error("Invalid Open Office XML: missing content types");
3894
- entryNames.splice(index, 1);
3895
- entryNames.unshift("[Content_Types].xml");
3896
- this._actions["[Content_Types].xml"] = true;
3897
- return each((name) => {
3898
- if (this.shouldProcess(name)) return this.handleEntry(zipfile, entryTable[name]);
3899
- }, entryNames, 0);
3900
- });
3901
- }
3902
- extract(reader) {
3903
- let archive = this.openArchive(reader);
3904
- this._document = new Document();
3905
- this._relationships = {};
3906
- this._entryTable = {};
3907
- this._entries = [];
3908
- return archive.then((zipfile) => this.processEntries(zipfile)).then(() => {
3909
- let document = this._document;
3910
- if (document._textboxes && document._textboxes.length > 0) document._textboxes = document._textboxes + "\n";
3911
- if (document._headerTextboxes && document._headerTextboxes.length > 0) document._headerTextboxes = document._headerTextboxes + "\n";
3912
- return document;
3913
- });
3914
- }
3915
- handleOpenTag(node) {
3916
- if (node.name === "Override") {
3917
- const actionFunction = this._streamTypes[node.attributes["ContentType"]];
3918
- if (actionFunction) {
3919
- const partName = node.attributes["PartName"].replace(/^[/]+/, "");
3920
- const action = {
3921
- action: actionFunction,
3922
- type: node.attributes["ContentType"]
3923
- };
3924
- this._actions[partName] = action;
3925
- }
3926
- } else if (node.name === "Default") {
3927
- const extension = node.attributes["Extension"];
3928
- const contentType = node.attributes["ContentType"];
3929
- this._defaults[extension] = contentType;
3930
- } else if (node.name === "Relationship") this._relationships[node.attributes["Id"]] = {
3931
- type: node.attributes["Type"],
3932
- target: node.attributes["Target"]
3933
- };
3934
- else if (node.name === "w:document" || node.name === "w:footnotes" || node.name === "w:endnotes" || node.name === "w:comments") {
3935
- this._context = ["content", "body"];
3936
- this._pieces = [];
3937
- } else if (node.name === "w:hdr" || node.name === "w:ftr") {
3938
- this._context = ["content", "header"];
3939
- this._pieces = [];
3940
- } else if (node.name === "w:endnote" || node.name === "w:footnote") {
3941
- const type = node.attributes["w:type"] || this._context[0];
3942
- this._context.unshift(type);
3943
- } else if (node.name === "w:tab" && this._context[0] === "content") this._pieces.push(" ");
3944
- else if (node.name === "w:br" && this._context[0] === "content") if ((node.attributes["w:type"] || "") === "page") this._pieces.push("\n");
3945
- else this._pieces.push("\n");
3946
- else if (node.name === "w:del" || node.name === "w:instrText") this._context.unshift("deleted");
3947
- else if (node.name === "w:tabs") this._context.unshift("tabs");
3948
- else if (node.name === "w:tc") this._context.unshift("cell");
3949
- else if (node.name === "w:drawing") this._context.unshift("drawing");
3950
- else if (node.name === "w:txbxContent") {
3951
- this._context.unshift(this._pieces);
3952
- this._context.unshift("textbox");
3953
- this._pieces = [];
3954
- }
3955
- }
3956
- handleCloseTag(node) {
3957
- if (node.name === "w:document") {
3958
- this._context = null;
3959
- this._document._body = this._pieces.join("");
3960
- } else if (node.name === "w:footnote" || node.name === "w:endnote") this._context.shift();
3961
- else if (node.name === "w:footnotes") {
3962
- this._context = null;
3963
- this._document._footnotes = this._pieces.join("");
3964
- } else if (node.name === "w:endnotes") {
3965
- this._context = null;
3966
- this._document._endnotes = this._pieces.join("");
3967
- } else if (node.name === "w:comments") {
3968
- this._context = null;
3969
- this._document._annotations = this._pieces.join("");
3970
- } else if (node.name === "w:hdr") {
3971
- this._context = null;
3972
- this._document._headers = this._document._headers + this._pieces.join("");
3973
- } else if (node.name === "w:ftr") {
3974
- this._context = null;
3975
- this._document._footers = this._document._footers + this._pieces.join("");
3976
- } else if (node.name === "w:p") {
3977
- if (this._context[0] === "content" || this._context[0] === "cell" || this._context[0] === "textbox") this._pieces.push("\n");
3978
- } else if (node.name === "w:del" || node.name === "w:instrText") this._context.shift();
3979
- else if (node.name === "w:tabs") this._context.shift();
3980
- else if (node.name === "w:tc") {
3981
- this._pieces.pop();
3982
- this._pieces.push(" ");
3983
- this._context.shift();
3984
- } else if (node.name === "w:tr") this._pieces.push("\n");
3985
- else if (node.name === "w:drawing") this._context.shift();
3986
- else if (node.name === "w:txbxContent") {
3987
- const textBox = this._pieces.join("");
3988
- if (this._context.shift() !== "textbox") throw new Error("Invalid textbox context");
3989
- this._pieces = this._context.shift();
3990
- if (this._context[0] === "drawing") return;
3991
- if (textBox.length == 0) return;
3992
- const documentField = this._context.includes("header") ? "_headerTextboxes" : "_textboxes";
3993
- if (this._document[documentField]) this._document[documentField] = this._document[documentField] + "\n" + textBox;
3994
- else this._document[documentField] = textBox;
3995
- }
3996
- }
3997
- createXmlParser() {
3998
- const parser = new SAXES.SaxesParser();
3999
- parser.on("opentag", (node) => {
4000
- try {
4001
- this.handleOpenTag(node);
4002
- } catch (e) {
4003
- parser.fail(e.message);
4004
- }
4005
- });
4006
- parser.on("closetag", (node) => {
4007
- try {
4008
- this.handleCloseTag(node);
4009
- } catch (e) {
4010
- parser.fail(e.message);
4011
- }
4012
- });
4013
- parser.on("text", (string) => {
4014
- try {
4015
- if (!this._context) return;
4016
- if (this._context[0] === "content" || this._context[0] === "cell" || this._context[0] === "textbox") this._pieces.push(string);
4017
- } catch (e) {
4018
- parser.fail(e.message);
4019
- }
4020
- });
4021
- return parser;
4022
- }
4023
- handleEntry(zipfile, entry) {
4024
- return new Promise((resolve, reject) => {
4025
- zipfile.openReadStream(entry, (err, readStream) => {
4026
- if (err) return reject(err);
4027
- this._source = entry.fileName;
4028
- const parser = this.createXmlParser();
4029
- parser.on("error", (e) => {
4030
- readStream.destroy(e);
4031
- reject(e);
4032
- });
4033
- parser.on("end", () => resolve());
4034
- readStream.on("end", () => parser.close());
4035
- readStream.on("error", (e) => reject(e));
4036
- readStream.on("readable", () => {
4037
- while (true) {
4038
- const chunk = readStream.read(4096);
4039
- if (chunk === null) return;
4040
- parser.write(chunk);
4041
- }
4042
- });
4043
- });
4044
- });
4045
- }
4046
- };
4047
- module.exports = OpenOfficeExtractor;
4048
- }));
4049
- //#endregion
4050
- //#region node_modules/.pnpm/word-extractor@1.0.4/node_modules/word-extractor/lib/word.js
4051
- var require_word = /* @__PURE__ */ __commonJSMin(((exports, module) => {
4052
- /**
4053
- * @module word
4054
- *
4055
- * @description
4056
- * The main module for the package. This exports an extractor class, which
4057
- * provides a single `extract` method that can be called with either a
4058
- * string (filename) or a buffer.
4059
- */
4060
- const { Buffer: Buffer$1 } = __require("buffer");
4061
- const WordOleExtractor = require_word_ole_extractor();
4062
- const OpenOfficeExtractor = require_open_office_extractor();
4063
- const BufferReader = require_buffer_reader();
4064
- const FileReader = require_file_reader();
4065
- /**
4066
- * The main class for the word extraction package. Typically, people will make
4067
- * an instance of this class, and call the {@link #extract} method to transform
4068
- * a Word file into a {@link Document} instance, which provides the accessors
4069
- * needed to read its body, and so on.
4070
- */
4071
- var WordExtractor = class {
4072
- constructor() {}
4073
- /**
4074
- * Extracts the main contents of the file. If a Buffer is passed, that
4075
- * is used instead. Opens the file, and reads the first block, uses that
4076
- * to detect whether this is a .doc file or a .docx file, and then calls
4077
- * either {@link WordOleDocument#extract} or {@link OpenOfficeDocument#extract}
4078
- * accordingly.
4079
- *
4080
- * @param {string|Buffer} source - either a string filename, or a Buffer containing the file content
4081
- * @returns a {@link Document} providing accessors onto the text
4082
- */
4083
- extract(source) {
4084
- let reader = null;
4085
- if (Buffer$1.isBuffer(source)) reader = new BufferReader(source);
4086
- else if (typeof source === "string") reader = new FileReader(source);
4087
- const buffer = Buffer$1.alloc(512);
4088
- return reader.open().then(() => reader.read(buffer, 0, 512, 0)).then((buffer) => {
4089
- let extractor = null;
4090
- if (buffer.readUInt16BE(0) === 53455) extractor = WordOleExtractor;
4091
- else if (buffer.readUInt16BE(0) === 20555) {
4092
- const next = buffer.readUInt16BE(2);
4093
- if (next === 772 || next === 1286 || next === 1800) extractor = OpenOfficeExtractor;
4094
- }
4095
- if (!extractor) throw new Error("Unable to read this type of file");
4096
- return new extractor().extract(reader);
4097
- }).finally(() => reader.close());
4098
- }
4099
- };
4100
- module.exports = WordExtractor;
4101
- }));
4102
- //#endregion
4103
- //#region ../../packages/file-loaders/src/loaders/doc/index.ts
4104
- var import_src = /* @__PURE__ */ __toESM(require_src());
4105
- var import_word = /* @__PURE__ */ __toESM(require_word());
4106
- const log = (0, import_src.default)("file-loaders:doc");
4107
- /**
4108
- * Loads legacy Word documents (.doc) using word-extractor.
4109
- * Extracts plain text content and basic metadata from DOC files.
4110
- */
4111
- var DocLoader = class {
4112
- async loadPages(filePath) {
4113
- log("Loading DOC file:", filePath);
4114
- try {
4115
- const extracted = await new import_word.default().extract(filePath);
4116
- const pageContent = extracted && typeof extracted.getBody === "function" ? extracted.getBody() : extracted?.text ?? "";
4117
- const lineCount = pageContent.split("\n").length;
4118
- const page = {
4119
- charCount: pageContent.length,
4120
- lineCount,
4121
- metadata: { pageNumber: 1 },
4122
- pageContent
4123
- };
4124
- log("DOC loading completed");
4125
- return [page];
4126
- } catch (e) {
4127
- const error = e;
4128
- log("Error encountered while loading DOC file");
4129
- console.error(`Error loading DOC file ${filePath}: ${error.message}`);
4130
- return [{
4131
- charCount: 0,
4132
- lineCount: 0,
4133
- metadata: { error: `Failed to load DOC file: ${error.message}` },
4134
- pageContent: ""
4135
- }];
4136
- }
4137
- }
4138
- async aggregateContent(pages) {
4139
- log("Aggregating content from", pages.length, "DOC pages");
4140
- return pages.map((p) => p.pageContent).join("\n\n");
4141
- }
4142
- };
4143
- //#endregion
4144
- export { DocLoader };