read-excel-file 9.1.0 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ 9.2.0 / 11.06.2026
2
+ ==================
3
+
4
+ * Reverted the shift from `unzipper` to `fflate` in Node.js code.
5
+ * The reason is that `unzipper` turns out to be 2x faster than `fflate` because `fflate` reimplements decompression in pure javascript while `unzipper` uses Node.js's "native" module `zlib` which is written in C.
6
+
7
+ * Replaced `unzipper` with `unzipper-esm`.
8
+ * This fixes the long-present [bug](https://github.com/ZJONSSON/node-unzipper/issues/330) when bundlers can't bundle an app that uses `unzipper` package because of that dynamic `require()` statement.
9
+
1
10
  9.1.0 / 07.06.2026
2
11
  ==================
3
12
 
@@ -5,9 +5,9 @@ Object.defineProperty(exports, "__esModule", {
5
5
  value: true
6
6
  });
7
7
  exports["default"] = convertInputToNodeStream;
8
- var _fs = _interopRequireDefault(require("fs"));
9
- var _buffer = require("buffer");
10
- var _stream = _interopRequireWildcard(require("stream"));
8
+ var _nodeFs = _interopRequireDefault(require("node:fs"));
9
+ var _nodeBuffer = require("node:buffer");
10
+ var _nodeStream = _interopRequireWildcard(require("node:stream"));
11
11
  function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
12
12
  function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { "default": obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj["default"] = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
13
13
  function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
@@ -17,7 +17,7 @@ function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "d
17
17
  * @returns {Stream}
18
18
  */
19
19
  function convertInputToNodeStream(input) {
20
- return input instanceof _stream["default"] ? input : input instanceof Buffer ? createReadableStreamFromBuffer(input) : input instanceof _buffer.Blob ? createReadableStreamFromBlob(input) : _fs["default"].createReadStream(input);
20
+ return input instanceof _nodeStream["default"] ? input : input instanceof Buffer ? createReadableStreamFromBuffer(input) : input instanceof _nodeBuffer.Blob ? createReadableStreamFromBlob(input) : _nodeFs["default"].createReadStream(input);
21
21
  }
22
22
 
23
23
  // Creates a readable stream from a `Buffer`.
@@ -28,7 +28,7 @@ function createReadableStreamFromBuffer(buffer) {
28
28
  if (buffer.length === 0) {
29
29
  throw new Error('No data');
30
30
  }
31
- return _stream.Readable.from(buffer);
31
+ return _nodeStream.Readable.from(buffer);
32
32
  }
33
33
 
34
34
  // Creates a readable stream from a `Blob`.
@@ -40,6 +40,6 @@ function createReadableStreamFromBlob(blob) {
40
40
  throw new Error('No data');
41
41
  }
42
42
  // Convert a web `ReadableStream` to a Node.js `Readable` `Stream`.
43
- return _stream.Readable.fromWeb(blob.stream());
43
+ return _nodeStream.Readable.fromWeb(blob.stream());
44
44
  }
45
45
  //# sourceMappingURL=convertInputToNodeStream.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"convertInputToNodeStream.js","names":["_fs","_interopRequireDefault","require","_buffer","_stream","_interopRequireWildcard","_getRequireWildcardCache","nodeInterop","WeakMap","cacheBabelInterop","cacheNodeInterop","obj","__esModule","_typeof","cache","has","get","newObj","hasPropertyDescriptor","Object","defineProperty","getOwnPropertyDescriptor","key","prototype","hasOwnProperty","call","desc","set","convertInputToNodeStream","input","Stream","Buffer","createReadableStreamFromBuffer","Blob","createReadableStreamFromBlob","fs","createReadStream","buffer","length","Error","Readable","from","blob","size","fromWeb","stream"],"sources":["../../source/export/convertInputToNodeStream.js"],"sourcesContent":["import fs from 'fs'\r\nimport { Blob } from 'buffer'\r\nimport Stream, { Readable } from 'stream'\r\n\r\n/**\r\n * Converts Node.js input argument to a stream.\r\n * @param {(string|Stream|Buffer|Blob)} input - A Node.js readable stream or a `Buffer` or a `Blob` or a path to a file.\r\n * @returns {Stream}\r\n */\r\nexport default function convertInputToNodeStream(input) {\r\n return input instanceof Stream\r\n ? input\r\n : (\r\n input instanceof Buffer\r\n ? createReadableStreamFromBuffer(input)\r\n : (\r\n input instanceof Blob\r\n ? createReadableStreamFromBlob(input)\r\n : fs.createReadStream(input)\r\n )\r\n )\r\n}\r\n\r\n// Creates a readable stream from a `Buffer`.\r\nfunction createReadableStreamFromBuffer(buffer) {\r\n // Node.js seems to have a bug in `Readable.from()` function:\r\n // it doesn't correctly handle empty buffers, i.e. it doesn't return a correct stream.\r\n // https://gitlab.com/catamphetamine/read-excel-file/-/issues/106\r\n if (buffer.length === 0) {\r\n throw new Error('No data')\r\n }\r\n return Readable.from(buffer)\r\n}\r\n\r\n// Creates a readable stream from a `Blob`.\r\nfunction createReadableStreamFromBlob(blob) {\r\n // I didn't test but I'd presume that Node.js would throw on an empty `Blob`\r\n // same way it does on an empty `Buffer`.\r\n // https://gitlab.com/catamphetamine/read-excel-file/-/issues/106\r\n if (blob.size === 0) {\r\n throw new Error('No data')\r\n }\r\n // Convert a web `ReadableStream` to a Node.js `Readable` `Stream`.\r\n return Readable.fromWeb(blob.stream())\r\n}"],"mappings":";;;;;;;AAAA,IAAAA,GAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,OAAA,GAAAD,OAAA;AACA,IAAAE,OAAA,GAAAC,uBAAA,CAAAH,OAAA;AAAyC,SAAAI,yBAAAC,WAAA,eAAAC,OAAA,kCAAAC,iBAAA,OAAAD,OAAA,QAAAE,gBAAA,OAAAF,OAAA,YAAAF,wBAAA,YAAAA,yBAAAC,WAAA,WAAAA,WAAA,GAAAG,gBAAA,GAAAD,iBAAA,KAAAF,WAAA;AAAA,SAAAF,wBAAAM,GAAA,EAAAJ,WAAA,SAAAA,WAAA,IAAAI,GAAA,IAAAA,GAAA,CAAAC,UAAA,WAAAD,GAAA,QAAAA,GAAA,aAAAE,OAAA,CAAAF,GAAA,yBAAAA,GAAA,uCAAAA,GAAA,UAAAG,KAAA,GAAAR,wBAAA,CAAAC,WAAA,OAAAO,KAAA,IAAAA,KAAA,CAAAC,GAAA,CAAAJ,GAAA,YAAAG,KAAA,CAAAE,GAAA,CAAAL,GAAA,SAAAM,MAAA,WAAAC,qBAAA,GAAAC,MAAA,CAAAC,cAAA,IAAAD,MAAA,CAAAE,wBAAA,WAAAC,GAAA,IAAAX,GAAA,QAAAW,GAAA,kBAAAH,MAAA,CAAAI,SAAA,CAAAC,cAAA,CAAAC,IAAA,CAAAd,GAAA,EAAAW,GAAA,SAAAI,IAAA,GAAAR,qBAAA,GAAAC,MAAA,CAAAE,wBAAA,CAAAV,GAAA,EAAAW,GAAA,cAAAI,IAAA,KAAAA,IAAA,CAAAV,GAAA,IAAAU,IAAA,CAAAC,GAAA,KAAAR,MAAA,CAAAC,cAAA,CAAAH,MAAA,EAAAK,GAAA,EAAAI,IAAA,YAAAT,MAAA,CAAAK,GAAA,IAAAX,GAAA,CAAAW,GAAA,SAAAL,MAAA,cAAAN,GAAA,MAAAG,KAAA,IAAAA,KAAA,CAAAa,GAAA,CAAAhB,GAAA,EAAAM,MAAA,YAAAA,MAAA;AAAA,SAAAhB,uBAAAU,GAAA,WAAAA,GAAA,IAAAA,GAAA,CAAAC,UAAA,GAAAD,GAAA,gBAAAA,GAAA;AAEzC;AACA;AACA;AACA;AACA;AACe,SAASiB,wBAAwBA,CAACC,KAAK,EAAE;EACtD,OAAOA,KAAK,YAAYC,kBAAM,GAC1BD,KAAK,GAELA,KAAK,YAAYE,MAAM,GACnBC,8BAA8B,CAACH,KAAK,CAAC,GAErCA,KAAK,YAAYI,YAAI,GACjBC,4BAA4B,CAACL,KAAK,CAAC,GACnCM,cAAE,CAACC,gBAAgB,CAACP,KAAK,CAElC;AACL;;AAEA;AACA,SAASG,8BAA8BA,CAACK,MAAM,EAAE;EAC9C;EACA;EACA;EACA,IAAIA,MAAM,CAACC,MAAM,KAAK,CAAC,EAAE;IACvB,MAAM,IAAIC,KAAK,CAAC,SAAS,CAAC;EAC5B;EACA,OAAOC,gBAAQ,CAACC,IAAI,CAACJ,MAAM,CAAC;AAC9B;;AAEA;AACA,SAASH,4BAA4BA,CAACQ,IAAI,EAAE;EAC1C;EACA;EACA;EACA,IAAIA,IAAI,CAACC,IAAI,KAAK,CAAC,EAAE;IACnB,MAAM,IAAIJ,KAAK,CAAC,SAAS,CAAC;EAC5B;EACA;EACA,OAAOC,gBAAQ,CAACI,OAAO,CAACF,IAAI,CAACG,MAAM,CAAC,CAAC,CAAC;AACxC"}
1
+ {"version":3,"file":"convertInputToNodeStream.js","names":["_nodeFs","_interopRequireDefault","require","_nodeBuffer","_nodeStream","_interopRequireWildcard","_getRequireWildcardCache","nodeInterop","WeakMap","cacheBabelInterop","cacheNodeInterop","obj","__esModule","_typeof","cache","has","get","newObj","hasPropertyDescriptor","Object","defineProperty","getOwnPropertyDescriptor","key","prototype","hasOwnProperty","call","desc","set","convertInputToNodeStream","input","Stream","Buffer","createReadableStreamFromBuffer","Blob","createReadableStreamFromBlob","fs","createReadStream","buffer","length","Error","Readable","from","blob","size","fromWeb","stream"],"sources":["../../source/export/convertInputToNodeStream.js"],"sourcesContent":["import fs from 'node:fs'\r\nimport { Blob } from 'node:buffer'\r\nimport Stream, { Readable } from 'node:stream'\r\n\r\n/**\r\n * Converts Node.js input argument to a stream.\r\n * @param {(string|Stream|Buffer|Blob)} input - A Node.js readable stream or a `Buffer` or a `Blob` or a path to a file.\r\n * @returns {Stream}\r\n */\r\nexport default function convertInputToNodeStream(input) {\r\n return input instanceof Stream\r\n ? input\r\n : (\r\n input instanceof Buffer\r\n ? createReadableStreamFromBuffer(input)\r\n : (\r\n input instanceof Blob\r\n ? createReadableStreamFromBlob(input)\r\n : fs.createReadStream(input)\r\n )\r\n )\r\n}\r\n\r\n// Creates a readable stream from a `Buffer`.\r\nfunction createReadableStreamFromBuffer(buffer) {\r\n // Node.js seems to have a bug in `Readable.from()` function:\r\n // it doesn't correctly handle empty buffers, i.e. it doesn't return a correct stream.\r\n // https://gitlab.com/catamphetamine/read-excel-file/-/issues/106\r\n if (buffer.length === 0) {\r\n throw new Error('No data')\r\n }\r\n return Readable.from(buffer)\r\n}\r\n\r\n// Creates a readable stream from a `Blob`.\r\nfunction createReadableStreamFromBlob(blob) {\r\n // I didn't test but I'd presume that Node.js would throw on an empty `Blob`\r\n // same way it does on an empty `Buffer`.\r\n // https://gitlab.com/catamphetamine/read-excel-file/-/issues/106\r\n if (blob.size === 0) {\r\n throw new Error('No data')\r\n }\r\n // Convert a web `ReadableStream` to a Node.js `Readable` `Stream`.\r\n return Readable.fromWeb(blob.stream())\r\n}"],"mappings":";;;;;;;AAAA,IAAAA,OAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,WAAA,GAAAD,OAAA;AACA,IAAAE,WAAA,GAAAC,uBAAA,CAAAH,OAAA;AAA8C,SAAAI,yBAAAC,WAAA,eAAAC,OAAA,kCAAAC,iBAAA,OAAAD,OAAA,QAAAE,gBAAA,OAAAF,OAAA,YAAAF,wBAAA,YAAAA,yBAAAC,WAAA,WAAAA,WAAA,GAAAG,gBAAA,GAAAD,iBAAA,KAAAF,WAAA;AAAA,SAAAF,wBAAAM,GAAA,EAAAJ,WAAA,SAAAA,WAAA,IAAAI,GAAA,IAAAA,GAAA,CAAAC,UAAA,WAAAD,GAAA,QAAAA,GAAA,aAAAE,OAAA,CAAAF,GAAA,yBAAAA,GAAA,uCAAAA,GAAA,UAAAG,KAAA,GAAAR,wBAAA,CAAAC,WAAA,OAAAO,KAAA,IAAAA,KAAA,CAAAC,GAAA,CAAAJ,GAAA,YAAAG,KAAA,CAAAE,GAAA,CAAAL,GAAA,SAAAM,MAAA,WAAAC,qBAAA,GAAAC,MAAA,CAAAC,cAAA,IAAAD,MAAA,CAAAE,wBAAA,WAAAC,GAAA,IAAAX,GAAA,QAAAW,GAAA,kBAAAH,MAAA,CAAAI,SAAA,CAAAC,cAAA,CAAAC,IAAA,CAAAd,GAAA,EAAAW,GAAA,SAAAI,IAAA,GAAAR,qBAAA,GAAAC,MAAA,CAAAE,wBAAA,CAAAV,GAAA,EAAAW,GAAA,cAAAI,IAAA,KAAAA,IAAA,CAAAV,GAAA,IAAAU,IAAA,CAAAC,GAAA,KAAAR,MAAA,CAAAC,cAAA,CAAAH,MAAA,EAAAK,GAAA,EAAAI,IAAA,YAAAT,MAAA,CAAAK,GAAA,IAAAX,GAAA,CAAAW,GAAA,SAAAL,MAAA,cAAAN,GAAA,MAAAG,KAAA,IAAAA,KAAA,CAAAa,GAAA,CAAAhB,GAAA,EAAAM,MAAA,YAAAA,MAAA;AAAA,SAAAhB,uBAAAU,GAAA,WAAAA,GAAA,IAAAA,GAAA,CAAAC,UAAA,GAAAD,GAAA,gBAAAA,GAAA;AAE9C;AACA;AACA;AACA;AACA;AACe,SAASiB,wBAAwBA,CAACC,KAAK,EAAE;EACtD,OAAOA,KAAK,YAAYC,sBAAM,GAC1BD,KAAK,GAELA,KAAK,YAAYE,MAAM,GACnBC,8BAA8B,CAACH,KAAK,CAAC,GAErCA,KAAK,YAAYI,gBAAI,GACjBC,4BAA4B,CAACL,KAAK,CAAC,GACnCM,kBAAE,CAACC,gBAAgB,CAACP,KAAK,CAElC;AACL;;AAEA;AACA,SAASG,8BAA8BA,CAACK,MAAM,EAAE;EAC9C;EACA;EACA;EACA,IAAIA,MAAM,CAACC,MAAM,KAAK,CAAC,EAAE;IACvB,MAAM,IAAIC,KAAK,CAAC,SAAS,CAAC;EAC5B;EACA,OAAOC,oBAAQ,CAACC,IAAI,CAACJ,MAAM,CAAC;AAC9B;;AAEA;AACA,SAASH,4BAA4BA,CAACQ,IAAI,EAAE;EAC1C;EACA;EACA;EACA,IAAIA,IAAI,CAACC,IAAI,KAAK,CAAC,EAAE;IACnB,MAAM,IAAIJ,KAAK,CAAC,SAAS,CAAC;EAC5B;EACA;EACA,OAAOC,oBAAQ,CAACI,OAAO,CAACF,IAAI,CAACG,MAAM,CAAC,CAAC,CAAC;AACxC"}
@@ -0,0 +1,209 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports["default"] = unzipFromStream;
7
+ var _fflate = require("fflate");
8
+ var _nodeBuffer = require("node:buffer");
9
+ // `fflate` uses a javascript-only implementation of `.zip` compression/decompression.
10
+ // This means that it could likely be less performant than Node.js's "native" `zlib` module`.
11
+
12
+ // This code was originally submitted by Stian Jensen.
13
+ // https://github.com/catamphetamine/read-excel-file/pull/122
14
+
15
+ // A `*.zip` file consists of individual file entries with the "total" summary section
16
+ // placed at the end of the file rather than at the start of it, which was originally done
17
+ // to allow for easy append of data to a given `.zip` file.
18
+ // https://en.wikipedia.org/wiki/ZIP_(file_format)
19
+ //
20
+ // But this also means that reading a `*.zip` file from a stream can't really be done
21
+ // using the "officially recommended" way of first reading the "total" summary section
22
+ // and only then reading the individual file entries specified in that summary section.
23
+ //
24
+ // So in order to be able to read a `*.zip` file from a stream, some corners have to be cut.
25
+ // For example, the "total" summary section is completely ignored and instead the reader
26
+ // should adopt "data recovery" software approach — it should proactively "scan" the input stream
27
+ // for individual file entries and handle them one-by-one as they come.
28
+ //
29
+ // Such approach doesn't seem to contradict with the XLSX specification
30
+ // because an `*.xlsx` files is supposed to be a normal `.zip` archive
31
+ // without any "trickery" such as "deleted" files or "garbage" data
32
+ // hiding under the hood.
33
+ //
34
+ // So when handling `*.xlsx` file, we assume that each such file must start
35
+ // with an individual file entry followed by another individual file entry, etc.
36
+ //
37
+ // When the "summary" section is reached, we assume that the archive has ended.
38
+ //
39
+ // To read a `.zip` archive, the code uses `fflate`'s `Unzip` class
40
+ // with `UnzipInflate` decompression implementation to decompress the data
41
+ // that was previously compressed using `DEFLATE` compressing algorithm,
42
+ // which is what `*.xlsx` files use.
43
+ //
44
+ // The `Unzip` class doesn't speak the Node.js stream interface, and `fflate`'s readme
45
+ // doesn't include a clear "reading a `.zip` file from a Node.js stream" section.
46
+ // https://github.com/101arrowz/fflate/issues/251
47
+ // Instead, the `Unzip` class has its own `push(chunk)` / `onfile` / `entry.ondata` protocol.
48
+ // This code reads the binary input stream and forwards each chunk of it to `unzip.push()`,
49
+ // and then collects the decompressed file entries.
50
+ //
51
+ // P.S. In the comments to `UnzipInflate` in `fflate` package, it says:
52
+ // "Streaming DEFLATE decompression for ZIP archives. Prefer AsyncZipInflate for better performance."
53
+ // But there seems to be no `AsyncZipInflate` class in the `fflate` package.
54
+ // https://github.com/101arrowz/fflate/issues/277
55
+ // So just the regular `UnzipInflate` is used here.
56
+ //
57
+
58
+ /**
59
+ * Reads `*.zip` file contents.
60
+ * @param {Stream} stream
61
+ * @return {Promise<Record<string,Buffer>>} Resolves to an object holding `*.zip` file entries. P.S. `Buffer` is a `Uint8Array`.
62
+ */
63
+ function unzipFromStream(stream) {
64
+ var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {},
65
+ filter = _ref.filter;
66
+ // The `files` object stores the files and their contents.
67
+ var files = {};
68
+ return new Promise(function (resolve, reject) {
69
+ var errored = false;
70
+ var onError = function onError(error) {
71
+ if (!errored) {
72
+ errored = true;
73
+ reject(error);
74
+ }
75
+ };
76
+ var _createZipFileValidat = createZipFileValidator(function (isValid) {
77
+ if (!isValid) {
78
+ onError(new Error('Invalid `.zip` archive'));
79
+ }
80
+ }),
81
+ validateChunk = _createZipFileValidat.validateChunk;
82
+
83
+ // `Unzip` discovers each individual file entry in the input data stream
84
+ // and then calls the callback function for each such entry.
85
+ var unzip = new _fflate.Unzip(function (entry) {
86
+ // If there already was an error while reading this `.zip` file,
87
+ // ignore any follow-up entries.
88
+ if (errored) {
89
+ return;
90
+ }
91
+
92
+ // Skip directory entries (their names end with a slash).
93
+ // Only files are of any interest.
94
+ if (entry.name.endsWith('/')) {
95
+ return;
96
+ }
97
+
98
+ // See if this file should be ignored.
99
+ // If it should, this entry won't be processed, i.e. `Unzip` will not try
100
+ // to decompress its data, and will just discard it.
101
+ if (filter && !filter({
102
+ path: entry.name
103
+ })) {
104
+ return;
105
+ }
106
+ var chunks = [];
107
+
108
+ // `entry.ondata` is called with each decompressed chunk of the entry,
109
+ // and a final time with `isLast === true` once the entry is complete.
110
+ entry.ondata = function (error, chunk, isLast) {
111
+ if (error) {
112
+ return onError(error);
113
+ }
114
+ chunks.push(chunk);
115
+ if (isLast) {
116
+ files[entry.name] = _nodeBuffer.Buffer.concat(chunks);
117
+ }
118
+ };
119
+
120
+ // Start decompressing this entry.
121
+ entry.start();
122
+ });
123
+
124
+ // Register the decompressor for the data that was compressed using
125
+ // `DEFLATE` compression algorithm (compression method `8`),
126
+ // which is what `.xlsx` files use.
127
+ unzip.register(_fflate.UnzipInflate);
128
+ stream
129
+ // Catch errors emitted from the input stream (for example, a file read error).
130
+ .on('error', onError)
131
+ // When another chunk of data is read from the input stream.
132
+ .on('data', function (chunk) {
133
+ // If there already was an error while reading this `.zip` file,
134
+ // ignore any follow-up data chunks.
135
+ if (errored) {
136
+ return;
137
+ }
138
+ // Validate the `.zip` archive as its data comes through.
139
+ validateChunk(chunk);
140
+ // If the `.zip` archive is found to be invalid, stop any further
141
+ // processing of it.
142
+ if (errored) {
143
+ return;
144
+ }
145
+ // Push the next data chunk to `fflate`'s `Unzip` class instance.
146
+ // The `.push()` function is synchronous, meaning that by the time it returns,
147
+ // any complete files entries encountered so far have already been decompressed
148
+ // and populated in the `files` object.
149
+ try {
150
+ unzip.push(chunk, false);
151
+ } catch (error) {
152
+ onError(error);
153
+ }
154
+ })
155
+ // When there's no more data in the input stream to consume,
156
+ // finish reading the `.zip` archive.
157
+ .on('end', function () {
158
+ // If there were any errors when reading the `.zip` archive,
159
+ // don't `resolve()` with anything.
160
+ if (errored) {
161
+ return;
162
+ }
163
+ try {
164
+ // Signal the end of the archive to `fflate`'s `Unzip` class instance.
165
+ // It will flush any remaining state in it.
166
+ unzip.push(new Uint8Array(0), true);
167
+ // Resolve with the unzipped files.
168
+ resolve(files);
169
+ } catch (error) {
170
+ onError(error);
171
+ }
172
+ });
173
+ });
174
+ }
175
+
176
+ // Every section in a `.zip` archive is marked with 4 bytes, the first two of which
177
+ // are `0x50` and `0x4B`, which reads "PK", referencing the initials of the inventor Phil Katz.
178
+ //
179
+ // It looks like `fflate`'s `Unzip` doesn't ever complain about whatever data is thrown at it.
180
+ // Due to how `.zip` file format is defined, "garbage" data could be placed at various
181
+ // places in it and it'd still be a valid `.zip` archive. It's likely that for this reason
182
+ // `fflate` doesn't ever complain and simply emits no entries when fed any kind of invalid data.
183
+ //
184
+ // In order to introduce some basic validation, here we specifically demand
185
+ // that a `.zip` archive must at least start with an individual file entry
186
+ // because an `.xlsx` file creator softwared really shouldn't attempt doing
187
+ // anything "funny" when writing a file, hence this adherence requirement.
188
+ //
189
+ function createZipFileValidator(onValidationResult) {
190
+ var firstBytesCount = 2;
191
+ var firstBytes = [];
192
+ var firstBytesCheckResult;
193
+ return {
194
+ validateChunk: function validateChunk(chunk) {
195
+ if (firstBytes.length < 2) {
196
+ var i = 0;
197
+ while (i < chunk.length && i < firstBytesCount) {
198
+ firstBytes.push(chunk[i]);
199
+ i++;
200
+ }
201
+ if (firstBytes.length === 2) {
202
+ var isValid = firstBytes[0] === 0x50 && firstBytes[1] === 0x4B;
203
+ onValidationResult(isValid);
204
+ }
205
+ }
206
+ }
207
+ };
208
+ }
209
+ //# sourceMappingURL=unzipFromStream.fflate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"unzipFromStream.fflate.js","names":["_fflate","require","_nodeBuffer","unzipFromStream","stream","_ref","arguments","length","undefined","filter","files","Promise","resolve","reject","errored","onError","error","_createZipFileValidat","createZipFileValidator","isValid","Error","validateChunk","unzip","Unzip","entry","name","endsWith","path","chunks","ondata","chunk","isLast","push","Buffer","concat","start","register","UnzipInflate","on","Uint8Array","onValidationResult","firstBytesCount","firstBytes","firstBytesCheckResult","i"],"sources":["../../source/zip/unzipFromStream.fflate.js"],"sourcesContent":["// `fflate` uses a javascript-only implementation of `.zip` compression/decompression.\r\n// This means that it could likely be less performant than Node.js's \"native\" `zlib` module`.\r\n\r\n// This code was originally submitted by Stian Jensen.\r\n// https://github.com/catamphetamine/read-excel-file/pull/122\r\n\r\n// A `*.zip` file consists of individual file entries with the \"total\" summary section\r\n// placed at the end of the file rather than at the start of it, which was originally done\r\n// to allow for easy append of data to a given `.zip` file.\r\n// https://en.wikipedia.org/wiki/ZIP_(file_format)\r\n//\r\n// But this also means that reading a `*.zip` file from a stream can't really be done\r\n// using the \"officially recommended\" way of first reading the \"total\" summary section\r\n// and only then reading the individual file entries specified in that summary section.\r\n//\r\n// So in order to be able to read a `*.zip` file from a stream, some corners have to be cut.\r\n// For example, the \"total\" summary section is completely ignored and instead the reader\r\n// should adopt \"data recovery\" software approach — it should proactively \"scan\" the input stream\r\n// for individual file entries and handle them one-by-one as they come.\r\n//\r\n// Such approach doesn't seem to contradict with the XLSX specification\r\n// because an `*.xlsx` files is supposed to be a normal `.zip` archive\r\n// without any \"trickery\" such as \"deleted\" files or \"garbage\" data\r\n// hiding under the hood.\r\n//\r\n// So when handling `*.xlsx` file, we assume that each such file must start\r\n// with an individual file entry followed by another individual file entry, etc.\r\n//\r\n// When the \"summary\" section is reached, we assume that the archive has ended.\r\n//\r\n// To read a `.zip` archive, the code uses `fflate`'s `Unzip` class\r\n// with `UnzipInflate` decompression implementation to decompress the data\r\n// that was previously compressed using `DEFLATE` compressing algorithm,\r\n// which is what `*.xlsx` files use.\r\n//\r\n// The `Unzip` class doesn't speak the Node.js stream interface, and `fflate`'s readme\r\n// doesn't include a clear \"reading a `.zip` file from a Node.js stream\" section.\r\n// https://github.com/101arrowz/fflate/issues/251\r\n// Instead, the `Unzip` class has its own `push(chunk)` / `onfile` / `entry.ondata` protocol.\r\n// This code reads the binary input stream and forwards each chunk of it to `unzip.push()`,\r\n// and then collects the decompressed file entries.\r\n//\r\n// P.S. In the comments to `UnzipInflate` in `fflate` package, it says:\r\n// \"Streaming DEFLATE decompression for ZIP archives. Prefer AsyncZipInflate for better performance.\"\r\n// But there seems to be no `AsyncZipInflate` class in the `fflate` package.\r\n// https://github.com/101arrowz/fflate/issues/277\r\n// So just the regular `UnzipInflate` is used here.\r\n//\r\nimport { Unzip, UnzipInflate } from 'fflate'\r\n\r\nimport { Buffer } from 'node:buffer'\r\n\r\n/**\r\n * Reads `*.zip` file contents.\r\n * @param {Stream} stream\r\n * @return {Promise<Record<string,Buffer>>} Resolves to an object holding `*.zip` file entries. P.S. `Buffer` is a `Uint8Array`.\r\n */\r\nexport default function unzipFromStream(stream, { filter } = {}) {\r\n\t// The `files` object stores the files and their contents.\r\n\tconst files = {}\r\n\r\n\treturn new Promise((resolve, reject) => {\r\n\t\tlet errored = false\r\n\r\n\t\tconst onError = (error) => {\r\n\t\t\tif (!errored) {\r\n\t\t\t\terrored = true\r\n\t\t\t\treject(error)\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\tconst { validateChunk } = createZipFileValidator((isValid) => {\r\n\t\t\tif (!isValid) {\r\n\t\t\t\tonError(new Error('Invalid `.zip` archive'))\r\n\t\t\t}\r\n\t\t})\r\n\r\n\t\t// `Unzip` discovers each individual file entry in the input data stream\r\n\t\t// and then calls the callback function for each such entry.\r\n\t\tconst unzip = new Unzip((entry) => {\r\n\t\t\t// If there already was an error while reading this `.zip` file,\r\n\t\t\t// ignore any follow-up entries.\r\n\t\t\tif (errored) {\r\n\t\t\t\treturn\r\n\t\t\t}\r\n\r\n\t\t\t// Skip directory entries (their names end with a slash).\r\n\t\t\t// Only files are of any interest.\r\n\t\t\tif (entry.name.endsWith('/')) {\r\n\t\t\t\treturn\r\n\t\t\t}\r\n\r\n\t\t\t// See if this file should be ignored.\r\n\t\t\t// If it should, this entry won't be processed, i.e. `Unzip` will not try\r\n\t\t\t// to decompress its data, and will just discard it.\r\n\t\t\tif (filter && !filter({ path: entry.name })) {\r\n\t\t\t\treturn\r\n\t\t\t}\r\n\r\n\t\t\tconst chunks = []\r\n\r\n\t\t\t// `entry.ondata` is called with each decompressed chunk of the entry,\r\n\t\t\t// and a final time with `isLast === true` once the entry is complete.\r\n\t\t\tentry.ondata = (error, chunk, isLast) => {\r\n\t\t\t\tif (error) {\r\n\t\t\t\t\treturn onError(error)\r\n\t\t\t\t}\r\n\t\t\t\tchunks.push(chunk)\r\n\t\t\t\tif (isLast) {\r\n\t\t\t\t\tfiles[entry.name] = Buffer.concat(chunks)\r\n\t\t\t\t}\r\n\t\t\t}\r\n\r\n\t\t\t// Start decompressing this entry.\r\n\t\t\tentry.start()\r\n\t\t})\r\n\r\n\t\t// Register the decompressor for the data that was compressed using\r\n\t\t// `DEFLATE` compression algorithm (compression method `8`),\r\n\t\t// which is what `.xlsx` files use.\r\n\t\tunzip.register(UnzipInflate)\r\n\r\n\t\tstream\r\n\t\t\t// Catch errors emitted from the input stream (for example, a file read error).\r\n\t\t\t.on('error', onError)\r\n\t\t\t// When another chunk of data is read from the input stream.\r\n\t\t\t.on('data', (chunk) => {\r\n\t\t\t\t// If there already was an error while reading this `.zip` file,\r\n\t\t\t\t// ignore any follow-up data chunks.\r\n\t\t\t\tif (errored) {\r\n\t\t\t\t\treturn\r\n\t\t\t\t}\r\n\t\t\t\t// Validate the `.zip` archive as its data comes through.\r\n\t\t\t\tvalidateChunk(chunk)\r\n\t\t\t\t// If the `.zip` archive is found to be invalid, stop any further\r\n\t\t\t\t// processing of it.\r\n\t\t\t\tif (errored) {\r\n\t\t\t\t\treturn\r\n\t\t\t\t}\r\n\t\t\t\t// Push the next data chunk to `fflate`'s `Unzip` class instance.\r\n\t\t\t\t// The `.push()` function is synchronous, meaning that by the time it returns,\r\n\t\t\t\t// any complete files entries encountered so far have already been decompressed\r\n\t\t\t\t// and populated in the `files` object.\r\n\t\t\t\ttry {\r\n\t\t\t\t\tunzip.push(chunk, false)\r\n\t\t\t\t} catch (error) {\r\n\t\t\t\t\tonError(error)\r\n\t\t\t\t}\r\n\t\t\t})\r\n\t\t\t// When there's no more data in the input stream to consume,\r\n\t\t\t// finish reading the `.zip` archive.\r\n\t\t\t.on('end', () => {\r\n\t\t\t\t// If there were any errors when reading the `.zip` archive,\r\n\t\t\t\t// don't `resolve()` with anything.\r\n\t\t\t\tif (errored) {\r\n\t\t\t\t\treturn\r\n\t\t\t\t}\r\n\t\t\t\ttry {\r\n\t\t\t\t\t// Signal the end of the archive to `fflate`'s `Unzip` class instance.\r\n\t\t\t\t\t// It will flush any remaining state in it.\r\n\t\t\t\t\tunzip.push(new Uint8Array(0), true)\r\n\t\t\t\t\t// Resolve with the unzipped files.\r\n\t\t\t\t\tresolve(files)\r\n\t\t\t\t} catch (error) {\r\n\t\t\t\t\tonError(error)\r\n\t\t\t\t}\r\n\t\t\t})\r\n\t})\r\n}\r\n\r\n// Every section in a `.zip` archive is marked with 4 bytes, the first two of which\r\n// are `0x50` and `0x4B`, which reads \"PK\", referencing the initials of the inventor Phil Katz.\r\n//\r\n// It looks like `fflate`'s `Unzip` doesn't ever complain about whatever data is thrown at it.\r\n// Due to how `.zip` file format is defined, \"garbage\" data could be placed at various\r\n// places in it and it'd still be a valid `.zip` archive. It's likely that for this reason\r\n// `fflate` doesn't ever complain and simply emits no entries when fed any kind of invalid data.\r\n//\r\n// In order to introduce some basic validation, here we specifically demand\r\n// that a `.zip` archive must at least start with an individual file entry\r\n// because an `.xlsx` file creator softwared really shouldn't attempt doing\r\n// anything \"funny\" when writing a file, hence this adherence requirement.\r\n//\r\nfunction createZipFileValidator(onValidationResult) {\r\n\tconst firstBytesCount = 2\r\n\tconst firstBytes = []\r\n\tlet firstBytesCheckResult\r\n\treturn {\r\n\t\tvalidateChunk(chunk) {\r\n\t\t\tif (firstBytes.length < 2) {\r\n\t\t\t\tlet i = 0\r\n\t\t\t\twhile (i < chunk.length && i < firstBytesCount) {\r\n\t\t\t\t\tfirstBytes.push(chunk[i])\r\n\t\t\t\t\ti++\r\n\t\t\t\t}\r\n\t\t\t\tif (firstBytes.length === 2) {\r\n\t\t\t\t\tconst isValid = firstBytes[0] === 0x50 && firstBytes[1] === 0x4B\r\n\t\t\t\t\tonValidationResult(isValid)\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t}\r\n\t}\r\n}"],"mappings":";;;;;;AAgDA,IAAAA,OAAA,GAAAC,OAAA;AAEA,IAAAC,WAAA,GAAAD,OAAA;AAlDA;AACA;;AAEA;AACA;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AAKA;AACA;AACA;AACA;AACA;AACe,SAASE,eAAeA,CAACC,MAAM,EAAmB;EAAA,IAAAC,IAAA,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAJ,CAAC,CAAC;IAAbG,MAAM,GAAAJ,IAAA,CAANI,MAAM;EACvD;EACA,IAAMC,KAAK,GAAG,CAAC,CAAC;EAEhB,OAAO,IAAIC,OAAO,CAAC,UAACC,OAAO,EAAEC,MAAM,EAAK;IACvC,IAAIC,OAAO,GAAG,KAAK;IAEnB,IAAMC,OAAO,GAAG,SAAVA,OAAOA,CAAIC,KAAK,EAAK;MAC1B,IAAI,CAACF,OAAO,EAAE;QACbA,OAAO,GAAG,IAAI;QACdD,MAAM,CAACG,KAAK,CAAC;MACd;IACD,CAAC;IAED,IAAAC,qBAAA,GAA0BC,sBAAsB,CAAC,UAACC,OAAO,EAAK;QAC7D,IAAI,CAACA,OAAO,EAAE;UACbJ,OAAO,CAAC,IAAIK,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAC7C;MACD,CAAC,CAAC;MAJMC,aAAa,GAAAJ,qBAAA,CAAbI,aAAa;;IAMrB;IACA;IACA,IAAMC,KAAK,GAAG,IAAIC,aAAK,CAAC,UAACC,KAAK,EAAK;MAClC;MACA;MACA,IAAIV,OAAO,EAAE;QACZ;MACD;;MAEA;MACA;MACA,IAAIU,KAAK,CAACC,IAAI,CAACC,QAAQ,CAAC,GAAG,CAAC,EAAE;QAC7B;MACD;;MAEA;MACA;MACA;MACA,IAAIjB,MAAM,IAAI,CAACA,MAAM,CAAC;QAAEkB,IAAI,EAAEH,KAAK,CAACC;MAAK,CAAC,CAAC,EAAE;QAC5C;MACD;MAEA,IAAMG,MAAM,GAAG,EAAE;;MAEjB;MACA;MACAJ,KAAK,CAACK,MAAM,GAAG,UAACb,KAAK,EAAEc,KAAK,EAAEC,MAAM,EAAK;QACxC,IAAIf,KAAK,EAAE;UACV,OAAOD,OAAO,CAACC,KAAK,CAAC;QACtB;QACAY,MAAM,CAACI,IAAI,CAACF,KAAK,CAAC;QAClB,IAAIC,MAAM,EAAE;UACXrB,KAAK,CAACc,KAAK,CAACC,IAAI,CAAC,GAAGQ,kBAAM,CAACC,MAAM,CAACN,MAAM,CAAC;QAC1C;MACD,CAAC;;MAED;MACAJ,KAAK,CAACW,KAAK,CAAC,CAAC;IACd,CAAC,CAAC;;IAEF;IACA;IACA;IACAb,KAAK,CAACc,QAAQ,CAACC,oBAAY,CAAC;IAE5BjC;IACC;IAAA,CACCkC,EAAE,CAAC,OAAO,EAAEvB,OAAO;IACpB;IAAA,CACCuB,EAAE,CAAC,MAAM,EAAE,UAACR,KAAK,EAAK;MACtB;MACA;MACA,IAAIhB,OAAO,EAAE;QACZ;MACD;MACA;MACAO,aAAa,CAACS,KAAK,CAAC;MACpB;MACA;MACA,IAAIhB,OAAO,EAAE;QACZ;MACD;MACA;MACA;MACA;MACA;MACA,IAAI;QACHQ,KAAK,CAACU,IAAI,CAACF,KAAK,EAAE,KAAK,CAAC;MACzB,CAAC,CAAC,OAAOd,KAAK,EAAE;QACfD,OAAO,CAACC,KAAK,CAAC;MACf;IACD,CAAC;IACD;IACA;IAAA,CACCsB,EAAE,CAAC,KAAK,EAAE,YAAM;MAChB;MACA;MACA,IAAIxB,OAAO,EAAE;QACZ;MACD;MACA,IAAI;QACH;QACA;QACAQ,KAAK,CAACU,IAAI,CAAC,IAAIO,UAAU,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC;QACnC;QACA3B,OAAO,CAACF,KAAK,CAAC;MACf,CAAC,CAAC,OAAOM,KAAK,EAAE;QACfD,OAAO,CAACC,KAAK,CAAC;MACf;IACD,CAAC,CAAC;EACJ,CAAC,CAAC;AACH;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASE,sBAAsBA,CAACsB,kBAAkB,EAAE;EACnD,IAAMC,eAAe,GAAG,CAAC;EACzB,IAAMC,UAAU,GAAG,EAAE;EACrB,IAAIC,qBAAqB;EACzB,OAAO;IACNtB,aAAa,WAAAA,cAACS,KAAK,EAAE;MACpB,IAAIY,UAAU,CAACnC,MAAM,GAAG,CAAC,EAAE;QAC1B,IAAIqC,CAAC,GAAG,CAAC;QACT,OAAOA,CAAC,GAAGd,KAAK,CAACvB,MAAM,IAAIqC,CAAC,GAAGH,eAAe,EAAE;UAC/CC,UAAU,CAACV,IAAI,CAACF,KAAK,CAACc,CAAC,CAAC,CAAC;UACzBA,CAAC,EAAE;QACJ;QACA,IAAIF,UAAU,CAACnC,MAAM,KAAK,CAAC,EAAE;UAC5B,IAAMY,OAAO,GAAGuB,UAAU,CAAC,CAAC,CAAC,KAAK,IAAI,IAAIA,UAAU,CAAC,CAAC,CAAC,KAAK,IAAI;UAChEF,kBAAkB,CAACrB,OAAO,CAAC;QAC5B;MACD;IACD;EACD,CAAC;AACF"}
@@ -3,198 +3,12 @@
3
3
  Object.defineProperty(exports, "__esModule", {
4
4
  value: true
5
5
  });
6
- exports["default"] = unzipFromStream;
7
- var _fflate = require("fflate");
8
- var _buffer = require("buffer");
9
- // This code was originally submitted by Stian Jensen.
10
- // https://github.com/catamphetamine/read-excel-file/pull/122
11
-
12
- // A `*.zip` file consists of individual file entries with the "total" summary section
13
- // placed at the end of the file rather than at the start of it, which was originally done
14
- // to allow for easy append of data to a given `.zip` file.
15
- // https://en.wikipedia.org/wiki/ZIP_(file_format)
16
- //
17
- // But this also means that reading a `*.zip` file from a stream can't really be done
18
- // using the "officially recommended" way of first reading the "total" summary section
19
- // and only then reading the individual file entries specified in that summary section.
20
- //
21
- // So in order to be able to read a `*.zip` file from a stream, some corners have to be cut.
22
- // For example, the "total" summary section is completely ignored and instead the reader
23
- // should adopt "data recovery" software approach — it should proactively "scan" the input stream
24
- // for individual file entries and handle them one-by-one as they come.
25
- //
26
- // Such approach doesn't seem to contradict with the XLSX specification
27
- // because an `*.xlsx` files is supposed to be a normal `.zip` archive
28
- // without any "trickery" such as "deleted" files or "garbage" data
29
- // hiding under the hood.
30
- //
31
- // So when handling `*.xlsx` file, we assume that each such file must start
32
- // with an individual file entry followed by another individual file entry, etc.
33
- //
34
- // When the "summary" section is reached, we assume that the archive has ended.
35
- //
36
- // To read a `.zip` archive, the code uses `fflate`'s `Unzip` class
37
- // with `UnzipInflate` decompression implementation to decompress the data
38
- // that was previously compressed using `DEFLATE` compressing algorithm,
39
- // which is what `*.xlsx` files use.
40
- //
41
- // The `Unzip` class doesn't speak the Node.js stream interface, and `fflate`'s readme
42
- // doesn't include a clear "reading a `.zip` file from a Node.js stream" section.
43
- // https://github.com/101arrowz/fflate/issues/251
44
- // Instead, the `Unzip` class has its own `push(chunk)` / `onfile` / `entry.ondata` protocol.
45
- // This code reads the binary input stream and forwards each chunk of it to `unzip.push()`,
46
- // and then collects the decompressed file entries.
47
- //
48
-
49
- /**
50
- * Reads `*.zip` file contents.
51
- * @param {Stream} stream
52
- * @return {Promise<Record<string,Buffer>>} Resolves to an object holding `*.zip` file entries. P.S. `Buffer` is a `Uint8Array`.
53
- */
54
- function unzipFromStream(stream) {
55
- var _ref = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {},
56
- filter = _ref.filter;
57
- // The `files` object stores the files and their contents.
58
- var files = {};
59
- return new Promise(function (resolve, reject) {
60
- var errored = false;
61
- var onError = function onError(error) {
62
- if (!errored) {
63
- errored = true;
64
- reject(error);
65
- }
66
- };
67
- var _createZipFileValidat = createZipFileValidator(function (isValid) {
68
- if (!isValid) {
69
- onError(new Error('Invalid `.zip` archive'));
70
- }
71
- }),
72
- validateChunk = _createZipFileValidat.validateChunk;
73
-
74
- // `Unzip` discovers each individual file entry in the input data stream
75
- // and then calls the callback function for each such entry.
76
- var unzip = new _fflate.Unzip(function (entry) {
77
- // If there already was an error while reading this `.zip` file,
78
- // ignore any follow-up entries.
79
- if (errored) {
80
- return;
81
- }
82
-
83
- // Skip directory entries (their names end with a slash).
84
- // Only files are of any interest.
85
- if (entry.name.endsWith('/')) {
86
- return;
87
- }
88
-
89
- // See if this file should be ignored.
90
- // If it should, this entry won't be processed, i.e. `Unzip` will not try
91
- // to decompress its data, and will just discard it.
92
- if (filter && !filter({
93
- path: entry.name
94
- })) {
95
- return;
96
- }
97
- var chunks = [];
98
-
99
- // `entry.ondata` is called with each decompressed chunk of the entry,
100
- // and a final time with `isLast === true` once the entry is complete.
101
- entry.ondata = function (error, chunk, isLast) {
102
- if (error) {
103
- return onError(error);
104
- }
105
- chunks.push(chunk);
106
- if (isLast) {
107
- files[entry.name] = _buffer.Buffer.concat(chunks);
108
- }
109
- };
110
-
111
- // Start decompressing this entry.
112
- entry.start();
113
- });
114
-
115
- // Register the decompressor for the data that was compressed using
116
- // `DEFLATE` compression algorithm (compression method `8`),
117
- // which is what `.xlsx` files use.
118
- unzip.register(_fflate.UnzipInflate);
119
- stream
120
- // Catch errors emitted from the input stream (for example, a file read error).
121
- .on('error', onError)
122
- // When another chunk of data is read from the input stream.
123
- .on('data', function (chunk) {
124
- // If there already was an error while reading this `.zip` file,
125
- // ignore any follow-up data chunks.
126
- if (errored) {
127
- return;
128
- }
129
- // Validate the `.zip` archive as its data comes through.
130
- validateChunk(chunk);
131
- // If the `.zip` archive is found to be invalid, stop any further
132
- // processing of it.
133
- if (errored) {
134
- return;
135
- }
136
- // Push the next data chunk to `fflate`'s `Unzip` class instance.
137
- // The `.push()` function is synchronous, meaning that by the time it returns,
138
- // any complete files entries encountered so far have already been decompressed
139
- // and populated in the `files` object.
140
- try {
141
- unzip.push(chunk, false);
142
- } catch (error) {
143
- onError(error);
144
- }
145
- })
146
- // When there's no more data in the input stream to consume,
147
- // finish reading the `.zip` archive.
148
- .on('end', function () {
149
- // If there were any errors when reading the `.zip` archive,
150
- // don't `resolve()` with anything.
151
- if (errored) {
152
- return;
153
- }
154
- try {
155
- // Signal the end of the archive to `fflate`'s `Unzip` class instance.
156
- // It will flush any remaining state in it.
157
- unzip.push(new Uint8Array(0), true);
158
- // Resolve with the unzipped files.
159
- resolve(files);
160
- } catch (error) {
161
- onError(error);
162
- }
163
- });
164
- });
165
- }
166
-
167
- // Every section in a `.zip` archive is marked with 4 bytes, the first two of which
168
- // are `0x50` and `0x4B`, which reads "PK", referencing the initials of the inventor Phil Katz.
169
- //
170
- // It looks like `fflate`'s `Unzip` doesn't ever complain about whatever data is thrown at it.
171
- // Due to how `.zip` file format is defined, "garbage" data could be placed at various
172
- // places in it and it'd still be a valid `.zip` archive. It's likely that for this reason
173
- // `fflate` doesn't ever complain and simply emits no entries when fed any kind of invalid data.
174
- //
175
- // In order to introduce some basic validation, here we specifically demand
176
- // that a `.zip` archive must at least start with an individual file entry
177
- // because an `.xlsx` file creator softwared really shouldn't attempt doing
178
- // anything "funny" when writing a file, hence this adherence requirement.
179
- //
180
- function createZipFileValidator(onValidationResult) {
181
- var firstBytesCount = 2;
182
- var firstBytes = [];
183
- var firstBytesCheckResult;
184
- return {
185
- validateChunk: function validateChunk(chunk) {
186
- if (firstBytes.length < 2) {
187
- var i = 0;
188
- while (i < chunk.length && i < firstBytesCount) {
189
- firstBytes.push(chunk[i]);
190
- i++;
191
- }
192
- if (firstBytes.length === 2) {
193
- var isValid = firstBytes[0] === 0x50 && firstBytes[1] === 0x4B;
194
- onValidationResult(isValid);
195
- }
196
- }
197
- }
198
- };
199
- }
6
+ Object.defineProperty(exports, "default", {
7
+ enumerable: true,
8
+ get: function get() {
9
+ return _unzipFromStreamUnzipper["default"];
10
+ }
11
+ });
12
+ var _unzipFromStreamUnzipper = _interopRequireDefault(require("./unzipFromStream.unzipper.js"));
13
+ function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { "default": obj }; }
200
14
  //# sourceMappingURL=unzipFromStream.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"unzipFromStream.js","names":["_fflate","require","_buffer","unzipFromStream","stream","_ref","arguments","length","undefined","filter","files","Promise","resolve","reject","errored","onError","error","_createZipFileValidat","createZipFileValidator","isValid","Error","validateChunk","unzip","Unzip","entry","name","endsWith","path","chunks","ondata","chunk","isLast","push","Buffer","concat","start","register","UnzipInflate","on","Uint8Array","onValidationResult","firstBytesCount","firstBytes","firstBytesCheckResult","i"],"sources":["../../source/zip/unzipFromStream.js"],"sourcesContent":["// This code was originally submitted by Stian Jensen.\r\n// https://github.com/catamphetamine/read-excel-file/pull/122\r\n\r\n// A `*.zip` file consists of individual file entries with the \"total\" summary section\r\n// placed at the end of the file rather than at the start of it, which was originally done\r\n// to allow for easy append of data to a given `.zip` file.\r\n// https://en.wikipedia.org/wiki/ZIP_(file_format)\r\n//\r\n// But this also means that reading a `*.zip` file from a stream can't really be done\r\n// using the \"officially recommended\" way of first reading the \"total\" summary section\r\n// and only then reading the individual file entries specified in that summary section.\r\n//\r\n// So in order to be able to read a `*.zip` file from a stream, some corners have to be cut.\r\n// For example, the \"total\" summary section is completely ignored and instead the reader\r\n// should adopt \"data recovery\" software approach — it should proactively \"scan\" the input stream\r\n// for individual file entries and handle them one-by-one as they come.\r\n//\r\n// Such approach doesn't seem to contradict with the XLSX specification\r\n// because an `*.xlsx` files is supposed to be a normal `.zip` archive\r\n// without any \"trickery\" such as \"deleted\" files or \"garbage\" data\r\n// hiding under the hood.\r\n//\r\n// So when handling `*.xlsx` file, we assume that each such file must start\r\n// with an individual file entry followed by another individual file entry, etc.\r\n//\r\n// When the \"summary\" section is reached, we assume that the archive has ended.\r\n//\r\n// To read a `.zip` archive, the code uses `fflate`'s `Unzip` class\r\n// with `UnzipInflate` decompression implementation to decompress the data\r\n// that was previously compressed using `DEFLATE` compressing algorithm,\r\n// which is what `*.xlsx` files use.\r\n//\r\n// The `Unzip` class doesn't speak the Node.js stream interface, and `fflate`'s readme\r\n// doesn't include a clear \"reading a `.zip` file from a Node.js stream\" section.\r\n// https://github.com/101arrowz/fflate/issues/251\r\n// Instead, the `Unzip` class has its own `push(chunk)` / `onfile` / `entry.ondata` protocol.\r\n// This code reads the binary input stream and forwards each chunk of it to `unzip.push()`,\r\n// and then collects the decompressed file entries.\r\n//\r\nimport { Unzip, UnzipInflate } from 'fflate'\r\n\r\nimport { Buffer } from 'buffer'\r\n\r\n/**\r\n * Reads `*.zip` file contents.\r\n * @param {Stream} stream\r\n * @return {Promise<Record<string,Buffer>>} Resolves to an object holding `*.zip` file entries. P.S. `Buffer` is a `Uint8Array`.\r\n */\r\nexport default function unzipFromStream(stream, { filter } = {}) {\r\n\t// The `files` object stores the files and their contents.\r\n\tconst files = {}\r\n\r\n\treturn new Promise((resolve, reject) => {\r\n\t\tlet errored = false\r\n\r\n\t\tconst onError = (error) => {\r\n\t\t\tif (!errored) {\r\n\t\t\t\terrored = true\r\n\t\t\t\treject(error)\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\tconst { validateChunk } = createZipFileValidator((isValid) => {\r\n\t\t\tif (!isValid) {\r\n\t\t\t\tonError(new Error('Invalid `.zip` archive'))\r\n\t\t\t}\r\n\t\t})\r\n\r\n\t\t// `Unzip` discovers each individual file entry in the input data stream\r\n\t\t// and then calls the callback function for each such entry.\r\n\t\tconst unzip = new Unzip((entry) => {\r\n\t\t\t// If there already was an error while reading this `.zip` file,\r\n\t\t\t// ignore any follow-up entries.\r\n\t\t\tif (errored) {\r\n\t\t\t\treturn\r\n\t\t\t}\r\n\r\n\t\t\t// Skip directory entries (their names end with a slash).\r\n\t\t\t// Only files are of any interest.\r\n\t\t\tif (entry.name.endsWith('/')) {\r\n\t\t\t\treturn\r\n\t\t\t}\r\n\r\n\t\t\t// See if this file should be ignored.\r\n\t\t\t// If it should, this entry won't be processed, i.e. `Unzip` will not try\r\n\t\t\t// to decompress its data, and will just discard it.\r\n\t\t\tif (filter && !filter({ path: entry.name })) {\r\n\t\t\t\treturn\r\n\t\t\t}\r\n\r\n\t\t\tconst chunks = []\r\n\r\n\t\t\t// `entry.ondata` is called with each decompressed chunk of the entry,\r\n\t\t\t// and a final time with `isLast === true` once the entry is complete.\r\n\t\t\tentry.ondata = (error, chunk, isLast) => {\r\n\t\t\t\tif (error) {\r\n\t\t\t\t\treturn onError(error)\r\n\t\t\t\t}\r\n\t\t\t\tchunks.push(chunk)\r\n\t\t\t\tif (isLast) {\r\n\t\t\t\t\tfiles[entry.name] = Buffer.concat(chunks)\r\n\t\t\t\t}\r\n\t\t\t}\r\n\r\n\t\t\t// Start decompressing this entry.\r\n\t\t\tentry.start()\r\n\t\t})\r\n\r\n\t\t// Register the decompressor for the data that was compressed using\r\n\t\t// `DEFLATE` compression algorithm (compression method `8`),\r\n\t\t// which is what `.xlsx` files use.\r\n\t\tunzip.register(UnzipInflate)\r\n\r\n\t\tstream\r\n\t\t\t// Catch errors emitted from the input stream (for example, a file read error).\r\n\t\t\t.on('error', onError)\r\n\t\t\t// When another chunk of data is read from the input stream.\r\n\t\t\t.on('data', (chunk) => {\r\n\t\t\t\t// If there already was an error while reading this `.zip` file,\r\n\t\t\t\t// ignore any follow-up data chunks.\r\n\t\t\t\tif (errored) {\r\n\t\t\t\t\treturn\r\n\t\t\t\t}\r\n\t\t\t\t// Validate the `.zip` archive as its data comes through.\r\n\t\t\t\tvalidateChunk(chunk)\r\n\t\t\t\t// If the `.zip` archive is found to be invalid, stop any further\r\n\t\t\t\t// processing of it.\r\n\t\t\t\tif (errored) {\r\n\t\t\t\t\treturn\r\n\t\t\t\t}\r\n\t\t\t\t// Push the next data chunk to `fflate`'s `Unzip` class instance.\r\n\t\t\t\t// The `.push()` function is synchronous, meaning that by the time it returns,\r\n\t\t\t\t// any complete files entries encountered so far have already been decompressed\r\n\t\t\t\t// and populated in the `files` object.\r\n\t\t\t\ttry {\r\n\t\t\t\t\tunzip.push(chunk, false)\r\n\t\t\t\t} catch (error) {\r\n\t\t\t\t\tonError(error)\r\n\t\t\t\t}\r\n\t\t\t})\r\n\t\t\t// When there's no more data in the input stream to consume,\r\n\t\t\t// finish reading the `.zip` archive.\r\n\t\t\t.on('end', () => {\r\n\t\t\t\t// If there were any errors when reading the `.zip` archive,\r\n\t\t\t\t// don't `resolve()` with anything.\r\n\t\t\t\tif (errored) {\r\n\t\t\t\t\treturn\r\n\t\t\t\t}\r\n\t\t\t\ttry {\r\n\t\t\t\t\t// Signal the end of the archive to `fflate`'s `Unzip` class instance.\r\n\t\t\t\t\t// It will flush any remaining state in it.\r\n\t\t\t\t\tunzip.push(new Uint8Array(0), true)\r\n\t\t\t\t\t// Resolve with the unzipped files.\r\n\t\t\t\t\tresolve(files)\r\n\t\t\t\t} catch (error) {\r\n\t\t\t\t\tonError(error)\r\n\t\t\t\t}\r\n\t\t\t})\r\n\t})\r\n}\r\n\r\n// Every section in a `.zip` archive is marked with 4 bytes, the first two of which\r\n// are `0x50` and `0x4B`, which reads \"PK\", referencing the initials of the inventor Phil Katz.\r\n//\r\n// It looks like `fflate`'s `Unzip` doesn't ever complain about whatever data is thrown at it.\r\n// Due to how `.zip` file format is defined, \"garbage\" data could be placed at various\r\n// places in it and it'd still be a valid `.zip` archive. It's likely that for this reason\r\n// `fflate` doesn't ever complain and simply emits no entries when fed any kind of invalid data.\r\n//\r\n// In order to introduce some basic validation, here we specifically demand\r\n// that a `.zip` archive must at least start with an individual file entry\r\n// because an `.xlsx` file creator softwared really shouldn't attempt doing\r\n// anything \"funny\" when writing a file, hence this adherence requirement.\r\n//\r\nfunction createZipFileValidator(onValidationResult) {\r\n\tconst firstBytesCount = 2\r\n\tconst firstBytes = []\r\n\tlet firstBytesCheckResult\r\n\treturn {\r\n\t\tvalidateChunk(chunk) {\r\n\t\t\tif (firstBytes.length < 2) {\r\n\t\t\t\tlet i = 0\r\n\t\t\t\twhile (i < chunk.length && i < firstBytesCount) {\r\n\t\t\t\t\tfirstBytes.push(chunk[i])\r\n\t\t\t\t\ti++\r\n\t\t\t\t}\r\n\t\t\t\tif (firstBytes.length === 2) {\r\n\t\t\t\t\tconst isValid = firstBytes[0] === 0x50 && firstBytes[1] === 0x4B\r\n\t\t\t\t\tonValidationResult(isValid)\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t}\r\n\t}\r\n}"],"mappings":";;;;;;AAuCA,IAAAA,OAAA,GAAAC,OAAA;AAEA,IAAAC,OAAA,GAAAD,OAAA;AAzCA;AACA;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AAKA;AACA;AACA;AACA;AACA;AACe,SAASE,eAAeA,CAACC,MAAM,EAAmB;EAAA,IAAAC,IAAA,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAJ,CAAC,CAAC;IAAbG,MAAM,GAAAJ,IAAA,CAANI,MAAM;EACvD;EACA,IAAMC,KAAK,GAAG,CAAC,CAAC;EAEhB,OAAO,IAAIC,OAAO,CAAC,UAACC,OAAO,EAAEC,MAAM,EAAK;IACvC,IAAIC,OAAO,GAAG,KAAK;IAEnB,IAAMC,OAAO,GAAG,SAAVA,OAAOA,CAAIC,KAAK,EAAK;MAC1B,IAAI,CAACF,OAAO,EAAE;QACbA,OAAO,GAAG,IAAI;QACdD,MAAM,CAACG,KAAK,CAAC;MACd;IACD,CAAC;IAED,IAAAC,qBAAA,GAA0BC,sBAAsB,CAAC,UAACC,OAAO,EAAK;QAC7D,IAAI,CAACA,OAAO,EAAE;UACbJ,OAAO,CAAC,IAAIK,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAC7C;MACD,CAAC,CAAC;MAJMC,aAAa,GAAAJ,qBAAA,CAAbI,aAAa;;IAMrB;IACA;IACA,IAAMC,KAAK,GAAG,IAAIC,aAAK,CAAC,UAACC,KAAK,EAAK;MAClC;MACA;MACA,IAAIV,OAAO,EAAE;QACZ;MACD;;MAEA;MACA;MACA,IAAIU,KAAK,CAACC,IAAI,CAACC,QAAQ,CAAC,GAAG,CAAC,EAAE;QAC7B;MACD;;MAEA;MACA;MACA;MACA,IAAIjB,MAAM,IAAI,CAACA,MAAM,CAAC;QAAEkB,IAAI,EAAEH,KAAK,CAACC;MAAK,CAAC,CAAC,EAAE;QAC5C;MACD;MAEA,IAAMG,MAAM,GAAG,EAAE;;MAEjB;MACA;MACAJ,KAAK,CAACK,MAAM,GAAG,UAACb,KAAK,EAAEc,KAAK,EAAEC,MAAM,EAAK;QACxC,IAAIf,KAAK,EAAE;UACV,OAAOD,OAAO,CAACC,KAAK,CAAC;QACtB;QACAY,MAAM,CAACI,IAAI,CAACF,KAAK,CAAC;QAClB,IAAIC,MAAM,EAAE;UACXrB,KAAK,CAACc,KAAK,CAACC,IAAI,CAAC,GAAGQ,cAAM,CAACC,MAAM,CAACN,MAAM,CAAC;QAC1C;MACD,CAAC;;MAED;MACAJ,KAAK,CAACW,KAAK,CAAC,CAAC;IACd,CAAC,CAAC;;IAEF;IACA;IACA;IACAb,KAAK,CAACc,QAAQ,CAACC,oBAAY,CAAC;IAE5BjC;IACC;IAAA,CACCkC,EAAE,CAAC,OAAO,EAAEvB,OAAO;IACpB;IAAA,CACCuB,EAAE,CAAC,MAAM,EAAE,UAACR,KAAK,EAAK;MACtB;MACA;MACA,IAAIhB,OAAO,EAAE;QACZ;MACD;MACA;MACAO,aAAa,CAACS,KAAK,CAAC;MACpB;MACA;MACA,IAAIhB,OAAO,EAAE;QACZ;MACD;MACA;MACA;MACA;MACA;MACA,IAAI;QACHQ,KAAK,CAACU,IAAI,CAACF,KAAK,EAAE,KAAK,CAAC;MACzB,CAAC,CAAC,OAAOd,KAAK,EAAE;QACfD,OAAO,CAACC,KAAK,CAAC;MACf;IACD,CAAC;IACD;IACA;IAAA,CACCsB,EAAE,CAAC,KAAK,EAAE,YAAM;MAChB;MACA;MACA,IAAIxB,OAAO,EAAE;QACZ;MACD;MACA,IAAI;QACH;QACA;QACAQ,KAAK,CAACU,IAAI,CAAC,IAAIO,UAAU,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC;QACnC;QACA3B,OAAO,CAACF,KAAK,CAAC;MACf,CAAC,CAAC,OAAOM,KAAK,EAAE;QACfD,OAAO,CAACC,KAAK,CAAC;MACf;IACD,CAAC,CAAC;EACJ,CAAC,CAAC;AACH;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,SAASE,sBAAsBA,CAACsB,kBAAkB,EAAE;EACnD,IAAMC,eAAe,GAAG,CAAC;EACzB,IAAMC,UAAU,GAAG,EAAE;EACrB,IAAIC,qBAAqB;EACzB,OAAO;IACNtB,aAAa,WAAAA,cAACS,KAAK,EAAE;MACpB,IAAIY,UAAU,CAACnC,MAAM,GAAG,CAAC,EAAE;QAC1B,IAAIqC,CAAC,GAAG,CAAC;QACT,OAAOA,CAAC,GAAGd,KAAK,CAACvB,MAAM,IAAIqC,CAAC,GAAGH,eAAe,EAAE;UAC/CC,UAAU,CAACV,IAAI,CAACF,KAAK,CAACc,CAAC,CAAC,CAAC;UACzBA,CAAC,EAAE;QACJ;QACA,IAAIF,UAAU,CAACnC,MAAM,KAAK,CAAC,EAAE;UAC5B,IAAMY,OAAO,GAAGuB,UAAU,CAAC,CAAC,CAAC,KAAK,IAAI,IAAIA,UAAU,CAAC,CAAC,CAAC,KAAK,IAAI;UAChEF,kBAAkB,CAACrB,OAAO,CAAC;QAC5B;MACD;IACD;EACD,CAAC;AACF"}
1
+ {"version":3,"file":"unzipFromStream.js","names":["_unzipFromStreamUnzipper","_interopRequireDefault","require","obj","__esModule"],"sources":["../../source/zip/unzipFromStream.js"],"sourcesContent":["// Currently, there're two implementations:\r\n// * `fflate` — a pure-javascript implementation that uses `fflate` package.\r\n// * `unzipper` — a \"native\" Node.js module that uses Node's `zlib` which is written in C.\r\n//\r\n// The implementations are compared in a benchmark:\r\n//\r\n// ```\r\n// npm run test:benchmark:unzipFromStream\r\n// ```\r\n//\r\n// The benchmark tells that `unzipper` is 2x faster than `fflate`.\r\n//\r\nexport { default as default } from './unzipFromStream.unzipper.js'"],"mappings":";;;;;;;;;;;AAYA,IAAAA,wBAAA,GAAAC,sBAAA,CAAAC,OAAA;AAAkE,SAAAD,uBAAAE,GAAA,WAAAA,GAAA,IAAAA,GAAA,CAAAC,UAAA,GAAAD,GAAA,gBAAAA,GAAA"}