oxc-parser 0.72.2 → 0.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,283 @@
1
+ 'use strict';
2
+
3
+ const os = require('node:os');
4
+ const bindings = require('../bindings.js');
5
+
6
+ module.exports = {
7
+ rawTransferSupported,
8
+ parseSyncRawImpl,
9
+ parseAsyncRawImpl,
10
+ prepareRaw,
11
+ isJsAst,
12
+ returnBufferToCache,
13
+ };
14
+
15
+ // Import `eager.js` and `lazy.js` after the exports above, because of circular dependencies
16
+ const { parseSyncRaw, parseAsyncRaw } = require('./eager.js');
17
+ module.exports.parseSyncRaw = parseSyncRaw;
18
+ module.exports.parseAsyncRaw = parseAsyncRaw;
19
+
20
+ const { parseSyncLazy, parseAsyncLazy } = require('./lazy.js');
21
+ module.exports.parseSyncLazy = parseSyncLazy;
22
+ module.exports.parseAsyncLazy = parseAsyncLazy;
23
+
24
+ function parseSyncRawImpl(filename, sourceText, options, deserialize) {
25
+ const { buffer, sourceByteLen, options: optionsAmended } = prepareRaw(sourceText, options);
26
+ bindings.parseSyncRaw(filename, buffer, sourceByteLen, optionsAmended);
27
+ return deserialize(buffer, sourceText, sourceByteLen);
28
+ }
29
+
30
+ // User should not schedule more async tasks than there are available CPUs, as it hurts performance,
31
+ // but it's a common mistake in async JS code to do exactly that.
32
+ //
33
+ // That anti-pattern looks like this when applied to Oxc:
34
+ //
35
+ // ```js
36
+ // const asts = await Promise.all(
37
+ // files.map(
38
+ // async (filename) => {
39
+ // const sourceText = await fs.readFile(filename, 'utf8');
40
+ // const ast = await oxc.parseAsync(filename, sourceText);
41
+ // return ast;
42
+ // }
43
+ // )
44
+ // );
45
+ // ```
46
+ //
47
+ // In most cases, that'd just result in a bit of degraded performance, and higher memory use because
48
+ // of loading sources into memory prematurely.
49
+ //
50
+ // However, raw transfer uses a 6 GiB buffer for each parsing operation.
51
+ // Most of the memory pages in those buffers are never touched, so this does not consume a huge amount
52
+ // of physical memory, but it does still consume virtual memory.
53
+ //
54
+ // If we allowed creating a large number of 6 GiB buffers simultaneously, it would quickly consume
55
+ // virtual memory space and risk memory exhaustion. The code above would exhaust all of bottom half
56
+ // (heap) of 48-bit virtual memory space if `files.length >= 21_845`. This is not a number which
57
+ // is unrealistic in real world code.
58
+ //
59
+ // To guard against this possibility, we implement a simple queue.
60
+ // No more than `os.availableParallelism()` files can be parsed simultaneously, and any further calls to
61
+ // `parseAsyncRaw` will be put in a queue, to execute once other tasks complete.
62
+ //
63
+ // Fallback to `os.cpus().length` on versions of NodeJS prior to v18.14.0, which do not support
64
+ // `os.availableParallelism`.
65
+ let availableCores = os.availableParallelism ? os.availableParallelism() : os.cpus().length;
66
+ const queue = [];
67
+
68
+ async function parseAsyncRawImpl(filename, sourceText, options, deserialize) {
69
+ // Wait for a free CPU core if all CPUs are currently busy.
70
+ //
71
+ // Note: `availableCores` is NOT decremented if have to wait in the queue first,
72
+ // and NOT incremented when parsing completes and it runs next task in the queue.
73
+ //
74
+ // This is to avoid a race condition if `parseAsyncRaw` is called during the microtick in between
75
+ // `resolve` being called below, and the promise resolving here. In that case the new task could
76
+ // start running, and then the promise resolves, and the queued task also starts running.
77
+ // We'd then have `availableParallelism() + 1` tasks running simultaneously. Potentially, this could
78
+ // happen repeatedly, with the number of tasks running simultaneously ever-increasing.
79
+ if (availableCores === 0) {
80
+ // All CPU cores are busy. Put this task in queue and wait for capacity to become available.
81
+ await new Promise((resolve, _) => {
82
+ queue.push(resolve);
83
+ });
84
+ } else {
85
+ // A CPU core is available. Mark core as busy, and run parsing now.
86
+ availableCores--;
87
+ }
88
+
89
+ // Parse
90
+ const { buffer, sourceByteLen, options: optionsAmended } = prepareRaw(sourceText, options);
91
+ await bindings.parseAsyncRaw(filename, buffer, sourceByteLen, optionsAmended);
92
+ const ret = deserialize(buffer, sourceText, sourceByteLen);
93
+
94
+ // Free the CPU core
95
+ if (queue.length > 0) {
96
+ // Some further tasks waiting in queue. Run the next one.
97
+ // Do not increment `availableCores` (see above).
98
+ const resolve = queue.shift();
99
+ resolve();
100
+ } else {
101
+ // No tasks waiting in queue. This CPU is now free.
102
+ availableCores++;
103
+ }
104
+
105
+ return ret;
106
+ }
107
+
108
+ const ONE_GIB = 1 << 30,
109
+ TWO_GIB = ONE_GIB * 2,
110
+ SIX_GIB = ONE_GIB * 6;
111
+
112
+ // We keep a cache of buffers for raw transfer, so we can reuse them as much as possible.
113
+ //
114
+ // When processing multiple files, it's ideal if can reuse an existing buffer, as it's more likely to
115
+ // be warm in CPU cache, it avoids allocations, and it saves work for the garbage collector.
116
+ //
117
+ // However, we also don't want to keep a load of large buffers around indefinitely using up memory,
118
+ // if they're not going to be used again.
119
+ //
120
+ // We have no knowledge of what pattern over time user may process files in (could be lots in quick
121
+ // succession, or more occasionally in a long-running process). So we try to use flexible caching
122
+ // strategy which is adaptable to many usage patterns.
123
+ //
124
+ // We use a 2-tier cache.
125
+ // Tier 1 uses strong references, tier 2 uses weak references.
126
+ //
127
+ // When parsing is complete and the buffer is no longer in use, push it to `buffers` (tier 1 cache).
128
+ // Set a timer to clear the cache when no activity for 10 seconds.
129
+ //
130
+ // When the timer expires, move all the buffers from tier 1 cache into `oldBuffers` (tier 2).
131
+ // They are stored there as `WeakRef`s, so the garbage collector is free to reclaim them.
132
+ //
133
+ // On the next call to `parseSync` or `parseAsync`, promote any buffers in tier 2 cache which were not
134
+ // already garbage collected back into tier 1 cache. This is on assumption that parsing one file
135
+ // indicates parsing as a whole is an ongoing process, and there will likely be further calls to
136
+ // `parseSync` / `parseAsync` in future.
137
+ //
138
+ // The weak tier 2 cache is because V8 does not necessarily free memory as soon as it's able to be
139
+ // freed. We don't want to block it from freeing memory, but if it's not done that yet, there's no
140
+ // point creating a new buffer, when one already exists.
141
+ const CLEAR_BUFFERS_TIMEOUT = 10_000; // 10 seconds
142
+ const buffers = [], oldBuffers = [];
143
+
144
+ let encoder = null, clearBuffersTimeout = null;
145
+
146
+ // Get a buffer (from cache if possible), copy source text into it, and amend options object
147
+ function prepareRaw(sourceText, options) {
148
+ if (!rawTransferSupported()) {
149
+ throw new Error(
150
+ '`experimentalRawTransfer` option is not supported on 32-bit or big-endian systems, ' +
151
+ 'versions of NodeJS prior to v22.0.0, versions of Deno prior to v2.0.0, and other runtimes',
152
+ );
153
+ }
154
+
155
+ // Delete `experimentalRawTransfer` and `experimentalLazy` options
156
+ let _;
157
+ ({ experimentalRawTransfer: _, experimentalLazy: _, ...options } = options);
158
+
159
+ // Cancel timeout for clearing buffers
160
+ if (clearBuffersTimeout !== null) {
161
+ clearTimeout(clearBuffersTimeout);
162
+ clearBuffersTimeout = null;
163
+ }
164
+
165
+ // Revive any discarded buffers which have not yet been garbage collected
166
+ if (oldBuffers.length > 0) {
167
+ const revivedBuffers = [];
168
+ for (let oldBuffer of oldBuffers) {
169
+ oldBuffer = oldBuffer.deref();
170
+ if (oldBuffer !== undefined) revivedBuffers.push(oldBuffer);
171
+ }
172
+ oldBuffers.length = 0;
173
+ if (revivedBuffers.length > 0) buffers.unshift(...revivedBuffers);
174
+ }
175
+
176
+ // Reuse existing buffer, or create a new one
177
+ const buffer = buffers.length > 0 ? buffers.pop() : createBuffer();
178
+
179
+ // Get/create `TextEncoder`
180
+ if (encoder === null) encoder = new TextEncoder();
181
+
182
+ // Write source into start of buffer.
183
+ // `TextEncoder` cannot write into a `Uint8Array` larger than 1 GiB,
184
+ // so create a view into buffer of this size to write into.
185
+ const sourceBuffer = new Uint8Array(buffer.buffer, buffer.byteOffset, ONE_GIB);
186
+ const { read, written: sourceByteLen } = encoder.encodeInto(sourceText, sourceBuffer);
187
+ if (read !== sourceText.length) throw new Error('Failed to write source text into buffer');
188
+
189
+ return { buffer, sourceByteLen, options };
190
+ }
191
+
192
+ // Get if AST should be parsed as JS or TS.
193
+ // Rust side sets a `bool` in this position in buffer which is `true` if TS.
194
+ function isJsAst(buffer) {
195
+ // 2147483636 = (2 * 1024 * 1024 * 1024) - 12
196
+ // i.e. 12 bytes from end of 2 GiB buffer
197
+ return buffer[2147483636] === 0;
198
+ }
199
+
200
+ // Return buffer to cache, to be reused.
201
+ // Set a timer to clear buffers.
202
+ function returnBufferToCache(buffer) {
203
+ buffers.push(buffer);
204
+
205
+ if (clearBuffersTimeout !== null) clearTimeout(clearBuffersTimeout);
206
+ clearBuffersTimeout = setTimeout(clearBuffersCache, CLEAR_BUFFERS_TIMEOUT);
207
+ clearBuffersTimeout.unref();
208
+ }
209
+
210
+ // Downgrade buffers in tier 1 cache (`buffers`) to tier 2 (`oldBuffers`),
211
+ // so they can be garbage collected
212
+ function clearBuffersCache() {
213
+ clearBuffersTimeout = null;
214
+
215
+ for (const buffer of buffers) {
216
+ oldBuffers.push(new WeakRef(buffer));
217
+ }
218
+ buffers.length = 0;
219
+ }
220
+
221
+ // Create a `Uint8Array` which is 2 GiB in size, with its start aligned on 4 GiB.
222
+ //
223
+ // Achieve this by creating a 6 GiB `ArrayBuffer`, getting the offset within it that's aligned to 4 GiB,
224
+ // chopping off that number of bytes from the start, and shortening to 2 GiB.
225
+ //
226
+ // It's always possible to obtain a 2 GiB slice aligned on 4 GiB within a 6 GiB buffer,
227
+ // no matter how the 6 GiB buffer is aligned.
228
+ //
229
+ // Note: On systems with virtual memory, this only consumes 6 GiB of *virtual* memory.
230
+ // It does not consume physical memory until data is actually written to the `Uint8Array`.
231
+ // Physical memory consumed corresponds to the quantity of data actually written.
232
+ function createBuffer() {
233
+ const arrayBuffer = new ArrayBuffer(SIX_GIB);
234
+ const offset = bindings.getBufferOffset(new Uint8Array(arrayBuffer));
235
+ const buffer = new Uint8Array(arrayBuffer, offset, TWO_GIB);
236
+ buffer.uint32 = new Uint32Array(arrayBuffer, offset, TWO_GIB / 4);
237
+ buffer.float64 = new Float64Array(arrayBuffer, offset, TWO_GIB / 8);
238
+ return buffer;
239
+ }
240
+
241
+ let rawTransferIsSupported = null;
242
+
243
+ // Returns `true` if `experimentalRawTransfer` is option is supported.
244
+ //
245
+ // Raw transfer is only supported on 64-bit little-endian systems,
246
+ // and NodeJS >= v22.0.0 or Deno >= v2.0.0.
247
+ //
248
+ // Versions of NodeJS prior to v22.0.0 do not support creating an `ArrayBuffer` larger than 4 GiB.
249
+ // Bun (as at v1.2.4) also does not support creating an `ArrayBuffer` larger than 4 GiB.
250
+ // Support on Deno v1 is unknown and it's EOL, so treating Deno before v2.0.0 as unsupported.
251
+ function rawTransferSupported() {
252
+ if (rawTransferIsSupported === null) {
253
+ rawTransferIsSupported = rawTransferRuntimeSupported() && bindings.rawTransferSupported();
254
+ }
255
+ return rawTransferIsSupported;
256
+ }
257
+
258
+ // Checks copied from:
259
+ // https://github.com/unjs/std-env/blob/ab15595debec9e9115a9c1d31bc7597a8e71dbfd/src/runtimes.ts
260
+ // MIT license: https://github.com/unjs/std-env/blob/ab15595debec9e9115a9c1d31bc7597a8e71dbfd/LICENCE
261
+ function rawTransferRuntimeSupported() {
262
+ let global;
263
+ try {
264
+ global = globalThis;
265
+ } catch (e) {
266
+ return false;
267
+ }
268
+
269
+ const isBun = !!global.Bun || !!global.process?.versions?.bun;
270
+ if (isBun) return false;
271
+
272
+ const isDeno = !!global.Deno;
273
+ if (isDeno) {
274
+ const match = Deno.version?.deno?.match(/^(\d+)\./);
275
+ return !!match && match[1] * 1 >= 2;
276
+ }
277
+
278
+ const isNode = global.process?.release?.name === 'node';
279
+ if (!isNode) return false;
280
+
281
+ const match = process.version?.match(/^v(\d+)\./);
282
+ return !!match && match[1] * 1 >= 22;
283
+ }
@@ -0,0 +1,87 @@
1
+ 'use strict';
2
+
3
+ const { parseSyncRawImpl, parseAsyncRawImpl, returnBufferToCache } = require('./index.js');
4
+
5
+ module.exports = { parseSyncLazy, parseAsyncLazy };
6
+
7
+ function parseSyncLazy(filename, sourceText, options) {
8
+ return parseSyncRawImpl(filename, sourceText, options, construct);
9
+ }
10
+
11
+ function parseAsyncLazy(filename, sourceText, options) {
12
+ return parseAsyncRawImpl(filename, sourceText, options, construct);
13
+ }
14
+
15
+ // Registry for buffers which are held by lazily-deserialized ASTs.
16
+ // Returns buffer to cache when the `ast` wrapper is garbage collected.
17
+ //
18
+ // Check for existence of `FinalizationRegistry`, to avoid errors on old versions of NodeJS
19
+ // which don't support it. e.g. Prettier supports NodeJS v14.
20
+ // Raw transfer is disabled on NodeJS before v22, so it doesn't matter if this is `null` on old NodeJS
21
+ // - it'll never be accessed in that case.
22
+ const bufferRecycleRegistry = typeof FinalizationRegistry === 'undefined'
23
+ ? null
24
+ : new FinalizationRegistry(returnBufferToCache);
25
+
26
+ let constructLazyData = null, TOKEN;
27
+
28
+ // Get an object with getters which lazy deserialize AST from buffer
29
+ function construct(buffer, sourceText, sourceLen) {
30
+ // Lazy load deserializer, and get `TOKEN` to store in `ast` objects
31
+ if (constructLazyData === null) {
32
+ ({ construct: constructLazyData, TOKEN } = require('../generated/deserialize/lazy.js'));
33
+ }
34
+
35
+ // Create AST object
36
+ const sourceIsAscii = sourceText.length === sourceLen;
37
+ const ast = { buffer, sourceText, sourceLen, sourceIsAscii, nodes: new Map(), token: TOKEN };
38
+
39
+ // Register `ast` with the recycle registry so buffer is returned to cache
40
+ // when `ast` is garbage collected
41
+ bufferRecycleRegistry.register(ast, buffer, ast);
42
+
43
+ // Get root data class instance
44
+ const data = constructLazyData(ast);
45
+
46
+ return {
47
+ get program() {
48
+ return data.program;
49
+ },
50
+ get module() {
51
+ return data.module;
52
+ },
53
+ get comments() {
54
+ return data.comments;
55
+ },
56
+ get errors() {
57
+ return data.errors;
58
+ },
59
+ dispose: dispose.bind(null, ast),
60
+ };
61
+ }
62
+
63
+ // Dispose of this AST.
64
+ //
65
+ // After calling this method, trying to read any nodes from this AST may cause an error.
66
+ //
67
+ // Buffer is returned to the cache to be reused.
68
+ //
69
+ // The buffer would be returned to the cache anyway, once all nodes of the AST are garbage collected,
70
+ // but calling `dispose` is preferable, as it will happen immediately.
71
+ // Otherwise, garbage collector may take time to collect the `ast` object, and new buffers may be created
72
+ // in the meantime, when we could have reused this one.
73
+ function dispose(ast) {
74
+ // Return buffer to cache to be reused
75
+ returnBufferToCache(ast.buffer);
76
+
77
+ // Remove connection between `ast` and the buffer
78
+ ast.buffer = null;
79
+
80
+ // Clear other contents of `ast`, so they can be garbage collected
81
+ ast.sourceText = null;
82
+ ast.nodes = null;
83
+
84
+ // Remove `ast` from recycling register.
85
+ // When `ast` is garbage collected, there's no longer any action to be taken.
86
+ bufferRecycleRegistry.unregister(ast);
87
+ }