oxc-parser 0.71.0 → 0.72.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -20,6 +20,9 @@ The only differences between Oxc's AST and ESTree / TS-ESTree are:
20
20
 
21
21
  - Support for Stage 3 ECMA features [`import defer`](https://github.com/tc39/proposal-defer-import-eval)
22
22
  and [`import source`](https://github.com/tc39/proposal-source-phase-imports).
23
+ - In TS-ESTree AST, `import.defer(...)` and `import.source(...)` are represented as an `ImportExpression`
24
+ with `'defer'` or `'source'` in `phase` field (as in ESTree spec), where TS-ESLint represents these
25
+ as a `CallExpression` with `MetaProperty` as its `callee`.
23
26
  - Addition of a non-standard `hashbang` field to `Program`.
24
27
 
25
28
  That aside, the AST should completely align with Acorn's ESTree AST or TS-ESLint's TS-ESTree.
package/bindings.js CHANGED
@@ -390,6 +390,7 @@ module.exports.ExportLocalNameKind = nativeBinding.ExportLocalNameKind
390
390
  module.exports.getBufferOffset = nativeBinding.getBufferOffset
391
391
  module.exports.ImportNameKind = nativeBinding.ImportNameKind
392
392
  module.exports.parseAsync = nativeBinding.parseAsync
393
+ module.exports.parseAsyncRaw = nativeBinding.parseAsyncRaw
393
394
  module.exports.parseSync = nativeBinding.parseSync
394
395
  module.exports.parseSyncRaw = nativeBinding.parseSyncRaw
395
396
  module.exports.rawTransferSupported = nativeBinding.rawTransferSupported
@@ -1057,6 +1057,7 @@ function deserializeImportExpression(pos) {
1057
1057
  end: deserializeU32(pos + 4),
1058
1058
  source: deserializeExpression(pos + 8),
1059
1059
  options: deserializeOptionExpression(pos + 24),
1060
+ phase: deserializeOptionImportPhase(pos + 40),
1060
1061
  };
1061
1062
  }
1062
1063
 
package/index.d.ts CHANGED
@@ -139,6 +139,38 @@ export interface OxcError {
139
139
  */
140
140
  export declare function parseAsync(filename: string, sourceText: string, options?: ParserOptions | undefined | null): Promise<ParseResult>
141
141
 
142
+ /**
143
+ * Parse AST into provided `Uint8Array` buffer, asynchronously.
144
+ *
145
+ * Note: This function can be slower than `parseSyncRaw` due to the overhead of spawning a thread.
146
+ *
147
+ * Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
148
+ * provided as `source_len`.
149
+ *
150
+ * This function will parse the source, and write the AST into the buffer, starting at the end.
151
+ *
152
+ * It also writes to the very end of the buffer the offset of `Program` within the buffer.
153
+ *
154
+ * Caller can deserialize data from the buffer on JS side.
155
+ *
156
+ * # SAFETY
157
+ *
158
+ * Caller must ensure:
159
+ * * Source text is written into start of the buffer.
160
+ * * Source text's UTF-8 byte length is `source_len`.
161
+ * * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
162
+ * * Contents of buffer must not be mutated by caller until the `AsyncTask` returned by this
163
+ * function resolves.
164
+ *
165
+ * If source text is originally a JS string on JS side, and converted to a buffer with
166
+ * `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
167
+ *
168
+ * # Panics
169
+ *
170
+ * Panics if source text is too long, or AST takes more memory than is available in the buffer.
171
+ */
172
+ export declare function parseAsyncRaw(filename: string, buffer: Uint8Array, sourceLen: number, options?: ParserOptions | undefined | null): Promise<unknown>
173
+
142
174
  export interface ParserOptions {
143
175
  /** Treat the source text as `js`, `jsx`, `ts`, or `tsx`. */
144
176
  lang?: 'js' | 'jsx' | 'ts' | 'tsx'
@@ -176,7 +208,7 @@ export interface ParserOptions {
176
208
  export declare function parseSync(filename: string, sourceText: string, options?: ParserOptions | undefined | null): ParseResult
177
209
 
178
210
  /**
179
- * Parses AST into provided `Uint8Array` buffer.
211
+ * Parse AST into provided `Uint8Array` buffer, synchronously.
180
212
  *
181
213
  * Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
182
214
  * provided as `source_len`.
package/index.js CHANGED
@@ -1,3 +1,4 @@
1
+ const { availableParallelism } = require('node:os');
1
2
  const bindings = require('./bindings.js');
2
3
  const { wrap } = require('./wrap.cjs');
3
4
 
@@ -9,20 +10,137 @@ module.exports.ImportNameKind = bindings.ImportNameKind;
9
10
  module.exports.parseWithoutReturn = bindings.parseWithoutReturn;
10
11
  module.exports.Severity = bindings.Severity;
11
12
 
12
- module.exports.parseAsync = async function parseAsync(...args) {
13
- return wrap(await bindings.parseAsync(...args));
13
+ module.exports.parseAsync = async function parseAsync(filename, sourceText, options) {
14
+ if (options?.experimentalRawTransfer) return await parseAsyncRaw(filename, sourceText, options);
15
+ return wrap(await bindings.parseAsync(filename, sourceText, options));
14
16
  };
15
17
 
16
18
  module.exports.parseSync = function parseSync(filename, sourceText, options) {
17
- if (options?.experimentalRawTransfer) {
18
- return parseSyncRaw(filename, sourceText, options);
19
- }
19
+ if (options?.experimentalRawTransfer) return parseSyncRaw(filename, sourceText, options);
20
20
  return wrap(bindings.parseSync(filename, sourceText, options));
21
21
  };
22
22
 
23
- let buffer, encoder, deserializeJS, deserializeTS;
24
-
25
23
  function parseSyncRaw(filename, sourceText, options) {
24
+ const { buffer, sourceByteLen, options: optionsAmended } = prepareRaw(sourceText, options);
25
+ bindings.parseSyncRaw(filename, buffer, sourceByteLen, optionsAmended);
26
+ return deserialize(buffer, sourceText, sourceByteLen);
27
+ }
28
+
29
+ // User should not schedule more async tasks than there are available CPUs, as it hurts performance,
30
+ // but it's a common mistake in async JS code to do exactly that.
31
+ //
32
+ // That anti-pattern looks like this when applied to Oxc:
33
+ //
34
+ // ```js
35
+ // const asts = await Promise.all(
36
+ // files.map(
37
+ // async (filename) => {
38
+ // const sourceText = await fs.readFile(filename, 'utf8');
39
+ // const ast = await oxc.parseAsync(filename, sourceText);
40
+ // return ast;
41
+ // }
42
+ // )
43
+ // );
44
+ // ```
45
+ //
46
+ // In most cases, that'd just result in a bit of degraded performance, and higher memory use because
47
+ // of loading sources into memory prematurely.
48
+ //
49
+ // However, raw transfer uses a 6 GiB buffer for each parsing operation.
50
+ // Most of the memory pages in those buffers are never touched, so this does not consume a huge amount
51
+ // of physical memory, but it does still consume virtual memory.
52
+ //
53
+ // If we allowed creating a large number of 6 GiB buffers simultaneously, it would quickly consume
54
+ // virtual memory space and risk memory exhaustion. The code above would exhaust all of bottom half
55
+ // (heap) of 48-bit virtual memory space if `files.length >= 21_845`. This is not a number which
56
+ // is unrealistic in real world code.
57
+ //
58
+ // To guard against this possibility, we implement a simple queue.
59
+ // No more than `os.availableParallelism()` files can be parsed simultaneously, and any further calls to
60
+ // `parseAsyncRaw` will be put in a queue, to execute once other tasks complete.
61
+ let availableCores = availableParallelism();
62
+ const queue = [];
63
+
64
+ async function parseAsyncRaw(filename, sourceText, options) {
65
+ // Wait for a free CPU core if all CPUs are currently busy.
66
+ //
67
+ // Note: `availableCores` is NOT decremented if have to wait in the queue first,
68
+ // and NOT incremented when parsing completes and it runs next task in the queue.
69
+ //
70
+ // This is to avoid a race condition if `parseAsyncRaw` is called during the microtick in between
71
+ // `resolve` being called below, and the promise resolving here. In that case the new task could
72
+ // start running, and then the promise resolves, and the queued task also starts running.
73
+ // We'd then have `availableParallelism() + 1` tasks running simultaneously. Potentially, this could
74
+ // happen repeatedly, with the number of tasks running simultaneously ever-increasing.
75
+ if (availableCores === 0) {
76
+ // All CPU cores are busy. Put this task in queue and wait for capacity to become available.
77
+ await new Promise((resolve, _) => {
78
+ queue.push(resolve);
79
+ });
80
+ } else {
81
+ // A CPU core is available. Mark core as busy, and run parsing now.
82
+ availableCores--;
83
+ }
84
+
85
+ // Parse
86
+ const { buffer, sourceByteLen, options: optionsAmended } = prepareRaw(sourceText, options);
87
+ await bindings.parseAsyncRaw(filename, buffer, sourceByteLen, optionsAmended);
88
+ const ret = deserialize(buffer, sourceText, sourceByteLen);
89
+
90
+ // Free the CPU core
91
+ if (queue.length > 0) {
92
+ // Some further tasks waiting in queue. Run the next one.
93
+ // Do not increment `availableCores` (see above).
94
+ const resolve = queue.shift();
95
+ resolve();
96
+ } else {
97
+ // No tasks waiting in queue. This CPU is now free.
98
+ availableCores++;
99
+ }
100
+
101
+ return ret;
102
+ }
103
+
104
+ const ONE_GIB = 1 << 30,
105
+ TWO_GIB = ONE_GIB * 2,
106
+ SIX_GIB = ONE_GIB * 6;
107
+
108
+ // We keep a cache of buffers for raw transfer, so we can reuse them as much as possible.
109
+ //
110
+ // When processing multiple files, it's ideal if can reuse an existing buffer, as it's more likely to
111
+ // be warm in CPU cache, it avoids allocations, and it saves work for the garbage collector.
112
+ //
113
+ // However, we also don't want to keep a load of large buffers around indefinitely using up memory,
114
+ // if they're not going to be used again.
115
+ //
116
+ // We have no knowledge of what pattern over time user may process files in (could be lots in quick
117
+ // succession, or more occasionally in a long-running process). So we try to use flexible caching
118
+ // strategy which is adaptable to many usage patterns.
119
+ //
120
+ // We use a 2-tier cache.
121
+ // Tier 1 uses strong references, tier 2 uses weak references.
122
+ //
123
+ // When parsing is complete and the buffer is no longer in use, push it to `buffers` (tier 1 cache).
124
+ // Set a timer to clear the cache when no activity for 10 seconds.
125
+ //
126
+ // When the timer expires, move all the buffers from tier 1 cache into `oldBuffers` (tier 2).
127
+ // They are stored there as `WeakRef`s, so the garbage collector is free to reclaim them.
128
+ //
129
+ // On the next call to `parseSync` or `parseAsync`, promote any buffers in tier 2 cache which were not
130
+ // already garbage collected back into tier 1 cache. This is on assumption that parsing one file
131
+ // indicates parsing as a whole is an ongoing process, and there will likely be further calls to
132
+ // `parseSync` / `parseAsync` in future.
133
+ //
134
+ // The weak tier 2 cache is because V8 does not necessarily free memory as soon as it's able to be
135
+ // freed. We don't want to block it from freeing memory, but if it's not done that yet, there's no
136
+ // point creating a new buffer, when one already exists.
137
+ const CLEAR_BUFFERS_TIMEOUT = 10_000; // 10 seconds
138
+ const buffers = [], oldBuffers = [];
139
+
140
+ let encoder = null, deserializeJS = null, deserializeTS = null, clearBuffersTimeout = null;
141
+
142
+ // Get a buffer (from cache if possible), copy source text into it, and amend options object
143
+ function prepareRaw(sourceText, options) {
26
144
  if (!rawTransferSupported()) {
27
145
  throw new Error(
28
146
  '`experimentalRawTransfer` option is not supported on 32-bit or big-endian systems, ' +
@@ -31,39 +149,52 @@ function parseSyncRaw(filename, sourceText, options) {
31
149
  }
32
150
 
33
151
  // Delete `experimentalRawTransfer` option
34
- let experimentalRawTransfer;
35
- ({ experimentalRawTransfer, ...options } = options);
152
+ let _;
153
+ ({ experimentalRawTransfer: _, ...options } = options);
36
154
 
37
- // Create buffer and `TextEncoder`
38
- if (!buffer) {
39
- buffer = createBuffer();
40
- encoder = new TextEncoder();
155
+ // Cancel timeout for clearing buffers
156
+ if (clearBuffersTimeout !== null) {
157
+ clearTimeout(clearBuffersTimeout);
158
+ clearBuffersTimeout = null;
41
159
  }
42
160
 
161
+ // Revive any discarded buffers which have not yet been garbage collected
162
+ if (oldBuffers.length > 0) {
163
+ const revivedBuffers = [];
164
+ for (let oldBuffer of oldBuffers) {
165
+ oldBuffer = oldBuffer.deref();
166
+ if (oldBuffer !== undefined) revivedBuffers.push(oldBuffer);
167
+ }
168
+ oldBuffers.length = 0;
169
+ if (revivedBuffers.length > 0) buffers.unshift(...revivedBuffers);
170
+ }
171
+
172
+ // Reuse existing buffer, or create a new one
173
+ const buffer = buffers.length > 0 ? buffers.pop() : createBuffer();
174
+
175
+ // Get/create `TextEncoder`
176
+ if (encoder === null) encoder = new TextEncoder();
177
+
43
178
  // Write source into start of buffer.
44
179
  // `TextEncoder` cannot write into a `Uint8Array` larger than 1 GiB,
45
180
  // so create a view into buffer of this size to write into.
46
181
  const sourceBuffer = new Uint8Array(buffer.buffer, buffer.byteOffset, ONE_GIB);
47
182
  const { read, written: sourceByteLen } = encoder.encodeInto(sourceText, sourceBuffer);
48
- if (read !== sourceText.length) {
49
- throw new Error('Failed to write source text into buffer');
50
- }
51
-
52
- // Parse
53
- bindings.parseSyncRaw(filename, buffer, sourceByteLen, options);
183
+ if (read !== sourceText.length) throw new Error('Failed to write source text into buffer');
54
184
 
55
- // Deserialize.
56
- // We cannot lazily deserialize in the getters, because the buffer might be re-used to parse
57
- // another file before the getter is called.
185
+ return { buffer, sourceByteLen, options };
186
+ }
58
187
 
59
- // (2 * 1024 * 1024 * 1024 - 12)
60
- const astTypeFlagPos = 2147483636;
61
- let isJsAst = buffer[astTypeFlagPos] === 0;
188
+ // Deserialize AST from buffer
189
+ function deserialize(buffer, sourceText, sourceByteLen) {
190
+ // 2147483636 = (2 * 1024 * 1024 * 1024) - 12
191
+ // i.e. 12 bytes from end of 2 GiB buffer
192
+ const isJsAst = buffer[2147483636] === 0;
62
193
 
63
194
  // Lazy load deserializer, and deserialize buffer to JS objects
64
195
  let data;
65
196
  if (isJsAst) {
66
- if (!deserializeJS) deserializeJS = require('./generated/deserialize/js.js');
197
+ if (deserializeJS === null) deserializeJS = require('./generated/deserialize/js.js');
67
198
  data = deserializeJS(buffer, sourceText, sourceByteLen);
68
199
 
69
200
  // Add a line comment for hashbang
@@ -72,12 +203,22 @@ function parseSyncRaw(filename, sourceText, options) {
72
203
  data.comments.unshift({ type: 'Line', value: hashbang.value, start: hashbang.start, end: hashbang.end });
73
204
  }
74
205
  } else {
75
- if (!deserializeTS) deserializeTS = require('./generated/deserialize/ts.js');
206
+ if (deserializeTS === null) deserializeTS = require('./generated/deserialize/ts.js');
76
207
  data = deserializeTS(buffer, sourceText, sourceByteLen);
77
208
  // Note: Do not add line comment for hashbang, to match `@typescript-eslint/parser`.
78
209
  // See https://github.com/oxc-project/oxc/blob/ea784f5f082e4c53c98afde9bf983afd0b95e44e/napi/parser/src/lib.rs#L106-L130
79
210
  }
80
211
 
212
+ // Return buffer to cache, to be reused
213
+ buffers.push(buffer);
214
+
215
+ // Set timer to clear buffers
216
+ if (clearBuffersTimeout !== null) clearTimeout(clearBuffersTimeout);
217
+ clearBuffersTimeout = setTimeout(clearBuffersCache, CLEAR_BUFFERS_TIMEOUT);
218
+ clearBuffersTimeout.unref();
219
+
220
+ // We cannot lazily deserialize in the getters, because the buffer might be re-used to parse
221
+ // another file before the getter is called.
81
222
  return {
82
223
  get program() {
83
224
  return data.program;
@@ -94,9 +235,16 @@ function parseSyncRaw(filename, sourceText, options) {
94
235
  };
95
236
  }
96
237
 
97
- const ONE_GIB = 1 << 30,
98
- TWO_GIB = ONE_GIB * 2,
99
- SIX_GIB = ONE_GIB * 6;
238
+ // Downgrade buffers in tier 1 cache (`buffers`) to tier 2 (`oldBuffers`),
239
+ // so they can be garbage collected
240
+ function clearBuffersCache() {
241
+ clearBuffersTimeout = null;
242
+
243
+ for (const buffer of buffers) {
244
+ oldBuffers.push(new WeakRef(buffer));
245
+ }
246
+ buffers.length = 0;
247
+ }
100
248
 
101
249
  // Create a `Uint8Array` which is 2 GiB in size, with its start aligned on 4 GiB.
102
250
  //
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "oxc-parser",
3
- "version": "0.71.0",
3
+ "version": "0.72.1",
4
4
  "main": "index.js",
5
5
  "browser": "wasm.mjs",
6
6
  "engines": {
@@ -39,15 +39,15 @@
39
39
  "access": "public"
40
40
  },
41
41
  "dependencies": {
42
- "@oxc-project/types": "^0.71.0"
42
+ "@oxc-project/types": "^0.72.1"
43
43
  },
44
44
  "devDependencies": {
45
45
  "@codspeed/vitest-plugin": "^4.0.0",
46
46
  "@napi-rs/wasm-runtime": "^0.2.7",
47
- "@vitest/browser": "3.1.3",
47
+ "@vitest/browser": "3.1.4",
48
48
  "esbuild": "^0.25.0",
49
49
  "playwright": "^1.51.0",
50
- "vitest": "3.1.3",
50
+ "vitest": "3.1.4",
51
51
  "typescript": "5.8.3"
52
52
  },
53
53
  "napi": {
@@ -77,23 +77,23 @@
77
77
  "dtsHeaderFile": "header.js"
78
78
  },
79
79
  "optionalDependencies": {
80
- "@oxc-parser/binding-win32-x64-msvc": "0.71.0",
81
- "@oxc-parser/binding-win32-arm64-msvc": "0.71.0",
82
- "@oxc-parser/binding-linux-x64-gnu": "0.71.0",
83
- "@oxc-parser/binding-linux-x64-musl": "0.71.0",
84
- "@oxc-parser/binding-freebsd-x64": "0.71.0",
85
- "@oxc-parser/binding-linux-arm64-gnu": "0.71.0",
86
- "@oxc-parser/binding-linux-arm64-musl": "0.71.0",
87
- "@oxc-parser/binding-linux-arm-gnueabihf": "0.71.0",
88
- "@oxc-parser/binding-linux-arm-musleabihf": "0.71.0",
89
- "@oxc-parser/binding-linux-s390x-gnu": "0.71.0",
90
- "@oxc-parser/binding-linux-riscv64-gnu": "0.71.0",
91
- "@oxc-parser/binding-darwin-x64": "0.71.0",
92
- "@oxc-parser/binding-darwin-arm64": "0.71.0",
93
- "@oxc-parser/binding-wasm32-wasi": "0.71.0"
80
+ "@oxc-parser/binding-win32-x64-msvc": "0.72.1",
81
+ "@oxc-parser/binding-win32-arm64-msvc": "0.72.1",
82
+ "@oxc-parser/binding-linux-x64-gnu": "0.72.1",
83
+ "@oxc-parser/binding-linux-x64-musl": "0.72.1",
84
+ "@oxc-parser/binding-freebsd-x64": "0.72.1",
85
+ "@oxc-parser/binding-linux-arm64-gnu": "0.72.1",
86
+ "@oxc-parser/binding-linux-arm64-musl": "0.72.1",
87
+ "@oxc-parser/binding-linux-arm-gnueabihf": "0.72.1",
88
+ "@oxc-parser/binding-linux-arm-musleabihf": "0.72.1",
89
+ "@oxc-parser/binding-linux-s390x-gnu": "0.72.1",
90
+ "@oxc-parser/binding-linux-riscv64-gnu": "0.72.1",
91
+ "@oxc-parser/binding-darwin-x64": "0.72.1",
92
+ "@oxc-parser/binding-darwin-arm64": "0.72.1",
93
+ "@oxc-parser/binding-wasm32-wasi": "0.72.1"
94
94
  },
95
95
  "scripts": {
96
- "build-dev": "napi build --no-dts-cache --platform --js bindings.js",
96
+ "build-dev": "napi build --platform --js bindings.js",
97
97
  "build": "pnpm run build-dev --features allocator --release",
98
98
  "postbuild-dev": "node patch.mjs",
99
99
  "build-wasi": "pnpm run build-dev --release --target wasm32-wasip1-threads",