npm - oxc-parser - Versions diffs - 0.71.0 → 0.72.1 - Mend

oxc-parser 0.71.0 → 0.72.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +3 -0
package/bindings.js +1 -0
package/generated/deserialize/ts.js +1 -0
package/index.d.ts +33 -1
package/index.js +178 -30
package/package.json +19 -19

package/README.md CHANGED Viewed

@@ -20,6 +20,9 @@ The only differences between Oxc's AST and ESTree / TS-ESTree are:
 - Support for Stage 3 ECMA features [`import defer`](https://github.com/tc39/proposal-defer-import-eval)
   and [`import source`](https://github.com/tc39/proposal-source-phase-imports).
+- In TS-ESTree AST, `import.defer(...)` and `import.source(...)` are represented as an `ImportExpression`
+  with `'defer'` or `'source'` in `phase` field (as in ESTree spec), where TS-ESLint represents these
+  as a `CallExpression` with `MetaProperty` as its `callee`.
 - Addition of a non-standard `hashbang` field to `Program`.
 That aside, the AST should completely align with Acorn's ESTree AST or TS-ESLint's TS-ESTree.

package/bindings.js CHANGED Viewed

@@ -390,6 +390,7 @@ module.exports.ExportLocalNameKind = nativeBinding.ExportLocalNameKind
 module.exports.getBufferOffset = nativeBinding.getBufferOffset
 module.exports.ImportNameKind = nativeBinding.ImportNameKind
 module.exports.parseAsync = nativeBinding.parseAsync
+module.exports.parseAsyncRaw = nativeBinding.parseAsyncRaw
 module.exports.parseSync = nativeBinding.parseSync
 module.exports.parseSyncRaw = nativeBinding.parseSyncRaw
 module.exports.rawTransferSupported = nativeBinding.rawTransferSupported

package/generated/deserialize/ts.js CHANGED Viewed

@@ -1057,6 +1057,7 @@ function deserializeImportExpression(pos) {
     end: deserializeU32(pos + 4),
     source: deserializeExpression(pos + 8),
     options: deserializeOptionExpression(pos + 24),
+    phase: deserializeOptionImportPhase(pos + 40),
   };
 }

package/index.d.ts CHANGED Viewed

@@ -139,6 +139,38 @@ export interface OxcError {
  */
 export declare function parseAsync(filename: string, sourceText: string, options?: ParserOptions | undefined | null): Promise<ParseResult>
+/**
+ * Parse AST into provided `Uint8Array` buffer, asynchronously.
+ *
+ * Note: This function can be slower than `parseSyncRaw` due to the overhead of spawning a thread.
+ *
+ * Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
+ * provided as `source_len`.
+ *
+ * This function will parse the source, and write the AST into the buffer, starting at the end.
+ *
+ * It also writes to the very end of the buffer the offset of `Program` within the buffer.
+ *
+ * Caller can deserialize data from the buffer on JS side.
+ *
+ * # SAFETY
+ *
+ * Caller must ensure:
+ * * Source text is written into start of the buffer.
+ * * Source text's UTF-8 byte length is `source_len`.
+ * * The 1st `source_len` bytes of the buffer comprises a valid UTF-8 string.
+ * * Contents of buffer must not be mutated by caller until the `AsyncTask` returned by this
+ *   function resolves.
+ *
+ * If source text is originally a JS string on JS side, and converted to a buffer with
+ * `Buffer.from(str)` or `new TextEncoder().encode(str)`, this guarantees it's valid UTF-8.
+ *
+ * # Panics
+ *
+ * Panics if source text is too long, or AST takes more memory than is available in the buffer.
+ */
+export declare function parseAsyncRaw(filename: string, buffer: Uint8Array, sourceLen: number, options?: ParserOptions | undefined | null): Promise<unknown>
 export interface ParserOptions {
   /** Treat the source text as `js`, `jsx`, `ts`, or `tsx`. */
   lang?: 'js' | 'jsx' | 'ts' | 'tsx'
@@ -176,7 +208,7 @@ export interface ParserOptions {
 export declare function parseSync(filename: string, sourceText: string, options?: ParserOptions | undefined | null): ParseResult
 /**
- * Parses AST into provided `Uint8Array` buffer.
+ * Parse AST into provided `Uint8Array` buffer, synchronously.
  *
  * Source text must be written into the start of the buffer, and its length (in UTF-8 bytes)
  * provided as `source_len`.

package/index.js CHANGED Viewed

@@ -1,3 +1,4 @@
+const { availableParallelism } = require('node:os');
 const bindings = require('./bindings.js');
 const { wrap } = require('./wrap.cjs');
@@ -9,20 +10,137 @@ module.exports.ImportNameKind = bindings.ImportNameKind;
 module.exports.parseWithoutReturn = bindings.parseWithoutReturn;
 module.exports.Severity = bindings.Severity;
-module.exports.parseAsync = async function parseAsync(...args) {
-  return wrap(await bindings.parseAsync(...args));
+module.exports.parseAsync = async function parseAsync(filename, sourceText, options) {
+  if (options?.experimentalRawTransfer) return await parseAsyncRaw(filename, sourceText, options);
+  return wrap(await bindings.parseAsync(filename, sourceText, options));
 };
 module.exports.parseSync = function parseSync(filename, sourceText, options) {
-  if (options?.experimentalRawTransfer) {
-    return parseSyncRaw(filename, sourceText, options);
-  }
+  if (options?.experimentalRawTransfer) return parseSyncRaw(filename, sourceText, options);
   return wrap(bindings.parseSync(filename, sourceText, options));
 };
-let buffer, encoder, deserializeJS, deserializeTS;
 function parseSyncRaw(filename, sourceText, options) {
+  const { buffer, sourceByteLen, options: optionsAmended } = prepareRaw(sourceText, options);
+  bindings.parseSyncRaw(filename, buffer, sourceByteLen, optionsAmended);
+  return deserialize(buffer, sourceText, sourceByteLen);
+}
+// User should not schedule more async tasks than there are available CPUs, as it hurts performance,
+// but it's a common mistake in async JS code to do exactly that.
+//
+// That anti-pattern looks like this when applied to Oxc:
+//
+// ```js
+// const asts = await Promise.all(
+//   files.map(
+//     async (filename) => {
+//       const sourceText = await fs.readFile(filename, 'utf8');
+//       const ast = await oxc.parseAsync(filename, sourceText);
+//       return ast;
+//     }
+//   )
+// );
+// ```
+//
+// In most cases, that'd just result in a bit of degraded performance, and higher memory use because
+// of loading sources into memory prematurely.
+//
+// However, raw transfer uses a 6 GiB buffer for each parsing operation.
+// Most of the memory pages in those buffers are never touched, so this does not consume a huge amount
+// of physical memory, but it does still consume virtual memory.
+//
+// If we allowed creating a large number of 6 GiB buffers simultaneously, it would quickly consume
+// virtual memory space and risk memory exhaustion. The code above would exhaust all of bottom half
+// (heap) of 48-bit virtual memory space if `files.length >= 21_845`. This is not a number which
+// is unrealistic in real world code.
+//
+// To guard against this possibility, we implement a simple queue.
+// No more than `os.availableParallelism()` files can be parsed simultaneously, and any further calls to
+// `parseAsyncRaw` will be put in a queue, to execute once other tasks complete.
+let availableCores = availableParallelism();
+const queue = [];
+async function parseAsyncRaw(filename, sourceText, options) {
+  // Wait for a free CPU core if all CPUs are currently busy.
+  //
+  // Note: `availableCores` is NOT decremented if have to wait in the queue first,
+  // and NOT incremented when parsing completes and it runs next task in the queue.
+  //
+  // This is to avoid a race condition if `parseAsyncRaw` is called during the microtick in between
+  // `resolve` being called below, and the promise resolving here. In that case the new task could
+  // start running, and then the promise resolves, and the queued task also starts running.
+  // We'd then have `availableParallelism() + 1` tasks running simultaneously. Potentially, this could
+  // happen repeatedly, with the number of tasks running simultaneously ever-increasing.
+  if (availableCores === 0) {
+    // All CPU cores are busy. Put this task in queue and wait for capacity to become available.
+    await new Promise((resolve, _) => {
+      queue.push(resolve);
+    });
+  } else {
+    // A CPU core is available. Mark core as busy, and run parsing now.
+    availableCores--;
+  }
+  // Parse
+  const { buffer, sourceByteLen, options: optionsAmended } = prepareRaw(sourceText, options);
+  await bindings.parseAsyncRaw(filename, buffer, sourceByteLen, optionsAmended);
+  const ret = deserialize(buffer, sourceText, sourceByteLen);
+  // Free the CPU core
+  if (queue.length > 0) {
+    // Some further tasks waiting in queue. Run the next one.
+    // Do not increment `availableCores` (see above).
+    const resolve = queue.shift();
+    resolve();
+  } else {
+    // No tasks waiting in queue. This CPU is now free.
+    availableCores++;
+  }
+  return ret;
+}
+const ONE_GIB = 1 << 30,
+  TWO_GIB = ONE_GIB * 2,
+  SIX_GIB = ONE_GIB * 6;
+// We keep a cache of buffers for raw transfer, so we can reuse them as much as possible.
+//
+// When processing multiple files, it's ideal if can reuse an existing buffer, as it's more likely to
+// be warm in CPU cache, it avoids allocations, and it saves work for the garbage collector.
+//
+// However, we also don't want to keep a load of large buffers around indefinitely using up memory,
+// if they're not going to be used again.
+//
+// We have no knowledge of what pattern over time user may process files in (could be lots in quick
+// succession, or more occasionally in a long-running process). So we try to use flexible caching
+// strategy which is adaptable to many usage patterns.
+//
+// We use a 2-tier cache.
+// Tier 1 uses strong references, tier 2 uses weak references.
+//
+// When parsing is complete and the buffer is no longer in use, push it to `buffers` (tier 1 cache).
+// Set a timer to clear the cache when no activity for 10 seconds.
+//
+// When the timer expires, move all the buffers from tier 1 cache into `oldBuffers` (tier 2).
+// They are stored there as `WeakRef`s, so the garbage collector is free to reclaim them.
+//
+// On the next call to `parseSync` or `parseAsync`, promote any buffers in tier 2 cache which were not
+// already garbage collected back into tier 1 cache. This is on assumption that parsing one file
+// indicates parsing as a whole is an ongoing process, and there will likely be further calls to
+// `parseSync` / `parseAsync` in future.
+//
+// The weak tier 2 cache is because V8 does not necessarily free memory as soon as it's able to be
+// freed. We don't want to block it from freeing memory, but if it's not done that yet, there's no
+// point creating a new buffer, when one already exists.
+const CLEAR_BUFFERS_TIMEOUT = 10_000; // 10 seconds
+const buffers = [], oldBuffers = [];
+let encoder = null, deserializeJS = null, deserializeTS = null, clearBuffersTimeout = null;
+// Get a buffer (from cache if possible), copy source text into it, and amend options object
+function prepareRaw(sourceText, options) {
   if (!rawTransferSupported()) {
     throw new Error(
       '`experimentalRawTransfer` option is not supported on 32-bit or big-endian systems, ' +
@@ -31,39 +149,52 @@ function parseSyncRaw(filename, sourceText, options) {
   }
   // Delete `experimentalRawTransfer` option
-  let experimentalRawTransfer;
-  ({ experimentalRawTransfer, ...options } = options);
+  let _;
+  ({ experimentalRawTransfer: _, ...options } = options);
-  // Create buffer and `TextEncoder`
-  if (!buffer) {
-    buffer = createBuffer();
-    encoder = new TextEncoder();
+  // Cancel timeout for clearing buffers
+  if (clearBuffersTimeout !== null) {
+    clearTimeout(clearBuffersTimeout);
+    clearBuffersTimeout = null;
   }
+  // Revive any discarded buffers which have not yet been garbage collected
+  if (oldBuffers.length > 0) {
+    const revivedBuffers = [];
+    for (let oldBuffer of oldBuffers) {
+      oldBuffer = oldBuffer.deref();
+      if (oldBuffer !== undefined) revivedBuffers.push(oldBuffer);
+    }
+    oldBuffers.length = 0;
+    if (revivedBuffers.length > 0) buffers.unshift(...revivedBuffers);
+  }
+  // Reuse existing buffer, or create a new one
+  const buffer = buffers.length > 0 ? buffers.pop() : createBuffer();
+  // Get/create `TextEncoder`
+  if (encoder === null) encoder = new TextEncoder();
   // Write source into start of buffer.
   // `TextEncoder` cannot write into a `Uint8Array` larger than 1 GiB,
   // so create a view into buffer of this size to write into.
   const sourceBuffer = new Uint8Array(buffer.buffer, buffer.byteOffset, ONE_GIB);
   const { read, written: sourceByteLen } = encoder.encodeInto(sourceText, sourceBuffer);
-  if (read !== sourceText.length) {
-    throw new Error('Failed to write source text into buffer');
-  }
-  // Parse
-  bindings.parseSyncRaw(filename, buffer, sourceByteLen, options);
+  if (read !== sourceText.length) throw new Error('Failed to write source text into buffer');
-  // Deserialize.
-  // We cannot lazily deserialize in the getters, because the buffer might be re-used to parse
-  // another file before the getter is called.
+  return { buffer, sourceByteLen, options };
+}
-  // (2 * 1024 * 1024 * 1024 - 12)
-  const astTypeFlagPos = 2147483636;
-  let isJsAst = buffer[astTypeFlagPos] === 0;
+// Deserialize AST from buffer
+function deserialize(buffer, sourceText, sourceByteLen) {
+  // 2147483636 = (2 * 1024 * 1024 * 1024) - 12
+  // i.e. 12 bytes from end of 2 GiB buffer
+  const isJsAst = buffer[2147483636] === 0;
   // Lazy load deserializer, and deserialize buffer to JS objects
   let data;
   if (isJsAst) {
-    if (!deserializeJS) deserializeJS = require('./generated/deserialize/js.js');
+    if (deserializeJS === null) deserializeJS = require('./generated/deserialize/js.js');
     data = deserializeJS(buffer, sourceText, sourceByteLen);
     // Add a line comment for hashbang
@@ -72,12 +203,22 @@ function parseSyncRaw(filename, sourceText, options) {
       data.comments.unshift({ type: 'Line', value: hashbang.value, start: hashbang.start, end: hashbang.end });
     }
   } else {
-    if (!deserializeTS) deserializeTS = require('./generated/deserialize/ts.js');
+    if (deserializeTS === null) deserializeTS = require('./generated/deserialize/ts.js');
     data = deserializeTS(buffer, sourceText, sourceByteLen);
     // Note: Do not add line comment for hashbang, to match `@typescript-eslint/parser`.
     // See https://github.com/oxc-project/oxc/blob/ea784f5f082e4c53c98afde9bf983afd0b95e44e/napi/parser/src/lib.rs#L106-L130
   }
+  // Return buffer to cache, to be reused
+  buffers.push(buffer);
+  // Set timer to clear buffers
+  if (clearBuffersTimeout !== null) clearTimeout(clearBuffersTimeout);
+  clearBuffersTimeout = setTimeout(clearBuffersCache, CLEAR_BUFFERS_TIMEOUT);
+  clearBuffersTimeout.unref();
+  // We cannot lazily deserialize in the getters, because the buffer might be re-used to parse
+  // another file before the getter is called.
   return {
     get program() {
       return data.program;
@@ -94,9 +235,16 @@ function parseSyncRaw(filename, sourceText, options) {
   };
 }
-const ONE_GIB = 1 << 30,
-  TWO_GIB = ONE_GIB * 2,
-  SIX_GIB = ONE_GIB * 6;
+// Downgrade buffers in tier 1 cache (`buffers`) to tier 2 (`oldBuffers`),
+// so they can be garbage collected
+function clearBuffersCache() {
+  clearBuffersTimeout = null;
+  for (const buffer of buffers) {
+    oldBuffers.push(new WeakRef(buffer));
+  }
+  buffers.length = 0;
+}
 // Create a `Uint8Array` which is 2 GiB in size, with its start aligned on 4 GiB.
 //

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "oxc-parser",
-  "version": "0.71.0",
+  "version": "0.72.1",
   "main": "index.js",
   "browser": "wasm.mjs",
   "engines": {
@@ -39,15 +39,15 @@
     "access": "public"
   },
   "dependencies": {
-    "@oxc-project/types": "^0.71.0"
+    "@oxc-project/types": "^0.72.1"
   },
   "devDependencies": {
     "@codspeed/vitest-plugin": "^4.0.0",
     "@napi-rs/wasm-runtime": "^0.2.7",
-    "@vitest/browser": "3.1.3",
+    "@vitest/browser": "3.1.4",
     "esbuild": "^0.25.0",
     "playwright": "^1.51.0",
-    "vitest": "3.1.3",
+    "vitest": "3.1.4",
     "typescript": "5.8.3"
   },
   "napi": {
@@ -77,23 +77,23 @@
     "dtsHeaderFile": "header.js"
   },
   "optionalDependencies": {
-    "@oxc-parser/binding-win32-x64-msvc": "0.71.0",
-    "@oxc-parser/binding-win32-arm64-msvc": "0.71.0",
-    "@oxc-parser/binding-linux-x64-gnu": "0.71.0",
-    "@oxc-parser/binding-linux-x64-musl": "0.71.0",
-    "@oxc-parser/binding-freebsd-x64": "0.71.0",
-    "@oxc-parser/binding-linux-arm64-gnu": "0.71.0",
-    "@oxc-parser/binding-linux-arm64-musl": "0.71.0",
-    "@oxc-parser/binding-linux-arm-gnueabihf": "0.71.0",
-    "@oxc-parser/binding-linux-arm-musleabihf": "0.71.0",
-    "@oxc-parser/binding-linux-s390x-gnu": "0.71.0",
-    "@oxc-parser/binding-linux-riscv64-gnu": "0.71.0",
-    "@oxc-parser/binding-darwin-x64": "0.71.0",
-    "@oxc-parser/binding-darwin-arm64": "0.71.0",
-    "@oxc-parser/binding-wasm32-wasi": "0.71.0"
+    "@oxc-parser/binding-win32-x64-msvc": "0.72.1",
+    "@oxc-parser/binding-win32-arm64-msvc": "0.72.1",
+    "@oxc-parser/binding-linux-x64-gnu": "0.72.1",
+    "@oxc-parser/binding-linux-x64-musl": "0.72.1",
+    "@oxc-parser/binding-freebsd-x64": "0.72.1",
+    "@oxc-parser/binding-linux-arm64-gnu": "0.72.1",
+    "@oxc-parser/binding-linux-arm64-musl": "0.72.1",
+    "@oxc-parser/binding-linux-arm-gnueabihf": "0.72.1",
+    "@oxc-parser/binding-linux-arm-musleabihf": "0.72.1",
+    "@oxc-parser/binding-linux-s390x-gnu": "0.72.1",
+    "@oxc-parser/binding-linux-riscv64-gnu": "0.72.1",
+    "@oxc-parser/binding-darwin-x64": "0.72.1",
+    "@oxc-parser/binding-darwin-arm64": "0.72.1",
+    "@oxc-parser/binding-wasm32-wasi": "0.72.1"
   },
   "scripts": {
-    "build-dev": "napi build --no-dts-cache --platform --js bindings.js",
+    "build-dev": "napi build --platform --js bindings.js",
     "build": "pnpm run build-dev --features allocator --release",
     "postbuild-dev": "node patch.mjs",
     "build-wasi": "pnpm run build-dev --release --target wasm32-wasip1-threads",