goatlint-parser 0.125.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,254 @@
1
+ import { createRequire } from "node:module";
2
+ import { TOKENS_OFFSET_POS_32, TOKENS_LEN_POS_32 } from "../generated/constants.js";
3
+ import { isJsAst, parseAsyncRawImpl, parseSyncRawImpl, returnBufferToCache } from "./common.js";
4
+
5
+ const require = createRequire(import.meta.url);
6
+
7
+ /**
8
+ * Parse JS/TS source synchronously on current thread, using raw transfer to speed up deserialization.
9
+ *
10
+ * @param {string} filename - Filename
11
+ * @param {string} sourceText - Source text of file
12
+ * @param {Object} options - Parsing options
13
+ * @returns {Object} - Object with property getters for `program`, `module`, `comments`, and `errors`
14
+ */
15
+ export function parseSyncRaw(filename, sourceText, options) {
16
+ return parseSyncRawImpl(filename, sourceText, options, deserialize);
17
+ }
18
+
19
+ /**
20
+ * Parse JS/TS source asynchronously, using raw transfer to speed up deserialization.
21
+ *
22
+ * Note that not all of the workload can happen on a separate thread.
23
+ * Parsing on Rust side does happen in a separate thread, but deserialization of the AST to JS objects
24
+ * has to happen on current thread. This synchronous deserialization work typically outweighs
25
+ * the asynchronous parsing by a factor of around 3.
26
+ *
27
+ * i.e. the majority of the workload cannot be parallelized by using this method.
28
+ *
29
+ * Generally `parseSyncRaw` is preferable to use as it does not have the overhead of spawning a thread.
30
+ * If you need to parallelize parsing multiple files, it is recommended to use worker threads.
31
+ *
32
+ * @param {string} filename - Filename
33
+ * @param {string} sourceText - Source text of file
34
+ * @param {Object} options - Parsing options
35
+ * @returns {Object} - Object with property getters for `program`, `module`, `comments`, and `errors`
36
+ */
37
+ export function parse(filename, sourceText, options) {
38
+ return parseAsyncRawImpl(filename, sourceText, options, deserialize);
39
+ }
40
+
41
+ // Deserializers are large files, so lazy-loaded.
42
+ // `deserialize` functions are stored in this array once loaded.
43
+ // Index into these arrays is `isJs * 1 + range * 2 + experimentalParent * 4`.
44
+ const deserializers = [null, null, null, null, null, null, null, null];
45
+ const deserializerNames = [
46
+ "ts",
47
+ "js",
48
+ "ts_range",
49
+ "js_range",
50
+ "ts_parent",
51
+ "js_parent",
52
+ "ts_range_parent",
53
+ "js_range_parent",
54
+ ];
55
+
56
+ /**
57
+ * Deserialize whole AST from buffer.
58
+ *
59
+ * @param {Uint8Array} buffer - Buffer containing AST in raw form
60
+ * @param {string} sourceText - Source for the file
61
+ * @param {number} sourceByteLen - Length of source text in UTF-8 bytes
62
+ * @param {Object} options - Parsing options
63
+ * @returns {Object} - Object with property getters for `program`, `module`, `comments`, and `errors`
64
+ */
65
+ function deserialize(buffer, sourceText, sourceByteLen, options) {
66
+ const isJs = isJsAst(buffer),
67
+ range = !!options.range,
68
+ parent = !!options.experimentalParent;
69
+
70
+ // Lazy load deserializer, and deserialize buffer to JS objects
71
+ const deserializerIndex = +isJs | (+range << 1) | (+parent << 2);
72
+ let deserializeThis = deserializers[deserializerIndex];
73
+ if (deserializeThis === null) {
74
+ deserializeThis = deserializers[deserializerIndex] = require(
75
+ `../generated/deserialize/${deserializerNames[deserializerIndex]}.js`,
76
+ ).deserialize;
77
+ }
78
+
79
+ const data = deserializeThis(buffer, sourceText, sourceByteLen);
80
+
81
+ // Add a line comment for hashbang if JS.
82
+ // Do not add comment if TS, to match `@typescript-eslint/parser`.
83
+ // See https://github.com/oxc-project/oxc/blob/ea784f5f082e4c53c98afde9bf983afd0b95e44e/napi/parser/src/lib.rs#L106-L130
84
+ if (isJs) {
85
+ const { hashbang } = data.program;
86
+ if (hashbang !== null) {
87
+ data.comments.unshift(
88
+ range
89
+ ? {
90
+ type: "Line",
91
+ value: hashbang.value,
92
+ start: hashbang.start,
93
+ end: hashbang.end,
94
+ range: hashbang.range,
95
+ }
96
+ : { type: "Line", value: hashbang.value, start: hashbang.start, end: hashbang.end },
97
+ );
98
+ }
99
+ }
100
+
101
+ // Deserialize tokens
102
+ const tokens = options.experimentalTokens ? deserializeTokens(buffer, sourceText, isJs) : null;
103
+
104
+ // Return buffer to cache, to be reused
105
+ returnBufferToCache(buffer);
106
+
107
+ // We cannot lazily deserialize in the getters, because the buffer might be re-used to parse
108
+ // another file before the getter is called
109
+ if (tokens !== null) {
110
+ return {
111
+ get program() {
112
+ return data.program;
113
+ },
114
+ get module() {
115
+ return data.module;
116
+ },
117
+ get comments() {
118
+ return data.comments;
119
+ },
120
+ get tokens() {
121
+ return tokens;
122
+ },
123
+ get errors() {
124
+ return data.errors;
125
+ },
126
+ };
127
+ }
128
+
129
+ return {
130
+ get program() {
131
+ return data.program;
132
+ },
133
+ get module() {
134
+ return data.module;
135
+ },
136
+ get comments() {
137
+ return data.comments;
138
+ },
139
+ get errors() {
140
+ return data.errors;
141
+ },
142
+ };
143
+ }
144
+
145
+ // `ESTreeKind` discriminants (set by Rust side)
146
+ const PRIVATE_IDENTIFIER_KIND = 2;
147
+ const REGEXP_KIND = 8;
148
+
149
+ // Indexed by `ESTreeKind` discriminant (matches `ESTreeKind` enum in `estree_kind.rs`)
150
+ const TOKEN_TYPES = [
151
+ "Identifier",
152
+ "Keyword",
153
+ "PrivateIdentifier",
154
+ "Punctuator",
155
+ "Numeric",
156
+ "String",
157
+ "Boolean",
158
+ "Null",
159
+ "RegularExpression",
160
+ "Template",
161
+ "JSXText",
162
+ "JSXIdentifier",
163
+ ];
164
+
165
+ // Mask for active bits in `ESTreeKind` discriminants
166
+ const TOKEN_KIND_MASK = 15;
167
+
168
+ // Details of Rust `Token` type
169
+ const TOKEN_SIZE = 16;
170
+
171
+ /**
172
+ * Deserialize tokens from buffer.
173
+ * @param {Uint8Array} buffer - Buffer containing AST in raw form
174
+ * @param {string} sourceText - Source for the file
175
+ * @param {boolean} isJs - `true` if parsing in JS mode
176
+ * @returns {Object[]} - Array of token objects
177
+ */
178
+ function deserializeTokens(buffer, sourceText, isJs) {
179
+ const { int32 } = buffer;
180
+
181
+ let pos = int32[TOKENS_OFFSET_POS_32];
182
+ const len = int32[TOKENS_LEN_POS_32];
183
+ const endPos = pos + len * TOKEN_SIZE;
184
+
185
+ const tokens = [];
186
+ while (pos < endPos) {
187
+ tokens.push(deserializeToken(pos, int32, sourceText, isJs));
188
+ pos += TOKEN_SIZE;
189
+ }
190
+ return tokens;
191
+ }
192
+
193
+ /**
194
+ * Deserialize a token from buffer at position `pos`.
195
+ * @param {number} pos - Position in buffer containing Rust `Token` type
196
+ * @param {Int32Array} int32 - Buffer containing AST in raw form as an `Int32Array`
197
+ * @param {string} sourceText - Source for the file
198
+ * @param {boolean} isJs - `true` if parsing in JS mode
199
+ * @returns {Object} - Token object
200
+ */
201
+ function deserializeToken(pos, int32, sourceText, isJs) {
202
+ const pos32 = pos >> 2,
203
+ start = int32[pos32],
204
+ end = int32[pos32 + 1],
205
+ kindAndFlags = int32[pos32 + 2];
206
+
207
+ let value = sourceText.slice(start, end);
208
+
209
+ // `Kind` is byte at index 8 in `Token`.
210
+ // `Kind` has 12 variants numbered from 0 to 11.
211
+ // We have to mask the bottom byte (`& 0xFF`), so may as well mask off bits which can't be set in `Kind` at same time.
212
+ // This may allow V8 to generate more efficient code for `TOKEN_TYPES[kind]`.
213
+ const kind = kindAndFlags & TOKEN_KIND_MASK;
214
+
215
+ if (kind === REGEXP_KIND) {
216
+ const patternEnd = value.lastIndexOf("/");
217
+ return {
218
+ type: "RegularExpression",
219
+ value,
220
+ regex: {
221
+ pattern: value.slice(1, patternEnd),
222
+ flags: value.slice(patternEnd + 1),
223
+ },
224
+ start,
225
+ end,
226
+ };
227
+ }
228
+
229
+ // Strip leading `#` from private identifiers
230
+ if (kind === PRIVATE_IDENTIFIER_KIND) value = value.slice(1);
231
+
232
+ // Unescape identifiers, keywords, and private identifiers in JS mode.
233
+ // `is_escaped` flag is in byte 10 of `Token`, and is a `bool`.
234
+ if (isJs && kind <= PRIVATE_IDENTIFIER_KIND && (kindAndFlags & 0x10000) !== 0) {
235
+ value = unescapeIdentifier(value);
236
+ }
237
+
238
+ return { type: TOKEN_TYPES[kind], value, start, end };
239
+ }
240
+
241
+ /**
242
+ * Unescape an identifier.
243
+ *
244
+ * We do this on JS side, because escaped identifiers are so extremely rare that this function
245
+ * is never called in practice anyway.
246
+ *
247
+ * @param {string} name - Identifier name to unescape
248
+ * @returns {string} - Unescaped identifier name
249
+ */
250
+ function unescapeIdentifier(name) {
251
+ return name.replace(/\\u(?:\{([0-9a-fA-F]+)\}|([0-9a-fA-F]{4}))/g, (_, hex1, hex2) =>
252
+ String.fromCodePoint(parseInt(hex1 ?? hex2, 16)),
253
+ );
254
+ }
@@ -0,0 +1,11 @@
1
+ // Unique token which is not exposed publicly.
2
+ // Used to prevent user calling class constructors.
3
+ export const TOKEN = {};
4
+
5
+ /**
6
+ * Throw error when restricted class constructor is called by user code.
7
+ * @throws {Error}
8
+ */
9
+ export function constructorError() {
10
+ throw new Error("Constructor is for internal use only");
11
+ }
@@ -0,0 +1,153 @@
1
+ import { DATA_POINTER_POS_32, PROGRAM_OFFSET } from "../generated/constants.js";
2
+ import { RawTransferData } from "../generated/lazy/constructors.js";
3
+ import { walkProgram } from "../generated/lazy/walk.js";
4
+ import { parseAsyncRawImpl, parseSyncRawImpl, returnBufferToCache } from "./common.js";
5
+ import { TOKEN } from "./lazy-common.js";
6
+ import { getVisitorsArr } from "./visitor.js";
7
+ export { Visitor } from "./visitor.js";
8
+
9
+ /**
10
+ * Parse JS/TS source synchronously on current thread.
11
+ *
12
+ * The data in buffer is not deserialized. Is deserialized to JS objects lazily, when accessing the
13
+ * properties of objects.
14
+ *
15
+ * e.g. `program` in returned object is an instance of `Program` class, with getters for `start`, `end`,
16
+ * `body` etc.
17
+ *
18
+ * Returned object contains a `visit` function which can be used to visit the AST with a `Visitor`
19
+ * (`Visitor` class can be obtained by calling `experimentalGetLazyVisitor()`).
20
+ *
21
+ * Returned object contains a `dispose` method. When finished with this AST, it's advisable to call
22
+ * `dispose`, to return the buffer to the cache, so it can be reused.
23
+ * Garbage collector should do this anyway at some point, but on an unpredictable schedule,
24
+ * so it's preferable to call `dispose` manually, to ensure the buffer can be reused immediately.
25
+ *
26
+ * @param {string} filename - Filename
27
+ * @param {string} sourceText - Source text of file
28
+ * @param {Object} options - Parsing options
29
+ * @returns {Object} - Object with property getters for `program`, `module`, `comments`, and `errors`,
30
+ * and `dispose` and `visit` methods
31
+ */
32
+ export function parseSyncLazy(filename, sourceText, options) {
33
+ return parseSyncRawImpl(filename, sourceText, options, construct);
34
+ }
35
+
36
+ /**
37
+ * Parse JS/TS source asynchronously on a separate thread.
38
+ *
39
+ * The data in buffer is not deserialized. Is deserialized to JS objects lazily, when accessing the
40
+ * properties of objects.
41
+ *
42
+ * e.g. `program` in returned object is an instance of `Program` class, with getters for `start`, `end`,
43
+ * `body` etc.
44
+ *
45
+ * Because this function does not deserialize the AST, unlike `parse`, very little work happens
46
+ * on current thread in this function. Deserialization work only occurs when properties of the objects
47
+ * are accessed.
48
+ *
49
+ * Returned object contains a `visit` function which can be used to visit the AST with a `Visitor`
50
+ * (`Visitor` class can be obtained by calling `experimentalGetLazyVisitor()`).
51
+ *
52
+ * Returned object contains a `dispose` method. When finished with this AST, it's advisable to call
53
+ * `dispose`, to return the buffer to the cache, so it can be reused.
54
+ * Garbage collector should do this anyway at some point, but on an unpredictable schedule,
55
+ * so it's preferable to call `dispose` manually, to ensure the buffer can be reused immediately.
56
+ *
57
+ * @param {string} filename - Filename
58
+ * @param {string} sourceText - Source text of file
59
+ * @param {Object} options - Parsing options
60
+ * @returns {Object} - Object with property getters for `program`, `module`, `comments`, and `errors`,
61
+ * and `dispose` and `visit` methods
62
+ */
63
+ export function parse(filename, sourceText, options) {
64
+ return parseAsyncRawImpl(filename, sourceText, options, construct);
65
+ }
66
+
67
+ // Registry for buffers which are held by lazily-deserialized ASTs.
68
+ // Returns buffer to cache when the `ast` wrapper is garbage collected.
69
+ //
70
+ // Check for existence of `FinalizationRegistry`, to avoid errors on old versions of NodeJS
71
+ // which don't support it. e.g. Prettier supports NodeJS v14.
72
+ // Raw transfer is disabled on NodeJS before v22, so it doesn't matter if this is `null` on old NodeJS
73
+ // - it'll never be accessed in that case.
74
+ const bufferRecycleRegistry =
75
+ typeof FinalizationRegistry === "undefined"
76
+ ? null
77
+ : new FinalizationRegistry(returnBufferToCache);
78
+
79
+ /**
80
+ * Get an object with getters which lazy deserialize AST and other data from buffer.
81
+ *
82
+ * Object also includes `dispose` and `visit` functions.
83
+ *
84
+ * @param {Uint8Array} buffer - Buffer containing AST in raw form
85
+ * @param {string} sourceText - Source for the file
86
+ * @param {number} sourceByteLen - Length of source text in UTF-8 bytes
87
+ * @param {Object} _options - Parsing options
88
+ * @returns {Object} - Object with property getters for `program`, `module`, `comments`, and `errors`,
89
+ * and `dispose` and `visit` methods
90
+ */
91
+ function construct(buffer, sourceText, sourceByteLen, _options) {
92
+ // Create AST object
93
+ const sourceIsAscii = sourceText.length === sourceByteLen;
94
+ const ast = { buffer, sourceText, sourceByteLen, sourceIsAscii, nodes: new Map(), token: TOKEN };
95
+
96
+ // Register `ast` with the recycle registry so buffer is returned to cache
97
+ // when `ast` is garbage collected
98
+ bufferRecycleRegistry.register(ast, buffer, ast);
99
+
100
+ // Get root data class instance
101
+ const rawDataPos = buffer.int32[DATA_POINTER_POS_32];
102
+ const data = new RawTransferData(rawDataPos, ast);
103
+
104
+ return {
105
+ get program() {
106
+ return data.program;
107
+ },
108
+ get module() {
109
+ return data.module;
110
+ },
111
+ get comments() {
112
+ return data.comments;
113
+ },
114
+ get errors() {
115
+ return data.errors;
116
+ },
117
+ dispose: dispose.bind(null, ast),
118
+ visit(visitor) {
119
+ walkProgram(rawDataPos + PROGRAM_OFFSET, ast, getVisitorsArr(visitor));
120
+ },
121
+ };
122
+ }
123
+
124
+ /**
125
+ * Dispose of this AST.
126
+ *
127
+ * After calling this method, trying to read any nodes from this AST may cause an error.
128
+ *
129
+ * Buffer is returned to the cache to be reused.
130
+ *
131
+ * The buffer would be returned to the cache anyway, once all nodes of the AST are garbage collected,
132
+ * but calling `dispose` is preferable, as it will happen immediately.
133
+ * Otherwise, garbage collector may take time to collect the `ast` object, and new buffers may be created
134
+ * in the meantime, when we could have reused this one.
135
+ *
136
+ * @param {Object} ast - AST object containing buffer etc
137
+ * @returns {undefined}
138
+ */
139
+ function dispose(ast) {
140
+ // Return buffer to cache, to be reused
141
+ returnBufferToCache(ast.buffer);
142
+
143
+ // Remove connection between `ast` and the buffer
144
+ ast.buffer = null;
145
+
146
+ // Clear other contents of `ast`, so they can be garbage collected
147
+ ast.sourceText = null;
148
+ ast.nodes = null;
149
+
150
+ // Remove `ast` from recycling register.
151
+ // When `ast` is garbage collected, there's no longer any action to be taken.
152
+ bufferRecycleRegistry.unregister(ast);
153
+ }