@markuplint/parser-utils 4.8.10 → 5.0.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.ja.md +208 -0
- package/ARCHITECTURE.md +251 -0
- package/CHANGELOG.md +33 -2
- package/README.md +6 -0
- package/SKILL.md +126 -0
- package/docs/maintenance.ja.md +176 -0
- package/docs/maintenance.md +176 -0
- package/docs/parser-class.ja.md +655 -0
- package/docs/parser-class.md +655 -0
- package/lib/debug.js +8 -24
- package/lib/debugger.d.ts +25 -0
- package/lib/debugger.js +34 -4
- package/lib/enums.d.ts +10 -0
- package/lib/enums.js +10 -0
- package/lib/get-location.d.ts +31 -0
- package/lib/get-location.js +33 -0
- package/lib/get-namespace.d.ts +11 -0
- package/lib/get-namespace.js +38 -0
- package/lib/idl-attributes.d.ts +9 -0
- package/lib/idl-attributes.js +9 -0
- package/lib/ignore-block.js +15 -14
- package/lib/index.d.ts +2 -1
- package/lib/index.js +1 -1
- package/lib/parser-error.d.ts +16 -0
- package/lib/parser-error.js +20 -3
- package/lib/parser.d.ts +285 -7
- package/lib/parser.js +763 -551
- package/lib/script-parser.d.ts +21 -0
- package/lib/script-parser.js +17 -0
- package/lib/sort-nodes.d.ts +8 -0
- package/lib/sort-nodes.js +11 -3
- package/lib/types.d.ts +60 -3
- package/package.json +11 -10
package/lib/script-parser.d.ts
CHANGED
|
@@ -1,9 +1,30 @@
|
|
|
1
1
|
import type { CustomParser } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Tokenizes a JavaScript code string into an array of typed tokens using espree.
|
|
4
|
+
* Each token contains its type (e.g., Identifier, Punctuator) and raw value.
|
|
5
|
+
*
|
|
6
|
+
* @param script - The JavaScript source code to tokenize
|
|
7
|
+
* @returns An array of tokens with their type and string value
|
|
8
|
+
*/
|
|
2
9
|
export declare function scriptParser(script: string): ScriptTokenType[];
|
|
10
|
+
/**
|
|
11
|
+
* Attempts to extract the longest valid JavaScript prefix from a script string
|
|
12
|
+
* that may contain trailing non-JS content (e.g., HTML after an inline expression).
|
|
13
|
+
* Falls back to wrapping the script as an object literal or spread operator
|
|
14
|
+
* if the initial parse fails.
|
|
15
|
+
*
|
|
16
|
+
* @param script - The potentially mixed script/markup string to parse
|
|
17
|
+
* @param parse - A custom parse function to validate the script; defaults to espree with JSX support
|
|
18
|
+
* @returns An object containing the `validScript` prefix and the remaining `leftover` string
|
|
19
|
+
*/
|
|
3
20
|
export declare function safeScriptParser(script: string, parse?: CustomParser): {
|
|
4
21
|
validScript: string;
|
|
5
22
|
leftover: string;
|
|
6
23
|
};
|
|
24
|
+
/**
|
|
25
|
+
* A token produced by the script tokenizer, representing a single
|
|
26
|
+
* lexical unit of JavaScript source code.
|
|
27
|
+
*/
|
|
7
28
|
export type ScriptTokenType = {
|
|
8
29
|
type: 'Identifier' | 'Boolean' | 'Numeric' | 'String' | 'Template' | 'Punctuator';
|
|
9
30
|
value: string;
|
package/lib/script-parser.js
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
// @ts-ignore
|
|
2
2
|
import { tokenize, parse } from 'espree';
|
|
3
|
+
/**
|
|
4
|
+
* Tokenizes a JavaScript code string into an array of typed tokens using espree.
|
|
5
|
+
* Each token contains its type (e.g., Identifier, Punctuator) and raw value.
|
|
6
|
+
*
|
|
7
|
+
* @param script - The JavaScript source code to tokenize
|
|
8
|
+
* @returns An array of tokens with their type and string value
|
|
9
|
+
*/
|
|
3
10
|
export function scriptParser(script) {
|
|
4
11
|
const tokens = tokenize(script, {
|
|
5
12
|
ecmaVersion: 'latest',
|
|
@@ -10,6 +17,16 @@ export function scriptParser(script) {
|
|
|
10
17
|
value: token.value,
|
|
11
18
|
}));
|
|
12
19
|
}
|
|
20
|
+
/**
|
|
21
|
+
* Attempts to extract the longest valid JavaScript prefix from a script string
|
|
22
|
+
* that may contain trailing non-JS content (e.g., HTML after an inline expression).
|
|
23
|
+
* Falls back to wrapping the script as an object literal or spread operator
|
|
24
|
+
* if the initial parse fails.
|
|
25
|
+
*
|
|
26
|
+
* @param script - The potentially mixed script/markup string to parse
|
|
27
|
+
* @param parse - A custom parse function to validate the script; defaults to espree with JSX support
|
|
28
|
+
* @returns An object containing the `validScript` prefix and the remaining `leftover` string
|
|
29
|
+
*/
|
|
13
30
|
export function safeScriptParser(script, parse = defaultParse) {
|
|
14
31
|
let { validScript, leftover } = safeParse(script, parse);
|
|
15
32
|
// Support for object literal
|
package/lib/sort-nodes.d.ts
CHANGED
|
@@ -1,2 +1,10 @@
|
|
|
1
1
|
import type { MLASTNodeTreeItem } from '@markuplint/ml-ast';
|
|
2
|
+
/**
|
|
3
|
+
* Comparator function for sorting AST nodes by their source position.
|
|
4
|
+
* Sorts primarily by offset, then by end offset for nodes at the same position.
|
|
5
|
+
*
|
|
6
|
+
* @param a - The first node to compare
|
|
7
|
+
* @param b - The second node to compare
|
|
8
|
+
* @returns A negative, zero, or positive number for sort ordering
|
|
9
|
+
*/
|
|
2
10
|
export declare function sortNodes(a: MLASTNodeTreeItem, b: MLASTNodeTreeItem): number;
|
package/lib/sort-nodes.js
CHANGED
|
@@ -1,8 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Comparator function for sorting AST nodes by their source position.
|
|
3
|
+
* Sorts primarily by offset, then by end offset for nodes at the same position.
|
|
4
|
+
*
|
|
5
|
+
* @param a - The first node to compare
|
|
6
|
+
* @param b - The second node to compare
|
|
7
|
+
* @returns A negative, zero, or positive number for sort ordering
|
|
8
|
+
*/
|
|
1
9
|
export function sortNodes(a, b) {
|
|
2
|
-
if (a.
|
|
3
|
-
return sort(a.
|
|
10
|
+
if (a.offset === b.offset) {
|
|
11
|
+
return sort(a.offset + a.raw.length, b.offset + b.raw.length);
|
|
4
12
|
}
|
|
5
|
-
return sort(a.
|
|
13
|
+
return sort(a.offset, b.offset);
|
|
6
14
|
}
|
|
7
15
|
function sort(a, b) {
|
|
8
16
|
const diff = a - b;
|
package/lib/types.d.ts
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
import type { EndTagType, MLASTParentNode, ParserOptions as ConfigParserOptions } from '@markuplint/ml-ast';
|
|
2
|
+
/**
|
|
3
|
+
* Configuration options for initializing a Parser instance,
|
|
4
|
+
* controlling how the parser handles tags, attributes, and whitespace.
|
|
5
|
+
*/
|
|
2
6
|
export type ParserOptions = {
|
|
3
7
|
readonly booleanish?: boolean;
|
|
4
8
|
readonly endTagType?: EndTagType;
|
|
@@ -9,28 +13,58 @@ export type ParserOptions = {
|
|
|
9
13
|
readonly spaceChars?: readonly string[];
|
|
10
14
|
readonly rawTextElements?: readonly string[];
|
|
11
15
|
};
|
|
16
|
+
/**
|
|
17
|
+
* Options passed to a single parse invocation, extending the base config parser options
|
|
18
|
+
* with offset positioning and depth control for embedded code fragments.
|
|
19
|
+
*/
|
|
12
20
|
export type ParseOptions = ConfigParserOptions & {
|
|
13
21
|
readonly offsetOffset?: number;
|
|
14
22
|
readonly offsetLine?: number;
|
|
15
23
|
readonly offsetColumn?: number;
|
|
16
24
|
readonly depth?: number;
|
|
17
25
|
};
|
|
26
|
+
/**
|
|
27
|
+
* The result of tokenizing raw source code, containing the AST nodes
|
|
28
|
+
* and metadata about whether the parsed content is a document fragment.
|
|
29
|
+
*
|
|
30
|
+
* @template N - The AST node type produced by the tokenizer
|
|
31
|
+
* @template State - The parser state type carried through tokenization
|
|
32
|
+
*/
|
|
18
33
|
export type Tokenized<N extends {} = {}, State extends unknown = null> = {
|
|
19
34
|
readonly ast: N[];
|
|
20
35
|
readonly isFragment: boolean;
|
|
21
36
|
readonly state?: State;
|
|
22
37
|
};
|
|
38
|
+
/**
|
|
39
|
+
* A minimal source token representing a raw string fragment
|
|
40
|
+
* along with its starting position in the source code.
|
|
41
|
+
*/
|
|
23
42
|
export type Token = {
|
|
24
43
|
readonly raw: string;
|
|
25
|
-
readonly
|
|
26
|
-
readonly
|
|
27
|
-
readonly
|
|
44
|
+
readonly offset: number;
|
|
45
|
+
readonly line: number;
|
|
46
|
+
readonly col: number;
|
|
28
47
|
};
|
|
48
|
+
/**
|
|
49
|
+
* A token that belongs to a parent node in the AST, extending the base Token
|
|
50
|
+
* with nesting depth and a reference to the enclosing parent node.
|
|
51
|
+
*/
|
|
29
52
|
export type ChildToken = Token & {
|
|
30
53
|
readonly depth: number;
|
|
31
54
|
readonly parentNode: MLASTParentNode | null;
|
|
32
55
|
};
|
|
56
|
+
/**
|
|
57
|
+
* Determines how self-closing tags (e.g., `<br />`) are interpreted.
|
|
58
|
+
*
|
|
59
|
+
* - `"html"`: Only void elements are treated as self-closing (HTML spec behavior)
|
|
60
|
+
* - `"xml"`: The self-closing solidus (`/`) determines self-closing behavior
|
|
61
|
+
* - `"html+xml"`: Either void elements or the self-closing solidus cause self-closing
|
|
62
|
+
*/
|
|
33
63
|
export type SelfCloseType = 'html' | 'xml' | 'html+xml';
|
|
64
|
+
/**
|
|
65
|
+
* Represents a tagged code block (e.g., template expressions or preprocessor directives)
|
|
66
|
+
* that was extracted from the source during the ignore-block phase.
|
|
67
|
+
*/
|
|
34
68
|
export type Code = {
|
|
35
69
|
readonly type: string;
|
|
36
70
|
readonly index: number;
|
|
@@ -39,22 +73,45 @@ export type Code = {
|
|
|
39
73
|
readonly endTag: string | null;
|
|
40
74
|
resolved: boolean;
|
|
41
75
|
};
|
|
76
|
+
/**
|
|
77
|
+
* Defines a pattern for identifying blocks of code that should be masked
|
|
78
|
+
* (replaced with placeholder characters) before parsing, such as template
|
|
79
|
+
* language expressions or preprocessor directives.
|
|
80
|
+
*/
|
|
42
81
|
export type IgnoreTag = {
|
|
43
82
|
readonly type: string;
|
|
44
83
|
readonly start: Readonly<RegExp> | string;
|
|
45
84
|
readonly end: Readonly<RegExp> | string;
|
|
46
85
|
};
|
|
86
|
+
/**
|
|
87
|
+
* The result of masking ignore-tagged code blocks in the source, preserving
|
|
88
|
+
* the original source alongside the replaced version and a stack of extracted codes.
|
|
89
|
+
*/
|
|
47
90
|
export type IgnoreBlock = {
|
|
48
91
|
readonly source: string;
|
|
49
92
|
readonly replaced: string;
|
|
50
93
|
readonly stack: readonly Code[];
|
|
51
94
|
readonly maskChar: string;
|
|
52
95
|
};
|
|
96
|
+
/**
|
|
97
|
+
* Defines a pair of quote delimiters and the value type they enclose,
|
|
98
|
+
* used when parsing attribute values with non-standard quoting
|
|
99
|
+
* (e.g., JSX expression braces or template literals).
|
|
100
|
+
*/
|
|
53
101
|
export type QuoteSet = {
|
|
54
102
|
readonly start: string;
|
|
55
103
|
readonly end: string;
|
|
56
104
|
readonly type: ValueType;
|
|
57
105
|
readonly parser?: CustomParser;
|
|
58
106
|
};
|
|
107
|
+
/**
|
|
108
|
+
* A function that attempts to parse a code string, throwing a SyntaxError
|
|
109
|
+
* if the code is invalid. Used by the safe script parser to determine
|
|
110
|
+
* the boundary of valid script content.
|
|
111
|
+
*/
|
|
59
112
|
export type CustomParser = (code: string) => void;
|
|
113
|
+
/**
|
|
114
|
+
* The semantic type of an attribute value, distinguishing between
|
|
115
|
+
* plain string values and script/expression values.
|
|
116
|
+
*/
|
|
60
117
|
export type ValueType = 'string' | 'script';
|
package/package.json
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/parser-utils",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "5.0.0-alpha.0",
|
|
4
4
|
"description": "Utility module for markuplint parser plugin",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
7
7
|
"license": "MIT",
|
|
8
|
+
"engines": {
|
|
9
|
+
"node": ">=22"
|
|
10
|
+
},
|
|
8
11
|
"type": "module",
|
|
9
12
|
"exports": {
|
|
10
13
|
".": {
|
|
@@ -28,17 +31,15 @@
|
|
|
28
31
|
"clean": "tsc --build --clean tsconfig.build.json"
|
|
29
32
|
},
|
|
30
33
|
"dependencies": {
|
|
31
|
-
"@markuplint/ml-ast": "
|
|
32
|
-
"@markuplint/ml-spec": "
|
|
33
|
-
"@markuplint/types": "
|
|
34
|
-
"@types/uuid": "10.0.0",
|
|
34
|
+
"@markuplint/ml-ast": "5.0.0-alpha.0",
|
|
35
|
+
"@markuplint/ml-spec": "5.0.0-alpha.0",
|
|
36
|
+
"@markuplint/types": "5.0.0-alpha.0",
|
|
35
37
|
"debug": "4.4.3",
|
|
36
|
-
"espree": "
|
|
37
|
-
"type-fest": "4.
|
|
38
|
-
"uuid": "13.0.0"
|
|
38
|
+
"espree": "11.1.0",
|
|
39
|
+
"type-fest": "5.4.4"
|
|
39
40
|
},
|
|
40
41
|
"devDependencies": {
|
|
41
|
-
"@typescript-eslint/typescript-estree": "8.
|
|
42
|
+
"@typescript-eslint/typescript-estree": "8.56.0"
|
|
42
43
|
},
|
|
43
|
-
"gitHead": "
|
|
44
|
+
"gitHead": "13dcfc84ec83d87360c720e253383b60767e1b56"
|
|
44
45
|
}
|