@markuplint/parser-utils 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/attr-tokenizer.d.ts +2 -2
- package/lib/attr-tokenizer.js +14 -28
- package/lib/parser.d.ts +2 -2
- package/lib/parser.js +3 -3
- package/lib/script-parser.d.ts +5 -0
- package/lib/script-parser.js +61 -1
- package/lib/types.d.ts +4 -0
- package/package.json +8 -5
package/lib/attr-tokenizer.d.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import type { QuoteSet } from './types.js';
|
|
1
|
+
import type { QuoteSet, ValueType } from './types.js';
|
|
2
2
|
import { AttrState } from './enums.js';
|
|
3
3
|
/**
|
|
4
4
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
|
|
5
5
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
|
6
6
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
|
7
7
|
*/
|
|
8
|
-
export declare function attrTokenizer(raw: string, quoteSet?: readonly QuoteSet[], startState?: AttrState,
|
|
8
|
+
export declare function attrTokenizer(raw: string, quoteSet?: readonly QuoteSet[], startState?: AttrState, noQuoteValueType?: ValueType, endOfUnquotedValueChars?: ReadonlyArray<string>): {
|
|
9
9
|
spacesBeforeAttrName: string;
|
|
10
10
|
attrName: string;
|
|
11
11
|
spacesBeforeEqual: string;
|
package/lib/attr-tokenizer.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { defaultSpaces } from './const.js';
|
|
2
2
|
import { AttrState } from './enums.js';
|
|
3
|
+
import { safeScriptParser } from './script-parser.js';
|
|
3
4
|
const defaultQuoteSet = [
|
|
4
|
-
{ start: '"', end: '"' },
|
|
5
|
-
{ start: "'", end: "'" },
|
|
5
|
+
{ start: '"', end: '"', type: 'string' },
|
|
6
|
+
{ start: "'", end: "'", type: 'string' },
|
|
6
7
|
];
|
|
7
|
-
const defaultQuoteInValueChars = [];
|
|
8
8
|
const spaces = defaultSpaces;
|
|
9
9
|
const EQUAL = '=';
|
|
10
10
|
/**
|
|
@@ -12,7 +12,7 @@ const EQUAL = '=';
|
|
|
12
12
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
|
13
13
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
|
14
14
|
*/
|
|
15
|
-
export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = AttrState.BeforeName,
|
|
15
|
+
export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = AttrState.BeforeName, noQuoteValueType = 'string', endOfUnquotedValueChars = [...defaultSpaces, '/', '>']) {
|
|
16
16
|
let state = startState;
|
|
17
17
|
let spacesBeforeAttrName = '';
|
|
18
18
|
let attrName = '';
|
|
@@ -22,9 +22,10 @@ export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = Attr
|
|
|
22
22
|
let quoteTypeIndex = -1;
|
|
23
23
|
let quoteStart = '';
|
|
24
24
|
let attrValue = '';
|
|
25
|
+
let valueType = noQuoteValueType;
|
|
26
|
+
let parser;
|
|
25
27
|
let quoteEnd = '';
|
|
26
28
|
const isBeforeValueStarted = startState === AttrState.BeforeValue;
|
|
27
|
-
const quoteModeStack = [];
|
|
28
29
|
const chars = [...raw];
|
|
29
30
|
while (chars.length > 0) {
|
|
30
31
|
if (state === AttrState.AfterValue) {
|
|
@@ -104,15 +105,8 @@ export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = Attr
|
|
|
104
105
|
const quote = quoteSet[quoteTypeIndex];
|
|
105
106
|
if (quote) {
|
|
106
107
|
quoteStart = quote.start;
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
}
|
|
110
|
-
const raw = char + chars.join('');
|
|
111
|
-
const inQuote = quoteInValueChars.find(quote => raw.startsWith(quote.start));
|
|
112
|
-
if (inQuote) {
|
|
113
|
-
quoteModeStack.push(inQuote);
|
|
114
|
-
attrValue += inQuote.start;
|
|
115
|
-
chars.splice(0, inQuote.start.length - 1);
|
|
108
|
+
valueType = quote.type;
|
|
109
|
+
parser = quote.parser;
|
|
116
110
|
state = AttrState.Value;
|
|
117
111
|
break;
|
|
118
112
|
}
|
|
@@ -126,24 +120,16 @@ export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = Attr
|
|
|
126
120
|
state = AttrState.AfterValue;
|
|
127
121
|
break;
|
|
128
122
|
}
|
|
129
|
-
if (
|
|
123
|
+
if (char === quoteSet[quoteTypeIndex]?.end) {
|
|
130
124
|
quoteEnd = char;
|
|
131
125
|
state = AttrState.AfterValue;
|
|
132
126
|
break;
|
|
133
127
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
chars.splice(0, inQuoteEnd.end.length - 1);
|
|
140
|
-
break;
|
|
141
|
-
}
|
|
142
|
-
const inQuoteStart = quoteInValueChars.find(quote => raw.startsWith(quote.start));
|
|
143
|
-
if (inQuoteStart) {
|
|
144
|
-
quoteModeStack.push(inQuoteStart);
|
|
145
|
-
attrValue += inQuoteStart.start;
|
|
146
|
-
chars.splice(0, inQuoteStart.start.length - 1);
|
|
128
|
+
if (valueType === 'script') {
|
|
129
|
+
const raw = char + chars.join('');
|
|
130
|
+
const { validScript } = safeScriptParser(raw, parser);
|
|
131
|
+
attrValue += validScript;
|
|
132
|
+
chars.splice(0, validScript.length - 1);
|
|
147
133
|
break;
|
|
148
134
|
}
|
|
149
135
|
attrValue += char;
|
package/lib/parser.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Token, ChildToken, QuoteSet, ParseOptions, ParserOptions, Tokenized } from './types.js';
|
|
1
|
+
import type { Token, ChildToken, QuoteSet, ParseOptions, ParserOptions, Tokenized, ValueType } from './types.js';
|
|
2
2
|
import type { EndTagType, MLASTDocument, MLASTParentNode, MLParser, ParserAuthoredElementNameDistinguishing, MLASTElement, MLASTElementCloseTag, MLASTToken, MLASTNodeTreeItem, MLASTTag, MLASTText, MLASTAttr, MLASTChildNode, MLASTSpreadAttr, ElementType, Walker, MLASTHTMLAttr } from '@markuplint/ml-ast';
|
|
3
3
|
import { AttrState } from './enums.js';
|
|
4
4
|
import { ParserError } from './parser-error.js';
|
|
@@ -76,7 +76,7 @@ export declare abstract class Parser<Node extends {} = {}, State extends unknown
|
|
|
76
76
|
visitSpreadAttr(token: Token): MLASTSpreadAttr | null;
|
|
77
77
|
visitAttr(token: Token, options?: {
|
|
78
78
|
readonly quoteSet?: readonly QuoteSet[];
|
|
79
|
-
readonly
|
|
79
|
+
readonly noQuoteValueType?: ValueType;
|
|
80
80
|
readonly endOfUnquotedValueChars?: readonly string[];
|
|
81
81
|
readonly startState?: AttrState;
|
|
82
82
|
}): MLASTAttr & {
|
package/lib/parser.js
CHANGED
|
@@ -355,14 +355,14 @@ export class Parser {
|
|
|
355
355
|
const raw = token.raw;
|
|
356
356
|
const quoteSet = options?.quoteSet;
|
|
357
357
|
const startState = options?.startState ?? AttrState.BeforeName;
|
|
358
|
-
const
|
|
358
|
+
const noQuoteValueType = options?.noQuoteValueType;
|
|
359
359
|
const endOfUnquotedValueChars = options?.endOfUnquotedValueChars;
|
|
360
360
|
let startOffset = token.startOffset;
|
|
361
361
|
let startLine = token.startLine;
|
|
362
362
|
let startCol = token.startCol;
|
|
363
363
|
let tokens;
|
|
364
364
|
try {
|
|
365
|
-
tokens = attrTokenizer(raw, quoteSet, startState,
|
|
365
|
+
tokens = attrTokenizer(raw, quoteSet, startState, noQuoteValueType, endOfUnquotedValueChars);
|
|
366
366
|
}
|
|
367
367
|
catch (error) {
|
|
368
368
|
if (error instanceof SyntaxError) {
|
|
@@ -781,7 +781,7 @@ _Parser_booleanish = new WeakMap(), _Parser_defaultState = new WeakMap(), _Parse
|
|
|
781
781
|
for (let node of nodeList) {
|
|
782
782
|
if (node.type === 'endtag') {
|
|
783
783
|
const endTagUUID = node.uuid;
|
|
784
|
-
const openTag = newNodeList.findLast((n) => n.type === 'starttag' ? n.pairNode?.uuid === endTagUUID : false);
|
|
784
|
+
const openTag = newNodeList.findLast((n) => n.type === 'starttag' && !n.isGhost ? n.pairNode?.uuid === endTagUUID : false);
|
|
785
785
|
if (!openTag) {
|
|
786
786
|
node = __classPrivateFieldGet(this, _Parser_instances, "m", _Parser_convertIntoInvalidNode).call(this, node);
|
|
787
787
|
}
|
package/lib/script-parser.d.ts
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
|
+
import type { CustomParser } from './types.js';
|
|
1
2
|
export declare function scriptParser(script: string): ScriptTokenType[];
|
|
3
|
+
export declare function safeScriptParser(script: string, parse?: CustomParser): {
|
|
4
|
+
validScript: string;
|
|
5
|
+
leftover: string;
|
|
6
|
+
};
|
|
2
7
|
export type ScriptTokenType = {
|
|
3
8
|
type: 'Identifier' | 'Boolean' | 'Numeric' | 'String' | 'Template' | 'Punctuator';
|
|
4
9
|
value: string;
|
package/lib/script-parser.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// @ts-ignore
|
|
2
|
-
import { tokenize } from 'espree';
|
|
2
|
+
import { tokenize, parse } from 'espree';
|
|
3
3
|
export function scriptParser(script) {
|
|
4
4
|
const tokens = tokenize(script, {
|
|
5
5
|
ecmaVersion: 'latest',
|
|
@@ -10,3 +10,63 @@ export function scriptParser(script) {
|
|
|
10
10
|
value: token.value,
|
|
11
11
|
}));
|
|
12
12
|
}
|
|
13
|
+
export function safeScriptParser(script, parse = defaultParse) {
|
|
14
|
+
let { validScript, leftover } = safeParse(script, parse);
|
|
15
|
+
// Support for object literal
|
|
16
|
+
if (leftover.trim()) {
|
|
17
|
+
const assignment = '$=';
|
|
18
|
+
({ validScript } = safeParse(`${assignment}${script}`, parse));
|
|
19
|
+
validScript = validScript.length > assignment.length ? validScript.slice(assignment.length) : '';
|
|
20
|
+
}
|
|
21
|
+
// Support for spread operator
|
|
22
|
+
if (validScript.trim() === '') {
|
|
23
|
+
const coverStart = '$={';
|
|
24
|
+
const coverEnd = '}';
|
|
25
|
+
({ validScript } = safeParse(`${coverStart}${script}${coverEnd}`, parse));
|
|
26
|
+
const coverEndLastIndex = validScript.lastIndexOf(coverEnd);
|
|
27
|
+
validScript =
|
|
28
|
+
validScript.length > coverStart.length + coverEnd.length
|
|
29
|
+
? validScript.slice(coverStart.length, coverEndLastIndex)
|
|
30
|
+
: '';
|
|
31
|
+
}
|
|
32
|
+
leftover = script.slice(validScript.length);
|
|
33
|
+
return {
|
|
34
|
+
validScript,
|
|
35
|
+
leftover,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
function safeParse(script, parse) {
|
|
39
|
+
let validScript;
|
|
40
|
+
let leftover;
|
|
41
|
+
try {
|
|
42
|
+
parse(script);
|
|
43
|
+
validScript = script;
|
|
44
|
+
leftover = '';
|
|
45
|
+
}
|
|
46
|
+
catch (error) {
|
|
47
|
+
if (error instanceof SyntaxError && 'index' in error && typeof error.index === 'number') {
|
|
48
|
+
let index = error.index;
|
|
49
|
+
const unexpectedToken = script.slice(index);
|
|
50
|
+
if (unexpectedToken.trim() === '') {
|
|
51
|
+
index = script.search(/\S\s*$/);
|
|
52
|
+
}
|
|
53
|
+
validScript = script.slice(0, index);
|
|
54
|
+
leftover = script.slice(index);
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
throw error;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return {
|
|
61
|
+
validScript,
|
|
62
|
+
leftover,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
function defaultParse(script) {
|
|
66
|
+
parse(script, {
|
|
67
|
+
ecmaVersion: 'latest',
|
|
68
|
+
ecmaFeatures: {
|
|
69
|
+
jsx: true,
|
|
70
|
+
},
|
|
71
|
+
});
|
|
72
|
+
}
|
package/lib/types.d.ts
CHANGED
|
@@ -53,4 +53,8 @@ export type IgnoreBlock = {
|
|
|
53
53
|
export type QuoteSet = {
|
|
54
54
|
readonly start: string;
|
|
55
55
|
readonly end: string;
|
|
56
|
+
readonly type: ValueType;
|
|
57
|
+
readonly parser?: CustomParser;
|
|
56
58
|
};
|
|
59
|
+
export type CustomParser = (code: string) => void;
|
|
60
|
+
export type ValueType = 'string' | 'script';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/parser-utils",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.5.1",
|
|
4
4
|
"description": "Utility module for markuplint parser plugin",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
@@ -28,13 +28,16 @@
|
|
|
28
28
|
},
|
|
29
29
|
"dependencies": {
|
|
30
30
|
"@markuplint/ml-ast": "4.2.0",
|
|
31
|
-
"@markuplint/ml-spec": "4.
|
|
32
|
-
"@markuplint/types": "4.
|
|
31
|
+
"@markuplint/ml-spec": "4.4.1",
|
|
32
|
+
"@markuplint/types": "4.3.0",
|
|
33
33
|
"@types/uuid": "9.0.8",
|
|
34
34
|
"debug": "4.3.4",
|
|
35
35
|
"espree": "10.0.1",
|
|
36
|
-
"type-fest": "4.
|
|
36
|
+
"type-fest": "4.15.0",
|
|
37
37
|
"uuid": "9.0.1"
|
|
38
38
|
},
|
|
39
|
-
"
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@typescript-eslint/typescript-estree": "7.7.0"
|
|
41
|
+
},
|
|
42
|
+
"gitHead": "b029c86a6b3a9ea8189d2e5535e3023aaea753fd"
|
|
40
43
|
}
|