@markuplint/parser-utils 4.3.1-dev.1 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/attr-tokenizer.d.ts +2 -2
- package/lib/attr-tokenizer.js +12 -28
- package/lib/parser.d.ts +2 -2
- package/lib/parser.js +3 -3
- package/lib/script-parser.d.ts +4 -0
- package/lib/script-parser.js +58 -1
- package/lib/types.d.ts +2 -0
- package/package.json +6 -6
package/lib/attr-tokenizer.d.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import type { QuoteSet } from './types.js';
|
|
1
|
+
import type { QuoteSet, ValueType } from './types.js';
|
|
2
2
|
import { AttrState } from './enums.js';
|
|
3
3
|
/**
|
|
4
4
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
|
|
5
5
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
|
6
6
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
|
7
7
|
*/
|
|
8
|
-
export declare function attrTokenizer(raw: string, quoteSet?: readonly QuoteSet[], startState?: AttrState,
|
|
8
|
+
export declare function attrTokenizer(raw: string, quoteSet?: readonly QuoteSet[], startState?: AttrState, noQuoteValueType?: ValueType, endOfUnquotedValueChars?: ReadonlyArray<string>): {
|
|
9
9
|
spacesBeforeAttrName: string;
|
|
10
10
|
attrName: string;
|
|
11
11
|
spacesBeforeEqual: string;
|
package/lib/attr-tokenizer.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { defaultSpaces } from './const.js';
|
|
2
2
|
import { AttrState } from './enums.js';
|
|
3
|
+
import { safeScriptParser } from './script-parser.js';
|
|
3
4
|
const defaultQuoteSet = [
|
|
4
|
-
{ start: '"', end: '"' },
|
|
5
|
-
{ start: "'", end: "'" },
|
|
5
|
+
{ start: '"', end: '"', type: 'string' },
|
|
6
|
+
{ start: "'", end: "'", type: 'string' },
|
|
6
7
|
];
|
|
7
|
-
const defaultQuoteInValueChars = [];
|
|
8
8
|
const spaces = defaultSpaces;
|
|
9
9
|
const EQUAL = '=';
|
|
10
10
|
/**
|
|
@@ -12,7 +12,7 @@ const EQUAL = '=';
|
|
|
12
12
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
|
13
13
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
|
14
14
|
*/
|
|
15
|
-
export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = AttrState.BeforeName,
|
|
15
|
+
export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = AttrState.BeforeName, noQuoteValueType = 'string', endOfUnquotedValueChars = [...defaultSpaces, '/', '>']) {
|
|
16
16
|
let state = startState;
|
|
17
17
|
let spacesBeforeAttrName = '';
|
|
18
18
|
let attrName = '';
|
|
@@ -22,9 +22,9 @@ export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = Attr
|
|
|
22
22
|
let quoteTypeIndex = -1;
|
|
23
23
|
let quoteStart = '';
|
|
24
24
|
let attrValue = '';
|
|
25
|
+
let valueType = noQuoteValueType;
|
|
25
26
|
let quoteEnd = '';
|
|
26
27
|
const isBeforeValueStarted = startState === AttrState.BeforeValue;
|
|
27
|
-
const quoteModeStack = [];
|
|
28
28
|
const chars = [...raw];
|
|
29
29
|
while (chars.length > 0) {
|
|
30
30
|
if (state === AttrState.AfterValue) {
|
|
@@ -104,15 +104,7 @@ export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = Attr
|
|
|
104
104
|
const quote = quoteSet[quoteTypeIndex];
|
|
105
105
|
if (quote) {
|
|
106
106
|
quoteStart = quote.start;
|
|
107
|
-
|
|
108
|
-
break;
|
|
109
|
-
}
|
|
110
|
-
const raw = char + chars.join('');
|
|
111
|
-
const inQuote = quoteInValueChars.find(quote => raw.startsWith(quote.start));
|
|
112
|
-
if (inQuote) {
|
|
113
|
-
quoteModeStack.push(inQuote);
|
|
114
|
-
attrValue += inQuote.start;
|
|
115
|
-
chars.splice(0, inQuote.start.length - 1);
|
|
107
|
+
valueType = quote.type;
|
|
116
108
|
state = AttrState.Value;
|
|
117
109
|
break;
|
|
118
110
|
}
|
|
@@ -126,24 +118,16 @@ export function attrTokenizer(raw, quoteSet = defaultQuoteSet, startState = Attr
|
|
|
126
118
|
state = AttrState.AfterValue;
|
|
127
119
|
break;
|
|
128
120
|
}
|
|
129
|
-
if (
|
|
121
|
+
if (char === quoteSet[quoteTypeIndex]?.end) {
|
|
130
122
|
quoteEnd = char;
|
|
131
123
|
state = AttrState.AfterValue;
|
|
132
124
|
break;
|
|
133
125
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
chars.splice(0, inQuoteEnd.end.length - 1);
|
|
140
|
-
break;
|
|
141
|
-
}
|
|
142
|
-
const inQuoteStart = quoteInValueChars.find(quote => raw.startsWith(quote.start));
|
|
143
|
-
if (inQuoteStart) {
|
|
144
|
-
quoteModeStack.push(inQuoteStart);
|
|
145
|
-
attrValue += inQuoteStart.start;
|
|
146
|
-
chars.splice(0, inQuoteStart.start.length - 1);
|
|
126
|
+
if (valueType === 'script') {
|
|
127
|
+
const raw = char + chars.join('');
|
|
128
|
+
const { validScript } = safeScriptParser(raw);
|
|
129
|
+
attrValue += validScript;
|
|
130
|
+
chars.splice(0, validScript.length - 1);
|
|
147
131
|
break;
|
|
148
132
|
}
|
|
149
133
|
attrValue += char;
|
package/lib/parser.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Token, ChildToken, QuoteSet, ParseOptions, ParserOptions, Tokenized } from './types.js';
|
|
1
|
+
import type { Token, ChildToken, QuoteSet, ParseOptions, ParserOptions, Tokenized, ValueType } from './types.js';
|
|
2
2
|
import type { EndTagType, MLASTDocument, MLASTParentNode, MLParser, ParserAuthoredElementNameDistinguishing, MLASTElement, MLASTElementCloseTag, MLASTToken, MLASTNodeTreeItem, MLASTTag, MLASTText, MLASTAttr, MLASTChildNode, MLASTSpreadAttr, ElementType, Walker, MLASTHTMLAttr } from '@markuplint/ml-ast';
|
|
3
3
|
import { AttrState } from './enums.js';
|
|
4
4
|
import { ParserError } from './parser-error.js';
|
|
@@ -76,7 +76,7 @@ export declare abstract class Parser<Node extends {} = {}, State extends unknown
|
|
|
76
76
|
visitSpreadAttr(token: Token): MLASTSpreadAttr | null;
|
|
77
77
|
visitAttr(token: Token, options?: {
|
|
78
78
|
readonly quoteSet?: readonly QuoteSet[];
|
|
79
|
-
readonly
|
|
79
|
+
readonly noQuoteValueType?: ValueType;
|
|
80
80
|
readonly endOfUnquotedValueChars?: readonly string[];
|
|
81
81
|
readonly startState?: AttrState;
|
|
82
82
|
}): MLASTAttr & {
|
package/lib/parser.js
CHANGED
|
@@ -355,14 +355,14 @@ export class Parser {
|
|
|
355
355
|
const raw = token.raw;
|
|
356
356
|
const quoteSet = options?.quoteSet;
|
|
357
357
|
const startState = options?.startState ?? AttrState.BeforeName;
|
|
358
|
-
const
|
|
358
|
+
const noQuoteValueType = options?.noQuoteValueType;
|
|
359
359
|
const endOfUnquotedValueChars = options?.endOfUnquotedValueChars;
|
|
360
360
|
let startOffset = token.startOffset;
|
|
361
361
|
let startLine = token.startLine;
|
|
362
362
|
let startCol = token.startCol;
|
|
363
363
|
let tokens;
|
|
364
364
|
try {
|
|
365
|
-
tokens = attrTokenizer(raw, quoteSet, startState,
|
|
365
|
+
tokens = attrTokenizer(raw, quoteSet, startState, noQuoteValueType, endOfUnquotedValueChars);
|
|
366
366
|
}
|
|
367
367
|
catch (error) {
|
|
368
368
|
if (error instanceof SyntaxError) {
|
|
@@ -781,7 +781,7 @@ _Parser_booleanish = new WeakMap(), _Parser_defaultState = new WeakMap(), _Parse
|
|
|
781
781
|
for (let node of nodeList) {
|
|
782
782
|
if (node.type === 'endtag') {
|
|
783
783
|
const endTagUUID = node.uuid;
|
|
784
|
-
const openTag = newNodeList.findLast((n) => n.type === 'starttag' ? n.pairNode?.uuid === endTagUUID : false);
|
|
784
|
+
const openTag = newNodeList.findLast((n) => n.type === 'starttag' && !n.isGhost ? n.pairNode?.uuid === endTagUUID : false);
|
|
785
785
|
if (!openTag) {
|
|
786
786
|
node = __classPrivateFieldGet(this, _Parser_instances, "m", _Parser_convertIntoInvalidNode).call(this, node);
|
|
787
787
|
}
|
package/lib/script-parser.d.ts
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
export declare function scriptParser(script: string): ScriptTokenType[];
|
|
2
|
+
export declare function safeScriptParser(script: string): {
|
|
3
|
+
validScript: string;
|
|
4
|
+
leftover: string;
|
|
5
|
+
};
|
|
2
6
|
export type ScriptTokenType = {
|
|
3
7
|
type: 'Identifier' | 'Boolean' | 'Numeric' | 'String' | 'Template' | 'Punctuator';
|
|
4
8
|
value: string;
|
package/lib/script-parser.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// @ts-ignore
|
|
2
|
-
import { tokenize } from 'espree';
|
|
2
|
+
import { tokenize, parse } from 'espree';
|
|
3
3
|
export function scriptParser(script) {
|
|
4
4
|
const tokens = tokenize(script, {
|
|
5
5
|
ecmaVersion: 'latest',
|
|
@@ -10,3 +10,60 @@ export function scriptParser(script) {
|
|
|
10
10
|
value: token.value,
|
|
11
11
|
}));
|
|
12
12
|
}
|
|
13
|
+
export function safeScriptParser(script) {
|
|
14
|
+
let { validScript, leftover } = safeParse(script);
|
|
15
|
+
// Support for object literal
|
|
16
|
+
if (leftover.trim()) {
|
|
17
|
+
const assignment = '$=';
|
|
18
|
+
({ validScript } = safeParse(`${assignment}${script}`));
|
|
19
|
+
validScript = validScript.length > assignment.length ? validScript.slice(assignment.length) : '';
|
|
20
|
+
}
|
|
21
|
+
// Support for spread operator
|
|
22
|
+
if (validScript.trim() === '') {
|
|
23
|
+
const coverStart = '$={';
|
|
24
|
+
const coverEnd = '}';
|
|
25
|
+
({ validScript } = safeParse(`${coverStart}${script}${coverEnd}`));
|
|
26
|
+
const coverEndLastIndex = validScript.lastIndexOf(coverEnd);
|
|
27
|
+
validScript =
|
|
28
|
+
validScript.length > coverStart.length + coverEnd.length
|
|
29
|
+
? validScript.slice(coverStart.length, coverEndLastIndex)
|
|
30
|
+
: '';
|
|
31
|
+
}
|
|
32
|
+
leftover = script.slice(validScript.length);
|
|
33
|
+
return {
|
|
34
|
+
validScript,
|
|
35
|
+
leftover,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
function safeParse(script) {
|
|
39
|
+
let validScript;
|
|
40
|
+
let leftover;
|
|
41
|
+
try {
|
|
42
|
+
parse(script, {
|
|
43
|
+
ecmaVersion: 'latest',
|
|
44
|
+
ecmaFeatures: {
|
|
45
|
+
jsx: true,
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
validScript = script;
|
|
49
|
+
leftover = '';
|
|
50
|
+
}
|
|
51
|
+
catch (error) {
|
|
52
|
+
if (error instanceof SyntaxError && 'index' in error && typeof error.index === 'number') {
|
|
53
|
+
let index = error.index;
|
|
54
|
+
const unexpectedToken = script.slice(index);
|
|
55
|
+
if (unexpectedToken.trim() === '') {
|
|
56
|
+
index = script.search(/\S\s*$/);
|
|
57
|
+
}
|
|
58
|
+
validScript = script.slice(0, index);
|
|
59
|
+
leftover = script.slice(index);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return {
|
|
66
|
+
validScript,
|
|
67
|
+
leftover,
|
|
68
|
+
};
|
|
69
|
+
}
|
package/lib/types.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/parser-utils",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.5.0",
|
|
4
4
|
"description": "Utility module for markuplint parser plugin",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
@@ -27,14 +27,14 @@
|
|
|
27
27
|
"clean": "tsc --build --clean"
|
|
28
28
|
},
|
|
29
29
|
"dependencies": {
|
|
30
|
-
"@markuplint/ml-ast": "4.
|
|
31
|
-
"@markuplint/ml-spec": "4.
|
|
32
|
-
"@markuplint/types": "4.
|
|
30
|
+
"@markuplint/ml-ast": "4.2.0",
|
|
31
|
+
"@markuplint/ml-spec": "4.4.0",
|
|
32
|
+
"@markuplint/types": "4.3.0",
|
|
33
33
|
"@types/uuid": "9.0.8",
|
|
34
34
|
"debug": "4.3.4",
|
|
35
35
|
"espree": "10.0.1",
|
|
36
|
-
"type-fest": "4.
|
|
36
|
+
"type-fest": "4.15.0",
|
|
37
37
|
"uuid": "9.0.1"
|
|
38
38
|
},
|
|
39
|
-
"gitHead": "
|
|
39
|
+
"gitHead": "d5c8786b0dbbd82cdd89018dd57941d62bbe8d06"
|
|
40
40
|
}
|