@markuplint/parser-utils 4.0.0-alpha.4 → 4.0.0-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/lib/attr-parser.d.ts +25 -0
- package/lib/attr-parser.js +188 -0
- package/lib/attr-tokenizer.d.ts +6 -0
- package/lib/attr-tokenizer.js +75 -0
- package/lib/const.d.ts +7 -1
- package/lib/const.js +8 -3
- package/lib/flatten-nodes.js +1 -1
- package/lib/get-location.js +2 -2
- package/lib/ignore-block.js +12 -0
- package/lib/ignore-front-matter.js +1 -1
- package/lib/index.d.ts +4 -0
- package/lib/index.js +4 -0
- package/lib/script-parser.d.ts +6 -0
- package/lib/script-parser.js +22 -0
- package/lib/tag-parser.d.ts +10 -0
- package/lib/tag-parser.js +152 -0
- package/lib/tag-splitter.d.ts +1 -1
- package/lib/tag-splitter.js +1 -1
- package/lib/types.d.ts +6 -2
- package/package.json +7 -6
package/LICENSE
CHANGED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { QuoteSet } from './types.js';
|
|
2
|
+
export declare enum AttrState {
|
|
3
|
+
BeforeName = 0,
|
|
4
|
+
Name = 1,
|
|
5
|
+
Equal = 2,
|
|
6
|
+
BeforeValue = 3,
|
|
7
|
+
Value = 4,
|
|
8
|
+
AfterValue = 5
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* @see https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
|
|
12
|
+
* @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
|
13
|
+
* @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
|
14
|
+
*/
|
|
15
|
+
export declare function attrParser(raw: string, quoteSet?: readonly QuoteSet[], startState?: AttrState, quoteInValueChars?: ReadonlyArray<QuoteSet>, spaces?: ReadonlyArray<string>): {
|
|
16
|
+
spacesBeforeAttrName: string;
|
|
17
|
+
attrName: string;
|
|
18
|
+
spacesBeforeEqual: string;
|
|
19
|
+
equal: string;
|
|
20
|
+
spacesAfterEqual: string;
|
|
21
|
+
quoteStart: string;
|
|
22
|
+
attrValue: string;
|
|
23
|
+
quoteEnd: string;
|
|
24
|
+
leftover: string;
|
|
25
|
+
};
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { defaultSpaces } from './const.js';
|
|
2
|
+
const defaultQuoteSet = [
|
|
3
|
+
{ start: '"', end: '"' },
|
|
4
|
+
{ start: "'", end: "'" },
|
|
5
|
+
];
|
|
6
|
+
const defaultQuoteInValueChars = [];
|
|
7
|
+
const EQUAL = '=';
|
|
8
|
+
export var AttrState;
|
|
9
|
+
(function (AttrState) {
|
|
10
|
+
AttrState[AttrState["BeforeName"] = 0] = "BeforeName";
|
|
11
|
+
AttrState[AttrState["Name"] = 1] = "Name";
|
|
12
|
+
AttrState[AttrState["Equal"] = 2] = "Equal";
|
|
13
|
+
AttrState[AttrState["BeforeValue"] = 3] = "BeforeValue";
|
|
14
|
+
AttrState[AttrState["Value"] = 4] = "Value";
|
|
15
|
+
AttrState[AttrState["AfterValue"] = 5] = "AfterValue";
|
|
16
|
+
})(AttrState || (AttrState = {}));
|
|
17
|
+
/**
|
|
18
|
+
* @see https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
|
|
19
|
+
* @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
|
20
|
+
* @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
|
21
|
+
*/
|
|
22
|
+
export function attrParser(raw, quoteSet = defaultQuoteSet, startState = AttrState.BeforeName, quoteInValueChars = defaultQuoteInValueChars, spaces = defaultSpaces) {
|
|
23
|
+
let state = startState;
|
|
24
|
+
let spacesBeforeAttrName = '';
|
|
25
|
+
let attrName = '';
|
|
26
|
+
let spacesBeforeEqual = '';
|
|
27
|
+
let equal = '';
|
|
28
|
+
let spacesAfterEqual = '';
|
|
29
|
+
let quoteTypeIndex = -1;
|
|
30
|
+
let quoteStart = '';
|
|
31
|
+
let attrValue = '';
|
|
32
|
+
let quoteEnd = '';
|
|
33
|
+
const quoteModeStack = [];
|
|
34
|
+
const chars = [...raw];
|
|
35
|
+
while (chars.length > 0) {
|
|
36
|
+
if (state === AttrState.AfterValue) {
|
|
37
|
+
break;
|
|
38
|
+
}
|
|
39
|
+
const char = chars.shift();
|
|
40
|
+
switch (state) {
|
|
41
|
+
case AttrState.BeforeName: {
|
|
42
|
+
if (char === '>') {
|
|
43
|
+
chars.unshift(char);
|
|
44
|
+
state = AttrState.AfterValue;
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
47
|
+
if (char === '/') {
|
|
48
|
+
chars.unshift(char);
|
|
49
|
+
state = AttrState.AfterValue;
|
|
50
|
+
break;
|
|
51
|
+
}
|
|
52
|
+
if (spaces.includes(char)) {
|
|
53
|
+
spacesBeforeAttrName += char;
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
attrName += char;
|
|
57
|
+
state = AttrState.Name;
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
case AttrState.Name: {
|
|
61
|
+
if (char === '>') {
|
|
62
|
+
chars.unshift(char);
|
|
63
|
+
state = AttrState.AfterValue;
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
if (char === '/') {
|
|
67
|
+
chars.unshift(char);
|
|
68
|
+
state = AttrState.AfterValue;
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
if (spaces.includes(char)) {
|
|
72
|
+
spacesBeforeEqual += char;
|
|
73
|
+
state = AttrState.Equal;
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
if (char === EQUAL) {
|
|
77
|
+
equal += char;
|
|
78
|
+
state = AttrState.BeforeValue;
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
attrName += char;
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
case AttrState.Equal: {
|
|
85
|
+
if (spaces.includes(char)) {
|
|
86
|
+
spacesBeforeEqual += char;
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
if (char === EQUAL) {
|
|
90
|
+
equal += char;
|
|
91
|
+
state = AttrState.BeforeValue;
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
// End of attribute
|
|
95
|
+
chars.unshift(spacesBeforeEqual, char);
|
|
96
|
+
spacesBeforeEqual = '';
|
|
97
|
+
state = AttrState.AfterValue;
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
case AttrState.BeforeValue: {
|
|
101
|
+
if (spaces.includes(char)) {
|
|
102
|
+
spacesAfterEqual += char;
|
|
103
|
+
break;
|
|
104
|
+
}
|
|
105
|
+
quoteTypeIndex = quoteSet.findIndex(quote => quote.start === char);
|
|
106
|
+
const quote = quoteSet[quoteTypeIndex];
|
|
107
|
+
if (quote) {
|
|
108
|
+
quoteStart = quote.start;
|
|
109
|
+
state = AttrState.Value;
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
const raw = char + chars.join('');
|
|
113
|
+
const inQuote = quoteInValueChars.find(quote => raw.startsWith(quote.start));
|
|
114
|
+
if (inQuote) {
|
|
115
|
+
quoteModeStack.push(inQuote);
|
|
116
|
+
attrValue += inQuote.start;
|
|
117
|
+
chars.splice(0, inQuote.start.length - 1);
|
|
118
|
+
state = AttrState.Value;
|
|
119
|
+
break;
|
|
120
|
+
}
|
|
121
|
+
chars.unshift(char);
|
|
122
|
+
state = AttrState.Value;
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
case AttrState.Value: {
|
|
126
|
+
// console.log(
|
|
127
|
+
// char,
|
|
128
|
+
// quoteSet[quoteTypeIndex]?.end,
|
|
129
|
+
// quoteModeStack.map(q => `${q.start}${q.end}`),
|
|
130
|
+
// );
|
|
131
|
+
if (!quoteSet[quoteTypeIndex]) {
|
|
132
|
+
if (spaces.includes(char)) {
|
|
133
|
+
chars.unshift(char);
|
|
134
|
+
state = AttrState.AfterValue;
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
if (char === '/') {
|
|
138
|
+
chars.unshift(char);
|
|
139
|
+
state = AttrState.AfterValue;
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
if (char === '>') {
|
|
143
|
+
chars.unshift(char);
|
|
144
|
+
state = AttrState.AfterValue;
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
if (quoteModeStack.length === 0 && char === quoteSet[quoteTypeIndex]?.end) {
|
|
149
|
+
quoteEnd = char;
|
|
150
|
+
state = AttrState.AfterValue;
|
|
151
|
+
break;
|
|
152
|
+
}
|
|
153
|
+
const raw = char + chars.join('');
|
|
154
|
+
const inQuoteEnd = quoteModeStack.at(-1);
|
|
155
|
+
if (inQuoteEnd && raw.startsWith(inQuoteEnd.end)) {
|
|
156
|
+
quoteModeStack.pop();
|
|
157
|
+
attrValue += inQuoteEnd.end;
|
|
158
|
+
chars.splice(0, inQuoteEnd.end.length - 1);
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
const inQuoteStart = quoteInValueChars.find(quote => raw.startsWith(quote.start));
|
|
162
|
+
if (inQuoteStart) {
|
|
163
|
+
quoteModeStack.push(inQuoteStart);
|
|
164
|
+
attrValue += inQuoteStart.start;
|
|
165
|
+
chars.splice(0, inQuoteStart.start.length - 1);
|
|
166
|
+
break;
|
|
167
|
+
}
|
|
168
|
+
attrValue += char;
|
|
169
|
+
break;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
if (state === AttrState.Value && quoteTypeIndex !== -1) {
|
|
174
|
+
throw new SyntaxError(`Unclosed attribute value: ${raw}`);
|
|
175
|
+
}
|
|
176
|
+
const leftover = chars.join('');
|
|
177
|
+
return {
|
|
178
|
+
spacesBeforeAttrName,
|
|
179
|
+
attrName,
|
|
180
|
+
spacesBeforeEqual,
|
|
181
|
+
equal,
|
|
182
|
+
spacesAfterEqual,
|
|
183
|
+
quoteStart,
|
|
184
|
+
attrValue,
|
|
185
|
+
quoteEnd,
|
|
186
|
+
leftover,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { QuoteSet } from './types.js';
|
|
2
|
+
import type { MLASTHTMLAttr } from '@markuplint/ml-ast';
|
|
3
|
+
import { AttrState } from './attr-parser.js';
|
|
4
|
+
export declare function attrTokenizer(raw: string, line: number, col: number, startOffset: number, quoteSet?: ReadonlyArray<QuoteSet>, startState?: AttrState, quoteInValueChars?: ReadonlyArray<QuoteSet>, spaces?: ReadonlyArray<string>): MLASTHTMLAttr & {
|
|
5
|
+
__leftover?: string;
|
|
6
|
+
};
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { AttrState, attrParser } from './attr-parser.js';
|
|
2
|
+
import { tokenizer, uuid } from './create-token.js';
|
|
3
|
+
export function attrTokenizer(raw, line, col, startOffset, quoteSet, startState = AttrState.BeforeName, quoteInValueChars, spaces) {
|
|
4
|
+
const parsed = attrParser(raw, quoteSet, startState, quoteInValueChars, spaces);
|
|
5
|
+
let offset = startOffset;
|
|
6
|
+
const spacesBeforeName = tokenizer(parsed.spacesBeforeAttrName, line, col, offset);
|
|
7
|
+
line = spacesBeforeName.endLine;
|
|
8
|
+
col = spacesBeforeName.endCol;
|
|
9
|
+
offset = spacesBeforeName.endOffset;
|
|
10
|
+
const name = tokenizer(parsed.attrName, line, col, offset);
|
|
11
|
+
line = name.endLine;
|
|
12
|
+
col = name.endCol;
|
|
13
|
+
offset = name.endOffset;
|
|
14
|
+
const spacesBeforeEqual = tokenizer(parsed.spacesBeforeEqual, line, col, offset);
|
|
15
|
+
line = spacesBeforeEqual.endLine;
|
|
16
|
+
col = spacesBeforeEqual.endCol;
|
|
17
|
+
offset = spacesBeforeEqual.endOffset;
|
|
18
|
+
const equal = tokenizer(parsed.equal, line, col, offset);
|
|
19
|
+
line = equal.endLine;
|
|
20
|
+
col = equal.endCol;
|
|
21
|
+
offset = equal.endOffset;
|
|
22
|
+
const spacesAfterEqual = tokenizer(parsed.spacesAfterEqual, line, col, offset);
|
|
23
|
+
line = spacesAfterEqual.endLine;
|
|
24
|
+
col = spacesAfterEqual.endCol;
|
|
25
|
+
offset = spacesAfterEqual.endOffset;
|
|
26
|
+
const startQuote = tokenizer(parsed.quoteStart, line, col, offset);
|
|
27
|
+
line = startQuote.endLine;
|
|
28
|
+
col = startQuote.endCol;
|
|
29
|
+
offset = startQuote.endOffset;
|
|
30
|
+
const value = tokenizer(parsed.attrValue, line, col, offset);
|
|
31
|
+
line = value.endLine;
|
|
32
|
+
col = value.endCol;
|
|
33
|
+
offset = value.endOffset;
|
|
34
|
+
const endQuote = tokenizer(parsed.quoteEnd, line, col, offset);
|
|
35
|
+
const attrToken = tokenizer(parsed.attrName +
|
|
36
|
+
parsed.spacesBeforeEqual +
|
|
37
|
+
parsed.equal +
|
|
38
|
+
parsed.spacesAfterEqual +
|
|
39
|
+
parsed.quoteStart +
|
|
40
|
+
parsed.attrValue +
|
|
41
|
+
parsed.quoteEnd, name.startLine, name.startCol, name.startOffset);
|
|
42
|
+
const result = {
|
|
43
|
+
type: 'html-attr',
|
|
44
|
+
uuid: uuid(),
|
|
45
|
+
raw: attrToken.raw,
|
|
46
|
+
startOffset: attrToken.startOffset,
|
|
47
|
+
endOffset: attrToken.endOffset,
|
|
48
|
+
startLine: attrToken.startLine,
|
|
49
|
+
endLine: attrToken.endLine,
|
|
50
|
+
startCol: attrToken.startCol,
|
|
51
|
+
endCol: attrToken.endCol,
|
|
52
|
+
spacesBeforeName,
|
|
53
|
+
name,
|
|
54
|
+
spacesBeforeEqual,
|
|
55
|
+
equal,
|
|
56
|
+
spacesAfterEqual,
|
|
57
|
+
startQuote,
|
|
58
|
+
value,
|
|
59
|
+
endQuote,
|
|
60
|
+
isDuplicatable: false,
|
|
61
|
+
nodeName: name.raw,
|
|
62
|
+
parentNode: null,
|
|
63
|
+
prevNode: null,
|
|
64
|
+
nextNode: null,
|
|
65
|
+
isFragment: false,
|
|
66
|
+
isGhost: false,
|
|
67
|
+
};
|
|
68
|
+
if (parsed.leftover) {
|
|
69
|
+
return {
|
|
70
|
+
...result,
|
|
71
|
+
__leftover: parsed.leftover,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
return result;
|
|
75
|
+
}
|
package/lib/const.d.ts
CHANGED
|
@@ -5,6 +5,12 @@ export declare const MASK_CHAR = "\uE000";
|
|
|
5
5
|
* @see https://developer.mozilla.org/en-US/docs/Web/SVG/Element
|
|
6
6
|
*/
|
|
7
7
|
export declare const svgElementList: string[];
|
|
8
|
-
export declare const reTag: RegExp;
|
|
9
8
|
export declare const reTagName: RegExp;
|
|
10
9
|
export declare const reSplitterTag: RegExp;
|
|
10
|
+
/**
|
|
11
|
+
* - U+0009 CHARACTER TABULATION (tab) => `\t`
|
|
12
|
+
* - U+000A LINE FEED (LF) => `\n`
|
|
13
|
+
* - U+000C FORM FEED (FF) => `\f`
|
|
14
|
+
* - U+0020 SPACE => ` `
|
|
15
|
+
*/
|
|
16
|
+
export declare const defaultSpaces: readonly ["\t", "\n", "\f", " "];
|
package/lib/const.js
CHANGED
|
@@ -94,7 +94,12 @@ export const svgElementList = [
|
|
|
94
94
|
'tref',
|
|
95
95
|
'vkern',
|
|
96
96
|
];
|
|
97
|
-
export const
|
|
98
|
-
// eslint-disable-next-line no-control-regex
|
|
99
|
-
export const reTagName = /^[a-z][^\u0000\u0009\u000A\u000C />]*/i;
|
|
97
|
+
export const reTagName = /^[a-z][^\0\t\n\f />]*/i;
|
|
100
98
|
export const reSplitterTag = /<[^>]+>/g;
|
|
99
|
+
/**
|
|
100
|
+
* - U+0009 CHARACTER TABULATION (tab) => `\t`
|
|
101
|
+
* - U+000A LINE FEED (LF) => `\n`
|
|
102
|
+
* - U+000C FORM FEED (FF) => `\f`
|
|
103
|
+
* - U+0020 SPACE => ` `
|
|
104
|
+
*/
|
|
105
|
+
export const defaultSpaces = ['\t', '\n', '\f', ' '];
|
package/lib/flatten-nodes.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { uuid } from './create-token.js';
|
|
2
2
|
import { getEndCol, getEndLine } from './get-location.js';
|
|
3
3
|
import { removeDeprecatedNode } from './remove-deprecated-node.js';
|
|
4
|
-
import tagSplitter from './tag-splitter.js';
|
|
4
|
+
import { tagSplitter } from './tag-splitter.js';
|
|
5
5
|
import { walk } from './walker.js';
|
|
6
6
|
export function flattenNodes(
|
|
7
7
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
|
package/lib/get-location.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
export function getLine(html, startOffset) {
|
|
2
|
-
return html.slice(0, startOffset).split(/\n/
|
|
2
|
+
return html.slice(0, startOffset).split(/\n/).length;
|
|
3
3
|
}
|
|
4
4
|
export function getCol(html, startOffset) {
|
|
5
|
-
const lines = html.slice(0, startOffset).split(/\n/
|
|
5
|
+
const lines = html.slice(0, startOffset).split(/\n/);
|
|
6
6
|
return (lines.at(-1) ?? '').length + 1;
|
|
7
7
|
}
|
|
8
8
|
export function getEndLine(html, line) {
|
package/lib/ignore-block.js
CHANGED
|
@@ -192,14 +192,26 @@ function snap(str, reg) {
|
|
|
192
192
|
return [index, above, snapPoint, below];
|
|
193
193
|
}
|
|
194
194
|
function removeGlobalOption(reg) {
|
|
195
|
+
if (typeof reg === 'string') {
|
|
196
|
+
return new RegExp(escapeRegExpForStr(reg));
|
|
197
|
+
}
|
|
195
198
|
return new RegExp(reg.source, reg.ignoreCase ? 'i' : '');
|
|
196
199
|
}
|
|
197
200
|
function prepend(reg, str) {
|
|
201
|
+
if (typeof reg === 'string') {
|
|
202
|
+
return new RegExp(str + escapeRegExpForStr(reg));
|
|
203
|
+
}
|
|
198
204
|
return new RegExp(str + reg.source, reg.ignoreCase ? 'i' : '');
|
|
199
205
|
}
|
|
200
206
|
function append(reg, str) {
|
|
207
|
+
if (typeof reg === 'string') {
|
|
208
|
+
return new RegExp(escapeRegExpForStr(reg) + str);
|
|
209
|
+
}
|
|
201
210
|
return new RegExp(reg.source + str, reg.ignoreCase ? 'i' : '');
|
|
202
211
|
}
|
|
203
212
|
function hasIgnoreBlock(textContent, maskChar) {
|
|
204
213
|
return textContent.includes(maskChar);
|
|
205
214
|
}
|
|
215
|
+
function escapeRegExpForStr(str) {
|
|
216
|
+
return str.replaceAll(/[!$()*+./:=?[\\\]^{|}]/g, '\\$&');
|
|
217
|
+
}
|
package/lib/index.d.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
export * from './attr-parser.js';
|
|
2
|
+
export * from './attr-tokenizer.js';
|
|
1
3
|
export * from './const.js';
|
|
2
4
|
export * from './create-token.js';
|
|
3
5
|
export * from './debugger.js';
|
|
@@ -12,5 +14,7 @@ export * from './ignore-front-matter.js';
|
|
|
12
14
|
export * from './parse-attr.js';
|
|
13
15
|
export * from './parser-error.js';
|
|
14
16
|
export * from './remove-deprecated-node.js';
|
|
17
|
+
export * from './script-parser.js';
|
|
18
|
+
export * from './tag-parser.js';
|
|
15
19
|
export * from './tag-splitter.js';
|
|
16
20
|
export * from './walker.js';
|
package/lib/index.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
export * from './attr-parser.js';
|
|
2
|
+
export * from './attr-tokenizer.js';
|
|
1
3
|
export * from './const.js';
|
|
2
4
|
export * from './create-token.js';
|
|
3
5
|
export * from './debugger.js';
|
|
@@ -12,5 +14,7 @@ export * from './ignore-front-matter.js';
|
|
|
12
14
|
export * from './parse-attr.js';
|
|
13
15
|
export * from './parser-error.js';
|
|
14
16
|
export * from './remove-deprecated-node.js';
|
|
17
|
+
export * from './script-parser.js';
|
|
18
|
+
export * from './tag-parser.js';
|
|
15
19
|
export * from './tag-splitter.js';
|
|
16
20
|
export * from './walker.js';
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// @ts-ignore
|
|
2
|
+
import { tokenize } from 'espree';
|
|
3
|
+
export function scriptParser(script) {
|
|
4
|
+
const tokens = tokenize(script, {
|
|
5
|
+
ecmaVersion: 'latest',
|
|
6
|
+
loc: false,
|
|
7
|
+
});
|
|
8
|
+
return tokens.map((token) => ({
|
|
9
|
+
type: token.type,
|
|
10
|
+
value: token.value,
|
|
11
|
+
}));
|
|
12
|
+
}
|
|
13
|
+
export function removeQuote(str) {
|
|
14
|
+
const quote = str[0];
|
|
15
|
+
if (quote !== '"' && quote !== "'") {
|
|
16
|
+
return str;
|
|
17
|
+
}
|
|
18
|
+
if (str.at(-1) !== quote) {
|
|
19
|
+
return str;
|
|
20
|
+
}
|
|
21
|
+
return str.slice(1, -1);
|
|
22
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { MLASTAttr } from '@markuplint/ml-ast';
|
|
2
|
+
export declare function tagParser(raw: string, startLine: number, startCol: number, startOffset: number, offsetOffset?: number, offsetLine?: number, offsetColumn?: number, spaces?: ReadonlyArray<string>): {
|
|
3
|
+
beforeOpenTag: string;
|
|
4
|
+
tagName: string;
|
|
5
|
+
attrs: MLASTAttr[];
|
|
6
|
+
afterAttrSpaces: import("@markuplint/ml-ast").MLToken;
|
|
7
|
+
selfClosingSolidus: import("@markuplint/ml-ast").MLToken;
|
|
8
|
+
isOpenTag: boolean;
|
|
9
|
+
leftover: string;
|
|
10
|
+
};
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { attrTokenizer } from './attr-tokenizer.js';
|
|
2
|
+
import { defaultSpaces } from './const.js';
|
|
3
|
+
import { tokenizer } from './create-token.js';
|
|
4
|
+
var TagState;
|
|
5
|
+
(function (TagState) {
|
|
6
|
+
TagState[TagState["BeforeOpenTag"] = 0] = "BeforeOpenTag";
|
|
7
|
+
TagState[TagState["FirstCharOfTagName"] = 1] = "FirstCharOfTagName";
|
|
8
|
+
TagState[TagState["TagName"] = 2] = "TagName";
|
|
9
|
+
TagState[TagState["Attrs"] = 3] = "Attrs";
|
|
10
|
+
TagState[TagState["AfterAttrs"] = 4] = "AfterAttrs";
|
|
11
|
+
TagState[TagState["AfterOpenTag"] = 5] = "AfterOpenTag";
|
|
12
|
+
})(TagState || (TagState = {}));
|
|
13
|
+
export function tagParser(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0, spaces = defaultSpaces) {
|
|
14
|
+
let offset = startOffset + offsetOffset;
|
|
15
|
+
let line = startLine + offsetLine;
|
|
16
|
+
let col = startCol + (startLine === 1 ? offsetColumn : 0);
|
|
17
|
+
let state = TagState.BeforeOpenTag;
|
|
18
|
+
let beforeOpenTagChars = '';
|
|
19
|
+
let tagName = '';
|
|
20
|
+
let afterAttrsSpaceChars = '';
|
|
21
|
+
let selfClosingSolidusChar = '';
|
|
22
|
+
let isOpenTag = true;
|
|
23
|
+
const attrs = [];
|
|
24
|
+
const chars = [...raw];
|
|
25
|
+
while (chars.length > 0) {
|
|
26
|
+
if (state === TagState.AfterOpenTag) {
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
const char = chars.shift();
|
|
30
|
+
stateSwitch: switch (state) {
|
|
31
|
+
case TagState.BeforeOpenTag: {
|
|
32
|
+
if (char === '<') {
|
|
33
|
+
const beforeOpenTag = tokenizer(beforeOpenTagChars, line, col, offset);
|
|
34
|
+
line = beforeOpenTag.endLine;
|
|
35
|
+
col = beforeOpenTag.endCol;
|
|
36
|
+
offset = beforeOpenTag.endOffset;
|
|
37
|
+
// Add `<` length
|
|
38
|
+
col += 1;
|
|
39
|
+
offset += 1;
|
|
40
|
+
state = TagState.FirstCharOfTagName;
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
beforeOpenTagChars += char;
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
case TagState.FirstCharOfTagName: {
|
|
47
|
+
if (/[a-z]/i.test(char)) {
|
|
48
|
+
tagName += char;
|
|
49
|
+
state = TagState.TagName;
|
|
50
|
+
break;
|
|
51
|
+
}
|
|
52
|
+
if (char === '/') {
|
|
53
|
+
isOpenTag = false;
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
chars.unshift(char);
|
|
57
|
+
state = TagState.AfterOpenTag;
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
case TagState.TagName: {
|
|
61
|
+
if (spaces.includes(char)) {
|
|
62
|
+
chars.unshift(char);
|
|
63
|
+
if (!isOpenTag) {
|
|
64
|
+
// Add `/` of `</`(close tag) length
|
|
65
|
+
offset += 1;
|
|
66
|
+
col += 1;
|
|
67
|
+
}
|
|
68
|
+
offset += tagName.length;
|
|
69
|
+
col += tagName.length;
|
|
70
|
+
state = TagState.Attrs;
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
if (char === '/') {
|
|
74
|
+
chars.unshift(char);
|
|
75
|
+
state = TagState.AfterAttrs;
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
if (char === '>') {
|
|
79
|
+
state = TagState.AfterOpenTag;
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
tagName += char;
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
case TagState.Attrs: {
|
|
86
|
+
let leftover = char + chars.join('');
|
|
87
|
+
while (leftover.trim()) {
|
|
88
|
+
if (leftover.trim().startsWith('/') || leftover.trim().startsWith('>')) {
|
|
89
|
+
chars.length = 0;
|
|
90
|
+
chars.push(...leftover);
|
|
91
|
+
state = TagState.AfterAttrs;
|
|
92
|
+
break stateSwitch;
|
|
93
|
+
}
|
|
94
|
+
const attr = attrTokenizer(leftover, line, col, offset);
|
|
95
|
+
line = attr.endLine;
|
|
96
|
+
col = attr.endCol;
|
|
97
|
+
offset = attr.endOffset;
|
|
98
|
+
if (leftover === attr.__leftover) {
|
|
99
|
+
throw new SyntaxError(`Invalid attribute syntax: ${leftover}`);
|
|
100
|
+
}
|
|
101
|
+
leftover = attr.__leftover ?? '';
|
|
102
|
+
delete attr.__leftover;
|
|
103
|
+
attrs.push(attr);
|
|
104
|
+
}
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
case TagState.AfterAttrs: {
|
|
108
|
+
if (char === '>') {
|
|
109
|
+
state = TagState.AfterOpenTag;
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
if (spaces.includes(char)) {
|
|
113
|
+
afterAttrsSpaceChars += char;
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
if (char === '/') {
|
|
117
|
+
selfClosingSolidusChar = char;
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
const leftover = chars.join('');
|
|
125
|
+
if ((!leftover && state === TagState.TagName) || tagName === '') {
|
|
126
|
+
throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
|
|
127
|
+
}
|
|
128
|
+
// console.log({
|
|
129
|
+
// state,
|
|
130
|
+
// leftover,
|
|
131
|
+
// afterAttrsSpaceChars,
|
|
132
|
+
// selfClosingSolidusChar,
|
|
133
|
+
// attrs: attrs.map(a => a.raw),
|
|
134
|
+
// });
|
|
135
|
+
const afterAttrSpaces = tokenizer(afterAttrsSpaceChars, line, col, offset);
|
|
136
|
+
line = afterAttrSpaces.endLine;
|
|
137
|
+
col = afterAttrSpaces.endCol;
|
|
138
|
+
offset = afterAttrSpaces.endOffset;
|
|
139
|
+
const selfClosingSolidus = tokenizer(selfClosingSolidusChar, line, col, offset);
|
|
140
|
+
line = selfClosingSolidus.endLine;
|
|
141
|
+
col = selfClosingSolidus.endCol;
|
|
142
|
+
offset = selfClosingSolidus.endOffset;
|
|
143
|
+
return {
|
|
144
|
+
beforeOpenTag: beforeOpenTagChars,
|
|
145
|
+
tagName,
|
|
146
|
+
attrs,
|
|
147
|
+
afterAttrSpaces,
|
|
148
|
+
selfClosingSolidus,
|
|
149
|
+
isOpenTag,
|
|
150
|
+
leftover,
|
|
151
|
+
};
|
|
152
|
+
}
|
package/lib/tag-splitter.d.ts
CHANGED
package/lib/tag-splitter.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { reSplitterTag, reTagName } from './const.js';
|
|
2
2
|
import { getEndCol, getEndLine } from '@markuplint/parser-utils';
|
|
3
|
-
export
|
|
3
|
+
export function tagSplitter(raw, line, col) {
|
|
4
4
|
return withLocation(tagSplitterAsString(raw), line, col);
|
|
5
5
|
}
|
|
6
6
|
function tagSplitterAsString(raw) {
|
package/lib/types.d.ts
CHANGED
|
@@ -7,8 +7,8 @@ export type Code = {
|
|
|
7
7
|
};
|
|
8
8
|
export type IgnoreTag = {
|
|
9
9
|
readonly type: string;
|
|
10
|
-
readonly start: Readonly<RegExp
|
|
11
|
-
readonly end: Readonly<RegExp
|
|
10
|
+
readonly start: Readonly<RegExp> | string;
|
|
11
|
+
readonly end: Readonly<RegExp> | string;
|
|
12
12
|
};
|
|
13
13
|
export type IgnoreBlock = {
|
|
14
14
|
readonly source: string;
|
|
@@ -16,3 +16,7 @@ export type IgnoreBlock = {
|
|
|
16
16
|
readonly stack: readonly Code[];
|
|
17
17
|
readonly maskChar: string;
|
|
18
18
|
};
|
|
19
|
+
export type QuoteSet = {
|
|
20
|
+
readonly start: string;
|
|
21
|
+
readonly end: string;
|
|
22
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@markuplint/parser-utils",
|
|
3
|
-
"version": "4.0.0-alpha.
|
|
3
|
+
"version": "4.0.0-alpha.6",
|
|
4
4
|
"description": "Utility module for markuplint parser plugin",
|
|
5
5
|
"repository": "git@github.com:markuplint/markuplint.git",
|
|
6
6
|
"author": "Yusuke Hirao <yusukehirao@me.com>",
|
|
@@ -24,11 +24,12 @@
|
|
|
24
24
|
"clean": "tsc --build --clean"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@markuplint/ml-ast": "4.0.0-alpha.
|
|
28
|
-
"@markuplint/types": "4.0.0-alpha.
|
|
29
|
-
"@types/uuid": "^9.0.
|
|
30
|
-
"
|
|
27
|
+
"@markuplint/ml-ast": "4.0.0-alpha.6",
|
|
28
|
+
"@markuplint/types": "4.0.0-alpha.6",
|
|
29
|
+
"@types/uuid": "^9.0.7",
|
|
30
|
+
"espree": "^9.6.1",
|
|
31
|
+
"type-fest": "^4.8.2",
|
|
31
32
|
"uuid": "^9.0.1"
|
|
32
33
|
},
|
|
33
|
-
"gitHead": "
|
|
34
|
+
"gitHead": "06e1242d274c72cf08a10a572b06ac35d1b924a4"
|
|
34
35
|
}
|