@markuplint/parser-utils 4.0.0-alpha.3 → 4.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2017-2019 Yusuke Hirao
3
+ Copyright (c) 2017-2023 Yusuke Hirao
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -0,0 +1,25 @@
1
+ import type { QuoteSet } from './types.js';
2
+ export declare enum AttrState {
3
+ BeforeName = 0,
4
+ Name = 1,
5
+ Equal = 2,
6
+ BeforeValue = 3,
7
+ Value = 4,
8
+ AfterValue = 5
9
+ }
10
+ /**
11
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
12
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
13
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
14
+ */
15
+ export declare function attrParser(raw: string, quoteSet?: readonly QuoteSet[], startState?: AttrState, quoteInValueChars?: ReadonlyArray<QuoteSet>, spaces?: ReadonlyArray<string>): {
16
+ spacesBeforeAttrName: string;
17
+ attrName: string;
18
+ spacesBeforeEqual: string;
19
+ equal: string;
20
+ spacesAfterEqual: string;
21
+ quoteStart: string;
22
+ attrValue: string;
23
+ quoteEnd: string;
24
+ leftover: string;
25
+ };
@@ -0,0 +1,188 @@
1
+ import { defaultSpaces } from './const.js';
2
+ const defaultQuoteSet = [
3
+ { start: '"', end: '"' },
4
+ { start: "'", end: "'" },
5
+ ];
6
+ const defaultQuoteInValueChars = [];
7
+ const EQUAL = '=';
8
+ export var AttrState;
9
+ (function (AttrState) {
10
+ AttrState[AttrState["BeforeName"] = 0] = "BeforeName";
11
+ AttrState[AttrState["Name"] = 1] = "Name";
12
+ AttrState[AttrState["Equal"] = 2] = "Equal";
13
+ AttrState[AttrState["BeforeValue"] = 3] = "BeforeValue";
14
+ AttrState[AttrState["Value"] = 4] = "Value";
15
+ AttrState[AttrState["AfterValue"] = 5] = "AfterValue";
16
+ })(AttrState || (AttrState = {}));
17
+ /**
18
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
19
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
20
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
21
+ */
22
+ export function attrParser(raw, quoteSet = defaultQuoteSet, startState = AttrState.BeforeName, quoteInValueChars = defaultQuoteInValueChars, spaces = defaultSpaces) {
23
+ let state = startState;
24
+ let spacesBeforeAttrName = '';
25
+ let attrName = '';
26
+ let spacesBeforeEqual = '';
27
+ let equal = '';
28
+ let spacesAfterEqual = '';
29
+ let quoteTypeIndex = -1;
30
+ let quoteStart = '';
31
+ let attrValue = '';
32
+ let quoteEnd = '';
33
+ const quoteModeStack = [];
34
+ const chars = [...raw];
35
+ while (chars.length > 0) {
36
+ if (state === AttrState.AfterValue) {
37
+ break;
38
+ }
39
+ const char = chars.shift();
40
+ switch (state) {
41
+ case AttrState.BeforeName: {
42
+ if (char === '>') {
43
+ chars.unshift(char);
44
+ state = AttrState.AfterValue;
45
+ break;
46
+ }
47
+ if (char === '/') {
48
+ chars.unshift(char);
49
+ state = AttrState.AfterValue;
50
+ break;
51
+ }
52
+ if (spaces.includes(char)) {
53
+ spacesBeforeAttrName += char;
54
+ break;
55
+ }
56
+ attrName += char;
57
+ state = AttrState.Name;
58
+ break;
59
+ }
60
+ case AttrState.Name: {
61
+ if (char === '>') {
62
+ chars.unshift(char);
63
+ state = AttrState.AfterValue;
64
+ break;
65
+ }
66
+ if (char === '/') {
67
+ chars.unshift(char);
68
+ state = AttrState.AfterValue;
69
+ break;
70
+ }
71
+ if (spaces.includes(char)) {
72
+ spacesBeforeEqual += char;
73
+ state = AttrState.Equal;
74
+ break;
75
+ }
76
+ if (char === EQUAL) {
77
+ equal += char;
78
+ state = AttrState.BeforeValue;
79
+ break;
80
+ }
81
+ attrName += char;
82
+ break;
83
+ }
84
+ case AttrState.Equal: {
85
+ if (spaces.includes(char)) {
86
+ spacesBeforeEqual += char;
87
+ break;
88
+ }
89
+ if (char === EQUAL) {
90
+ equal += char;
91
+ state = AttrState.BeforeValue;
92
+ break;
93
+ }
94
+ // End of attribute
95
+ chars.unshift(spacesBeforeEqual, char);
96
+ spacesBeforeEqual = '';
97
+ state = AttrState.AfterValue;
98
+ break;
99
+ }
100
+ case AttrState.BeforeValue: {
101
+ if (spaces.includes(char)) {
102
+ spacesAfterEqual += char;
103
+ break;
104
+ }
105
+ quoteTypeIndex = quoteSet.findIndex(quote => quote.start === char);
106
+ const quote = quoteSet[quoteTypeIndex];
107
+ if (quote) {
108
+ quoteStart = quote.start;
109
+ state = AttrState.Value;
110
+ break;
111
+ }
112
+ const raw = char + chars.join('');
113
+ const inQuote = quoteInValueChars.find(quote => raw.startsWith(quote.start));
114
+ if (inQuote) {
115
+ quoteModeStack.push(inQuote);
116
+ attrValue += inQuote.start;
117
+ chars.splice(0, inQuote.start.length - 1);
118
+ state = AttrState.Value;
119
+ break;
120
+ }
121
+ chars.unshift(char);
122
+ state = AttrState.Value;
123
+ break;
124
+ }
125
+ case AttrState.Value: {
126
+ // console.log(
127
+ // char,
128
+ // quoteSet[quoteTypeIndex]?.end,
129
+ // quoteModeStack.map(q => `${q.start}${q.end}`),
130
+ // );
131
+ if (!quoteSet[quoteTypeIndex]) {
132
+ if (spaces.includes(char)) {
133
+ chars.unshift(char);
134
+ state = AttrState.AfterValue;
135
+ break;
136
+ }
137
+ if (char === '/') {
138
+ chars.unshift(char);
139
+ state = AttrState.AfterValue;
140
+ break;
141
+ }
142
+ if (char === '>') {
143
+ chars.unshift(char);
144
+ state = AttrState.AfterValue;
145
+ break;
146
+ }
147
+ }
148
+ if (quoteModeStack.length === 0 && char === quoteSet[quoteTypeIndex]?.end) {
149
+ quoteEnd = char;
150
+ state = AttrState.AfterValue;
151
+ break;
152
+ }
153
+ const raw = char + chars.join('');
154
+ const inQuoteEnd = quoteModeStack.at(-1);
155
+ if (inQuoteEnd && raw.startsWith(inQuoteEnd.end)) {
156
+ quoteModeStack.pop();
157
+ attrValue += inQuoteEnd.end;
158
+ chars.splice(0, inQuoteEnd.end.length - 1);
159
+ break;
160
+ }
161
+ const inQuoteStart = quoteInValueChars.find(quote => raw.startsWith(quote.start));
162
+ if (inQuoteStart) {
163
+ quoteModeStack.push(inQuoteStart);
164
+ attrValue += inQuoteStart.start;
165
+ chars.splice(0, inQuoteStart.start.length - 1);
166
+ break;
167
+ }
168
+ attrValue += char;
169
+ break;
170
+ }
171
+ }
172
+ }
173
+ if (state === AttrState.Value && quoteTypeIndex !== -1) {
174
+ throw new SyntaxError(`Unclosed attribute value: ${raw}`);
175
+ }
176
+ const leftover = chars.join('');
177
+ return {
178
+ spacesBeforeAttrName,
179
+ attrName,
180
+ spacesBeforeEqual,
181
+ equal,
182
+ spacesAfterEqual,
183
+ quoteStart,
184
+ attrValue,
185
+ quoteEnd,
186
+ leftover,
187
+ };
188
+ }
@@ -0,0 +1,6 @@
1
+ import type { QuoteSet } from './types.js';
2
+ import type { MLASTHTMLAttr } from '@markuplint/ml-ast';
3
+ import { AttrState } from './attr-parser.js';
4
+ export declare function attrTokenizer(raw: string, line: number, col: number, startOffset: number, quoteSet?: ReadonlyArray<QuoteSet>, startState?: AttrState, quoteInValueChars?: ReadonlyArray<QuoteSet>, spaces?: ReadonlyArray<string>): MLASTHTMLAttr & {
5
+ __leftover?: string;
6
+ };
@@ -0,0 +1,75 @@
1
+ import { AttrState, attrParser } from './attr-parser.js';
2
+ import { tokenizer, uuid } from './create-token.js';
3
+ export function attrTokenizer(raw, line, col, startOffset, quoteSet, startState = AttrState.BeforeName, quoteInValueChars, spaces) {
4
+ const parsed = attrParser(raw, quoteSet, startState, quoteInValueChars, spaces);
5
+ let offset = startOffset;
6
+ const spacesBeforeName = tokenizer(parsed.spacesBeforeAttrName, line, col, offset);
7
+ line = spacesBeforeName.endLine;
8
+ col = spacesBeforeName.endCol;
9
+ offset = spacesBeforeName.endOffset;
10
+ const name = tokenizer(parsed.attrName, line, col, offset);
11
+ line = name.endLine;
12
+ col = name.endCol;
13
+ offset = name.endOffset;
14
+ const spacesBeforeEqual = tokenizer(parsed.spacesBeforeEqual, line, col, offset);
15
+ line = spacesBeforeEqual.endLine;
16
+ col = spacesBeforeEqual.endCol;
17
+ offset = spacesBeforeEqual.endOffset;
18
+ const equal = tokenizer(parsed.equal, line, col, offset);
19
+ line = equal.endLine;
20
+ col = equal.endCol;
21
+ offset = equal.endOffset;
22
+ const spacesAfterEqual = tokenizer(parsed.spacesAfterEqual, line, col, offset);
23
+ line = spacesAfterEqual.endLine;
24
+ col = spacesAfterEqual.endCol;
25
+ offset = spacesAfterEqual.endOffset;
26
+ const startQuote = tokenizer(parsed.quoteStart, line, col, offset);
27
+ line = startQuote.endLine;
28
+ col = startQuote.endCol;
29
+ offset = startQuote.endOffset;
30
+ const value = tokenizer(parsed.attrValue, line, col, offset);
31
+ line = value.endLine;
32
+ col = value.endCol;
33
+ offset = value.endOffset;
34
+ const endQuote = tokenizer(parsed.quoteEnd, line, col, offset);
35
+ const attrToken = tokenizer(parsed.attrName +
36
+ parsed.spacesBeforeEqual +
37
+ parsed.equal +
38
+ parsed.spacesAfterEqual +
39
+ parsed.quoteStart +
40
+ parsed.attrValue +
41
+ parsed.quoteEnd, name.startLine, name.startCol, name.startOffset);
42
+ const result = {
43
+ type: 'html-attr',
44
+ uuid: uuid(),
45
+ raw: attrToken.raw,
46
+ startOffset: attrToken.startOffset,
47
+ endOffset: attrToken.endOffset,
48
+ startLine: attrToken.startLine,
49
+ endLine: attrToken.endLine,
50
+ startCol: attrToken.startCol,
51
+ endCol: attrToken.endCol,
52
+ spacesBeforeName,
53
+ name,
54
+ spacesBeforeEqual,
55
+ equal,
56
+ spacesAfterEqual,
57
+ startQuote,
58
+ value,
59
+ endQuote,
60
+ isDuplicatable: false,
61
+ nodeName: name.raw,
62
+ parentNode: null,
63
+ prevNode: null,
64
+ nextNode: null,
65
+ isFragment: false,
66
+ isGhost: false,
67
+ };
68
+ if (parsed.leftover) {
69
+ return {
70
+ ...result,
71
+ __leftover: parsed.leftover,
72
+ };
73
+ }
74
+ return result;
75
+ }
package/lib/const.d.ts CHANGED
@@ -5,6 +5,12 @@ export declare const MASK_CHAR = "\uE000";
5
5
  * @see https://developer.mozilla.org/en-US/docs/Web/SVG/Element
6
6
  */
7
7
  export declare const svgElementList: string[];
8
- export declare const reTag: RegExp;
9
8
  export declare const reTagName: RegExp;
10
9
  export declare const reSplitterTag: RegExp;
10
+ /**
11
+ * - U+0009 CHARACTER TABULATION (tab) => `\t`
12
+ * - U+000A LINE FEED (LF) => `\n`
13
+ * - U+000C FORM FEED (FF) => `\f`
14
+ * - U+0020 SPACE => ` `
15
+ */
16
+ export declare const defaultSpaces: readonly ["\t", "\n", "\f", " "];
package/lib/const.js CHANGED
@@ -94,7 +94,12 @@ export const svgElementList = [
94
94
  'tref',
95
95
  'vkern',
96
96
  ];
97
- export const reTag = /^<((?:.|\s|\n)+)>\s*$/;
98
- // eslint-disable-next-line no-control-regex
99
- export const reTagName = /^(?:[a-z][^\u0000\u0009\u000A\u000C\u0020/>]*)/i;
97
+ export const reTagName = /^[a-z][^\0\t\n\f />]*/i;
100
98
  export const reSplitterTag = /<[^>]+>/g;
99
+ /**
100
+ * - U+0009 CHARACTER TABULATION (tab) => `\t`
101
+ * - U+000A LINE FEED (LF) => `\n`
102
+ * - U+000C FORM FEED (FF) => `\f`
103
+ * - U+0020 SPACE => ` `
104
+ */
105
+ export const defaultSpaces = ['\t', '\n', '\f', ' '];
package/lib/debugger.js CHANGED
@@ -1,21 +1,19 @@
1
1
  export function nodeListToDebugMaps(
2
2
  // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
3
3
  nodeList, withAttr = false) {
4
- return nodeList
5
- .map(n => {
4
+ return nodeList.flatMap(n => {
6
5
  const r = [];
7
- if (!n.isGhost) {
6
+ if (n.isGhost) {
7
+ r.push(`[N/A]>[N/A](N/A)${n.nodeName}: ${visibleWhiteSpace(n.raw)}`);
8
+ }
9
+ else {
8
10
  r.push(tokenDebug(n));
9
11
  if (withAttr && 'attributes' in n) {
10
12
  r.push(...attributesToDebugMaps(n.attributes).flat());
11
13
  }
12
14
  }
13
- else {
14
- r.push(`[N/A]>[N/A](N/A)${n.nodeName}: ${visibleWhiteSpace(n.raw)}`);
15
- }
16
15
  return r;
17
- })
18
- .flat();
16
+ });
19
17
  }
20
18
  export function attributesToDebugMaps(
21
19
  // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
@@ -28,16 +26,7 @@ attributes) {
28
26
  }),
29
27
  ];
30
28
  if (n.type === 'html-attr') {
31
- r.push(` ${tokenDebug(n.spacesBeforeName, 'bN')}`);
32
- r.push(` ${tokenDebug(n.name, 'name')}`);
33
- r.push(` ${tokenDebug(n.spacesBeforeEqual, 'bE')}`);
34
- r.push(` ${tokenDebug(n.equal, 'equal')}`);
35
- r.push(` ${tokenDebug(n.spacesAfterEqual, 'aE')}`);
36
- r.push(` ${tokenDebug(n.startQuote, 'sQ')}`);
37
- r.push(` ${tokenDebug(n.value, 'value')}`);
38
- r.push(` ${tokenDebug(n.endQuote, 'eQ')}`);
39
- r.push(` isDirective: ${!!n.isDirective}`);
40
- r.push(` isDynamicValue: ${!!n.isDynamicValue}`);
29
+ r.push(` ${tokenDebug(n.spacesBeforeName, 'bN')}`, ` ${tokenDebug(n.name, 'name')}`, ` ${tokenDebug(n.spacesBeforeEqual, 'bE')}`, ` ${tokenDebug(n.equal, 'equal')}`, ` ${tokenDebug(n.spacesAfterEqual, 'aE')}`, ` ${tokenDebug(n.startQuote, 'sQ')}`, ` ${tokenDebug(n.value, 'value')}`, ` ${tokenDebug(n.endQuote, 'eQ')}`, ` isDirective: ${!!n.isDirective}`, ` isDynamicValue: ${!!n.isDynamicValue}`);
41
30
  }
42
31
  if (n.potentialName != null) {
43
32
  r.push(` potentialName: ${visibleWhiteSpace(n.potentialName)}`);
@@ -54,5 +43,5 @@ function tokenDebug(n, type = '') {
54
43
  n.potentialName ?? n.nodeName ?? n.name ?? n.type ?? type}: ${visibleWhiteSpace(n.raw)}`;
55
44
  }
56
45
  function visibleWhiteSpace(chars) {
57
- return chars.replace(/\n/g, '⏎').replace(/\t/g, '→').replace(/\s/g, '␣');
46
+ return chars.replaceAll('\n', '⏎').replaceAll('\t', '→').replaceAll(/\s/g, '␣');
58
47
  }
@@ -25,7 +25,7 @@ function _distinguishAuthoredName(name, patterns) {
25
25
  });
26
26
  }
27
27
  function toRegexp(pattern) {
28
- const matched = pattern.match(/^\/(.+)\/([ig]*)$/i);
28
+ const matched = pattern.match(/^\/(.+)\/([gi]*)$/i);
29
29
  if (matched && matched[1]) {
30
30
  return new RegExp(matched[1], matched[2]);
31
31
  }
@@ -1,7 +1,7 @@
1
1
  import { uuid } from './create-token.js';
2
2
  import { getEndCol, getEndLine } from './get-location.js';
3
3
  import { removeDeprecatedNode } from './remove-deprecated-node.js';
4
- import tagSplitter from './tag-splitter.js';
4
+ import { tagSplitter } from './tag-splitter.js';
5
5
  import { walk } from './walker.js';
6
6
  export function flattenNodes(
7
7
  // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
@@ -71,9 +71,9 @@ nodeTree, rawHtml, createLastText = true) {
71
71
  * create Last spaces
72
72
  */
73
73
  let lastOffset = 0;
74
- nodeOrders.forEach((node, i) => {
74
+ for (const node of nodeOrders) {
75
75
  lastOffset = Math.max(node.endOffset, lastOffset);
76
- });
76
+ }
77
77
  // console.log(lastOffset);
78
78
  const lastTextContent = rawHtml.slice(lastOffset);
79
79
  // console.log(`"${lastTextContent}"`);
@@ -110,8 +110,8 @@ nodeTree, rawHtml, createLastText = true) {
110
110
  * concat text nodes
111
111
  */
112
112
  const result = [];
113
- nodeOrders.forEach(node => {
114
- const prevNode = result[result.length - 1] ?? null;
113
+ for (const node of nodeOrders) {
114
+ const prevNode = result.at(-1) ?? null;
115
115
  if (node.type === 'text' && prevNode?.type === 'text') {
116
116
  prevNode.raw = prevNode.raw + node.raw;
117
117
  prevNode.endOffset = node.endOffset;
@@ -132,10 +132,10 @@ nodeTree, rawHtml, createLastText = true) {
132
132
  if (node.nextNode) {
133
133
  node.nextNode.prevNode = prevNode;
134
134
  }
135
- return;
135
+ continue;
136
136
  }
137
137
  result.push(node);
138
- });
138
+ }
139
139
  {
140
140
  /**
141
141
  * Correction prev/next/parent
@@ -168,10 +168,12 @@ nodeTree, rawHtml, createLastText = true) {
168
168
  // Children
169
169
  if (node.type === 'text') {
170
170
  const parent = node.parentNode;
171
- if (parent && parent.type === 'starttag' && parent.nodeName.toLowerCase() === 'html') {
172
- if (parent.childNodes && !parent.childNodes.some(n => n.uuid === node.uuid)) {
173
- parent.childNodes.push(node);
174
- }
171
+ if (parent &&
172
+ parent.type === 'starttag' &&
173
+ parent.nodeName.toLowerCase() === 'html' &&
174
+ parent.childNodes &&
175
+ !parent.childNodes.some(n => n.uuid === node.uuid)) {
176
+ parent.childNodes.push(node);
175
177
  }
176
178
  }
177
179
  prevToken = node;
@@ -241,5 +243,5 @@ nodeTree, rawHtml) {
241
243
  node.endOffset = node.endOffset ?? currentEndOffset;
242
244
  nodeOrders.push(node);
243
245
  });
244
- return nodeOrders.slice();
246
+ return [...nodeOrders];
245
247
  }
@@ -1,9 +1,9 @@
1
1
  export function getLine(html, startOffset) {
2
- return html.slice(0, startOffset).split(/\n/g).length;
2
+ return html.slice(0, startOffset).split(/\n/).length;
3
3
  }
4
4
  export function getCol(html, startOffset) {
5
- const lines = html.slice(0, startOffset).split(/\n/g);
6
- return (lines[lines.length - 1] ?? '').length + 1;
5
+ const lines = html.slice(0, startOffset).split(/\n/);
6
+ return (lines.at(-1) ?? '').length + 1;
7
7
  }
8
8
  export function getEndLine(html, line) {
9
9
  return html.split(/\r?\n/).length - 1 + line;
@@ -428,8 +428,8 @@ export function searchIDLAttribute(name) {
428
428
  };
429
429
  }
430
430
  function camelize(str) {
431
- return str.replace(/[:-][a-z]/g, $0 => $0[1]?.toUpperCase() ?? '');
431
+ return str.replaceAll(/[:-][a-z]/g, $0 => $0[1]?.toUpperCase() ?? '');
432
432
  }
433
433
  function hyphenize(str) {
434
- return str.replace(/[A-Z]/g, $0 => `-${$0.toLowerCase()}`);
434
+ return str.replaceAll(/[A-Z]/g, $0 => `-${$0.toLowerCase()}`);
435
435
  }
@@ -9,7 +9,7 @@ export function ignoreBlock(source, tags, maskChar = MASK_CHAR) {
9
9
  // Replace tags in attributes
10
10
  const attr = maskText(prepend(tag.start, '(?<=(?:"|\'))'), append(tag.end, '(?=(?:"|\'))'), replaced, (startTag, taggedCode, endTag) => {
11
11
  const mask = maskChar.repeat(startTag.length) +
12
- taggedCode.replace(/[^\n]/g, maskChar) +
12
+ taggedCode.replaceAll(/[^\n]/g, maskChar) +
13
13
  maskChar.repeat((endTag ?? '').length);
14
14
  return mask;
15
15
  });
@@ -18,7 +18,7 @@ export function ignoreBlock(source, tags, maskChar = MASK_CHAR) {
18
18
  // Replace tags in other nodes
19
19
  const text = maskText(tag.start, tag.end, replaced, (startTag, taggedCode, endTag) => {
20
20
  const mask = maskChar.repeat(startTag.length) +
21
- taggedCode.replace(/[^\n]/g, maskChar) +
21
+ taggedCode.replaceAll(/[^\n]/g, maskChar) +
22
22
  maskChar.repeat((endTag ?? '').length);
23
23
  const taggedMask = `<!${mask.slice(2).slice(0, -1)}>`;
24
24
  return taggedMask;
@@ -63,7 +63,7 @@ function maskText(start, end, replaced, masking) {
63
63
  export function restoreNode(
64
64
  // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
65
65
  nodeList, ignoreBlock) {
66
- nodeList = nodeList.slice();
66
+ nodeList = [...nodeList];
67
67
  const { source, stack, maskChar } = ignoreBlock;
68
68
  for (const node of nodeList) {
69
69
  if (node.type === 'comment' || node.type === 'text' || node.type === 'psblock') {
@@ -71,7 +71,7 @@ nodeList, ignoreBlock) {
71
71
  continue;
72
72
  }
73
73
  const parentNode = node.parentNode;
74
- const index = nodeList.findIndex(n => n === node);
74
+ const index = nodeList.indexOf(node);
75
75
  const insertList = [];
76
76
  let text = node.raw;
77
77
  let pointer = 0;
@@ -192,14 +192,26 @@ function snap(str, reg) {
192
192
  return [index, above, snapPoint, below];
193
193
  }
194
194
  function removeGlobalOption(reg) {
195
+ if (typeof reg === 'string') {
196
+ return new RegExp(escapeRegExpForStr(reg));
197
+ }
195
198
  return new RegExp(reg.source, reg.ignoreCase ? 'i' : '');
196
199
  }
197
200
  function prepend(reg, str) {
201
+ if (typeof reg === 'string') {
202
+ return new RegExp(str + escapeRegExpForStr(reg));
203
+ }
198
204
  return new RegExp(str + reg.source, reg.ignoreCase ? 'i' : '');
199
205
  }
200
206
  function append(reg, str) {
207
+ if (typeof reg === 'string') {
208
+ return new RegExp(escapeRegExpForStr(reg) + str);
209
+ }
201
210
  return new RegExp(reg.source + str, reg.ignoreCase ? 'i' : '');
202
211
  }
203
212
  function hasIgnoreBlock(textContent, maskChar) {
204
213
  return textContent.includes(maskChar);
205
214
  }
215
+ function escapeRegExpForStr(str) {
216
+ return str.replaceAll(/[!$()*+./:=?[\\\]^{|}]/g, '\\$&');
217
+ }
@@ -1,5 +1,5 @@
1
1
  export function ignoreFrontMatter(code) {
2
- const reStart = /^(?:\s*\r?\n)?---\r?\n/.exec(code);
2
+ const reStart = /^(?:\s*\n)?---\r?\n/.exec(code);
3
3
  if (!reStart) {
4
4
  return code;
5
5
  }
@@ -12,6 +12,6 @@ export function ignoreFrontMatter(code) {
12
12
  const endPoint = startPoint + reEnd.index + reEnd[0].length;
13
13
  const frontMatter = code.slice(0, endPoint);
14
14
  const afterCode = code.slice(endPoint);
15
- const masked = frontMatter.replace(/[^\r\n]/g, ' ');
15
+ const masked = frontMatter.replaceAll(/[^\n\r]/g, ' ');
16
16
  return masked + afterCode;
17
17
  }
package/lib/index.d.ts CHANGED
@@ -1,3 +1,5 @@
1
+ export * from './attr-parser.js';
2
+ export * from './attr-tokenizer.js';
1
3
  export * from './const.js';
2
4
  export * from './create-token.js';
3
5
  export * from './debugger.js';
@@ -12,5 +14,7 @@ export * from './ignore-front-matter.js';
12
14
  export * from './parse-attr.js';
13
15
  export * from './parser-error.js';
14
16
  export * from './remove-deprecated-node.js';
17
+ export * from './script-parser.js';
18
+ export * from './tag-parser.js';
15
19
  export * from './tag-splitter.js';
16
20
  export * from './walker.js';
package/lib/index.js CHANGED
@@ -1,3 +1,5 @@
1
+ export * from './attr-parser.js';
2
+ export * from './attr-tokenizer.js';
1
3
  export * from './const.js';
2
4
  export * from './create-token.js';
3
5
  export * from './debugger.js';
@@ -12,5 +14,7 @@ export * from './ignore-front-matter.js';
12
14
  export * from './parse-attr.js';
13
15
  export * from './parser-error.js';
14
16
  export * from './remove-deprecated-node.js';
17
+ export * from './script-parser.js';
18
+ export * from './tag-parser.js';
15
19
  export * from './tag-splitter.js';
16
20
  export * from './walker.js';
package/lib/parse-attr.js CHANGED
@@ -55,7 +55,7 @@ export function tokenize(raw, options) {
55
55
  const valueDelimiters = options?.valueDelimiters ?? defaultValueDelimiters;
56
56
  const equalDelimiter = options?.equal ?? defaultEqual;
57
57
  let state = 'b-name';
58
- const charactors = raw.split('');
58
+ const charactors = [...raw];
59
59
  let beforeName = '';
60
60
  let name = '';
61
61
  let afterName = '';
@@ -20,16 +20,16 @@ nodeOrders) {
20
20
  */
21
21
  const stack = {};
22
22
  const removeIndexes = [];
23
- nodeOrders.forEach((node, i) => {
23
+ for (const [i, node] of nodeOrders.entries()) {
24
24
  if (node.isGhost) {
25
- return;
25
+ continue;
26
26
  }
27
27
  const id = `${node.startLine}:${node.startCol}:${node.endLine}:${node.endCol}`;
28
28
  if (stack[id] != null) {
29
29
  removeIndexes.push(i);
30
30
  }
31
31
  stack[id] = i;
32
- });
32
+ }
33
33
  let r = nodeOrders.length;
34
34
  while (r-- > 0) {
35
35
  if (removeIndexes.includes(r)) {
@@ -0,0 +1,6 @@
1
+ export declare function scriptParser(script: string): any;
2
+ export declare function removeQuote(str: string): string;
3
+ export type ScriptTokenType = {
4
+ type: 'Identifier' | 'Boolean' | 'Numeric' | 'String' | 'Template' | 'Punctuator';
5
+ value: string;
6
+ };
@@ -0,0 +1,22 @@
1
+ // @ts-ignore
2
+ import { tokenize } from 'espree';
3
+ export function scriptParser(script) {
4
+ const tokens = tokenize(script, {
5
+ ecmaVersion: 'latest',
6
+ loc: false,
7
+ });
8
+ return tokens.map((token) => ({
9
+ type: token.type,
10
+ value: token.value,
11
+ }));
12
+ }
13
+ export function removeQuote(str) {
14
+ const quote = str[0];
15
+ if (quote !== '"' && quote !== "'") {
16
+ return str;
17
+ }
18
+ if (str.at(-1) !== quote) {
19
+ return str;
20
+ }
21
+ return str.slice(1, -1);
22
+ }
@@ -0,0 +1,10 @@
1
+ import type { MLASTAttr } from '@markuplint/ml-ast';
2
+ export declare function tagParser(raw: string, startLine: number, startCol: number, startOffset: number, offsetOffset?: number, offsetLine?: number, offsetColumn?: number, spaces?: ReadonlyArray<string>): {
3
+ beforeOpenTag: string;
4
+ tagName: string;
5
+ attrs: MLASTAttr[];
6
+ afterAttrSpaces: import("@markuplint/ml-ast").MLToken;
7
+ selfClosingSolidus: import("@markuplint/ml-ast").MLToken;
8
+ isOpenTag: boolean;
9
+ leftover: string;
10
+ };
@@ -0,0 +1,152 @@
1
+ import { attrTokenizer } from './attr-tokenizer.js';
2
+ import { defaultSpaces } from './const.js';
3
+ import { tokenizer } from './create-token.js';
4
+ var TagState;
5
+ (function (TagState) {
6
+ TagState[TagState["BeforeOpenTag"] = 0] = "BeforeOpenTag";
7
+ TagState[TagState["FirstCharOfTagName"] = 1] = "FirstCharOfTagName";
8
+ TagState[TagState["TagName"] = 2] = "TagName";
9
+ TagState[TagState["Attrs"] = 3] = "Attrs";
10
+ TagState[TagState["AfterAttrs"] = 4] = "AfterAttrs";
11
+ TagState[TagState["AfterOpenTag"] = 5] = "AfterOpenTag";
12
+ })(TagState || (TagState = {}));
13
+ export function tagParser(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0, spaces = defaultSpaces) {
14
+ let offset = startOffset + offsetOffset;
15
+ let line = startLine + offsetLine;
16
+ let col = startCol + (startLine === 1 ? offsetColumn : 0);
17
+ let state = TagState.BeforeOpenTag;
18
+ let beforeOpenTagChars = '';
19
+ let tagName = '';
20
+ let afterAttrsSpaceChars = '';
21
+ let selfClosingSolidusChar = '';
22
+ let isOpenTag = true;
23
+ const attrs = [];
24
+ const chars = [...raw];
25
+ while (chars.length > 0) {
26
+ if (state === TagState.AfterOpenTag) {
27
+ break;
28
+ }
29
+ const char = chars.shift();
30
+ stateSwitch: switch (state) {
31
+ case TagState.BeforeOpenTag: {
32
+ if (char === '<') {
33
+ const beforeOpenTag = tokenizer(beforeOpenTagChars, line, col, offset);
34
+ line = beforeOpenTag.endLine;
35
+ col = beforeOpenTag.endCol;
36
+ offset = beforeOpenTag.endOffset;
37
+ // Add `<` length
38
+ col += 1;
39
+ offset += 1;
40
+ state = TagState.FirstCharOfTagName;
41
+ break;
42
+ }
43
+ beforeOpenTagChars += char;
44
+ break;
45
+ }
46
+ case TagState.FirstCharOfTagName: {
47
+ if (/[a-z]/i.test(char)) {
48
+ tagName += char;
49
+ state = TagState.TagName;
50
+ break;
51
+ }
52
+ if (char === '/') {
53
+ isOpenTag = false;
54
+ break;
55
+ }
56
+ chars.unshift(char);
57
+ state = TagState.AfterOpenTag;
58
+ break;
59
+ }
60
+ case TagState.TagName: {
61
+ if (spaces.includes(char)) {
62
+ chars.unshift(char);
63
+ if (!isOpenTag) {
64
+ // Add `/` of `</`(close tag) length
65
+ offset += 1;
66
+ col += 1;
67
+ }
68
+ offset += tagName.length;
69
+ col += tagName.length;
70
+ state = TagState.Attrs;
71
+ break;
72
+ }
73
+ if (char === '/') {
74
+ chars.unshift(char);
75
+ state = TagState.AfterAttrs;
76
+ break;
77
+ }
78
+ if (char === '>') {
79
+ state = TagState.AfterOpenTag;
80
+ break;
81
+ }
82
+ tagName += char;
83
+ break;
84
+ }
85
+ case TagState.Attrs: {
86
+ let leftover = char + chars.join('');
87
+ while (leftover.trim()) {
88
+ if (leftover.trim().startsWith('/') || leftover.trim().startsWith('>')) {
89
+ chars.length = 0;
90
+ chars.push(...leftover);
91
+ state = TagState.AfterAttrs;
92
+ break stateSwitch;
93
+ }
94
+ const attr = attrTokenizer(leftover, line, col, offset);
95
+ line = attr.endLine;
96
+ col = attr.endCol;
97
+ offset = attr.endOffset;
98
+ if (leftover === attr.__leftover) {
99
+ throw new SyntaxError(`Invalid attribute syntax: ${leftover}`);
100
+ }
101
+ leftover = attr.__leftover ?? '';
102
+ delete attr.__leftover;
103
+ attrs.push(attr);
104
+ }
105
+ break;
106
+ }
107
+ case TagState.AfterAttrs: {
108
+ if (char === '>') {
109
+ state = TagState.AfterOpenTag;
110
+ break;
111
+ }
112
+ if (spaces.includes(char)) {
113
+ afterAttrsSpaceChars += char;
114
+ break;
115
+ }
116
+ if (char === '/') {
117
+ selfClosingSolidusChar = char;
118
+ break;
119
+ }
120
+ throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
121
+ }
122
+ }
123
+ }
124
+ const leftover = chars.join('');
125
+ if ((!leftover && state === TagState.TagName) || tagName === '') {
126
+ throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
127
+ }
128
+ // console.log({
129
+ // state,
130
+ // leftover,
131
+ // afterAttrsSpaceChars,
132
+ // selfClosingSolidusChar,
133
+ // attrs: attrs.map(a => a.raw),
134
+ // });
135
+ const afterAttrSpaces = tokenizer(afterAttrsSpaceChars, line, col, offset);
136
+ line = afterAttrSpaces.endLine;
137
+ col = afterAttrSpaces.endCol;
138
+ offset = afterAttrSpaces.endOffset;
139
+ const selfClosingSolidus = tokenizer(selfClosingSolidusChar, line, col, offset);
140
+ line = selfClosingSolidus.endLine;
141
+ col = selfClosingSolidus.endCol;
142
+ offset = selfClosingSolidus.endOffset;
143
+ return {
144
+ beforeOpenTag: beforeOpenTagChars,
145
+ tagName,
146
+ attrs,
147
+ afterAttrSpaces,
148
+ selfClosingSolidus,
149
+ isOpenTag,
150
+ leftover,
151
+ };
152
+ }
@@ -4,4 +4,4 @@ export interface N {
4
4
  line: number;
5
5
  col: number;
6
6
  }
7
- export default function tagSplitter(raw: string, line: number, col: number): N[];
7
+ export declare function tagSplitter(raw: string, line: number, col: number): N[];
@@ -1,6 +1,6 @@
1
1
  import { reSplitterTag, reTagName } from './const.js';
2
2
  import { getEndCol, getEndLine } from '@markuplint/parser-utils';
3
- export default function tagSplitter(raw, line, col) {
3
+ export function tagSplitter(raw, line, col) {
4
4
  return withLocation(tagSplitterAsString(raw), line, col);
5
5
  }
6
6
  function tagSplitterAsString(raw) {
@@ -8,7 +8,7 @@ function tagSplitterAsString(raw) {
8
8
  if (!tagMatches) {
9
9
  return [raw];
10
10
  }
11
- const tokens = Array.from(tagMatches);
11
+ const tokens = [...tagMatches];
12
12
  tokens.unshift(); // remove all match
13
13
  const nodes = [];
14
14
  let rest = raw;
@@ -31,15 +31,7 @@ function tagSplitterAsString(raw) {
31
31
  function withLocation(nodes, line, col) {
32
32
  const result = [];
33
33
  for (const node of nodes) {
34
- if (node[0] !== '<') {
35
- result.push({
36
- type: 'text',
37
- raw: node,
38
- line,
39
- col,
40
- });
41
- }
42
- else {
34
+ if (node[0] === '<') {
43
35
  const label = node.slice(1).slice(0, -1);
44
36
  if (reTagName.test(label)) {
45
37
  result.push({
@@ -49,39 +41,54 @@ function withLocation(nodes, line, col) {
49
41
  col,
50
42
  });
51
43
  }
52
- else if (label[0] === '/') {
53
- result.push({
54
- type: 'endtag',
55
- raw: node,
56
- line,
57
- col,
58
- });
59
- }
60
- else if (label[0] === '!') {
61
- result.push({
62
- type: 'comment',
63
- raw: node,
64
- line,
65
- col,
66
- });
67
- }
68
- else if (label[0] === '?') {
69
- result.push({
70
- type: 'boguscomment',
71
- raw: node,
72
- line,
73
- col,
74
- });
75
- }
76
44
  else {
77
- result.push({
78
- type: 'text',
79
- raw: node,
80
- line,
81
- col,
82
- });
45
+ switch (label[0]) {
46
+ case '/': {
47
+ result.push({
48
+ type: 'endtag',
49
+ raw: node,
50
+ line,
51
+ col,
52
+ });
53
+ break;
54
+ }
55
+ case '!': {
56
+ result.push({
57
+ type: 'comment',
58
+ raw: node,
59
+ line,
60
+ col,
61
+ });
62
+ break;
63
+ }
64
+ case '?': {
65
+ result.push({
66
+ type: 'boguscomment',
67
+ raw: node,
68
+ line,
69
+ col,
70
+ });
71
+ break;
72
+ }
73
+ default: {
74
+ result.push({
75
+ type: 'text',
76
+ raw: node,
77
+ line,
78
+ col,
79
+ });
80
+ }
81
+ }
83
82
  }
84
83
  }
84
+ else {
85
+ result.push({
86
+ type: 'text',
87
+ raw: node,
88
+ line,
89
+ col,
90
+ });
91
+ }
85
92
  line = getEndLine(node, line);
86
93
  col = getEndCol(node, col);
87
94
  }
package/lib/types.d.ts CHANGED
@@ -7,8 +7,8 @@ export type Code = {
7
7
  };
8
8
  export type IgnoreTag = {
9
9
  readonly type: string;
10
- readonly start: Readonly<RegExp>;
11
- readonly end: Readonly<RegExp>;
10
+ readonly start: Readonly<RegExp> | string;
11
+ readonly end: Readonly<RegExp> | string;
12
12
  };
13
13
  export type IgnoreBlock = {
14
14
  readonly source: string;
@@ -16,3 +16,7 @@ export type IgnoreBlock = {
16
16
  readonly stack: readonly Code[];
17
17
  readonly maskChar: string;
18
18
  };
19
+ export type QuoteSet = {
20
+ readonly start: string;
21
+ readonly end: string;
22
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@markuplint/parser-utils",
3
- "version": "4.0.0-alpha.3",
3
+ "version": "4.0.0-alpha.5",
4
4
  "description": "Utility module for markuplint parser plugin",
5
5
  "repository": "git@github.com:markuplint/markuplint.git",
6
6
  "author": "Yusuke Hirao <yusukehirao@me.com>",
@@ -24,12 +24,12 @@
24
24
  "clean": "tsc --build --clean"
25
25
  },
26
26
  "dependencies": {
27
- "@markuplint/ml-ast": "4.0.0-alpha.3",
28
- "@markuplint/types": "4.0.0-alpha.3",
29
- "@types/uuid": "^9.0.4",
30
- "tslib": "^2.6.2",
31
- "type-fest": "^4.3.1",
27
+ "@markuplint/ml-ast": "4.0.0-alpha.5",
28
+ "@markuplint/types": "4.0.0-alpha.5",
29
+ "@types/uuid": "^9.0.6",
30
+ "espree": "^9.6.1",
31
+ "type-fest": "^4.5.0",
32
32
  "uuid": "^9.0.1"
33
33
  },
34
- "gitHead": "380836f7adc1ff7e8eaf9d869e68d29eee8f3b7e"
34
+ "gitHead": "0c3e4690662edf1765bcc4b6411ec5507c1e2ea3"
35
35
  }