@markuplint/html-parser 4.0.0-alpha.3 → 4.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2017-2019 Yusuke Hirao
3
+ Copyright (c) 2017-2023 Yusuke Hirao
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,7 +1,6 @@
1
1
  // @ts-nocheck TODO: Parse5(https://github.com/inikulin/parse5) supports to expose type definitions as submodules.
2
- import { detectElementType, getEndCol, getEndLine, sliceFragment, uuid } from '@markuplint/parser-utils';
2
+ import { detectElementType, getEndCol, getEndLine, sliceFragment, tagParser, uuid } from '@markuplint/parser-utils';
3
3
  import { parse, parseFragment } from 'parse5';
4
- import parseRawTag from './parse-raw-tag.js';
5
4
  const P5_OPTIONS = {
6
5
  scriptingEnabled: false,
7
6
  sourceCodeLocationInfo: true,
@@ -147,7 +146,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
147
146
  const startTagRaw = tagLoc
148
147
  ? rawHtml.slice(tagLoc.startOffset, tagLoc.endOffset)
149
148
  : rawHtml.slice(startOffset, endOffset ?? startOffset);
150
- const tagTokens = parseRawTag(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
149
+ const tagTokens = tagParser(startTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
151
150
  const tagName = tagTokens.tagName;
152
151
  let endTag = null;
153
152
  let endTagLoc = 'endTag' in location ? location.endTag : null;
@@ -168,7 +167,7 @@ parentNode, rawHtml, offsetOffset, offsetLine, offsetColumn) {
168
167
  if (endTagLoc) {
169
168
  const { startOffset, endOffset, startLine, endLine, startCol, endCol } = endTagLoc;
170
169
  const endTagRaw = rawHtml.slice(startOffset, endOffset);
171
- const endTagTokens = parseRawTag(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
170
+ const endTagTokens = tagParser(endTagRaw, startLine, startCol, startOffset, offsetOffset, offsetLine, offsetColumn);
172
171
  const endTagName = endTagTokens.tagName;
173
172
  endTag = {
174
173
  uuid: uuid(),
package/lib/index.d.ts CHANGED
@@ -1,6 +1,4 @@
1
- export { default as attrTokenizer } from './attr-tokenizer.js';
2
- export { default as isDocumentFragment } from './is-document-fragment.js';
3
- export { default as parseRawTag } from './parse-raw-tag.js';
1
+ export { isDocumentFragment } from './is-document-fragment.js';
4
2
  export { getNamespace } from './get-namespace.js';
5
3
  export { parse } from './parse.js';
6
4
  export { createTree } from './create-tree.js';
package/lib/index.js CHANGED
@@ -1,6 +1,4 @@
1
- export { default as attrTokenizer } from './attr-tokenizer.js';
2
- export { default as isDocumentFragment } from './is-document-fragment.js';
3
- export { default as parseRawTag } from './parse-raw-tag.js';
1
+ export { isDocumentFragment } from './is-document-fragment.js';
4
2
  export { getNamespace } from './get-namespace.js';
5
3
  export { parse } from './parse.js';
6
4
  export { createTree } from './create-tree.js';
@@ -1 +1 @@
1
- export default function isDocumentFragment(html: string): boolean;
1
+ export declare function isDocumentFragment(html: string): boolean;
@@ -1,3 +1,3 @@
1
- export default function isDocumentFragment(html) {
2
- return !/^\s*(<!doctype html(?:\s*.+)?>|<html(?:\s|>))/im.test(html);
1
+ export function isDocumentFragment(html) {
2
+ return !/^\s*(?:<!doctype html(?:\s*(?:\S.*|[\t\v\f \u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\uFEFF]))?>|<html[\s>])/im.test(html);
3
3
  }
@@ -5,9 +5,7 @@ export function isStartsHeadTagOrBodyTag(rawCode) {
5
5
  export function optimizeStartsHeadTagOrBodyTagSetup(rawCode) {
6
6
  const heads = [];
7
7
  const bodies = [];
8
- const code = rawCode.replace(
9
- // eslint-disable-next-line no-control-regex
10
- /(?<=<\/?)(?:head|body)(?=\u0009|\u000A|\u000C|\u0020|\/|>|\u0000)/gi, tag => {
8
+ const code = rawCode.replaceAll(/(?<=<\/?)(?:head|body)(?=[\0\t\n\f />])/gi, tag => {
11
9
  const prefix = `x-${UNDUPLICATED_CHAR}`;
12
10
  let name;
13
11
  if (/^head$/i.test(tag)) {
@@ -34,18 +32,18 @@ export function optimizeStartsHeadTagOrBodyTagResume(
34
32
  nodeList,
35
33
  // eslint-disable-next-line @typescript-eslint/prefer-readonly-parameter-types
36
34
  replacements) {
37
- nodeList.forEach(node => {
35
+ for (const node of nodeList) {
38
36
  if (!node.nodeName.startsWith(`x-${UNDUPLICATED_CHAR}`)) {
39
- return;
37
+ continue;
40
38
  }
41
39
  const realName = node.nodeName === `x-${UNDUPLICATED_CHAR}h` ? replacements.heads.shift() : replacements.bodies.shift();
42
40
  if (!realName) {
43
- return;
41
+ continue;
44
42
  }
45
43
  node.raw = node.raw.replace(node.nodeName, realName);
46
44
  node.nodeName = realName;
47
45
  if (node.type === 'starttag') {
48
46
  node.elementType = 'html';
49
47
  }
50
- });
48
+ }
51
49
  }
package/lib/parse.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { ignoreFrontMatter, flattenNodes } from '@markuplint/parser-utils';
2
2
  import { createTree } from './create-tree.js';
3
- import isDocumentFragment from './is-document-fragment.js';
3
+ import { isDocumentFragment } from './is-document-fragment.js';
4
4
  import { isStartsHeadTagOrBodyTag, optimizeStartsHeadTagOrBodyTagResume, optimizeStartsHeadTagOrBodyTagSetup, } from './optimize-starts-head-or-body.js';
5
5
  export const parse = (rawCode, options) => {
6
6
  if (options?.ignoreFrontMatter) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@markuplint/html-parser",
3
- "version": "4.0.0-alpha.3",
3
+ "version": "4.0.0-alpha.5",
4
4
  "description": "HTML parser for markuplint",
5
5
  "repository": "git@github.com:markuplint/markuplint.git",
6
6
  "author": "Yusuke Hirao <yusukehirao@me.com>",
@@ -25,11 +25,10 @@
25
25
  "clean": "tsc --build --clean"
26
26
  },
27
27
  "dependencies": {
28
- "@markuplint/ml-ast": "4.0.0-alpha.3",
29
- "@markuplint/parser-utils": "4.0.0-alpha.3",
28
+ "@markuplint/ml-ast": "4.0.0-alpha.5",
29
+ "@markuplint/parser-utils": "4.0.0-alpha.5",
30
30
  "parse5": "7.1.2",
31
- "tslib": "^2.6.2",
32
- "type-fest": "^4.3.1"
31
+ "type-fest": "^4.5.0"
33
32
  },
34
- "gitHead": "380836f7adc1ff7e8eaf9d869e68d29eee8f3b7e"
33
+ "gitHead": "0c3e4690662edf1765bcc4b6411ec5507c1e2ea3"
35
34
  }
@@ -1,2 +0,0 @@
1
- import type { MLASTHTMLAttr } from '@markuplint/ml-ast';
2
- export default function attrTokenizer(raw: string, line: number, col: number, startOffset: number): MLASTHTMLAttr;
@@ -1,80 +0,0 @@
1
- import { tokenizer, uuid } from '@markuplint/parser-utils';
2
- const reAttrsInStartTag =
3
- // eslint-disable-next-line no-control-regex
4
- /(\s*)([^\x00-\x1f\x7f-\x9f "'>/=]+)(?:(\s*)(=)(\s*)(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s]*)))?/;
5
- export default function attrTokenizer(raw, line, col, startOffset) {
6
- const attrMatchedMap = raw.match(reAttrsInStartTag);
7
- if (!attrMatchedMap) {
8
- throw new SyntaxError('Illegal attribute token');
9
- }
10
- const spacesBeforeAttrString = attrMatchedMap[1] ?? '';
11
- const nameChars = attrMatchedMap[2] ?? '';
12
- const spacesBeforeEqualChars = attrMatchedMap[3] ?? '';
13
- const equalChars = attrMatchedMap[4] ?? null;
14
- const spacesAfterEqualChars = attrMatchedMap[5] ?? '';
15
- const quoteChars = attrMatchedMap[6] != null ? '"' : attrMatchedMap[7] != null ? "'" : null;
16
- const valueChars = attrMatchedMap[6] ?? attrMatchedMap[7] ?? attrMatchedMap[8] ?? (quoteChars ? '' : null);
17
- let offset = startOffset;
18
- const spacesBeforeName = tokenizer(spacesBeforeAttrString, line, col, offset);
19
- line = spacesBeforeName.endLine;
20
- col = spacesBeforeName.endCol;
21
- offset = spacesBeforeName.endOffset;
22
- const name = tokenizer(nameChars, line, col, offset);
23
- line = name.endLine;
24
- col = name.endCol;
25
- offset = name.endOffset;
26
- const spacesBeforeEqual = tokenizer(spacesBeforeEqualChars, line, col, offset);
27
- line = spacesBeforeEqual.endLine;
28
- col = spacesBeforeEqual.endCol;
29
- offset = spacesBeforeEqual.endOffset;
30
- const equal = tokenizer(equalChars, line, col, offset);
31
- line = equal.endLine;
32
- col = equal.endCol;
33
- offset = equal.endOffset;
34
- const spacesAfterEqual = tokenizer(spacesAfterEqualChars, line, col, offset);
35
- line = spacesAfterEqual.endLine;
36
- col = spacesAfterEqual.endCol;
37
- offset = spacesAfterEqual.endOffset;
38
- const startQuote = tokenizer(quoteChars, line, col, offset);
39
- line = startQuote.endLine;
40
- col = startQuote.endCol;
41
- offset = startQuote.endOffset;
42
- const value = tokenizer(valueChars, line, col, offset);
43
- line = value.endLine;
44
- col = value.endCol;
45
- offset = value.endOffset;
46
- const endQuote = tokenizer(quoteChars, line, col, offset);
47
- const attrToken = tokenizer(nameChars +
48
- spacesBeforeEqualChars +
49
- (equalChars ?? '') +
50
- spacesAfterEqualChars +
51
- (quoteChars ?? '') +
52
- (valueChars ?? '') +
53
- (quoteChars ?? ''), name.startLine, name.startCol, name.startOffset);
54
- return {
55
- type: 'html-attr',
56
- uuid: uuid(),
57
- raw: attrToken.raw,
58
- startOffset: attrToken.startOffset,
59
- endOffset: attrToken.endOffset,
60
- startLine: attrToken.startLine,
61
- endLine: attrToken.endLine,
62
- startCol: attrToken.startCol,
63
- endCol: attrToken.endCol,
64
- spacesBeforeName,
65
- name,
66
- spacesBeforeEqual,
67
- equal,
68
- spacesAfterEqual,
69
- startQuote,
70
- value,
71
- endQuote,
72
- isDuplicatable: false,
73
- nodeName: name.raw,
74
- parentNode: null,
75
- prevNode: null,
76
- nextNode: null,
77
- isFragment: false,
78
- isGhost: false,
79
- };
80
- }
@@ -1,9 +0,0 @@
1
- import type { MLASTAttr, MLToken } from '@markuplint/ml-ast';
2
- type TagTokens = {
3
- tagName: string;
4
- attrs: MLASTAttr[];
5
- selfClosingSolidus: MLToken;
6
- endSpace: MLToken;
7
- };
8
- export default function parseRawTag(raw: string, startLine: number, startCol: number, startOffset: number, offsetOffset?: number, offsetLine?: number, offsetColumn?: number): TagTokens;
9
- export {};
@@ -1,51 +0,0 @@
1
- import { reTag, reTagName, isPotentialCustomElementName, tokenizer } from '@markuplint/parser-utils';
2
- import attrTokenizer from './attr-tokenizer.js';
3
- // eslint-disable-next-line no-control-regex
4
- const reAttrsInStartTag = /\s*[^\x00-\x1f\x7f-\x9f "'>/=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^\s]*))?/;
5
- const reEndTokens = /(\s*\/)?(\s*)>$/;
6
- export default function parseRawTag(raw, startLine, startCol, startOffset, offsetOffset = 0, offsetLine = 0, offsetColumn = 0) {
7
- let offset = startOffset + offsetOffset;
8
- let line = startLine + offsetLine;
9
- let col = startCol + (startLine === 1 ? offsetColumn : 0);
10
- const matches = raw.match(reTag);
11
- const tagWithAttrs = matches?.[1];
12
- if (!tagWithAttrs) {
13
- throw new SyntaxError(`Invalid tag syntax: "${raw}"`);
14
- }
15
- // eslint-disable-next-line no-control-regex
16
- const tagNameSplitted = tagWithAttrs.split(/[\u0000\u0009\u000A\u000C\u0020/>]/);
17
- const tagName = tagNameSplitted[0] || tagNameSplitted[1];
18
- if (!tagName || (!reTagName.test(tagName) && !isPotentialCustomElementName(tagName))) {
19
- throw new SyntaxError(`Invalid tag name: "${tagName}" in <${tagWithAttrs}>`);
20
- }
21
- const tagStartPos = tagWithAttrs.indexOf(tagName);
22
- let rawAttrs = tagWithAttrs.substring(tagStartPos + tagName.length);
23
- // console.log({ raw, tagStartPos, tagName, rawAttrs });
24
- col += tagName.length + 1 + tagStartPos;
25
- offset += tagName.length + 1 + tagStartPos;
26
- const attrs = [];
27
- while (reAttrsInStartTag.test(rawAttrs)) {
28
- const attrMatchedMap = rawAttrs.match(reAttrsInStartTag);
29
- if (attrMatchedMap && attrMatchedMap[0]) {
30
- const rawAttr = attrMatchedMap[0];
31
- const attr = attrTokenizer(rawAttr, line, col, offset);
32
- line = attr.endLine;
33
- col = attr.endCol;
34
- offset = attr.endOffset;
35
- rawAttrs = rawAttrs.substr(rawAttr.length);
36
- attrs.push(attr);
37
- }
38
- }
39
- const endTokens = reEndTokens.exec(raw);
40
- const selfClosingSolidus = tokenizer(endTokens?.[1] ?? '', line, col, offset);
41
- line = selfClosingSolidus.endLine;
42
- col = selfClosingSolidus.endCol;
43
- offset = selfClosingSolidus.endOffset;
44
- const endSpace = tokenizer(endTokens?.[2] ?? '', line, col, offset);
45
- return {
46
- tagName,
47
- attrs,
48
- selfClosingSolidus,
49
- endSpace,
50
- };
51
- }