@tkeron/html-parser 0.1.7 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +1 -7
  2. package/bun.lock +5 -0
  3. package/index.ts +4 -0
  4. package/package.json +7 -1
  5. package/src/css-selector.ts +1 -1
  6. package/src/dom-simulator.ts +38 -16
  7. package/src/encoding.ts +39 -0
  8. package/src/index.ts +9 -0
  9. package/src/parser.ts +478 -144
  10. package/src/serializer.ts +450 -0
  11. package/src/tokenizer.ts +59 -43
  12. package/tests/advanced.test.ts +119 -106
  13. package/tests/custom-elements.test.ts +172 -162
  14. package/tests/dom-extended.test.ts +12 -12
  15. package/tests/dom-manipulation.test.ts +9 -10
  16. package/tests/dom.test.ts +32 -27
  17. package/tests/helpers/tokenizer-adapter.test.ts +70 -0
  18. package/tests/helpers/tokenizer-adapter.ts +65 -0
  19. package/tests/helpers/tree-adapter.test.ts +39 -0
  20. package/tests/helpers/tree-adapter.ts +43 -0
  21. package/tests/html5lib-data/tokenizer/namedEntities.test +42422 -0
  22. package/tests/html5lib-data/tokenizer/pendingSpecChanges.test +9 -0
  23. package/tests/html5lib-data/tree-construction/adoption01.dat +354 -0
  24. package/tests/html5lib-data/tree-construction/adoption02.dat +39 -0
  25. package/tests/html5lib-data/tree-construction/domjs-unsafe.dat +0 -0
  26. package/tests/html5lib-data/tree-construction/entities02.dat +309 -0
  27. package/tests/html5lib-data/tree-construction/html5test-com.dat +301 -0
  28. package/tests/html5lib-data/tree-construction/math.dat +104 -0
  29. package/tests/html5lib-data/tree-construction/namespace-sensitivity.dat +22 -0
  30. package/tests/html5lib-data/tree-construction/noscript01.dat +237 -0
  31. package/tests/html5lib-data/tree-construction/ruby.dat +302 -0
  32. package/tests/html5lib-data/tree-construction/scriptdata01.dat +372 -0
  33. package/tests/html5lib-data/tree-construction/svg.dat +104 -0
  34. package/tests/html5lib-data/tree-construction/template.dat +1673 -0
  35. package/tests/html5lib-data/tree-construction/tests10.dat +853 -0
  36. package/tests/html5lib-data/tree-construction/tests11.dat +523 -0
  37. package/tests/html5lib-data/tree-construction/tests20.dat +842 -0
  38. package/tests/html5lib-data/tree-construction/tests21.dat +306 -0
  39. package/tests/html5lib-data/tree-construction/tests23.dat +168 -0
  40. package/tests/html5lib-data/tree-construction/tests24.dat +79 -0
  41. package/tests/html5lib-data/tree-construction/tests5.dat +210 -0
  42. package/tests/html5lib-data/tree-construction/tests6.dat +663 -0
  43. package/tests/html5lib-data/tree-construction/tests_innerHTML_1.dat +844 -0
  44. package/tests/parser.test.ts +172 -193
  45. package/tests/serializer-core.test.ts +16 -0
  46. package/tests/serializer-data/core.test +125 -0
  47. package/tests/serializer-data/injectmeta.test +66 -0
  48. package/tests/serializer-data/optionaltags.test +965 -0
  49. package/tests/serializer-data/options.test +60 -0
  50. package/tests/serializer-data/whitespace.test +51 -0
  51. package/tests/serializer-injectmeta.test.ts +16 -0
  52. package/tests/serializer-optionaltags.test.ts +16 -0
  53. package/tests/serializer-options.test.ts +16 -0
  54. package/tests/serializer-whitespace.test.ts +16 -0
  55. package/tests/tokenizer-namedEntities.test.ts +20 -0
  56. package/tests/tokenizer-pendingSpecChanges.test.ts +20 -0
  57. package/tests/tokenizer.test.ts +3 -6
  58. package/tests/tree-construction-adoption01.test.ts +37 -0
  59. package/tests/tree-construction-adoption02.test.ts +34 -0
  60. package/tests/tree-construction-domjs-unsafe.test.ts +24 -0
  61. package/tests/tree-construction-entities02.test.ts +33 -0
  62. package/tests/tree-construction-html5test-com.test.ts +24 -0
  63. package/tests/tree-construction-math.test.ts +18 -0
  64. package/tests/tree-construction-namespace-sensitivity.test.ts +18 -0
  65. package/tests/tree-construction-noscript01.test.ts +18 -0
  66. package/tests/tree-construction-ruby.test.ts +21 -0
  67. package/tests/tree-construction-scriptdata01.test.ts +21 -0
  68. package/tests/tree-construction-svg.test.ts +21 -0
  69. package/tests/tree-construction-template.test.ts +21 -0
  70. package/tests/tree-construction-tests10.test.ts +21 -0
  71. package/tests/tree-construction-tests11.test.ts +21 -0
  72. package/tests/tree-construction-tests20.test.ts +18 -0
  73. package/tests/tree-construction-tests21.test.ts +18 -0
  74. package/tests/tree-construction-tests23.test.ts +18 -0
  75. package/tests/tree-construction-tests24.test.ts +18 -0
  76. package/tests/tree-construction-tests5.test.ts +21 -0
  77. package/tests/tree-construction-tests6.test.ts +21 -0
  78. package/tests/tree-construction-tests_innerHTML_1.test.ts +21 -0
  79. package/tests/official/README.md +0 -87
  80. package/tests/official/acid/acid-tests.test.ts +0 -309
  81. package/tests/official/final-output/final-output.test.ts +0 -361
  82. package/tests/official/html5lib/tokenizer-utils.ts +0 -192
  83. package/tests/official/html5lib/tokenizer.test.ts +0 -171
  84. package/tests/official/html5lib/tree-construction-utils.ts +0 -194
  85. package/tests/official/html5lib/tree-construction.test.ts +0 -250
  86. package/tests/official/validator/validator-tests.test.ts +0 -237
  87. package/tests/official/validator-nu/validator-nu.test.ts +0 -335
  88. package/tests/official/whatwg/whatwg-tests.test.ts +0 -205
  89. package/tests/official/wpt/wpt-tests.test.ts +0 -409
@@ -0,0 +1,16 @@
1
+ import { expect, it, describe } from 'bun:test';
2
+ import { serializeTokens } from '../src/serializer';
3
+ import { readFileSync } from 'fs';
4
+
5
+ describe('Serializer Core Tests', () => {
6
+ const content = readFileSync('tests/serializer-data/core.test', 'utf8');
7
+ const data = JSON.parse(content);
8
+ const tests = data.tests;
9
+
10
+ tests.forEach((test: any, index: number) => {
11
+ it(test.description, () => {
12
+ const result = serializeTokens(test.input);
13
+ expect(result).toBe(test.expected[0]);
14
+ });
15
+ });
16
+ });
@@ -0,0 +1,125 @@
1
+ {"tests": [
2
+
3
+ {"description": "proper attribute value escaping",
4
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" ""}]]],
5
+ "expected": ["<span title='test \"with\" &amp;quot;'>"]
6
+ },
7
+
8
+ {"description": "proper attribute value non-quoting",
9
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
10
+ "expected": ["<span title=foo>"],
11
+ "xhtml": ["<span title=\"foo\">"]
12
+ },
13
+
14
+ {"description": "proper attribute value non-quoting (with <)",
15
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
16
+ "expected": ["<span title=foo<bar>"],
17
+ "xhtml": ["<span title=\"foo&lt;bar\">"]
18
+ },
19
+
20
+ {"description": "proper attribute value quoting (with =)",
21
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
22
+ "expected": ["<span title=\"foo=bar\">"]
23
+ },
24
+
25
+ {"description": "proper attribute value quoting (with >)",
26
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
27
+ "expected": ["<span title=\"foo>bar\">"]
28
+ },
29
+
30
+ {"description": "proper attribute value quoting (with \")",
31
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
32
+ "expected": ["<span title='foo\"bar'>"]
33
+ },
34
+
35
+ {"description": "proper attribute value quoting (with ')",
36
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
37
+ "expected": ["<span title=\"foo'bar\">"]
38
+ },
39
+
40
+ {"description": "proper attribute value quoting (with both \" and ')",
41
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
42
+ "expected": ["<span title=\"foo'bar&quot;baz\">"]
43
+ },
44
+
45
+ {"description": "proper attribute value quoting (with space)",
46
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
47
+ "expected": ["<span title=\"foo bar\">"]
48
+ },
49
+
50
+ {"description": "proper attribute value quoting (with tab)",
51
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
52
+ "expected": ["<span title=\"foo\tbar\">"]
53
+ },
54
+
55
+ {"description": "proper attribute value quoting (with LF)",
56
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
57
+ "expected": ["<span title=\"foo\nbar\">"]
58
+ },
59
+
60
+ {"description": "proper attribute value quoting (with CR)",
61
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
62
+ "expected": ["<span title=\"foo\rbar\">"]
63
+ },
64
+
65
+ {"description": "proper attribute value non-quoting (with linetab)",
66
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
67
+ "expected": ["<span title=foo\u000Bbar>"],
68
+ "xhtml": ["<span title=\"foo\u000Bbar\">"]
69
+ },
70
+
71
+ {"description": "proper attribute value quoting (with form feed)",
72
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
73
+ "expected": ["<span title=\"foo\u000Cbar\">"]
74
+ },
75
+
76
+ {"description": "void element (as EmptyTag token)",
77
+ "input": [["EmptyTag", "img", {}]],
78
+ "expected": ["<img>"],
79
+ "xhtml": ["<img />"]
80
+ },
81
+
82
+ {"description": "void element (as StartTag token)",
83
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
84
+ "expected": ["<img>"],
85
+ "xhtml": ["<img />"]
86
+ },
87
+
88
+ {"description": "doctype in error",
89
+ "input": [["Doctype", "foo"]],
90
+ "expected": ["<!DOCTYPE foo>"]
91
+ },
92
+
93
+ {"description": "character data",
94
+ "options": {"encoding":"utf-8"},
95
+ "input": [["Characters", "a<b>c&d"]],
96
+ "expected": ["a&lt;b&gt;c&amp;d"]
97
+ },
98
+
99
+ {"description": "rcdata",
100
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
101
+ "expected": ["<script>a<b>c&d"],
102
+ "xhtml": ["<script>a&lt;b&gt;c&amp;d"]
103
+ },
104
+
105
+ {"description": "doctype",
106
+ "input": [["Doctype", "HTML"]],
107
+ "expected": ["<!DOCTYPE HTML>"]
108
+ },
109
+
110
+ {"description": "HTML 4.01 DOCTYPE",
111
+ "input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
112
+ "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
113
+ },
114
+
115
+ {"description": "HTML 4.01 DOCTYPE without system identifier",
116
+ "input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN"]],
117
+ "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
118
+ },
119
+
120
+ {"description": "IBM DOCTYPE without public identifier",
121
+ "input": [["Doctype", "html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
122
+ "expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
123
+ }
124
+
125
+ ]}
@@ -0,0 +1,66 @@
1
+ {"tests": [
2
+
3
+ {"description": "no encoding",
4
+ "options": {"inject_meta_charset": true},
5
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
6
+ "expected": [""],
7
+ "xhtml": ["<head></head>"]
8
+ },
9
+
10
+ {"description": "empytag head",
11
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
12
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
13
+ "expected": ["<meta charset=utf-8>"],
14
+ "xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
15
+ },
16
+
17
+ {"description": "head w/title",
18
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
19
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
20
+ "expected": ["<meta charset=utf-8><title>foo</title>"],
21
+ "xhtml": ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
22
+ },
23
+
24
+ {"description": "head w/meta-charset",
25
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
26
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
27
+ "expected": ["<meta charset=utf-8>"],
28
+ "xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
29
+ },
30
+
31
+ {"description": "head w/ two meta-charset",
32
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
33
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
34
+ "expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
35
+ "xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
36
+ },
37
+
38
+ {"description": "head w/robots",
39
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
40
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
41
+ "expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
42
+ "xhtml": ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
43
+ },
44
+
45
+ {"description": "head w/robots & charset",
46
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
47
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
48
+ "expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
49
+ "xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
50
+ },
51
+
52
+ {"description": "head w/ charset in http-equiv content-type",
53
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
54
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
55
+ "expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
56
+ "xhtml": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
57
+ },
58
+
59
+ {"description": "head w/robots & charset in http-equiv content-type",
60
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
61
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
62
+ "expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
63
+ "xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
64
+ }
65
+
66
+ ]}