@tkeron/html-parser 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# HTML Parser - Powered by Bun Native Tokenizer
|
|
2
2
|
|
|
3
|
-
> ⚠️ **Work in Progress** - This package is currently under active development
|
|
3
|
+
> ⚠️ **Work in Progress** - This package is currently under active development.
|
|
4
4
|
|
|
5
5
|
A fast and lightweight HTML parser for Bun that converts HTML strings into DOM Document objects. **Now powered by a native Bun tokenizer** for optimal performance.
|
|
6
6
|
|
package/package.json
CHANGED
package/src/dom-simulator.ts
CHANGED
|
@@ -426,6 +426,11 @@ function updateElementContent(element: any): void {
|
|
|
426
426
|
enumerable: false,
|
|
427
427
|
configurable: true,
|
|
428
428
|
});
|
|
429
|
+
|
|
430
|
+
// Propagate changes up to parent elements
|
|
431
|
+
if (element.parentElement) {
|
|
432
|
+
updateElementContent(element.parentElement);
|
|
433
|
+
}
|
|
429
434
|
}
|
|
430
435
|
|
|
431
436
|
export function getTextContent(node: any): string {
|
|
@@ -169,16 +169,3 @@ describe('HTML5lib Tokenizer Tests', () => {
|
|
|
169
169
|
runHTML5libTokenizerTestSuite(entityTests, 'Entity Handling');
|
|
170
170
|
runHTML5libTokenizerTestSuite(commentTests, 'Comment Handling');
|
|
171
171
|
});
|
|
172
|
-
|
|
173
|
-
// Test for loading external test files (when available)
|
|
174
|
-
describe('HTML5lib External Tests', () => {
|
|
175
|
-
it('should be able to load external test files', async () => {
|
|
176
|
-
// This would be used to load actual HTML5lib test files
|
|
177
|
-
// const testData = await Bun.file('/path/to/test1.test').text();
|
|
178
|
-
// await loadHTML5libTokenizerTests(testData, 'External Test');
|
|
179
|
-
|
|
180
|
-
// For now, we'll just verify our utilities work
|
|
181
|
-
const testData = JSON.stringify(basicTokenizerTests);
|
|
182
|
-
await loadHTML5libTokenizerTests(testData, 'Loaded Basic Tests');
|
|
183
|
-
});
|
|
184
|
-
});
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { parseHTML } from "../index";
|
|
3
|
+
|
|
4
|
+
describe("setAttribute and outerHTML synchronization", () => {
|
|
5
|
+
it("should update outerHTML after setAttribute on img element", () => {
|
|
6
|
+
const doc = parseHTML('<html><body><img class="test" src="old.png" alt="test"/></body></html>');
|
|
7
|
+
const img = doc.querySelector('.test');
|
|
8
|
+
|
|
9
|
+
expect(img).not.toBeNull();
|
|
10
|
+
expect(img!.getAttribute('src')).toBe('old.png');
|
|
11
|
+
expect(img!.outerHTML).toContain('src="old.png"');
|
|
12
|
+
|
|
13
|
+
img!.setAttribute('src', 'new.png');
|
|
14
|
+
|
|
15
|
+
expect(img!.getAttribute('src')).toBe('new.png');
|
|
16
|
+
expect(img!.outerHTML).toContain('src="new.png"');
|
|
17
|
+
expect(img!.outerHTML).not.toContain('src="old.png"');
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("should update outerHTML after setAttribute on any element", () => {
|
|
21
|
+
const doc = parseHTML('<div id="test" class="old-class">Content</div>');
|
|
22
|
+
const div = doc.querySelector('#test');
|
|
23
|
+
|
|
24
|
+
expect(div).not.toBeNull();
|
|
25
|
+
expect(div!.getAttribute('class')).toBe('old-class');
|
|
26
|
+
|
|
27
|
+
div!.setAttribute('class', 'new-class');
|
|
28
|
+
div!.setAttribute('data-value', '123');
|
|
29
|
+
|
|
30
|
+
expect(div!.getAttribute('class')).toBe('new-class');
|
|
31
|
+
expect(div!.getAttribute('data-value')).toBe('123');
|
|
32
|
+
expect(div!.outerHTML).toContain('class="new-class"');
|
|
33
|
+
expect(div!.outerHTML).toContain('data-value="123"');
|
|
34
|
+
expect(div!.outerHTML).not.toContain('class="old-class"');
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it("should update parent innerHTML when child attribute changes", () => {
|
|
38
|
+
const doc = parseHTML('<html><body><img class="no-linked" src="" alt="test"/></body></html>');
|
|
39
|
+
const body = doc.querySelector('body');
|
|
40
|
+
const img = doc.querySelector('.no-linked');
|
|
41
|
+
|
|
42
|
+
expect(img).not.toBeNull();
|
|
43
|
+
expect(body).not.toBeNull();
|
|
44
|
+
|
|
45
|
+
img!.setAttribute('src', './new-image.png');
|
|
46
|
+
|
|
47
|
+
expect(body!.innerHTML).toContain('src="./new-image.png"');
|
|
48
|
+
expect(body!.innerHTML).not.toContain('src=""');
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("should update documentElement outerHTML after nested setAttribute", () => {
|
|
52
|
+
const doc = parseHTML(`<!DOCTYPE html>
|
|
53
|
+
<html lang="en">
|
|
54
|
+
<head>
|
|
55
|
+
<meta charset="UTF-8" />
|
|
56
|
+
<title>Test</title>
|
|
57
|
+
</head>
|
|
58
|
+
<body>
|
|
59
|
+
<img class="no-linked" src="" alt="test" />
|
|
60
|
+
</body>
|
|
61
|
+
</html>`);
|
|
62
|
+
|
|
63
|
+
const img = doc.querySelector('.no-linked');
|
|
64
|
+
expect(img).not.toBeNull();
|
|
65
|
+
|
|
66
|
+
img!.setAttribute('src', './profile.png');
|
|
67
|
+
|
|
68
|
+
const finalHTML = doc.documentElement!.outerHTML;
|
|
69
|
+
expect(finalHTML).toContain('src="./profile.png"');
|
|
70
|
+
expect(finalHTML).not.toContain('src=""');
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it("should handle multiple setAttribute calls", () => {
|
|
74
|
+
const doc = parseHTML('<a href="#" class="link">Click</a>');
|
|
75
|
+
const link = doc.querySelector('a');
|
|
76
|
+
|
|
77
|
+
expect(link).not.toBeNull();
|
|
78
|
+
|
|
79
|
+
link!.setAttribute('href', 'https://example.com');
|
|
80
|
+
link!.setAttribute('target', '_blank');
|
|
81
|
+
link!.setAttribute('rel', 'noopener');
|
|
82
|
+
|
|
83
|
+
const html = link!.outerHTML;
|
|
84
|
+
expect(html).toContain('href="https://example.com"');
|
|
85
|
+
expect(html).toContain('target="_blank"');
|
|
86
|
+
expect(html).toContain('rel="noopener"');
|
|
87
|
+
expect(html).not.toContain('href="#"');
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it("should update outerHTML after removeAttribute", () => {
|
|
91
|
+
const doc = parseHTML('<div id="test" class="my-class" data-value="123">Content</div>');
|
|
92
|
+
const div = doc.querySelector('#test');
|
|
93
|
+
|
|
94
|
+
expect(div).not.toBeNull();
|
|
95
|
+
expect(div!.outerHTML).toContain('data-value="123"');
|
|
96
|
+
|
|
97
|
+
div!.removeAttribute('data-value');
|
|
98
|
+
|
|
99
|
+
expect(div!.getAttribute('data-value')).toBeNull();
|
|
100
|
+
expect(div!.outerHTML).not.toContain('data-value');
|
|
101
|
+
});
|
|
102
|
+
});
|