@tkeron/html-parser 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,17 +11,17 @@ describe("cloneNode functionality", () => {
11
11
 
12
12
  const cloned = original.cloneNode(true);
13
13
 
14
- // Verificar que el nodo clonado existe
14
+
15
15
  expect(cloned).toBeTruthy();
16
16
  expect(cloned.nodeName).toBe("DIV");
17
17
 
18
- // Verificar que los atributos se copian
18
+
19
19
  expect(cloned.getAttribute("id")).toBe("original");
20
20
 
21
- // Verificar que el contenido de texto se copia
21
+
22
22
  expect(cloned.textContent).toBe("Hello World");
23
23
 
24
- // Verificar que tiene los hijos correctos
24
+
25
25
  expect(cloned.childNodes.length).toBeGreaterThan(0);
26
26
  });
27
27
 
@@ -38,20 +38,20 @@ describe("cloneNode functionality", () => {
38
38
 
39
39
  const cloned = parent.cloneNode(true);
40
40
 
41
- // Verificar estructura básica
41
+
42
42
  expect(cloned.nodeName).toBe("DIV");
43
43
  expect(cloned.getAttribute("id")).toBe("parent");
44
44
 
45
- // Verificar que los hijos se clonaron
45
+
46
46
  expect(cloned.childNodes.length).toBeGreaterThan(0);
47
47
 
48
- // Verificar que el hijo div está presente
48
+
49
49
  const childDiv = cloned.querySelector(".child");
50
50
  expect(childDiv).toBeTruthy();
51
51
  expect(childDiv?.nodeName).toBe("DIV");
52
52
  expect(childDiv?.getAttribute("class")).toBe("child");
53
53
 
54
- // Verificar el nieto span
54
+
55
55
  const span = cloned.querySelector("span");
56
56
  expect(span).toBeTruthy();
57
57
  expect(span?.textContent).toBe("Nested Text");
@@ -70,7 +70,7 @@ describe("cloneNode functionality", () => {
70
70
 
71
71
  const cloned = list.cloneNode(true);
72
72
 
73
- // Verificar que todos los items se clonaron
73
+
74
74
  const items = cloned.querySelectorAll("li");
75
75
  expect(items.length).toBe(3);
76
76
  expect(items[0]?.textContent).toBe("Item 1");
@@ -89,18 +89,18 @@ describe("cloneNode functionality", () => {
89
89
  const doc = parseHTML(html);
90
90
  const container = doc.querySelector("#container")!;
91
91
 
92
- // Guardar innerHTML original
92
+
93
93
  const originalInnerHTML = container.innerHTML;
94
94
  expect(originalInnerHTML).toBeTruthy();
95
95
  expect(originalInnerHTML.length).toBeGreaterThan(0);
96
96
 
97
97
  const cloned = container.cloneNode(true);
98
98
 
99
- // Verificar que innerHTML del clon no esté vacío
99
+
100
100
  expect(cloned.innerHTML).toBeTruthy();
101
101
  expect(cloned.innerHTML.length).toBeGreaterThan(0);
102
102
 
103
- // Verificar que el contenido es similar
103
+
104
104
  expect(cloned.innerHTML).toContain("<h1>Title</h1>");
105
105
  expect(cloned.innerHTML).toContain("<p>Paragraph 1</p>");
106
106
  expect(cloned.innerHTML).toContain("<p>Paragraph 2</p>");
@@ -111,19 +111,19 @@ describe("cloneNode functionality", () => {
111
111
  const doc = parseHTML(html);
112
112
  const mixed = doc.querySelector("#mixed")!;
113
113
 
114
- // Verificar estructura original
114
+
115
115
  const originalChildCount = mixed.childNodes.length;
116
116
  expect(originalChildCount).toBeGreaterThan(0);
117
117
 
118
118
  const cloned = mixed.cloneNode(true);
119
119
 
120
- // Verificar que tiene la misma cantidad de hijos
120
+
121
121
  expect(cloned.childNodes.length).toBe(originalChildCount);
122
122
 
123
- // Verificar contenido completo
123
+
124
124
  expect(cloned.textContent).toBe("Text beforebold textText after");
125
125
 
126
- // Verificar que el elemento strong existe
126
+
127
127
  const strong = cloned.querySelector("strong");
128
128
  expect(strong).toBeTruthy();
129
129
  expect(strong?.textContent).toBe("bold text");
@@ -136,7 +136,7 @@ describe("cloneNode functionality", () => {
136
136
 
137
137
  const cloned = element.cloneNode(true);
138
138
 
139
- // Verificar todos los atributos
139
+
140
140
  expect(cloned.getAttribute("id")).toBe("attrs");
141
141
  expect(cloned.getAttribute("class")).toBe("test");
142
142
  expect(cloned.getAttribute("data-value")).toBe("123");
@@ -161,11 +161,11 @@ describe("cloneNode functionality", () => {
161
161
 
162
162
  const cloned = article.cloneNode(true);
163
163
 
164
- // Verificar estructura
164
+
165
165
  expect(cloned.nodeName).toBe("ARTICLE");
166
166
  expect(cloned.getAttribute("id")).toBe("article");
167
167
 
168
- // Verificar elementos
168
+
169
169
  expect(cloned.querySelector("h2")?.textContent).toBe("Article Title");
170
170
 
171
171
  const paragraphs = cloned.querySelectorAll("p");
@@ -173,12 +173,12 @@ describe("cloneNode functionality", () => {
173
173
  expect(paragraphs[0]?.textContent).toBe("First paragraph");
174
174
  expect(paragraphs[1]?.textContent).toBe("Last paragraph");
175
175
 
176
- // Verificar div anidado
176
+
177
177
  const highlight = cloned.querySelector(".highlight");
178
178
  expect(highlight).toBeTruthy();
179
179
  expect(highlight?.querySelector("span")?.textContent).toBe("Highlighted");
180
180
 
181
- // Verificar que se copió el comentario
181
+
182
182
  const hasComment = Array.from(cloned.childNodes).some(
183
183
  (node: any) => node.nodeType === NodeType.COMMENT_NODE
184
184
  );
@@ -203,7 +203,7 @@ describe("cloneNode functionality", () => {
203
203
 
204
204
  const cloned = section.cloneNode(true);
205
205
 
206
- // Verificar outerHTML
206
+
207
207
  expect(cloned.outerHTML).toBeTruthy();
208
208
  expect(cloned.outerHTML).toContain("section");
209
209
  expect(cloned.outerHTML).toContain("class=\"main\"");
@@ -220,7 +220,7 @@ describe("cloneNode functionality", () => {
220
220
 
221
221
  const cloned = parent.cloneNode(false);
222
222
 
223
- // Debe copiar el elemento pero no los hijos
223
+
224
224
  expect(cloned.nodeName).toBe("DIV");
225
225
  expect(cloned.getAttribute("id")).toBe("parent");
226
226
  expect(cloned.childNodes.length).toBe(0);
@@ -234,12 +234,12 @@ describe("cloneNode functionality", () => {
234
234
 
235
235
  const cloned = element.cloneNode(false);
236
236
 
237
- // Atributos deben copiarse
237
+
238
238
  expect(cloned.getAttribute("id")).toBe("test");
239
239
  expect(cloned.getAttribute("class")).toBe("container");
240
240
  expect(cloned.getAttribute("data-value")).toBe("123");
241
241
 
242
- // Hijos NO deben copiarse
242
+
243
243
  expect(cloned.childNodes.length).toBe(0);
244
244
  expect(cloned.innerHTML).toBe("");
245
245
  });
@@ -253,7 +253,7 @@ describe("cloneNode functionality", () => {
253
253
 
254
254
  const cloned = original.cloneNode(true);
255
255
 
256
- // Modificar el clon no debe afectar el original
256
+
257
257
  cloned.setAttribute("id", "cloned");
258
258
  cloned.setAttribute("data-modified", "true");
259
259
 
@@ -273,10 +273,10 @@ describe("cloneNode functionality", () => {
273
273
 
274
274
  expect(clonedChild).toBeTruthy();
275
275
 
276
- // Modificar hijo del clon
276
+
277
277
  clonedChild?.setAttribute("data-cloned", "yes");
278
278
 
279
- // El hijo original no debe verse afectado
279
+
280
280
  const originalChild = parent.querySelector("#child");
281
281
  expect(originalChild?.hasAttribute("data-cloned")).toBe(false);
282
282
  });
@@ -324,7 +324,7 @@ describe("cloneNode functionality", () => {
324
324
 
325
325
  const cloned = level1.cloneNode(true);
326
326
 
327
- // Verificar todos los niveles
327
+
328
328
  expect(cloned.querySelector("#level2")).toBeTruthy();
329
329
  expect(cloned.querySelector("#level3")).toBeTruthy();
330
330
  expect(cloned.querySelector("#level4")).toBeTruthy();
@@ -340,22 +340,22 @@ describe("cloneNode functionality", () => {
340
340
  const doc = parseHTML(html);
341
341
  const container = doc.querySelector("#container")!;
342
342
 
343
- // Acceder a innerHTML para asegurar que _internalInnerHTML esté establecido
343
+
344
344
  const originalInnerHTML = container.innerHTML;
345
345
  expect(originalInnerHTML).toBeTruthy();
346
346
 
347
347
  const cloned = container.cloneNode(true);
348
348
 
349
- // Verificar que innerHTML funciona en el clon
349
+
350
350
  const clonedInnerHTML = cloned.innerHTML;
351
351
  expect(clonedInnerHTML).toBeTruthy();
352
352
  expect(clonedInnerHTML.length).toBeGreaterThan(0);
353
353
 
354
- // Verificar que contiene el mismo contenido
354
+
355
355
  expect(clonedInnerHTML).toContain("<p>Paragraph 1</p>");
356
356
  expect(clonedInnerHTML).toContain("<p>Paragraph 2</p>");
357
357
 
358
- // Verificar que el accessor de innerHTML funciona correctamente
358
+
359
359
  expect(typeof cloned.innerHTML).toBe("string");
360
360
  });
361
361
 
@@ -369,10 +369,10 @@ describe("cloneNode functionality", () => {
369
369
 
370
370
  const cloned = parent.cloneNode(true);
371
371
 
372
- // Verificar que childNodes tiene la misma estructura
372
+
373
373
  expect(cloned.childNodes.length).toBe(originalChildCount);
374
374
 
375
- // Verificar que podemos acceder a cada hijo
375
+
376
376
  for (let i = 0; i < cloned.childNodes.length; i++) {
377
377
  expect(cloned.childNodes[i]).toBeTruthy();
378
378
  expect(cloned.childNodes[i].nodeType).toBeDefined();
@@ -386,12 +386,12 @@ describe("cloneNode functionality", () => {
386
386
 
387
387
  const cloned = container.cloneNode(true);
388
388
 
389
- // Verificar que el array children está correctamente poblado
389
+
390
390
  expect(cloned.children).toBeTruthy();
391
391
  expect(Array.isArray(cloned.children)).toBe(true);
392
392
  expect(cloned.children.length).toBe(3);
393
393
 
394
- // Verificar que todos son elementos
394
+
395
395
  for (const child of cloned.children) {
396
396
  expect(child.nodeType).toBe(NodeType.ELEMENT_NODE);
397
397
  }
@@ -404,12 +404,12 @@ describe("cloneNode functionality", () => {
404
404
 
405
405
  const cloned = list.cloneNode(true);
406
406
 
407
- // Verificar referencias firstChild y lastChild
407
+
408
408
  expect(cloned.firstChild).toBeTruthy();
409
409
  expect(cloned.lastChild).toBeTruthy();
410
410
 
411
- // En DOM real, firstChild puede ser un nodo de texto (whitespace)
412
- // pero debemos asegurar que existen
411
+
412
+
413
413
  expect(cloned.firstElementChild).toBeTruthy();
414
414
  expect(cloned.lastElementChild).toBeTruthy();
415
415
 
@@ -427,12 +427,12 @@ describe("cloneNode functionality", () => {
427
427
  const doc = parseHTML(html);
428
428
  const dynamic = doc.querySelector("#dynamic")!;
429
429
 
430
- // Modificar innerHTML antes de clonar
430
+
431
431
  dynamic.innerHTML = "<p>Dynamic content</p><span>More content</span>";
432
432
 
433
433
  const cloned = dynamic.cloneNode(true);
434
434
 
435
- // Verificar que el contenido modificado se clonó
435
+
436
436
  expect(cloned.querySelector("p")).toBeTruthy();
437
437
  expect(cloned.querySelector("p")?.textContent).toBe("Dynamic content");
438
438
  expect(cloned.querySelector("span")).toBeTruthy();
@@ -446,17 +446,17 @@ describe("cloneNode functionality", () => {
446
446
 
447
447
  const cloned = original.cloneNode(true);
448
448
 
449
- // Verificar contenido clonado
449
+
450
450
  expect(cloned.querySelector("p")?.textContent).toBe("Original");
451
451
 
452
- // Modificar innerHTML del clon
452
+
453
453
  cloned.innerHTML = "<span>Modified</span>";
454
454
 
455
- // Original no debe cambiar
455
+
456
456
  expect(original.querySelector("p")?.textContent).toBe("Original");
457
457
  expect(original.querySelector("span")).toBeNull();
458
458
 
459
- // Clon debe tener el nuevo contenido
459
+
460
460
  expect(cloned.querySelector("span")?.textContent).toBe("Modified");
461
461
  expect(cloned.querySelector("p")).toBeNull();
462
462
  });
@@ -488,13 +488,13 @@ describe("cloneNode functionality", () => {
488
488
 
489
489
  const cloned = card.cloneNode(true);
490
490
 
491
- // Verificar estructura completa
491
+
492
492
  expect(cloned.getAttribute("data-id")).toBe("123");
493
493
  expect(cloned.querySelector(".card-header")).toBeTruthy();
494
494
  expect(cloned.querySelector(".card-body")).toBeTruthy();
495
495
  expect(cloned.querySelector(".card-footer")).toBeTruthy();
496
496
 
497
- // Verificar contenido específico
497
+
498
498
  expect(cloned.querySelector(".card-title")?.textContent).toBe("Card Title");
499
499
  expect(cloned.querySelector("strong")?.textContent).toBe("bold");
500
500
 
@@ -502,7 +502,7 @@ describe("cloneNode functionality", () => {
502
502
  expect(items.length).toBe(2);
503
503
 
504
504
  const buttons = cloned.querySelectorAll("button");
505
- expect(buttons.length).toBe(3); // close, save, cancel
505
+ expect(buttons.length).toBe(3);
506
506
  });
507
507
 
508
508
  it("should clone a form with various input types", () => {
@@ -522,7 +522,7 @@ describe("cloneNode functionality", () => {
522
522
 
523
523
  const cloned = form.cloneNode(true);
524
524
 
525
- // Verificar que todos los inputs se clonaron
525
+
526
526
  const textInput = cloned.querySelector('[name="username"]');
527
527
  expect(textInput).toBeTruthy();
528
528
  expect(textInput?.getAttribute("value")).toBe("john");
@@ -210,7 +210,7 @@ describe('Custom Elements Support', () => {
210
210
  const ast = parse(tokens);
211
211
 
212
212
  const element = ast.children![0]!;
213
- // nodeName should also be uppercase
213
+
214
214
  if (element.nodeName) {
215
215
  expect(element.nodeName.toUpperCase()).toBe('MY-COMP');
216
216
  }
@@ -382,11 +382,11 @@ describe('Custom Elements Support', () => {
382
382
  const tokens = tokenize(html);
383
383
  const ast = parse(tokens);
384
384
 
385
- // Find first element (skip whitespace text nodes)
385
+
386
386
  const userProfile = ast.children!.find(node => node.type === ASTNodeType.ELEMENT)!;
387
387
  expect(userProfile.tagName).toBe('user-profile');
388
388
 
389
- // Should have proper nesting
389
+
390
390
  expect(userProfile.children).toBeDefined();
391
391
  expect(userProfile.children!.length).toBeGreaterThan(0);
392
392
  });
@@ -412,7 +412,7 @@ describe('Custom Elements Support', () => {
412
412
  const tokens = tokenize(html);
413
413
  const ast = parse(tokens);
414
414
 
415
- // Find first element (skip whitespace text nodes)
415
+
416
416
  const appRoot = ast.children!.find(node => node.type === ASTNodeType.ELEMENT)!;
417
417
  expect(appRoot.tagName).toBe('app-root');
418
418
  });
@@ -471,12 +471,12 @@ describe('Custom Elements Support', () => {
471
471
  test('tokenizer should capture full custom element name', () => {
472
472
  const tokens = tokenize('<my-component-123></my-component-123>');
473
473
 
474
- // Find the opening tag token
474
+
475
475
  const openTag = tokens.find(t => t.type === 'TAG_OPEN');
476
476
  expect(openTag).toBeDefined();
477
477
  expect(openTag!.value).toBe('my-component-123');
478
478
 
479
- // Find the closing tag token
479
+
480
480
  const closeTag = tokens.find(t => t.type === 'TAG_CLOSE');
481
481
  expect(closeTag).toBeDefined();
482
482
  expect(closeTag!.value).toBe('my-component-123');
@@ -642,7 +642,7 @@ describe('Custom Elements Support', () => {
642
642
  const tokens = tokenize(html);
643
643
  const ast = parse(tokens);
644
644
 
645
- // Should have comment, element, comment
645
+
646
646
  const myComp = ast.children!.find(node => node.type === ASTNodeType.ELEMENT)!;
647
647
  expect(myComp.tagName).toBe('my-comp');
648
648
  });
@@ -700,7 +700,7 @@ describe('Custom Elements Support', () => {
700
700
  const tokens = tokenize('<table><tr><td><my-cell>content</my-cell></td></tr></table>');
701
701
  const ast = parse(tokens);
702
702
 
703
- // Find the custom element
703
+
704
704
  const table = ast.children![0]!;
705
705
  expect(table.tagName).toBe('table');
706
706
  });
@@ -233,7 +233,7 @@ describe('Performance Benchmarks', () => {
233
233
  const end = performance.now();
234
234
 
235
235
  expect(ast).toBeDefined();
236
- expect(end - start).toBeLessThan(10); // Should be very fast
236
+ expect(end - start).toBeLessThan(10);
237
237
  });
238
238
 
239
239
  it('should handle medium-sized HTML', () => {
@@ -245,7 +245,7 @@ describe('Performance Benchmarks', () => {
245
245
  const end = performance.now();
246
246
 
247
247
  expect(ast).toBeDefined();
248
- expect(end - start).toBeLessThan(100); // Should still be fast
248
+ expect(end - start).toBeLessThan(100);
249
249
  });
250
250
 
251
251
  it('should handle large HTML documents', () => {
@@ -257,7 +257,7 @@ describe('Performance Benchmarks', () => {
257
257
  const end = performance.now();
258
258
 
259
259
  expect(ast).toBeDefined();
260
- expect(end - start).toBeLessThan(1000); // Should complete within 1 second
260
+ expect(end - start).toBeLessThan(1000);
261
261
  });
262
262
 
263
263
  it('should handle deeply nested HTML', () => {
@@ -276,7 +276,7 @@ describe('Performance Benchmarks', () => {
276
276
  const end = performance.now();
277
277
 
278
278
  expect(ast).toBeDefined();
279
- expect(end - start).toBeLessThan(500); // Should handle deep nesting
279
+ expect(end - start).toBeLessThan(500);
280
280
  });
281
281
  });
282
282
 
@@ -284,14 +284,14 @@ describe('Memory Usage Tests', () => {
284
284
  it('should not leak memory on repeated parsing', () => {
285
285
  const testHtml = '<div><p>Memory test</p></div>';
286
286
 
287
- // Parse the same HTML multiple times
287
+
288
288
  for (let i = 0; i < 1000; i++) {
289
289
  const tokens = tokenize(testHtml);
290
290
  const ast = parse(tokens);
291
291
  expect(ast).toBeDefined();
292
292
  }
293
293
 
294
- // If we get here without crashing, memory is likely managed well
294
+
295
295
  expect(true).toBe(true);
296
296
  });
297
297
 
@@ -1,7 +1,7 @@
1
1
  import { describe, it, expect } from 'bun:test';
2
2
  import { parseHTML } from '../../../index';
3
3
 
4
- // Helper function to normalize text for comparison
4
+
5
5
  function normalizeText(text: string): string {
6
6
  return text
7
7
  .replace(/\s+/g, ' ')
@@ -16,7 +16,7 @@ describe('Final HTML Output Validation', () => {
16
16
  const document = parseHTML(html);
17
17
 
18
18
  expect(document).toBeDefined();
19
- expect(document.nodeType).toBe(9); // DOCUMENT_NODE
19
+ expect(document.nodeType).toBe(9);
20
20
 
21
21
  const div = document.querySelector('div');
22
22
  expect(div).toBeDefined();
@@ -101,9 +101,9 @@ describe('Final HTML Output Validation', () => {
101
101
  expect(p).toBeDefined();
102
102
  expect(p?.textContent).toBe('Content');
103
103
 
104
- // Check for comment node
104
+
105
105
  const commentNode = div?.childNodes[0];
106
- expect(commentNode?.nodeType).toBe(8); // COMMENT_NODE
106
+ expect(commentNode?.nodeType).toBe(8);
107
107
  });
108
108
  });
109
109
 
@@ -135,16 +135,16 @@ describe('Final HTML Output Validation', () => {
135
135
  expect(div).toBeDefined();
136
136
  expect(div?.childNodes.length).toBe(3);
137
137
 
138
- // First text node
139
- expect(div?.childNodes[0]?.nodeType).toBe(3); // TEXT_NODE
138
+
139
+ expect(div?.childNodes[0]?.nodeType).toBe(3);
140
140
  expect(div?.childNodes[0]?.textContent).toBe('Text before ');
141
141
 
142
- // Span element
143
- expect(div?.childNodes[1]?.nodeType).toBe(1); // ELEMENT_NODE
142
+
143
+ expect(div?.childNodes[1]?.nodeType).toBe(1);
144
144
  expect((div?.childNodes[1] as Element)?.tagName).toBe('SPAN');
145
145
 
146
- // Last text node
147
- expect(div?.childNodes[2]?.nodeType).toBe(3); // TEXT_NODE
146
+
147
+ expect(div?.childNodes[2]?.nodeType).toBe(3);
148
148
  expect(div?.childNodes[2]?.textContent).toBe(' text after');
149
149
  });
150
150
 
@@ -264,7 +264,7 @@ describe('Final HTML Output Validation', () => {
264
264
  const document = parseHTML(malformedHTML);
265
265
 
266
266
  expect(document).toBeDefined();
267
- expect(document.nodeType).toBe(9); // DOCUMENT_NODE
267
+ expect(document.nodeType).toBe(9);
268
268
 
269
269
  const divs = document.querySelectorAll('div');
270
270
  expect(divs.length).toBeGreaterThan(0);
@@ -296,7 +296,7 @@ describe('Final HTML Output Validation', () => {
296
296
  const p = document.querySelector('p');
297
297
  expect(p).toBeDefined();
298
298
  expect(p?.textContent).toContain('Special chars:');
299
- // The exact entity handling depends on your implementation
299
+
300
300
  });
301
301
 
302
302
  it('should handle multiple top-level elements', () => {
@@ -321,17 +321,17 @@ describe('Final HTML Output Validation', () => {
321
321
 
322
322
  const document = parseHTML(html);
323
323
 
324
- // Test getElementById
324
+
325
325
  const byId = document.getElementById('test');
326
326
  expect(byId).toBeDefined();
327
327
  expect(byId?.tagName).toBe('DIV');
328
328
 
329
- // Test querySelector
329
+
330
330
  const bySelector = document.querySelector('.container');
331
331
  expect(bySelector).toBeDefined();
332
332
  expect(bySelector?.id).toBe('test');
333
333
 
334
- // Test querySelectorAll
334
+
335
335
  const byClass = document.querySelectorAll('.text');
336
336
  expect(byClass.length).toBe(1);
337
337
  expect(byClass[0]?.textContent).toBe('Hello');
@@ -2,7 +2,7 @@ import { expect, describe, it } from 'bun:test';
2
2
  import { tokenize, TokenType } from '../../../src/tokenizer';
3
3
  import type { Token } from '../../../src/tokenizer';
4
4
 
5
- // HTML5lib tokenizer test format
5
+
6
6
  export interface HTML5libTokenizerTest {
7
7
  description: string;
8
8
  input: string;
@@ -18,12 +18,12 @@ export interface HTML5libTokenizerTestSuite {
18
18
  }
19
19
 
20
20
  export type HTML5libTokenOutput =
21
- | ['StartTag', string, Record<string, string>] // StartTag without self-closing
22
- | ['StartTag', string, Record<string, string>, boolean] // StartTag with self-closing
23
- | ['EndTag', string] // EndTag
24
- | ['Comment', string] // Comment
25
- | ['Character', string] // Character
26
- | ['DOCTYPE', string, string | null, string | null, boolean]; // DOCTYPE
21
+ | ['StartTag', string, Record<string, string>]
22
+ | ['StartTag', string, Record<string, string>, boolean]
23
+ | ['EndTag', string]
24
+ | ['Comment', string]
25
+ | ['Character', string]
26
+ | ['DOCTYPE', string, string | null, string | null, boolean];
27
27
 
28
28
  export interface HTML5libError {
29
29
  code: string;
@@ -31,9 +31,7 @@ export interface HTML5libError {
31
31
  col: number;
32
32
  }
33
33
 
34
- /**
35
- * Converts HTML5lib token format to our internal token format
36
- */
34
+
37
35
  export function convertHTML5libToken(html5libToken: HTML5libTokenOutput): Partial<Token> {
38
36
  const type = html5libToken[0];
39
37
  const nameOrData = html5libToken[1];
@@ -78,9 +76,7 @@ export function convertHTML5libToken(html5libToken: HTML5libTokenOutput): Partia
78
76
  }
79
77
  }
80
78
 
81
- /**
82
- * Converts our internal token format to HTML5lib format for comparison
83
- */
79
+
84
80
  export function convertToHTML5libToken(token: Token): HTML5libTokenOutput {
85
81
  switch (token.type) {
86
82
  case TokenType.DOCTYPE:
@@ -102,9 +98,7 @@ export function convertToHTML5libToken(token: Token): HTML5libTokenOutput {
102
98
  }
103
99
  }
104
100
 
105
- /**
106
- * Normalizes adjacent character tokens as per HTML5lib spec
107
- */
101
+
108
102
  export function normalizeCharacterTokens(tokens: Token[]): Token[] {
109
103
  const normalized: Token[] = [];
110
104
  let currentText = '';
@@ -140,13 +134,11 @@ export function normalizeCharacterTokens(tokens: Token[]): Token[] {
140
134
  return normalized;
141
135
  }
142
136
 
143
- /**
144
- * Runs a single HTML5lib tokenizer test
145
- */
137
+
146
138
  export function runHTML5libTokenizerTest(test: HTML5libTokenizerTest): void {
147
139
  const { description, input, output: expectedOutput, initialStates = ['Data state'] } = test;
148
140
 
149
- // Process double-escaped input if needed
141
+
150
142
  let processedInput = input;
151
143
  if (test.doubleEscaped) {
152
144
  processedInput = processedInput.replace(/\\u([0-9a-fA-F]{4})/g, (match, hex) => {
@@ -156,16 +148,16 @@ export function runHTML5libTokenizerTest(test: HTML5libTokenizerTest): void {
156
148
 
157
149
  for (const initialState of initialStates) {
158
150
  it(`${description} (${initialState})`, () => {
159
- // Tokenize the input
151
+
160
152
  const tokens = tokenize(processedInput);
161
153
 
162
- // Normalize character tokens
154
+
163
155
  const normalizedTokens = normalizeCharacterTokens(tokens);
164
156
 
165
- // Convert to HTML5lib format for comparison
157
+
166
158
  const actualOutput = normalizedTokens.map(convertToHTML5libToken);
167
159
 
168
- // Process expected output if double-escaped
160
+
169
161
  let processedExpectedOutput = expectedOutput;
170
162
  if (test.doubleEscaped) {
171
163
  processedExpectedOutput = expectedOutput.map(token => {
@@ -178,15 +170,13 @@ export function runHTML5libTokenizerTest(test: HTML5libTokenizerTest): void {
178
170
  });
179
171
  }
180
172
 
181
- // Compare outputs
173
+
182
174
  expect(actualOutput).toEqual(processedExpectedOutput);
183
175
  });
184
176
  }
185
177
  }
186
178
 
187
- /**
188
- * Runs all tests from an HTML5lib tokenizer test suite
189
- */
179
+
190
180
  export function runHTML5libTokenizerTestSuite(testSuite: HTML5libTokenizerTestSuite, suiteName: string): void {
191
181
  describe(`HTML5lib Tokenizer Tests: ${suiteName}`, () => {
192
182
  testSuite.tests.forEach(test => {
@@ -195,9 +185,7 @@ export function runHTML5libTokenizerTestSuite(testSuite: HTML5libTokenizerTestSu
195
185
  });
196
186
  }
197
187
 
198
- /**
199
- * Loads and runs HTML5lib tokenizer tests from JSON
200
- */
188
+
201
189
  export async function loadHTML5libTokenizerTests(testData: string, suiteName: string): Promise<void> {
202
190
  const testSuite: HTML5libTokenizerTestSuite = JSON.parse(testData);
203
191
  runHTML5libTokenizerTestSuite(testSuite, suiteName);