defuddle-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,1163 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { Command } from 'commander';
4
+ import { JSDOM, VirtualConsole, DOMWindow } from 'jsdom';
5
+ import pkg from 'defuddle';
6
+ const { Defuddle } = pkg;
7
+ import chalk from 'chalk';
8
+ import { readFile, writeFile } from 'fs/promises';
9
+ import { fileURLToPath } from 'url';
10
+ import { dirname, resolve } from 'path';
11
+ import { createMarkdownContent } from './markdown.js';
12
+
13
+ interface DOMSettableTokenList {
14
+ length: number;
15
+ value: string;
16
+ add(token: string): void;
17
+ contains(token: string): boolean;
18
+ item(index: number): string | null;
19
+ remove(token: string): void;
20
+ replace(oldToken: string, newToken: string): boolean;
21
+ supports(token: string): boolean;
22
+ toggle(token: string, force?: boolean): boolean;
23
+ [Symbol.iterator](): Iterator<string>;
24
+ }
25
+
26
+ interface ParseOptions {
27
+ output?: string;
28
+ markdown?: boolean;
29
+ md?: boolean;
30
+ json?: boolean;
31
+ debug?: boolean;
32
+ property?: string;
33
+ }
34
+
35
+ const __filename = fileURLToPath(import.meta.url);
36
+ const __dirname = dirname(__filename);
37
+
38
+ // Define CSS interfaces globally first
39
+ (globalThis as any).CSSRule = class {
40
+ readonly type: number = 1;
41
+ cssText: string;
42
+ parentRule: any;
43
+ parentStyleSheet: any;
44
+
45
+ constructor(type?: number) {
46
+ if (type !== undefined) {
47
+ Object.defineProperty(this, 'type', { value: type });
48
+ }
49
+ this.cssText = '';
50
+ this.parentRule = null;
51
+ this.parentStyleSheet = null;
52
+ }
53
+ };
54
+
55
+ // Add static properties
56
+ Object.defineProperties((globalThis as any).CSSRule, {
57
+ STYLE_RULE: { value: 1, writable: false },
58
+ CHARSET_RULE: { value: 2, writable: false },
59
+ IMPORT_RULE: { value: 3, writable: false },
60
+ MEDIA_RULE: { value: 4, writable: false },
61
+ FONT_FACE_RULE: { value: 5, writable: false },
62
+ PAGE_RULE: { value: 6, writable: false },
63
+ KEYFRAMES_RULE: { value: 7, writable: false },
64
+ KEYFRAME_RULE: { value: 8, writable: false },
65
+ NAMESPACE_RULE: { value: 10, writable: false },
66
+ COUNTER_STYLE_RULE: { value: 11, writable: false },
67
+ SUPPORTS_RULE: { value: 12, writable: false },
68
+ DOCUMENT_RULE: { value: 13, writable: false },
69
+ FONT_FEATURE_VALUES_RULE: { value: 14, writable: false },
70
+ VIEWPORT_RULE: { value: 15, writable: false },
71
+ REGION_STYLE_RULE: { value: 16, writable: false }
72
+ });
73
+
74
+ (globalThis as any).CSSMediaRule = class extends (globalThis as any).CSSRule {
75
+ media: MediaList;
76
+ cssRules: CSSRuleList;
77
+ conditionText: string = '';
78
+ deleteRule: (index: number) => void = () => {};
79
+ insertRule: (rule: string, index?: number) => number = () => 0;
80
+
81
+ constructor() {
82
+ super();
83
+ Object.defineProperty(this, 'type', { value: 4 }); // CSSRule.MEDIA_RULE
84
+ this.media = {
85
+ length: 0,
86
+ mediaText: '',
87
+ item: () => null,
88
+ appendMedium: () => {},
89
+ deleteMedium: () => {},
90
+ toString: () => '',
91
+ [Symbol.iterator]: function*() { yield ''; return undefined; }
92
+ };
93
+ this.cssRules = {
94
+ length: 0,
95
+ item: () => null,
96
+ [Symbol.iterator]: function*() {
97
+ yield new (globalThis as any).CSSRule();
98
+ return undefined;
99
+ }
100
+ };
101
+ }
102
+ };
103
+
104
+ (globalThis as any).CSSStyleSheet = class {
105
+ type: string = 'text/css';
106
+ href: string | null = null;
107
+ ownerNode: Element | ProcessingInstruction | null = null;
108
+ parentStyleSheet: CSSStyleSheet | null = null;
109
+ title: string | null = null;
110
+ media: MediaList;
111
+ disabled: boolean = false;
112
+ cssRules: CSSRuleList;
113
+ ownerRule: CSSRule | null = null;
114
+ rules: CSSRuleList;
115
+ addRule: (selector: string, style: string, index?: number) => number = () => 0;
116
+ removeRule: (index?: number) => void = () => {};
117
+ replace: (text: string) => Promise<CSSStyleSheet> = async () => this as unknown as CSSStyleSheet;
118
+ replaceSync: (text: string) => void = () => {};
119
+
120
+ constructor() {
121
+ this.media = {
122
+ length: 0,
123
+ mediaText: '',
124
+ item: () => null,
125
+ appendMedium: () => {},
126
+ deleteMedium: () => {},
127
+ toString: () => '',
128
+ [Symbol.iterator]: function*() { yield ''; return undefined; }
129
+ };
130
+ this.cssRules = {
131
+ length: 0,
132
+ item: () => null,
133
+ [Symbol.iterator]: function*() {
134
+ yield new (globalThis as any).CSSRule();
135
+ return undefined;
136
+ }
137
+ };
138
+ this.rules = this.cssRules;
139
+ }
140
+
141
+ insertRule(rule: string, index?: number): number {
142
+ return 0;
143
+ }
144
+
145
+ deleteRule(index: number): void {}
146
+ };
147
+
148
+ // Define SVGElement globally
149
+ (globalThis as any).SVGElement = class {
150
+ id: string = '';
151
+ className: string = '';
152
+ style: CSSStyleDeclaration = {
153
+ cssText: '',
154
+ length: 0,
155
+ parentRule: null,
156
+ getPropertyPriority: () => '',
157
+ getPropertyValue: () => '',
158
+ item: () => '',
159
+ removeProperty: () => '',
160
+ setProperty: () => '',
161
+ [Symbol.iterator]: function*() { yield ''; return undefined; }
162
+ } as unknown as CSSStyleDeclaration;
163
+ ownerSVGElement: SVGElement | null = null;
164
+ viewportElement: SVGElement | null = null;
165
+ tagName: string = '';
166
+ namespaceURI: string | null = null;
167
+ prefix: string | null = null;
168
+ localName: string = '';
169
+ baseURI: string = '';
170
+ textContent: string | null = '';
171
+ innerHTML: string = '';
172
+ outerHTML: string = '';
173
+ hidden: boolean = false;
174
+ slot: string = '';
175
+ attributes: NamedNodeMap = {
176
+ length: 0,
177
+ getNamedItem: () => null,
178
+ getNamedItemNS: () => null,
179
+ item: () => null,
180
+ removeNamedItem: () => null,
181
+ removeNamedItemNS: () => null,
182
+ setNamedItem: () => null,
183
+ setNamedItemNS: () => null,
184
+ [Symbol.iterator]: function*() { yield null; return undefined; }
185
+ } as unknown as NamedNodeMap;
186
+ childNodes: NodeListOf<ChildNode> = {
187
+ length: 0,
188
+ item: () => null,
189
+ forEach: () => {},
190
+ entries: function*() { yield [0, null]; return undefined; },
191
+ keys: function*() { yield 0; return undefined; },
192
+ values: function*() { yield null; return undefined; },
193
+ [Symbol.iterator]: function*() { yield null; return undefined; }
194
+ } as unknown as NodeListOf<ChildNode>;
195
+ firstChild: ChildNode | null = null;
196
+ lastChild: ChildNode | null = null;
197
+ nextSibling: ChildNode | null = null;
198
+ previousSibling: ChildNode | null = null;
199
+ parentNode: Node & ParentNode | null = null;
200
+ parentElement: HTMLElement | null = null;
201
+ childElementCount: number = 0;
202
+ firstElementChild: Element | null = null;
203
+ lastElementChild: Element | null = null;
204
+ nextElementSibling: Element | null = null;
205
+ previousElementSibling: Element | null = null;
206
+ children: HTMLCollection = {
207
+ length: 0,
208
+ item: () => null,
209
+ namedItem: () => null,
210
+ [Symbol.iterator]: function*() { yield null; return undefined; }
211
+ } as unknown as HTMLCollection;
212
+
213
+ constructor() {
214
+ // Initialize any required properties
215
+ }
216
+
217
+ getAttribute(name: string): string | null {
218
+ return null;
219
+ }
220
+
221
+ getAttributeNS(namespaceURI: string | null, localName: string): string | null {
222
+ return null;
223
+ }
224
+
225
+ setAttribute(name: string, value: string): void {}
226
+
227
+ setAttributeNS(namespaceURI: string | null, qualifiedName: string, value: string): void {}
228
+
229
+ removeAttributeNS(namespaceURI: string | null, localName: string): void {}
230
+
231
+ hasAttribute(name: string): boolean {
232
+ return false;
233
+ }
234
+
235
+ hasAttributeNS(namespaceURI: string | null, localName: string): boolean {
236
+ return false;
237
+ }
238
+
239
+ getBoundingClientRect(): DOMRect {
240
+ return {
241
+ top: 0,
242
+ left: 0,
243
+ bottom: 0,
244
+ right: 0,
245
+ width: 0,
246
+ height: 0,
247
+ x: 0,
248
+ y: 0,
249
+ toJSON: function() { return this; }
250
+ };
251
+ }
252
+
253
+ getClientRects(): DOMRectList {
254
+ return {
255
+ length: 0,
256
+ item: function() { return null; },
257
+ [Symbol.iterator]: function*() {}
258
+ } as DOMRectList;
259
+ }
260
+
261
+ getElementsByClassName(classNames: string): HTMLCollectionOf<Element> {
262
+ return {
263
+ length: 0,
264
+ item: () => null,
265
+ namedItem: () => null,
266
+ [Symbol.iterator]: function*() { yield null; return undefined; }
267
+ } as HTMLCollectionOf<Element>;
268
+ }
269
+
270
+ getElementsByTagName(qualifiedName: string): HTMLCollectionOf<Element> {
271
+ return {
272
+ length: 0,
273
+ item: () => null,
274
+ namedItem: () => null,
275
+ [Symbol.iterator]: function*() { yield null; return undefined; }
276
+ } as HTMLCollectionOf<Element>;
277
+ }
278
+
279
+ getElementsByTagNameNS(namespaceURI: string | null, localName: string): HTMLCollectionOf<Element> {
280
+ return {
281
+ length: 0,
282
+ item: () => null,
283
+ namedItem: () => null,
284
+ [Symbol.iterator]: function*() { yield null; return undefined; }
285
+ } as HTMLCollectionOf<Element>;
286
+ }
287
+
288
+ querySelector(selectors: string): Element | null {
289
+ return null;
290
+ }
291
+
292
+ querySelectorAll(selectors: string): NodeListOf<Element> {
293
+ return {
294
+ length: 0,
295
+ item: () => null,
296
+ forEach: () => {},
297
+ entries: function*() { yield [0, null]; return undefined; },
298
+ keys: function*() { yield 0; return undefined; },
299
+ values: function*() { yield null; return undefined; },
300
+ [Symbol.iterator]: function*() { yield null; return undefined; }
301
+ } as unknown as NodeListOf<Element>;
302
+ }
303
+
304
+ matches(selectors: string): boolean {
305
+ return false;
306
+ }
307
+
308
+ closest(selectors: string): Element | null {
309
+ return null;
310
+ }
311
+
312
+ contains(other: Node | null): boolean {
313
+ return false;
314
+ }
315
+
316
+ append(...nodes: (Node | string)[]): void {}
317
+
318
+ prepend(...nodes: (Node | string)[]): void {}
319
+
320
+ after(...nodes: (Node | string)[]): void {}
321
+
322
+ before(...nodes: (Node | string)[]): void {}
323
+
324
+ replaceWith(...nodes: (Node | string)[]): void {}
325
+
326
+ remove(): void {}
327
+
328
+ insertAdjacentElement(where: InsertPosition, element: Element): Element | null {
329
+ return null;
330
+ }
331
+
332
+ insertAdjacentText(where: InsertPosition, data: string): void {}
333
+
334
+ insertAdjacentHTML(position: InsertPosition, text: string): void {}
335
+
336
+ replaceChildren(...nodes: (Node | string)[]): void {}
337
+ };
338
+
339
+ // Define HTMLImageElement globally
340
+ (globalThis as any).HTMLImageElement = class {
341
+ alt: string = '';
342
+ src: string = '';
343
+ srcset: string = '';
344
+ sizes: string = '';
345
+ crossOrigin: string | null = null;
346
+ useMap: string = '';
347
+ isMap: boolean = false;
348
+ width: number = 0;
349
+ height: number = 0;
350
+ naturalWidth: number = 0;
351
+ naturalHeight: number = 0;
352
+ complete: boolean = false;
353
+ name: string = '';
354
+ lowsrc: string = '';
355
+ align: string = '';
356
+ hspace: number = 0;
357
+ vspace: number = 0;
358
+ longDesc: string = '';
359
+ border: string = '';
360
+ x: number = 0;
361
+ y: number = 0;
362
+ currentSrc: string = '';
363
+ decoding: 'sync' | 'async' | 'auto' = 'auto';
364
+ fetchPriority: 'high' | 'low' | 'auto' = 'auto';
365
+ loading: 'eager' | 'lazy' = 'eager';
366
+ referrerPolicy: string = '';
367
+
368
+ constructor() {
369
+ // Initialize any required properties
370
+ }
371
+
372
+ decode(): Promise<void> {
373
+ return Promise.resolve();
374
+ }
375
+ };
376
+
377
+ // Create a virtual console
378
+ const virtualConsole = new VirtualConsole();
379
+
380
+ // Function to set up DOM interfaces
381
+ function setupDOMInterfaces(window: DOMWindow) {
382
+ try {
383
+ // First, set up basic window properties
384
+ try {
385
+ if (!window.innerWidth) {
386
+ Object.defineProperty(window, 'innerWidth', { value: 1024 });
387
+ }
388
+ if (!window.innerHeight) {
389
+ Object.defineProperty(window, 'innerHeight', { value: 768 });
390
+ }
391
+ if (!window.devicePixelRatio) {
392
+ Object.defineProperty(window, 'devicePixelRatio', { value: 1 });
393
+ }
394
+ } catch (error) {
395
+ console.warn('Warning: Could not set basic window properties:', error);
396
+ }
397
+
398
+ // Set up CSS interfaces
399
+ try {
400
+ if (!window.CSSRule) {
401
+ window.CSSRule = (globalThis as any).CSSRule as any;
402
+ }
403
+ if (!window.CSSMediaRule) {
404
+ window.CSSMediaRule = (globalThis as any).CSSMediaRule as any;
405
+ }
406
+ if (!window.CSSStyleSheet) {
407
+ window.CSSStyleSheet = (globalThis as any).CSSStyleSheet as any;
408
+ }
409
+ } catch (error) {
410
+ console.warn('Warning: Could not set CSS interfaces:', error);
411
+ }
412
+
413
+ // Set up HTML and SVG interfaces
414
+ try {
415
+ if (!window.HTMLImageElement) {
416
+ window.HTMLImageElement = (globalThis as any).HTMLImageElement as any;
417
+ }
418
+ if (!window.SVGElement) {
419
+ window.SVGElement = (globalThis as any).SVGElement as any;
420
+ }
421
+ } catch (error) {
422
+ console.warn('Warning: Could not set HTML/SVG interfaces:', error);
423
+ }
424
+
425
+ // Set up screen object
426
+ try {
427
+ if (!window.screen) {
428
+ Object.defineProperty(window, 'screen', {
429
+ value: {
430
+ width: 1024,
431
+ height: 768,
432
+ availWidth: 1024,
433
+ availHeight: 768,
434
+ colorDepth: 24,
435
+ pixelDepth: 24,
436
+ orientation: {
437
+ type: 'landscape-primary',
438
+ angle: 0
439
+ }
440
+ }
441
+ });
442
+ }
443
+ } catch (error) {
444
+ console.warn('Warning: Could not set screen object:', error);
445
+ }
446
+
447
+ // Set up storage objects
448
+ try {
449
+ if (!window.localStorage) {
450
+ const storage = {
451
+ length: 0,
452
+ getItem: () => null,
453
+ setItem: () => {},
454
+ removeItem: () => {},
455
+ clear: () => {},
456
+ key: () => null
457
+ };
458
+ try {
459
+ Object.defineProperty(window, 'localStorage', {
460
+ value: storage,
461
+ writable: false,
462
+ configurable: false
463
+ });
464
+ } catch (error) {
465
+ // Silently ignore storage setup failures
466
+ }
467
+ }
468
+ if (!window.sessionStorage) {
469
+ const storage = {
470
+ length: 0,
471
+ getItem: () => null,
472
+ setItem: () => {},
473
+ removeItem: () => {},
474
+ clear: () => {},
475
+ key: () => null
476
+ };
477
+ try {
478
+ Object.defineProperty(window, 'sessionStorage', {
479
+ value: storage,
480
+ writable: false,
481
+ configurable: false
482
+ });
483
+ } catch (error) {
484
+ // Silently ignore storage setup failures
485
+ }
486
+ }
487
+ } catch (error) {
488
+ // Silently ignore storage setup failures
489
+ }
490
+
491
+ // Set up animation frame methods
492
+ try {
493
+ if (!window.requestAnimationFrame) {
494
+ window.requestAnimationFrame = (callback: FrameRequestCallback): number => {
495
+ return setTimeout(callback, 0) as unknown as number;
496
+ };
497
+ }
498
+ if (!window.cancelAnimationFrame) {
499
+ window.cancelAnimationFrame = (handle: number): void => {
500
+ clearTimeout(handle as unknown as number);
501
+ };
502
+ }
503
+ } catch (error) {
504
+ console.warn('Warning: Could not set animation frame methods:', error);
505
+ }
506
+
507
+ // Set up DOM methods
508
+ try {
509
+ if (!window.Document.prototype.getElementsByClassName) {
510
+ window.Document.prototype.getElementsByClassName = function(classNames: string): HTMLCollectionOf<Element> {
511
+ const elements = this.querySelectorAll('.' + classNames);
512
+ const collection = new HTMLCollection();
513
+ elements.forEach((el, i) => {
514
+ collection[i] = el;
515
+ });
516
+ return collection;
517
+ };
518
+ }
519
+ } catch (error) {
520
+ console.warn('Warning: Could not set getElementsByClassName:', error);
521
+ }
522
+
523
+ // Set up Node methods
524
+ try {
525
+ if (!window.Node.prototype.contains) {
526
+ window.Node.prototype.contains = function(node: Node): boolean {
527
+ let current: Node | null = node;
528
+ while (current) {
529
+ if (current === this) return true;
530
+ current = current.parentNode;
531
+ }
532
+ return false;
533
+ };
534
+ }
535
+ } catch (error) {
536
+ console.warn('Warning: Could not set Node.contains:', error);
537
+ }
538
+
539
+ // Set up Element methods
540
+ try {
541
+ if (!window.Element.prototype.getBoundingClientRect) {
542
+ window.Element.prototype.getBoundingClientRect = function(): DOMRect {
543
+ return {
544
+ top: 0,
545
+ left: 0,
546
+ bottom: 0,
547
+ right: 0,
548
+ width: 0,
549
+ height: 0,
550
+ x: 0,
551
+ y: 0,
552
+ toJSON: function() { return this; }
553
+ };
554
+ };
555
+ }
556
+ } catch (error) {
557
+ console.warn('Warning: Could not set getBoundingClientRect:', error);
558
+ }
559
+
560
+ // Set up Document methods
561
+ try {
562
+ if (!window.Document.prototype.getSelection) {
563
+ window.Document.prototype.getSelection = function(): Selection | null {
564
+ const selection = {
565
+ anchorNode: null,
566
+ anchorOffset: 0,
567
+ direction: 'forward',
568
+ focusNode: null,
569
+ focusOffset: 0,
570
+ isCollapsed: true,
571
+ rangeCount: 0,
572
+ type: 'None',
573
+ getRangeAt: function() { return new window.Range(); },
574
+ removeAllRanges: function() {},
575
+ addRange: function() {},
576
+ collapse: function() {},
577
+ collapseToEnd: function() {},
578
+ collapseToStart: function() {},
579
+ deleteFromDocument: function() {},
580
+ empty: function() {},
581
+ extend: function() {},
582
+ modify: function() {},
583
+ selectAllChildren: function() {},
584
+ setBaseAndExtent: function() {},
585
+ setPosition: function() {},
586
+ toString: function() { return ''; },
587
+ containsNode: function(node: Node, allowPartialContainment: boolean = false): boolean {
588
+ return false;
589
+ },
590
+ removeRange: function(range: Range): void {}
591
+ } as unknown as Selection;
592
+ return selection;
593
+ };
594
+ }
595
+ } catch (error) {
596
+ console.warn('Warning: Could not set getSelection:', error);
597
+ }
598
+
599
+ // Set up Window methods
600
+ try {
601
+ if (!window.Window.prototype.getComputedStyle) {
602
+ window.Window.prototype.getComputedStyle = function(elt: Element, pseudoElt?: string | null): CSSStyleDeclaration {
603
+ const style = {
604
+ accentColor: '',
605
+ alignContent: '',
606
+ alignItems: '',
607
+ alignSelf: '',
608
+ getPropertyValue: function(prop: string): string { return ''; }
609
+ } as CSSStyleDeclaration;
610
+ return style;
611
+ };
612
+ }
613
+ } catch (error) {
614
+ console.warn('Warning: Could not set getComputedStyle:', error);
615
+ }
616
+
617
+ // Set up Range constructor last
618
+ try {
619
+ if (!window.Range) {
620
+ window.Range = class Range {
621
+ static readonly START_TO_START = 0;
622
+ static readonly START_TO_END = 1;
623
+ static readonly END_TO_END = 2;
624
+ static readonly END_TO_START = 3;
625
+
626
+ readonly START_TO_START = 0;
627
+ readonly START_TO_END = 1;
628
+ readonly END_TO_END = 2;
629
+ readonly END_TO_START = 3;
630
+
631
+ startContainer: Node;
632
+ startOffset: number;
633
+ endContainer: Node;
634
+ endOffset: number;
635
+ collapsed: boolean;
636
+ commonAncestorContainer: Node;
637
+
638
+ constructor() {
639
+ this.startContainer = document.documentElement;
640
+ this.startOffset = 0;
641
+ this.endContainer = document.documentElement;
642
+ this.endOffset = 0;
643
+ this.collapsed = true;
644
+ this.commonAncestorContainer = document.documentElement;
645
+ }
646
+
647
+ createContextualFragment(fragment: string): DocumentFragment {
648
+ return document.createDocumentFragment();
649
+ }
650
+
651
+ detach(): void {}
652
+
653
+ cloneContents(): DocumentFragment {
654
+ return document.createDocumentFragment();
655
+ }
656
+
657
+ cloneRange(): Range {
658
+ return new Range();
659
+ }
660
+
661
+ collapse(toStart: boolean = false): void {}
662
+
663
+ compareBoundaryPoints(how: number, sourceRange: Range): number {
664
+ return 0;
665
+ }
666
+
667
+ comparePoint(node: Node, offset: number): number {
668
+ return 0;
669
+ }
670
+
671
+ deleteContents(): void {}
672
+
673
+ extractContents(): DocumentFragment {
674
+ return document.createDocumentFragment();
675
+ }
676
+
677
+ getBoundingClientRect(): DOMRect {
678
+ return {
679
+ top: 0,
680
+ left: 0,
681
+ bottom: 0,
682
+ right: 0,
683
+ width: 0,
684
+ height: 0,
685
+ x: 0,
686
+ y: 0,
687
+ toJSON: function() { return this; }
688
+ };
689
+ }
690
+
691
+ getClientRects(): DOMRectList {
692
+ return {
693
+ length: 0,
694
+ item: function() { return null; },
695
+ [Symbol.iterator]: function*() {}
696
+ } as DOMRectList;
697
+ }
698
+
699
+ insertNode(node: Node): void {}
700
+
701
+ intersectsNode(node: Node): boolean {
702
+ return false;
703
+ }
704
+
705
+ isPointInRange(node: Node, offset: number): boolean {
706
+ return false;
707
+ }
708
+
709
+ selectNode(node: Node): void {}
710
+
711
+ selectNodeContents(node: Node): void {
712
+ this.startContainer = node;
713
+ this.startOffset = 0;
714
+ this.endContainer = node;
715
+ this.endOffset = node.childNodes.length;
716
+ this.collapsed = false;
717
+ }
718
+
719
+ setEnd(node: Node, offset: number): void {}
720
+
721
+ setEndAfter(node: Node): void {}
722
+
723
+ setEndBefore(node: Node): void {}
724
+
725
+ setStart(node: Node, offset: number): void {}
726
+
727
+ setStartAfter(node: Node): void {}
728
+
729
+ setStartBefore(node: Node): void {}
730
+
731
+ surroundContents(newParent: Node): void {}
732
+ };
733
+ }
734
+ } catch (error) {
735
+ console.warn('Warning: Could not set Range constructor:', error);
736
+ }
737
+
738
+ } catch (error) {
739
+ console.error('Error in setupDOMInterfaces:', error);
740
+ // Don't throw the error, just log it
741
+ }
742
+ }
743
+
744
+ // Create a virtual DOM
745
+ const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
746
+ virtualConsole,
747
+ runScripts: 'dangerously',
748
+ resources: 'usable',
749
+ pretendToBeVisual: true,
750
+ beforeParse(window: DOMWindow) {
751
+ setupDOMInterfaces(window);
752
+ }
753
+ });
754
+
755
+ // Get the window object
756
+ const window = dom.window;
757
+
758
+ // Add window to global scope
759
+ (globalThis as any).window = window;
760
+
761
+ // Add document to global scope
762
+ (globalThis as any).document = window.document;
763
+
764
+ // Add required DOM interfaces to global scope
765
+ (globalThis as any).Element = window.Element;
766
+ (globalThis as any).Node = window.Node;
767
+ (globalThis as any).NodeFilter = window.NodeFilter;
768
+ (globalThis as any).Range = window.Range;
769
+ (globalThis as any).DOMParser = window.DOMParser;
770
+ (globalThis as any).XMLSerializer = window.XMLSerializer;
771
+ (globalThis as any).navigator = window.navigator;
772
+ (globalThis as any).HTMLElement = window.HTMLElement;
773
+
774
+ // Define DOMSettableTokenList
775
+ (globalThis as any).DOMSettableTokenList = class {
776
+ length: number = 0;
777
+ value: string = '';
778
+ add(token: string): void {}
779
+ contains(token: string): boolean { return false; }
780
+ item(index: number): string | null { return null; }
781
+ remove(token: string): void {}
782
+ replace(oldToken: string, newToken: string): boolean { return false; }
783
+ supports(token: string): boolean { return false; }
784
+ toggle(token: string, force?: boolean): boolean { return false; }
785
+ [Symbol.iterator](): Iterator<string> {
786
+ return function*() { yield ''; return undefined; }();
787
+ }
788
+ };
789
+
790
+ // Define HTML element types
791
+ (globalThis as any).HTMLIFrameElement = class extends (globalThis as any).HTMLElement {
792
+ constructor() {
793
+ super();
794
+ }
795
+ align: string = '';
796
+ allow: string = '';
797
+ allowFullscreen: boolean = false;
798
+ contentDocument: Document | null = null;
799
+ contentWindow: Window | null = null;
800
+ frameBorder: string = '';
801
+ height: string = '';
802
+ longDesc: string = '';
803
+ marginHeight: string = '';
804
+ marginWidth: string = '';
805
+ name: string = '';
806
+ referrerPolicy: string = '';
807
+ sandbox: DOMSettableTokenList = {
808
+ length: 0,
809
+ value: '',
810
+ add: () => {},
811
+ contains: () => false,
812
+ item: () => null,
813
+ remove: () => {},
814
+ replace: () => false,
815
+ supports: () => false,
816
+ toggle: () => false,
817
+ [Symbol.iterator]: function*() { yield ''; return undefined; }
818
+ } as unknown as DOMSettableTokenList;
819
+ scrolling: string = '';
820
+ src: string = '';
821
+ srcdoc: string = '';
822
+ width: string = '';
823
+ };
824
+
825
+ (globalThis as any).HTMLOListElement = class extends (globalThis as any).HTMLElement {
826
+ constructor() {
827
+ super();
828
+ }
829
+ type: string = '';
830
+ compact: boolean = false;
831
+ reversed: boolean = false;
832
+ start: number = 0;
833
+ };
834
+
835
+ (globalThis as any).HTMLUListElement = class extends (globalThis as any).HTMLElement {
836
+ constructor() {
837
+ super();
838
+ }
839
+ type: string = '';
840
+ compact: boolean = false;
841
+ };
842
+
843
+ (globalThis as any).HTMLTableElement = class extends (globalThis as any).HTMLElement {
844
+ constructor() {
845
+ super();
846
+ }
847
+ caption: HTMLTableCaptionElement | null = null;
848
+ tHead: HTMLTableSectionElement | null = null;
849
+ tFoot: HTMLTableSectionElement | null = null;
850
+ tBodies: HTMLCollectionOf<HTMLTableSectionElement> = {
851
+ length: 0,
852
+ item: () => null,
853
+ namedItem: () => null,
854
+ [Symbol.iterator]: function*() { yield null; return undefined; }
855
+ } as HTMLCollectionOf<HTMLTableSectionElement>;
856
+ rows: HTMLCollectionOf<HTMLTableRowElement> = {
857
+ length: 0,
858
+ item: () => null,
859
+ namedItem: () => null,
860
+ [Symbol.iterator]: function*() { yield null; return undefined; }
861
+ } as HTMLCollectionOf<HTMLTableRowElement>;
862
+ align: string = '';
863
+ bgColor: string = '';
864
+ border: string = '';
865
+ cellPadding: string = '';
866
+ cellSpacing: string = '';
867
+ frame: string = '';
868
+ rules: string = '';
869
+ summary: string = '';
870
+ width: string = '';
871
+ createCaption(): HTMLTableCaptionElement {
872
+ return new (globalThis as any).HTMLTableCaptionElement();
873
+ }
874
+ deleteCaption(): void {}
875
+ createTHead(): HTMLTableSectionElement {
876
+ return new (globalThis as any).HTMLTableSectionElement();
877
+ }
878
+ deleteTHead(): void {}
879
+ createTFoot(): HTMLTableSectionElement {
880
+ return new (globalThis as any).HTMLTableSectionElement();
881
+ }
882
+ deleteTFoot(): void {}
883
+ createTBody(): HTMLTableSectionElement {
884
+ return new (globalThis as any).HTMLTableSectionElement();
885
+ }
886
+ insertRow(index?: number): HTMLTableRowElement {
887
+ return new (globalThis as any).HTMLTableRowElement();
888
+ }
889
+ deleteRow(index: number): void {}
890
+ };
891
+
892
+ (globalThis as any).HTMLTableRowElement = class extends (globalThis as any).HTMLElement {
893
+ constructor() {
894
+ super();
895
+ }
896
+ rowIndex: number = 0;
897
+ sectionRowIndex: number = 0;
898
+ cells: HTMLCollectionOf<HTMLTableCellElement> = {
899
+ length: 0,
900
+ item: () => null,
901
+ namedItem: () => null,
902
+ [Symbol.iterator]: function*() { yield null; return undefined; }
903
+ } as HTMLCollectionOf<HTMLTableCellElement>;
904
+ align: string = '';
905
+ bgColor: string = '';
906
+ ch: string = '';
907
+ chOff: string = '';
908
+ vAlign: string = '';
909
+ insertCell(index?: number): HTMLTableCellElement {
910
+ return new (globalThis as any).HTMLTableCellElement();
911
+ }
912
+ deleteCell(index: number): void {}
913
+ };
914
+
915
+ (globalThis as any).HTMLTableCellElement = class extends (globalThis as any).HTMLElement {
916
+ constructor() {
917
+ super();
918
+ }
919
+ colSpan: number = 1;
920
+ rowSpan: number = 1;
921
+ headers: DOMSettableTokenList = {
922
+ length: 0,
923
+ value: '',
924
+ add: () => {},
925
+ contains: () => false,
926
+ item: () => null,
927
+ remove: () => {},
928
+ replace: () => false,
929
+ supports: () => false,
930
+ toggle: () => false,
931
+ [Symbol.iterator]: function*() { yield ''; return undefined; }
932
+ } as unknown as DOMSettableTokenList;
933
+ cellIndex: number = 0;
934
+ scope: string = '';
935
+ abbr: string = '';
936
+ align: string = '';
937
+ axis: string = '';
938
+ bgColor: string = '';
939
+ ch: string = '';
940
+ chOff: string = '';
941
+ height: string = '';
942
+ noWrap: boolean = false;
943
+ vAlign: string = '';
944
+ width: string = '';
945
+ };
946
+
947
+ (globalThis as any).HTMLTableSectionElement = class extends (globalThis as any).HTMLElement {
948
+ constructor() {
949
+ super();
950
+ }
951
+ rows: HTMLCollectionOf<HTMLTableRowElement> = {
952
+ length: 0,
953
+ item: () => null,
954
+ namedItem: () => null,
955
+ [Symbol.iterator]: function*() { yield null; return undefined; }
956
+ } as HTMLCollectionOf<HTMLTableRowElement>;
957
+ align: string = '';
958
+ ch: string = '';
959
+ chOff: string = '';
960
+ vAlign: string = '';
961
+ insertRow(index?: number): HTMLTableRowElement {
962
+ return new (globalThis as any).HTMLTableRowElement();
963
+ }
964
+ deleteRow(index: number): void {}
965
+ };
966
+
967
+ (globalThis as any).HTMLTableCaptionElement = class extends (globalThis as any).HTMLElement {
968
+ constructor() {
969
+ super();
970
+ }
971
+ align: string = '';
972
+ };
973
+
974
+ const program = new Command();
975
+
976
+ program
977
+ .name('defuddle')
978
+ .description('Extract article content from web pages')
979
+ .version('0.1.0');
980
+
981
+ program
982
+ .command('parse')
983
+ .description('Parse HTML content from a file or URL')
984
+ .argument('<source>', 'HTML file path or URL to parse')
985
+ .option('-o, --output <file>', 'Output file path (default: stdout)')
986
+ .option('-m, --markdown', 'Convert content to markdown format')
987
+ .option('--md', 'Alias for --markdown')
988
+ .option('-j, --json', 'Output as JSON with metadata and content')
989
+ .option('-p, --property <name>', 'Extract a specific property (e.g., title, description, domain)')
990
+ .option('--debug', 'Enable debug mode')
991
+ .action(async (source: string, options: ParseOptions) => {
992
+ try {
993
+ // Handle --md alias
994
+ if (options.md) {
995
+ options.markdown = true;
996
+ }
997
+ let html: string;
998
+
999
+ try {
1000
+ // Determine if source is a URL or file path
1001
+ if (source.startsWith('http://') || source.startsWith('https://')) {
1002
+ const response = await fetch(source);
1003
+ html = await response.text();
1004
+ } else {
1005
+ const filePath = resolve(process.cwd(), source);
1006
+ html = await readFile(filePath, 'utf-8');
1007
+ }
1008
+
1009
+ // Create a new JSDOM instance with the HTML content
1010
+ const contentDom = new JSDOM(html, {
1011
+ virtualConsole,
1012
+ runScripts: 'dangerously',
1013
+ resources: 'usable',
1014
+ pretendToBeVisual: true,
1015
+ url: source.startsWith('http') ? source : undefined,
1016
+ beforeParse(window: DOMWindow) {
1017
+ try {
1018
+ setupDOMInterfaces(window);
1019
+ } catch (error) {
1020
+ console.error('Error setting up DOM interfaces:', error);
1021
+ }
1022
+ }
1023
+ });
1024
+
1025
+ // Initialize document properties
1026
+ const doc = contentDom.window.document;
1027
+
1028
+ // Ensure document has required properties
1029
+ if (!doc.documentElement) {
1030
+ throw new Error('Document has no root element');
1031
+ }
1032
+
1033
+ // Set up document properties
1034
+ try {
1035
+ doc.documentElement.style.cssText = '';
1036
+ doc.documentElement.className = '';
1037
+ } catch (error) {
1038
+ console.warn('Warning: Could not set document element properties:', error);
1039
+ }
1040
+
1041
+ // Ensure body exists and is properly set up
1042
+ if (!doc.body) {
1043
+ const body = doc.createElement('body');
1044
+ doc.documentElement.appendChild(body);
1045
+ }
1046
+ try {
1047
+ doc.body.style.cssText = '';
1048
+ doc.body.className = '';
1049
+ } catch (error) {
1050
+ console.warn('Warning: Could not set body properties:', error);
1051
+ }
1052
+
1053
+ // Set up viewport and ensure head exists
1054
+ if (!doc.head) {
1055
+ const head = doc.createElement('head');
1056
+ doc.documentElement.insertBefore(head, doc.body);
1057
+ }
1058
+
1059
+ // Add viewport meta tag
1060
+ try {
1061
+ const viewport = doc.createElement('meta');
1062
+ viewport.setAttribute('name', 'viewport');
1063
+ viewport.setAttribute('content', 'width=device-width, initial-scale=1');
1064
+ doc.head.appendChild(viewport);
1065
+ } catch (error) {
1066
+ console.warn('Warning: Could not add viewport meta tag:', error);
1067
+ }
1068
+
1069
+ // Add a base style element for mobile styles
1070
+ try {
1071
+ const style = doc.createElement('style');
1072
+ style.textContent = `
1073
+ @media (max-width: 768px) {
1074
+ body { width: 100%; }
1075
+ }
1076
+ `;
1077
+ doc.head.appendChild(style);
1078
+ } catch (error) {
1079
+ console.warn('Warning: Could not add style element:', error);
1080
+ }
1081
+
1082
+ // Parse content with debug mode if enabled
1083
+ try {
1084
+ const defuddle = new Defuddle(doc, {
1085
+ debug: options.debug
1086
+ });
1087
+
1088
+ const result = await defuddle.parse();
1089
+
1090
+ // Format output
1091
+ let output: string;
1092
+ let content: string;
1093
+ let contentMarkdown: string | undefined;
1094
+
1095
+ // Convert content to markdown if requested
1096
+ if (options.markdown || options.json) {
1097
+ contentMarkdown = createMarkdownContent(result.content, source);
1098
+ }
1099
+
1100
+ // Format the response based on options
1101
+ if (options.property) {
1102
+ // Extract specific property
1103
+ const property = options.property.toLowerCase();
1104
+ if (property in result) {
1105
+ output = result[property as keyof typeof result]?.toString() || '';
1106
+ } else {
1107
+ console.error(chalk.red(`Error: Property "${property}" not found in response`));
1108
+ process.exit(1);
1109
+ }
1110
+ } else if (options.json) {
1111
+ const jsonObj: any = {
1112
+ content: result.content,
1113
+ title: result.title,
1114
+ description: result.description,
1115
+ domain: result.domain,
1116
+ favicon: result.favicon,
1117
+ image: result.image,
1118
+ parseTime: result.parseTime,
1119
+ published: result.published,
1120
+ author: result.author,
1121
+ site: result.site,
1122
+ schemaOrgData: result.schemaOrgData,
1123
+ wordCount: result.wordCount
1124
+ };
1125
+
1126
+ // Only include markdown content if markdown flag is set
1127
+ if (options.markdown) {
1128
+ jsonObj.contentMarkdown = contentMarkdown;
1129
+ }
1130
+
1131
+ output = JSON.stringify(jsonObj, null, 2)
1132
+ .replace(/"([^"]+)":/g, chalk.cyan('"$1":'))
1133
+ .replace(/: "([^"]+)"/g, chalk.yellow(': "$1"'))
1134
+ .replace(/: (\d+)/g, chalk.yellow(': $1'))
1135
+ .replace(/: (true|false|null)/g, chalk.magenta(': $1'));
1136
+ } else {
1137
+ output = options.markdown ? contentMarkdown! : result.content;
1138
+ }
1139
+
1140
+ // Handle output
1141
+ if (options.output) {
1142
+ const outputPath = resolve(process.cwd(), options.output);
1143
+ await writeFile(outputPath, output, 'utf-8');
1144
+ console.log(chalk.green(`Output written to ${options.output}`));
1145
+ } else {
1146
+ console.log(output);
1147
+ }
1148
+ } catch (error) {
1149
+ console.error(chalk.red('Error during parsing:'), error);
1150
+ process.exit(1);
1151
+ }
1152
+ } catch (error) {
1153
+ console.error(chalk.red('Error loading content:'), error instanceof Error ? error.message : 'Unknown error occurred');
1154
+ process.exit(1);
1155
+ }
1156
+
1157
+ } catch (error) {
1158
+ console.error(chalk.red('Error:'), error instanceof Error ? error.message : 'Unknown error occurred');
1159
+ process.exit(1);
1160
+ }
1161
+ });
1162
+
1163
+ program.parse();