defuddle-cli 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -8,6 +8,9 @@ import { readFile, writeFile } from 'fs/promises';
8
8
  import { fileURLToPath } from 'url';
9
9
  import { dirname, resolve } from 'path';
10
10
  import { createMarkdownContent } from './markdown.js';
11
+ import { setupDOMInterfaces } from './dom/interfaces/setup.js';
12
+ import { setupRange } from './dom/interfaces/range.js';
13
+ import { setupDocumentMethods, setupWindowMethods } from './dom/interfaces/document.js';
11
14
  const __filename = fileURLToPath(import.meta.url);
12
15
  const __dirname = dirname(__filename);
13
16
  // Define CSS interfaces globally first
@@ -22,7 +25,7 @@ globalThis.CSSRule = class {
22
25
  this.parentStyleSheet = null;
23
26
  }
24
27
  };
25
- // Add static properties
28
+ // Static properties
26
29
  Object.defineProperties(globalThis.CSSRule, {
27
30
  STYLE_RULE: { value: 1, writable: false },
28
31
  CHARSET_RULE: { value: 2, writable: false },
@@ -302,343 +305,6 @@ globalThis.HTMLImageElement = class {
302
305
  };
303
306
  // Create a virtual console
304
307
  const virtualConsole = new VirtualConsole();
305
- // Function to set up DOM interfaces
306
- function setupDOMInterfaces(window) {
307
- var _a;
308
- try {
309
- // First, set up basic window properties
310
- try {
311
- if (!window.innerWidth) {
312
- Object.defineProperty(window, 'innerWidth', { value: 1024 });
313
- }
314
- if (!window.innerHeight) {
315
- Object.defineProperty(window, 'innerHeight', { value: 768 });
316
- }
317
- if (!window.devicePixelRatio) {
318
- Object.defineProperty(window, 'devicePixelRatio', { value: 1 });
319
- }
320
- }
321
- catch (error) {
322
- console.warn('Warning: Could not set basic window properties:', error);
323
- }
324
- // Set up CSS interfaces
325
- try {
326
- if (!window.CSSRule) {
327
- window.CSSRule = globalThis.CSSRule;
328
- }
329
- if (!window.CSSMediaRule) {
330
- window.CSSMediaRule = globalThis.CSSMediaRule;
331
- }
332
- if (!window.CSSStyleSheet) {
333
- window.CSSStyleSheet = globalThis.CSSStyleSheet;
334
- }
335
- }
336
- catch (error) {
337
- console.warn('Warning: Could not set CSS interfaces:', error);
338
- }
339
- // Set up HTML and SVG interfaces
340
- try {
341
- if (!window.HTMLImageElement) {
342
- window.HTMLImageElement = globalThis.HTMLImageElement;
343
- }
344
- if (!window.SVGElement) {
345
- window.SVGElement = globalThis.SVGElement;
346
- }
347
- }
348
- catch (error) {
349
- console.warn('Warning: Could not set HTML/SVG interfaces:', error);
350
- }
351
- // Set up screen object
352
- try {
353
- if (!window.screen) {
354
- Object.defineProperty(window, 'screen', {
355
- value: {
356
- width: 1024,
357
- height: 768,
358
- availWidth: 1024,
359
- availHeight: 768,
360
- colorDepth: 24,
361
- pixelDepth: 24,
362
- orientation: {
363
- type: 'landscape-primary',
364
- angle: 0
365
- }
366
- }
367
- });
368
- }
369
- }
370
- catch (error) {
371
- console.warn('Warning: Could not set screen object:', error);
372
- }
373
- // Set up storage objects
374
- try {
375
- if (!window.localStorage) {
376
- const storage = {
377
- length: 0,
378
- getItem: () => null,
379
- setItem: () => { },
380
- removeItem: () => { },
381
- clear: () => { },
382
- key: () => null
383
- };
384
- try {
385
- Object.defineProperty(window, 'localStorage', {
386
- value: storage,
387
- writable: false,
388
- configurable: false
389
- });
390
- }
391
- catch (error) {
392
- // Silently ignore storage setup failures
393
- }
394
- }
395
- if (!window.sessionStorage) {
396
- const storage = {
397
- length: 0,
398
- getItem: () => null,
399
- setItem: () => { },
400
- removeItem: () => { },
401
- clear: () => { },
402
- key: () => null
403
- };
404
- try {
405
- Object.defineProperty(window, 'sessionStorage', {
406
- value: storage,
407
- writable: false,
408
- configurable: false
409
- });
410
- }
411
- catch (error) {
412
- // Silently ignore storage setup failures
413
- }
414
- }
415
- }
416
- catch (error) {
417
- // Silently ignore storage setup failures
418
- }
419
- // Set up animation frame methods
420
- try {
421
- if (!window.requestAnimationFrame) {
422
- window.requestAnimationFrame = (callback) => {
423
- return setTimeout(callback, 0);
424
- };
425
- }
426
- if (!window.cancelAnimationFrame) {
427
- window.cancelAnimationFrame = (handle) => {
428
- clearTimeout(handle);
429
- };
430
- }
431
- }
432
- catch (error) {
433
- console.warn('Warning: Could not set animation frame methods:', error);
434
- }
435
- // Set up DOM methods
436
- try {
437
- if (!window.Document.prototype.getElementsByClassName) {
438
- window.Document.prototype.getElementsByClassName = function (classNames) {
439
- const elements = this.querySelectorAll('.' + classNames);
440
- const collection = new HTMLCollection();
441
- elements.forEach((el, i) => {
442
- collection[i] = el;
443
- });
444
- return collection;
445
- };
446
- }
447
- }
448
- catch (error) {
449
- console.warn('Warning: Could not set getElementsByClassName:', error);
450
- }
451
- // Set up Node methods
452
- try {
453
- if (!window.Node.prototype.contains) {
454
- window.Node.prototype.contains = function (node) {
455
- let current = node;
456
- while (current) {
457
- if (current === this)
458
- return true;
459
- current = current.parentNode;
460
- }
461
- return false;
462
- };
463
- }
464
- }
465
- catch (error) {
466
- console.warn('Warning: Could not set Node.contains:', error);
467
- }
468
- // Set up Element methods
469
- try {
470
- if (!window.Element.prototype.getBoundingClientRect) {
471
- window.Element.prototype.getBoundingClientRect = function () {
472
- return {
473
- top: 0,
474
- left: 0,
475
- bottom: 0,
476
- right: 0,
477
- width: 0,
478
- height: 0,
479
- x: 0,
480
- y: 0,
481
- toJSON: function () { return this; }
482
- };
483
- };
484
- }
485
- }
486
- catch (error) {
487
- console.warn('Warning: Could not set getBoundingClientRect:', error);
488
- }
489
- // Set up Document methods
490
- try {
491
- if (!window.Document.prototype.getSelection) {
492
- window.Document.prototype.getSelection = function () {
493
- const selection = {
494
- anchorNode: null,
495
- anchorOffset: 0,
496
- direction: 'forward',
497
- focusNode: null,
498
- focusOffset: 0,
499
- isCollapsed: true,
500
- rangeCount: 0,
501
- type: 'None',
502
- getRangeAt: function () { return new window.Range(); },
503
- removeAllRanges: function () { },
504
- addRange: function () { },
505
- collapse: function () { },
506
- collapseToEnd: function () { },
507
- collapseToStart: function () { },
508
- deleteFromDocument: function () { },
509
- empty: function () { },
510
- extend: function () { },
511
- modify: function () { },
512
- selectAllChildren: function () { },
513
- setBaseAndExtent: function () { },
514
- setPosition: function () { },
515
- toString: function () { return ''; },
516
- containsNode: function (node, allowPartialContainment = false) {
517
- return false;
518
- },
519
- removeRange: function (range) { }
520
- };
521
- return selection;
522
- };
523
- }
524
- }
525
- catch (error) {
526
- console.warn('Warning: Could not set getSelection:', error);
527
- }
528
- // Set up Window methods
529
- try {
530
- if (!window.Window.prototype.getComputedStyle) {
531
- window.Window.prototype.getComputedStyle = function (elt, pseudoElt) {
532
- const style = {
533
- accentColor: '',
534
- alignContent: '',
535
- alignItems: '',
536
- alignSelf: '',
537
- getPropertyValue: function (prop) { return ''; }
538
- };
539
- return style;
540
- };
541
- }
542
- }
543
- catch (error) {
544
- console.warn('Warning: Could not set getComputedStyle:', error);
545
- }
546
- // Set up Range constructor last
547
- try {
548
- if (!window.Range) {
549
- window.Range = (_a = class Range {
550
- constructor() {
551
- this.START_TO_START = 0;
552
- this.START_TO_END = 1;
553
- this.END_TO_END = 2;
554
- this.END_TO_START = 3;
555
- this.startContainer = document.documentElement;
556
- this.startOffset = 0;
557
- this.endContainer = document.documentElement;
558
- this.endOffset = 0;
559
- this.collapsed = true;
560
- this.commonAncestorContainer = document.documentElement;
561
- }
562
- createContextualFragment(fragment) {
563
- return document.createDocumentFragment();
564
- }
565
- detach() { }
566
- cloneContents() {
567
- return document.createDocumentFragment();
568
- }
569
- cloneRange() {
570
- return new _a();
571
- }
572
- collapse(toStart = false) { }
573
- compareBoundaryPoints(how, sourceRange) {
574
- return 0;
575
- }
576
- comparePoint(node, offset) {
577
- return 0;
578
- }
579
- deleteContents() { }
580
- extractContents() {
581
- return document.createDocumentFragment();
582
- }
583
- getBoundingClientRect() {
584
- return {
585
- top: 0,
586
- left: 0,
587
- bottom: 0,
588
- right: 0,
589
- width: 0,
590
- height: 0,
591
- x: 0,
592
- y: 0,
593
- toJSON: function () { return this; }
594
- };
595
- }
596
- getClientRects() {
597
- return {
598
- length: 0,
599
- item: function () { return null; },
600
- [Symbol.iterator]: function* () { }
601
- };
602
- }
603
- insertNode(node) { }
604
- intersectsNode(node) {
605
- return false;
606
- }
607
- isPointInRange(node, offset) {
608
- return false;
609
- }
610
- selectNode(node) { }
611
- selectNodeContents(node) {
612
- this.startContainer = node;
613
- this.startOffset = 0;
614
- this.endContainer = node;
615
- this.endOffset = node.childNodes.length;
616
- this.collapsed = false;
617
- }
618
- setEnd(node, offset) { }
619
- setEndAfter(node) { }
620
- setEndBefore(node) { }
621
- setStart(node, offset) { }
622
- setStartAfter(node) { }
623
- setStartBefore(node) { }
624
- surroundContents(newParent) { }
625
- },
626
- _a.START_TO_START = 0,
627
- _a.START_TO_END = 1,
628
- _a.END_TO_END = 2,
629
- _a.END_TO_START = 3,
630
- _a);
631
- }
632
- }
633
- catch (error) {
634
- console.warn('Warning: Could not set Range constructor:', error);
635
- }
636
- }
637
- catch (error) {
638
- console.error('Error in setupDOMInterfaces:', error);
639
- // Don't throw the error, just log it
640
- }
641
- }
642
308
  // Create a virtual DOM
643
309
  const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
644
310
  virtualConsole,
@@ -647,6 +313,9 @@ const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
647
313
  pretendToBeVisual: true,
648
314
  beforeParse(window) {
649
315
  setupDOMInterfaces(window);
316
+ setupRange(window);
317
+ setupDocumentMethods(window);
318
+ setupWindowMethods(window);
650
319
  }
651
320
  });
652
321
  // Get the window object
@@ -662,7 +331,14 @@ globalThis.NodeFilter = window.NodeFilter;
662
331
  globalThis.Range = window.Range;
663
332
  globalThis.DOMParser = window.DOMParser;
664
333
  globalThis.XMLSerializer = window.XMLSerializer;
665
- globalThis.navigator = window.navigator;
334
+ // Handle navigator property
335
+ if (!globalThis.navigator || Object.getOwnPropertyDescriptor(globalThis, 'navigator')?.configurable) {
336
+ Object.defineProperty(globalThis, 'navigator', {
337
+ value: window.navigator,
338
+ writable: false,
339
+ configurable: true
340
+ });
341
+ }
666
342
  globalThis.HTMLElement = window.HTMLElement;
667
343
  // Define DOMSettableTokenList
668
344
  globalThis.DOMSettableTokenList = class {
@@ -857,6 +533,71 @@ globalThis.HTMLTableCaptionElement = class extends globalThis.HTMLElement {
857
533
  this.align = '';
858
534
  }
859
535
  };
536
+ globalThis.HTMLButtonElement = class extends globalThis.HTMLElement {
537
+ constructor() {
538
+ super();
539
+ this.disabled = false;
540
+ this.form = null;
541
+ this.formAction = '';
542
+ this.formEnctype = '';
543
+ this.formMethod = '';
544
+ this.formNoValidate = false;
545
+ this.formTarget = '';
546
+ this.name = '';
547
+ this.type = 'submit';
548
+ this.value = '';
549
+ this.menu = null;
550
+ }
551
+ };
552
+ // Add HTMLSpanElement interface
553
+ globalThis.HTMLSpanElement = class extends globalThis.HTMLElement {
554
+ constructor() {
555
+ super();
556
+ }
557
+ };
558
+ // Add HTMLDivElement interface
559
+ globalThis.HTMLDivElement = class extends globalThis.HTMLElement {
560
+ constructor() {
561
+ super();
562
+ this.align = '';
563
+ }
564
+ };
565
+ globalThis.HTMLAnchorElement = class extends globalThis.HTMLElement {
566
+ constructor() {
567
+ super();
568
+ this.href = '';
569
+ this.target = '';
570
+ this.download = '';
571
+ this.ping = '';
572
+ this.rel = '';
573
+ this.relList = {
574
+ length: 0,
575
+ value: '',
576
+ add: () => { },
577
+ contains: () => false,
578
+ item: () => null,
579
+ remove: () => { },
580
+ replace: () => false,
581
+ supports: () => false,
582
+ toggle: () => false,
583
+ [Symbol.iterator]: function* () { yield ''; return undefined; }
584
+ };
585
+ this.hreflang = '';
586
+ this.type = '';
587
+ this.text = '';
588
+ this.referrerPolicy = '';
589
+ this.origin = '';
590
+ this.protocol = '';
591
+ this.username = '';
592
+ this.password = '';
593
+ this.host = '';
594
+ this.hostname = '';
595
+ this.port = '';
596
+ this.pathname = '';
597
+ this.search = '';
598
+ this.hash = '';
599
+ }
600
+ };
860
601
  const program = new Command();
861
602
  program
862
603
  .name('defuddle')
@@ -965,6 +706,10 @@ program
965
706
  debug: options.debug
966
707
  });
967
708
  const result = await defuddle.parse();
709
+ // If in debug mode, don't show content output
710
+ if (options.debug) {
711
+ process.exit(0);
712
+ }
968
713
  // Format output
969
714
  let output;
970
715
  let content;
package/dist/markdown.js CHANGED
@@ -233,14 +233,6 @@ export function createMarkdownContent(content, url) {
233
233
  return '\n\n' + items.join('\n\n') + '\n\n';
234
234
  }
235
235
  });
236
- turndownService.addRule('removeHiddenElements', {
237
- filter: function (node) {
238
- return (node.style.display === 'none');
239
- },
240
- replacement: function () {
241
- return '';
242
- }
243
- });
244
236
  turndownService.addRule('citations', {
245
237
  filter: (node) => {
246
238
  if (node instanceof Element) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "defuddle-cli",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Command line interface for Defuddle - extract article content from web pages",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -0,0 +1,53 @@
1
+ import { DOMWindow } from 'jsdom';
2
+ import { SetupFunction } from './setup.js';
3
+
4
+ export const setupDocumentMethods: SetupFunction = (window: DOMWindow) => {
5
+ if (!window.Document.prototype.getSelection) {
6
+ window.Document.prototype.getSelection = function(): Selection | null {
7
+ const selection = {
8
+ anchorNode: null,
9
+ anchorOffset: 0,
10
+ direction: 'forward',
11
+ focusNode: null,
12
+ focusOffset: 0,
13
+ isCollapsed: true,
14
+ rangeCount: 0,
15
+ type: 'None',
16
+ getRangeAt: function() { return new window.Range(); },
17
+ removeAllRanges: function() {},
18
+ addRange: function() {},
19
+ collapse: function() {},
20
+ collapseToEnd: function() {},
21
+ collapseToStart: function() {},
22
+ deleteFromDocument: function() {},
23
+ empty: function() {},
24
+ extend: function() {},
25
+ modify: function() {},
26
+ selectAllChildren: function() {},
27
+ setBaseAndExtent: function() {},
28
+ setPosition: function() {},
29
+ toString: function() { return ''; },
30
+ containsNode: function(node: Node, allowPartialContainment: boolean = false): boolean {
31
+ return false;
32
+ },
33
+ removeRange: function(range: Range): void {}
34
+ } as unknown as Selection;
35
+ return selection;
36
+ };
37
+ }
38
+ };
39
+
40
+ export const setupWindowMethods: SetupFunction = (window: DOMWindow) => {
41
+ if (!window.Window.prototype.getComputedStyle) {
42
+ window.Window.prototype.getComputedStyle = function(elt: Element, pseudoElt?: string | null): CSSStyleDeclaration {
43
+ const style = {
44
+ accentColor: '',
45
+ alignContent: '',
46
+ alignItems: '',
47
+ alignSelf: '',
48
+ getPropertyValue: function(prop: string): string { return ''; }
49
+ } as CSSStyleDeclaration;
50
+ return style;
51
+ };
52
+ }
53
+ };
@@ -0,0 +1,120 @@
1
+ import { DOMWindow } from 'jsdom';
2
+ import { SetupFunction } from './setup.js';
3
+
4
+ export const setupRange: SetupFunction = (window: DOMWindow) => {
5
+ if (!window.Range) {
6
+ window.Range = class Range {
7
+ static readonly START_TO_START = 0;
8
+ static readonly START_TO_END = 1;
9
+ static readonly END_TO_END = 2;
10
+ static readonly END_TO_START = 3;
11
+
12
+ readonly START_TO_START = 0;
13
+ readonly START_TO_END = 1;
14
+ readonly END_TO_END = 2;
15
+ readonly END_TO_START = 3;
16
+
17
+ startContainer: Node;
18
+ startOffset: number;
19
+ endContainer: Node;
20
+ endOffset: number;
21
+ collapsed: boolean;
22
+ commonAncestorContainer: Node;
23
+
24
+ constructor() {
25
+ this.startContainer = document.documentElement;
26
+ this.startOffset = 0;
27
+ this.endContainer = document.documentElement;
28
+ this.endOffset = 0;
29
+ this.collapsed = true;
30
+ this.commonAncestorContainer = document.documentElement;
31
+ }
32
+
33
+ createContextualFragment(fragment: string): DocumentFragment {
34
+ return document.createDocumentFragment();
35
+ }
36
+
37
+ detach(): void {}
38
+
39
+ cloneContents(): DocumentFragment {
40
+ return document.createDocumentFragment();
41
+ }
42
+
43
+ cloneRange(): Range {
44
+ return new Range();
45
+ }
46
+
47
+ collapse(toStart: boolean = false): void {}
48
+
49
+ compareBoundaryPoints(how: number, sourceRange: Range): number {
50
+ return 0;
51
+ }
52
+
53
+ comparePoint(node: Node, offset: number): number {
54
+ return 0;
55
+ }
56
+
57
+ deleteContents(): void {}
58
+
59
+ extractContents(): DocumentFragment {
60
+ return document.createDocumentFragment();
61
+ }
62
+
63
+ getBoundingClientRect(): DOMRect {
64
+ return {
65
+ top: 0,
66
+ left: 0,
67
+ bottom: 0,
68
+ right: 0,
69
+ width: 0,
70
+ height: 0,
71
+ x: 0,
72
+ y: 0,
73
+ toJSON: function() { return this; }
74
+ };
75
+ }
76
+
77
+ getClientRects(): DOMRectList {
78
+ return {
79
+ length: 0,
80
+ item: function() { return null; },
81
+ [Symbol.iterator]: function*() {}
82
+ } as DOMRectList;
83
+ }
84
+
85
+ insertNode(node: Node): void {}
86
+
87
+ intersectsNode(node: Node): boolean {
88
+ return false;
89
+ }
90
+
91
+ isPointInRange(node: Node, offset: number): boolean {
92
+ return false;
93
+ }
94
+
95
+ selectNode(node: Node): void {}
96
+
97
+ selectNodeContents(node: Node): void {
98
+ this.startContainer = node;
99
+ this.startOffset = 0;
100
+ this.endContainer = node;
101
+ this.endOffset = node.childNodes.length;
102
+ this.collapsed = false;
103
+ }
104
+
105
+ setEnd(node: Node, offset: number): void {}
106
+
107
+ setEndAfter(node: Node): void {}
108
+
109
+ setEndBefore(node: Node): void {}
110
+
111
+ setStart(node: Node, offset: number): void {}
112
+
113
+ setStartAfter(node: Node): void {}
114
+
115
+ setStartBefore(node: Node): void {}
116
+
117
+ surroundContents(newParent: Node): void {}
118
+ };
119
+ }
120
+ };