defuddle-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +75 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +1041 -0
- package/dist/markdown.d.ts +1 -0
- package/dist/markdown.js +511 -0
- package/package.json +34 -0
- package/src/index.ts +1163 -0
- package/src/markdown.ts +603 -0
- package/tsconfig.json +16 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,1163 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { Command } from 'commander';
|
|
4
|
+
import { JSDOM, VirtualConsole, DOMWindow } from 'jsdom';
|
|
5
|
+
import pkg from 'defuddle';
|
|
6
|
+
const { Defuddle } = pkg;
|
|
7
|
+
import chalk from 'chalk';
|
|
8
|
+
import { readFile, writeFile } from 'fs/promises';
|
|
9
|
+
import { fileURLToPath } from 'url';
|
|
10
|
+
import { dirname, resolve } from 'path';
|
|
11
|
+
import { createMarkdownContent } from './markdown.js';
|
|
12
|
+
|
|
13
|
+
interface DOMSettableTokenList {
|
|
14
|
+
length: number;
|
|
15
|
+
value: string;
|
|
16
|
+
add(token: string): void;
|
|
17
|
+
contains(token: string): boolean;
|
|
18
|
+
item(index: number): string | null;
|
|
19
|
+
remove(token: string): void;
|
|
20
|
+
replace(oldToken: string, newToken: string): boolean;
|
|
21
|
+
supports(token: string): boolean;
|
|
22
|
+
toggle(token: string, force?: boolean): boolean;
|
|
23
|
+
[Symbol.iterator](): Iterator<string>;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
interface ParseOptions {
|
|
27
|
+
output?: string;
|
|
28
|
+
markdown?: boolean;
|
|
29
|
+
md?: boolean;
|
|
30
|
+
json?: boolean;
|
|
31
|
+
debug?: boolean;
|
|
32
|
+
property?: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
36
|
+
const __dirname = dirname(__filename);
|
|
37
|
+
|
|
38
|
+
// Define CSS interfaces globally first
|
|
39
|
+
(globalThis as any).CSSRule = class {
|
|
40
|
+
readonly type: number = 1;
|
|
41
|
+
cssText: string;
|
|
42
|
+
parentRule: any;
|
|
43
|
+
parentStyleSheet: any;
|
|
44
|
+
|
|
45
|
+
constructor(type?: number) {
|
|
46
|
+
if (type !== undefined) {
|
|
47
|
+
Object.defineProperty(this, 'type', { value: type });
|
|
48
|
+
}
|
|
49
|
+
this.cssText = '';
|
|
50
|
+
this.parentRule = null;
|
|
51
|
+
this.parentStyleSheet = null;
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// Add static properties
|
|
56
|
+
Object.defineProperties((globalThis as any).CSSRule, {
|
|
57
|
+
STYLE_RULE: { value: 1, writable: false },
|
|
58
|
+
CHARSET_RULE: { value: 2, writable: false },
|
|
59
|
+
IMPORT_RULE: { value: 3, writable: false },
|
|
60
|
+
MEDIA_RULE: { value: 4, writable: false },
|
|
61
|
+
FONT_FACE_RULE: { value: 5, writable: false },
|
|
62
|
+
PAGE_RULE: { value: 6, writable: false },
|
|
63
|
+
KEYFRAMES_RULE: { value: 7, writable: false },
|
|
64
|
+
KEYFRAME_RULE: { value: 8, writable: false },
|
|
65
|
+
NAMESPACE_RULE: { value: 10, writable: false },
|
|
66
|
+
COUNTER_STYLE_RULE: { value: 11, writable: false },
|
|
67
|
+
SUPPORTS_RULE: { value: 12, writable: false },
|
|
68
|
+
DOCUMENT_RULE: { value: 13, writable: false },
|
|
69
|
+
FONT_FEATURE_VALUES_RULE: { value: 14, writable: false },
|
|
70
|
+
VIEWPORT_RULE: { value: 15, writable: false },
|
|
71
|
+
REGION_STYLE_RULE: { value: 16, writable: false }
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
(globalThis as any).CSSMediaRule = class extends (globalThis as any).CSSRule {
|
|
75
|
+
media: MediaList;
|
|
76
|
+
cssRules: CSSRuleList;
|
|
77
|
+
conditionText: string = '';
|
|
78
|
+
deleteRule: (index: number) => void = () => {};
|
|
79
|
+
insertRule: (rule: string, index?: number) => number = () => 0;
|
|
80
|
+
|
|
81
|
+
constructor() {
|
|
82
|
+
super();
|
|
83
|
+
Object.defineProperty(this, 'type', { value: 4 }); // CSSRule.MEDIA_RULE
|
|
84
|
+
this.media = {
|
|
85
|
+
length: 0,
|
|
86
|
+
mediaText: '',
|
|
87
|
+
item: () => null,
|
|
88
|
+
appendMedium: () => {},
|
|
89
|
+
deleteMedium: () => {},
|
|
90
|
+
toString: () => '',
|
|
91
|
+
[Symbol.iterator]: function*() { yield ''; return undefined; }
|
|
92
|
+
};
|
|
93
|
+
this.cssRules = {
|
|
94
|
+
length: 0,
|
|
95
|
+
item: () => null,
|
|
96
|
+
[Symbol.iterator]: function*() {
|
|
97
|
+
yield new (globalThis as any).CSSRule();
|
|
98
|
+
return undefined;
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
(globalThis as any).CSSStyleSheet = class {
|
|
105
|
+
type: string = 'text/css';
|
|
106
|
+
href: string | null = null;
|
|
107
|
+
ownerNode: Element | ProcessingInstruction | null = null;
|
|
108
|
+
parentStyleSheet: CSSStyleSheet | null = null;
|
|
109
|
+
title: string | null = null;
|
|
110
|
+
media: MediaList;
|
|
111
|
+
disabled: boolean = false;
|
|
112
|
+
cssRules: CSSRuleList;
|
|
113
|
+
ownerRule: CSSRule | null = null;
|
|
114
|
+
rules: CSSRuleList;
|
|
115
|
+
addRule: (selector: string, style: string, index?: number) => number = () => 0;
|
|
116
|
+
removeRule: (index?: number) => void = () => {};
|
|
117
|
+
replace: (text: string) => Promise<CSSStyleSheet> = async () => this as unknown as CSSStyleSheet;
|
|
118
|
+
replaceSync: (text: string) => void = () => {};
|
|
119
|
+
|
|
120
|
+
constructor() {
|
|
121
|
+
this.media = {
|
|
122
|
+
length: 0,
|
|
123
|
+
mediaText: '',
|
|
124
|
+
item: () => null,
|
|
125
|
+
appendMedium: () => {},
|
|
126
|
+
deleteMedium: () => {},
|
|
127
|
+
toString: () => '',
|
|
128
|
+
[Symbol.iterator]: function*() { yield ''; return undefined; }
|
|
129
|
+
};
|
|
130
|
+
this.cssRules = {
|
|
131
|
+
length: 0,
|
|
132
|
+
item: () => null,
|
|
133
|
+
[Symbol.iterator]: function*() {
|
|
134
|
+
yield new (globalThis as any).CSSRule();
|
|
135
|
+
return undefined;
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
this.rules = this.cssRules;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
insertRule(rule: string, index?: number): number {
|
|
142
|
+
return 0;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
deleteRule(index: number): void {}
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
// Define SVGElement globally
|
|
149
|
+
(globalThis as any).SVGElement = class {
|
|
150
|
+
id: string = '';
|
|
151
|
+
className: string = '';
|
|
152
|
+
style: CSSStyleDeclaration = {
|
|
153
|
+
cssText: '',
|
|
154
|
+
length: 0,
|
|
155
|
+
parentRule: null,
|
|
156
|
+
getPropertyPriority: () => '',
|
|
157
|
+
getPropertyValue: () => '',
|
|
158
|
+
item: () => '',
|
|
159
|
+
removeProperty: () => '',
|
|
160
|
+
setProperty: () => '',
|
|
161
|
+
[Symbol.iterator]: function*() { yield ''; return undefined; }
|
|
162
|
+
} as unknown as CSSStyleDeclaration;
|
|
163
|
+
ownerSVGElement: SVGElement | null = null;
|
|
164
|
+
viewportElement: SVGElement | null = null;
|
|
165
|
+
tagName: string = '';
|
|
166
|
+
namespaceURI: string | null = null;
|
|
167
|
+
prefix: string | null = null;
|
|
168
|
+
localName: string = '';
|
|
169
|
+
baseURI: string = '';
|
|
170
|
+
textContent: string | null = '';
|
|
171
|
+
innerHTML: string = '';
|
|
172
|
+
outerHTML: string = '';
|
|
173
|
+
hidden: boolean = false;
|
|
174
|
+
slot: string = '';
|
|
175
|
+
attributes: NamedNodeMap = {
|
|
176
|
+
length: 0,
|
|
177
|
+
getNamedItem: () => null,
|
|
178
|
+
getNamedItemNS: () => null,
|
|
179
|
+
item: () => null,
|
|
180
|
+
removeNamedItem: () => null,
|
|
181
|
+
removeNamedItemNS: () => null,
|
|
182
|
+
setNamedItem: () => null,
|
|
183
|
+
setNamedItemNS: () => null,
|
|
184
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
185
|
+
} as unknown as NamedNodeMap;
|
|
186
|
+
childNodes: NodeListOf<ChildNode> = {
|
|
187
|
+
length: 0,
|
|
188
|
+
item: () => null,
|
|
189
|
+
forEach: () => {},
|
|
190
|
+
entries: function*() { yield [0, null]; return undefined; },
|
|
191
|
+
keys: function*() { yield 0; return undefined; },
|
|
192
|
+
values: function*() { yield null; return undefined; },
|
|
193
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
194
|
+
} as unknown as NodeListOf<ChildNode>;
|
|
195
|
+
firstChild: ChildNode | null = null;
|
|
196
|
+
lastChild: ChildNode | null = null;
|
|
197
|
+
nextSibling: ChildNode | null = null;
|
|
198
|
+
previousSibling: ChildNode | null = null;
|
|
199
|
+
parentNode: Node & ParentNode | null = null;
|
|
200
|
+
parentElement: HTMLElement | null = null;
|
|
201
|
+
childElementCount: number = 0;
|
|
202
|
+
firstElementChild: Element | null = null;
|
|
203
|
+
lastElementChild: Element | null = null;
|
|
204
|
+
nextElementSibling: Element | null = null;
|
|
205
|
+
previousElementSibling: Element | null = null;
|
|
206
|
+
children: HTMLCollection = {
|
|
207
|
+
length: 0,
|
|
208
|
+
item: () => null,
|
|
209
|
+
namedItem: () => null,
|
|
210
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
211
|
+
} as unknown as HTMLCollection;
|
|
212
|
+
|
|
213
|
+
constructor() {
|
|
214
|
+
// Initialize any required properties
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
getAttribute(name: string): string | null {
|
|
218
|
+
return null;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
getAttributeNS(namespaceURI: string | null, localName: string): string | null {
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
setAttribute(name: string, value: string): void {}
|
|
226
|
+
|
|
227
|
+
setAttributeNS(namespaceURI: string | null, qualifiedName: string, value: string): void {}
|
|
228
|
+
|
|
229
|
+
removeAttributeNS(namespaceURI: string | null, localName: string): void {}
|
|
230
|
+
|
|
231
|
+
hasAttribute(name: string): boolean {
|
|
232
|
+
return false;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
hasAttributeNS(namespaceURI: string | null, localName: string): boolean {
|
|
236
|
+
return false;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
getBoundingClientRect(): DOMRect {
|
|
240
|
+
return {
|
|
241
|
+
top: 0,
|
|
242
|
+
left: 0,
|
|
243
|
+
bottom: 0,
|
|
244
|
+
right: 0,
|
|
245
|
+
width: 0,
|
|
246
|
+
height: 0,
|
|
247
|
+
x: 0,
|
|
248
|
+
y: 0,
|
|
249
|
+
toJSON: function() { return this; }
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
getClientRects(): DOMRectList {
|
|
254
|
+
return {
|
|
255
|
+
length: 0,
|
|
256
|
+
item: function() { return null; },
|
|
257
|
+
[Symbol.iterator]: function*() {}
|
|
258
|
+
} as DOMRectList;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
getElementsByClassName(classNames: string): HTMLCollectionOf<Element> {
|
|
262
|
+
return {
|
|
263
|
+
length: 0,
|
|
264
|
+
item: () => null,
|
|
265
|
+
namedItem: () => null,
|
|
266
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
267
|
+
} as HTMLCollectionOf<Element>;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
getElementsByTagName(qualifiedName: string): HTMLCollectionOf<Element> {
|
|
271
|
+
return {
|
|
272
|
+
length: 0,
|
|
273
|
+
item: () => null,
|
|
274
|
+
namedItem: () => null,
|
|
275
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
276
|
+
} as HTMLCollectionOf<Element>;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
getElementsByTagNameNS(namespaceURI: string | null, localName: string): HTMLCollectionOf<Element> {
|
|
280
|
+
return {
|
|
281
|
+
length: 0,
|
|
282
|
+
item: () => null,
|
|
283
|
+
namedItem: () => null,
|
|
284
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
285
|
+
} as HTMLCollectionOf<Element>;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
querySelector(selectors: string): Element | null {
|
|
289
|
+
return null;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
querySelectorAll(selectors: string): NodeListOf<Element> {
|
|
293
|
+
return {
|
|
294
|
+
length: 0,
|
|
295
|
+
item: () => null,
|
|
296
|
+
forEach: () => {},
|
|
297
|
+
entries: function*() { yield [0, null]; return undefined; },
|
|
298
|
+
keys: function*() { yield 0; return undefined; },
|
|
299
|
+
values: function*() { yield null; return undefined; },
|
|
300
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
301
|
+
} as unknown as NodeListOf<Element>;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
matches(selectors: string): boolean {
|
|
305
|
+
return false;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
closest(selectors: string): Element | null {
|
|
309
|
+
return null;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
contains(other: Node | null): boolean {
|
|
313
|
+
return false;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
append(...nodes: (Node | string)[]): void {}
|
|
317
|
+
|
|
318
|
+
prepend(...nodes: (Node | string)[]): void {}
|
|
319
|
+
|
|
320
|
+
after(...nodes: (Node | string)[]): void {}
|
|
321
|
+
|
|
322
|
+
before(...nodes: (Node | string)[]): void {}
|
|
323
|
+
|
|
324
|
+
replaceWith(...nodes: (Node | string)[]): void {}
|
|
325
|
+
|
|
326
|
+
remove(): void {}
|
|
327
|
+
|
|
328
|
+
insertAdjacentElement(where: InsertPosition, element: Element): Element | null {
|
|
329
|
+
return null;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
insertAdjacentText(where: InsertPosition, data: string): void {}
|
|
333
|
+
|
|
334
|
+
insertAdjacentHTML(position: InsertPosition, text: string): void {}
|
|
335
|
+
|
|
336
|
+
replaceChildren(...nodes: (Node | string)[]): void {}
|
|
337
|
+
};
|
|
338
|
+
|
|
339
|
+
// Define HTMLImageElement globally
|
|
340
|
+
(globalThis as any).HTMLImageElement = class {
|
|
341
|
+
alt: string = '';
|
|
342
|
+
src: string = '';
|
|
343
|
+
srcset: string = '';
|
|
344
|
+
sizes: string = '';
|
|
345
|
+
crossOrigin: string | null = null;
|
|
346
|
+
useMap: string = '';
|
|
347
|
+
isMap: boolean = false;
|
|
348
|
+
width: number = 0;
|
|
349
|
+
height: number = 0;
|
|
350
|
+
naturalWidth: number = 0;
|
|
351
|
+
naturalHeight: number = 0;
|
|
352
|
+
complete: boolean = false;
|
|
353
|
+
name: string = '';
|
|
354
|
+
lowsrc: string = '';
|
|
355
|
+
align: string = '';
|
|
356
|
+
hspace: number = 0;
|
|
357
|
+
vspace: number = 0;
|
|
358
|
+
longDesc: string = '';
|
|
359
|
+
border: string = '';
|
|
360
|
+
x: number = 0;
|
|
361
|
+
y: number = 0;
|
|
362
|
+
currentSrc: string = '';
|
|
363
|
+
decoding: 'sync' | 'async' | 'auto' = 'auto';
|
|
364
|
+
fetchPriority: 'high' | 'low' | 'auto' = 'auto';
|
|
365
|
+
loading: 'eager' | 'lazy' = 'eager';
|
|
366
|
+
referrerPolicy: string = '';
|
|
367
|
+
|
|
368
|
+
constructor() {
|
|
369
|
+
// Initialize any required properties
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
decode(): Promise<void> {
|
|
373
|
+
return Promise.resolve();
|
|
374
|
+
}
|
|
375
|
+
};
|
|
376
|
+
|
|
377
|
+
// Create a virtual console
|
|
378
|
+
const virtualConsole = new VirtualConsole();
|
|
379
|
+
|
|
380
|
+
// Function to set up DOM interfaces
|
|
381
|
+
function setupDOMInterfaces(window: DOMWindow) {
|
|
382
|
+
try {
|
|
383
|
+
// First, set up basic window properties
|
|
384
|
+
try {
|
|
385
|
+
if (!window.innerWidth) {
|
|
386
|
+
Object.defineProperty(window, 'innerWidth', { value: 1024 });
|
|
387
|
+
}
|
|
388
|
+
if (!window.innerHeight) {
|
|
389
|
+
Object.defineProperty(window, 'innerHeight', { value: 768 });
|
|
390
|
+
}
|
|
391
|
+
if (!window.devicePixelRatio) {
|
|
392
|
+
Object.defineProperty(window, 'devicePixelRatio', { value: 1 });
|
|
393
|
+
}
|
|
394
|
+
} catch (error) {
|
|
395
|
+
console.warn('Warning: Could not set basic window properties:', error);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Set up CSS interfaces
|
|
399
|
+
try {
|
|
400
|
+
if (!window.CSSRule) {
|
|
401
|
+
window.CSSRule = (globalThis as any).CSSRule as any;
|
|
402
|
+
}
|
|
403
|
+
if (!window.CSSMediaRule) {
|
|
404
|
+
window.CSSMediaRule = (globalThis as any).CSSMediaRule as any;
|
|
405
|
+
}
|
|
406
|
+
if (!window.CSSStyleSheet) {
|
|
407
|
+
window.CSSStyleSheet = (globalThis as any).CSSStyleSheet as any;
|
|
408
|
+
}
|
|
409
|
+
} catch (error) {
|
|
410
|
+
console.warn('Warning: Could not set CSS interfaces:', error);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// Set up HTML and SVG interfaces
|
|
414
|
+
try {
|
|
415
|
+
if (!window.HTMLImageElement) {
|
|
416
|
+
window.HTMLImageElement = (globalThis as any).HTMLImageElement as any;
|
|
417
|
+
}
|
|
418
|
+
if (!window.SVGElement) {
|
|
419
|
+
window.SVGElement = (globalThis as any).SVGElement as any;
|
|
420
|
+
}
|
|
421
|
+
} catch (error) {
|
|
422
|
+
console.warn('Warning: Could not set HTML/SVG interfaces:', error);
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// Set up screen object
|
|
426
|
+
try {
|
|
427
|
+
if (!window.screen) {
|
|
428
|
+
Object.defineProperty(window, 'screen', {
|
|
429
|
+
value: {
|
|
430
|
+
width: 1024,
|
|
431
|
+
height: 768,
|
|
432
|
+
availWidth: 1024,
|
|
433
|
+
availHeight: 768,
|
|
434
|
+
colorDepth: 24,
|
|
435
|
+
pixelDepth: 24,
|
|
436
|
+
orientation: {
|
|
437
|
+
type: 'landscape-primary',
|
|
438
|
+
angle: 0
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
} catch (error) {
|
|
444
|
+
console.warn('Warning: Could not set screen object:', error);
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Set up storage objects
|
|
448
|
+
try {
|
|
449
|
+
if (!window.localStorage) {
|
|
450
|
+
const storage = {
|
|
451
|
+
length: 0,
|
|
452
|
+
getItem: () => null,
|
|
453
|
+
setItem: () => {},
|
|
454
|
+
removeItem: () => {},
|
|
455
|
+
clear: () => {},
|
|
456
|
+
key: () => null
|
|
457
|
+
};
|
|
458
|
+
try {
|
|
459
|
+
Object.defineProperty(window, 'localStorage', {
|
|
460
|
+
value: storage,
|
|
461
|
+
writable: false,
|
|
462
|
+
configurable: false
|
|
463
|
+
});
|
|
464
|
+
} catch (error) {
|
|
465
|
+
// Silently ignore storage setup failures
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
if (!window.sessionStorage) {
|
|
469
|
+
const storage = {
|
|
470
|
+
length: 0,
|
|
471
|
+
getItem: () => null,
|
|
472
|
+
setItem: () => {},
|
|
473
|
+
removeItem: () => {},
|
|
474
|
+
clear: () => {},
|
|
475
|
+
key: () => null
|
|
476
|
+
};
|
|
477
|
+
try {
|
|
478
|
+
Object.defineProperty(window, 'sessionStorage', {
|
|
479
|
+
value: storage,
|
|
480
|
+
writable: false,
|
|
481
|
+
configurable: false
|
|
482
|
+
});
|
|
483
|
+
} catch (error) {
|
|
484
|
+
// Silently ignore storage setup failures
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
} catch (error) {
|
|
488
|
+
// Silently ignore storage setup failures
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// Set up animation frame methods
|
|
492
|
+
try {
|
|
493
|
+
if (!window.requestAnimationFrame) {
|
|
494
|
+
window.requestAnimationFrame = (callback: FrameRequestCallback): number => {
|
|
495
|
+
return setTimeout(callback, 0) as unknown as number;
|
|
496
|
+
};
|
|
497
|
+
}
|
|
498
|
+
if (!window.cancelAnimationFrame) {
|
|
499
|
+
window.cancelAnimationFrame = (handle: number): void => {
|
|
500
|
+
clearTimeout(handle as unknown as number);
|
|
501
|
+
};
|
|
502
|
+
}
|
|
503
|
+
} catch (error) {
|
|
504
|
+
console.warn('Warning: Could not set animation frame methods:', error);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// Set up DOM methods
|
|
508
|
+
try {
|
|
509
|
+
if (!window.Document.prototype.getElementsByClassName) {
|
|
510
|
+
window.Document.prototype.getElementsByClassName = function(classNames: string): HTMLCollectionOf<Element> {
|
|
511
|
+
const elements = this.querySelectorAll('.' + classNames);
|
|
512
|
+
const collection = new HTMLCollection();
|
|
513
|
+
elements.forEach((el, i) => {
|
|
514
|
+
collection[i] = el;
|
|
515
|
+
});
|
|
516
|
+
return collection;
|
|
517
|
+
};
|
|
518
|
+
}
|
|
519
|
+
} catch (error) {
|
|
520
|
+
console.warn('Warning: Could not set getElementsByClassName:', error);
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
// Set up Node methods
|
|
524
|
+
try {
|
|
525
|
+
if (!window.Node.prototype.contains) {
|
|
526
|
+
window.Node.prototype.contains = function(node: Node): boolean {
|
|
527
|
+
let current: Node | null = node;
|
|
528
|
+
while (current) {
|
|
529
|
+
if (current === this) return true;
|
|
530
|
+
current = current.parentNode;
|
|
531
|
+
}
|
|
532
|
+
return false;
|
|
533
|
+
};
|
|
534
|
+
}
|
|
535
|
+
} catch (error) {
|
|
536
|
+
console.warn('Warning: Could not set Node.contains:', error);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Set up Element methods
|
|
540
|
+
try {
|
|
541
|
+
if (!window.Element.prototype.getBoundingClientRect) {
|
|
542
|
+
window.Element.prototype.getBoundingClientRect = function(): DOMRect {
|
|
543
|
+
return {
|
|
544
|
+
top: 0,
|
|
545
|
+
left: 0,
|
|
546
|
+
bottom: 0,
|
|
547
|
+
right: 0,
|
|
548
|
+
width: 0,
|
|
549
|
+
height: 0,
|
|
550
|
+
x: 0,
|
|
551
|
+
y: 0,
|
|
552
|
+
toJSON: function() { return this; }
|
|
553
|
+
};
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
} catch (error) {
|
|
557
|
+
console.warn('Warning: Could not set getBoundingClientRect:', error);
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// Set up Document methods
|
|
561
|
+
try {
|
|
562
|
+
if (!window.Document.prototype.getSelection) {
|
|
563
|
+
window.Document.prototype.getSelection = function(): Selection | null {
|
|
564
|
+
const selection = {
|
|
565
|
+
anchorNode: null,
|
|
566
|
+
anchorOffset: 0,
|
|
567
|
+
direction: 'forward',
|
|
568
|
+
focusNode: null,
|
|
569
|
+
focusOffset: 0,
|
|
570
|
+
isCollapsed: true,
|
|
571
|
+
rangeCount: 0,
|
|
572
|
+
type: 'None',
|
|
573
|
+
getRangeAt: function() { return new window.Range(); },
|
|
574
|
+
removeAllRanges: function() {},
|
|
575
|
+
addRange: function() {},
|
|
576
|
+
collapse: function() {},
|
|
577
|
+
collapseToEnd: function() {},
|
|
578
|
+
collapseToStart: function() {},
|
|
579
|
+
deleteFromDocument: function() {},
|
|
580
|
+
empty: function() {},
|
|
581
|
+
extend: function() {},
|
|
582
|
+
modify: function() {},
|
|
583
|
+
selectAllChildren: function() {},
|
|
584
|
+
setBaseAndExtent: function() {},
|
|
585
|
+
setPosition: function() {},
|
|
586
|
+
toString: function() { return ''; },
|
|
587
|
+
containsNode: function(node: Node, allowPartialContainment: boolean = false): boolean {
|
|
588
|
+
return false;
|
|
589
|
+
},
|
|
590
|
+
removeRange: function(range: Range): void {}
|
|
591
|
+
} as unknown as Selection;
|
|
592
|
+
return selection;
|
|
593
|
+
};
|
|
594
|
+
}
|
|
595
|
+
} catch (error) {
|
|
596
|
+
console.warn('Warning: Could not set getSelection:', error);
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
// Set up Window methods
|
|
600
|
+
try {
|
|
601
|
+
if (!window.Window.prototype.getComputedStyle) {
|
|
602
|
+
window.Window.prototype.getComputedStyle = function(elt: Element, pseudoElt?: string | null): CSSStyleDeclaration {
|
|
603
|
+
const style = {
|
|
604
|
+
accentColor: '',
|
|
605
|
+
alignContent: '',
|
|
606
|
+
alignItems: '',
|
|
607
|
+
alignSelf: '',
|
|
608
|
+
getPropertyValue: function(prop: string): string { return ''; }
|
|
609
|
+
} as CSSStyleDeclaration;
|
|
610
|
+
return style;
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
} catch (error) {
|
|
614
|
+
console.warn('Warning: Could not set getComputedStyle:', error);
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// Set up Range constructor last
|
|
618
|
+
try {
|
|
619
|
+
if (!window.Range) {
|
|
620
|
+
window.Range = class Range {
|
|
621
|
+
static readonly START_TO_START = 0;
|
|
622
|
+
static readonly START_TO_END = 1;
|
|
623
|
+
static readonly END_TO_END = 2;
|
|
624
|
+
static readonly END_TO_START = 3;
|
|
625
|
+
|
|
626
|
+
readonly START_TO_START = 0;
|
|
627
|
+
readonly START_TO_END = 1;
|
|
628
|
+
readonly END_TO_END = 2;
|
|
629
|
+
readonly END_TO_START = 3;
|
|
630
|
+
|
|
631
|
+
startContainer: Node;
|
|
632
|
+
startOffset: number;
|
|
633
|
+
endContainer: Node;
|
|
634
|
+
endOffset: number;
|
|
635
|
+
collapsed: boolean;
|
|
636
|
+
commonAncestorContainer: Node;
|
|
637
|
+
|
|
638
|
+
constructor() {
|
|
639
|
+
this.startContainer = document.documentElement;
|
|
640
|
+
this.startOffset = 0;
|
|
641
|
+
this.endContainer = document.documentElement;
|
|
642
|
+
this.endOffset = 0;
|
|
643
|
+
this.collapsed = true;
|
|
644
|
+
this.commonAncestorContainer = document.documentElement;
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
createContextualFragment(fragment: string): DocumentFragment {
|
|
648
|
+
return document.createDocumentFragment();
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
detach(): void {}
|
|
652
|
+
|
|
653
|
+
cloneContents(): DocumentFragment {
|
|
654
|
+
return document.createDocumentFragment();
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
cloneRange(): Range {
|
|
658
|
+
return new Range();
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
collapse(toStart: boolean = false): void {}
|
|
662
|
+
|
|
663
|
+
compareBoundaryPoints(how: number, sourceRange: Range): number {
|
|
664
|
+
return 0;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
comparePoint(node: Node, offset: number): number {
|
|
668
|
+
return 0;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
deleteContents(): void {}
|
|
672
|
+
|
|
673
|
+
extractContents(): DocumentFragment {
|
|
674
|
+
return document.createDocumentFragment();
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
getBoundingClientRect(): DOMRect {
|
|
678
|
+
return {
|
|
679
|
+
top: 0,
|
|
680
|
+
left: 0,
|
|
681
|
+
bottom: 0,
|
|
682
|
+
right: 0,
|
|
683
|
+
width: 0,
|
|
684
|
+
height: 0,
|
|
685
|
+
x: 0,
|
|
686
|
+
y: 0,
|
|
687
|
+
toJSON: function() { return this; }
|
|
688
|
+
};
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
getClientRects(): DOMRectList {
|
|
692
|
+
return {
|
|
693
|
+
length: 0,
|
|
694
|
+
item: function() { return null; },
|
|
695
|
+
[Symbol.iterator]: function*() {}
|
|
696
|
+
} as DOMRectList;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
insertNode(node: Node): void {}
|
|
700
|
+
|
|
701
|
+
intersectsNode(node: Node): boolean {
|
|
702
|
+
return false;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
isPointInRange(node: Node, offset: number): boolean {
|
|
706
|
+
return false;
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
selectNode(node: Node): void {}
|
|
710
|
+
|
|
711
|
+
selectNodeContents(node: Node): void {
|
|
712
|
+
this.startContainer = node;
|
|
713
|
+
this.startOffset = 0;
|
|
714
|
+
this.endContainer = node;
|
|
715
|
+
this.endOffset = node.childNodes.length;
|
|
716
|
+
this.collapsed = false;
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
setEnd(node: Node, offset: number): void {}
|
|
720
|
+
|
|
721
|
+
setEndAfter(node: Node): void {}
|
|
722
|
+
|
|
723
|
+
setEndBefore(node: Node): void {}
|
|
724
|
+
|
|
725
|
+
setStart(node: Node, offset: number): void {}
|
|
726
|
+
|
|
727
|
+
setStartAfter(node: Node): void {}
|
|
728
|
+
|
|
729
|
+
setStartBefore(node: Node): void {}
|
|
730
|
+
|
|
731
|
+
surroundContents(newParent: Node): void {}
|
|
732
|
+
};
|
|
733
|
+
}
|
|
734
|
+
} catch (error) {
|
|
735
|
+
console.warn('Warning: Could not set Range constructor:', error);
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
} catch (error) {
|
|
739
|
+
console.error('Error in setupDOMInterfaces:', error);
|
|
740
|
+
// Don't throw the error, just log it
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
// Create a virtual DOM
|
|
745
|
+
const dom = new JSDOM('<!DOCTYPE html><html><body></body></html>', {
|
|
746
|
+
virtualConsole,
|
|
747
|
+
runScripts: 'dangerously',
|
|
748
|
+
resources: 'usable',
|
|
749
|
+
pretendToBeVisual: true,
|
|
750
|
+
beforeParse(window: DOMWindow) {
|
|
751
|
+
setupDOMInterfaces(window);
|
|
752
|
+
}
|
|
753
|
+
});
|
|
754
|
+
|
|
755
|
+
// Get the window object
|
|
756
|
+
const window = dom.window;
|
|
757
|
+
|
|
758
|
+
// Add window to global scope
|
|
759
|
+
(globalThis as any).window = window;
|
|
760
|
+
|
|
761
|
+
// Add document to global scope
|
|
762
|
+
(globalThis as any).document = window.document;
|
|
763
|
+
|
|
764
|
+
// Add required DOM interfaces to global scope
|
|
765
|
+
(globalThis as any).Element = window.Element;
|
|
766
|
+
(globalThis as any).Node = window.Node;
|
|
767
|
+
(globalThis as any).NodeFilter = window.NodeFilter;
|
|
768
|
+
(globalThis as any).Range = window.Range;
|
|
769
|
+
(globalThis as any).DOMParser = window.DOMParser;
|
|
770
|
+
(globalThis as any).XMLSerializer = window.XMLSerializer;
|
|
771
|
+
(globalThis as any).navigator = window.navigator;
|
|
772
|
+
(globalThis as any).HTMLElement = window.HTMLElement;
|
|
773
|
+
|
|
774
|
+
// Define DOMSettableTokenList
|
|
775
|
+
(globalThis as any).DOMSettableTokenList = class {
|
|
776
|
+
length: number = 0;
|
|
777
|
+
value: string = '';
|
|
778
|
+
add(token: string): void {}
|
|
779
|
+
contains(token: string): boolean { return false; }
|
|
780
|
+
item(index: number): string | null { return null; }
|
|
781
|
+
remove(token: string): void {}
|
|
782
|
+
replace(oldToken: string, newToken: string): boolean { return false; }
|
|
783
|
+
supports(token: string): boolean { return false; }
|
|
784
|
+
toggle(token: string, force?: boolean): boolean { return false; }
|
|
785
|
+
[Symbol.iterator](): Iterator<string> {
|
|
786
|
+
return function*() { yield ''; return undefined; }();
|
|
787
|
+
}
|
|
788
|
+
};
|
|
789
|
+
|
|
790
|
+
// Define HTML element types
|
|
791
|
+
(globalThis as any).HTMLIFrameElement = class extends (globalThis as any).HTMLElement {
|
|
792
|
+
constructor() {
|
|
793
|
+
super();
|
|
794
|
+
}
|
|
795
|
+
align: string = '';
|
|
796
|
+
allow: string = '';
|
|
797
|
+
allowFullscreen: boolean = false;
|
|
798
|
+
contentDocument: Document | null = null;
|
|
799
|
+
contentWindow: Window | null = null;
|
|
800
|
+
frameBorder: string = '';
|
|
801
|
+
height: string = '';
|
|
802
|
+
longDesc: string = '';
|
|
803
|
+
marginHeight: string = '';
|
|
804
|
+
marginWidth: string = '';
|
|
805
|
+
name: string = '';
|
|
806
|
+
referrerPolicy: string = '';
|
|
807
|
+
sandbox: DOMSettableTokenList = {
|
|
808
|
+
length: 0,
|
|
809
|
+
value: '',
|
|
810
|
+
add: () => {},
|
|
811
|
+
contains: () => false,
|
|
812
|
+
item: () => null,
|
|
813
|
+
remove: () => {},
|
|
814
|
+
replace: () => false,
|
|
815
|
+
supports: () => false,
|
|
816
|
+
toggle: () => false,
|
|
817
|
+
[Symbol.iterator]: function*() { yield ''; return undefined; }
|
|
818
|
+
} as unknown as DOMSettableTokenList;
|
|
819
|
+
scrolling: string = '';
|
|
820
|
+
src: string = '';
|
|
821
|
+
srcdoc: string = '';
|
|
822
|
+
width: string = '';
|
|
823
|
+
};
|
|
824
|
+
|
|
825
|
+
(globalThis as any).HTMLOListElement = class extends (globalThis as any).HTMLElement {
|
|
826
|
+
constructor() {
|
|
827
|
+
super();
|
|
828
|
+
}
|
|
829
|
+
type: string = '';
|
|
830
|
+
compact: boolean = false;
|
|
831
|
+
reversed: boolean = false;
|
|
832
|
+
start: number = 0;
|
|
833
|
+
};
|
|
834
|
+
|
|
835
|
+
(globalThis as any).HTMLUListElement = class extends (globalThis as any).HTMLElement {
|
|
836
|
+
constructor() {
|
|
837
|
+
super();
|
|
838
|
+
}
|
|
839
|
+
type: string = '';
|
|
840
|
+
compact: boolean = false;
|
|
841
|
+
};
|
|
842
|
+
|
|
843
|
+
(globalThis as any).HTMLTableElement = class extends (globalThis as any).HTMLElement {
|
|
844
|
+
constructor() {
|
|
845
|
+
super();
|
|
846
|
+
}
|
|
847
|
+
caption: HTMLTableCaptionElement | null = null;
|
|
848
|
+
tHead: HTMLTableSectionElement | null = null;
|
|
849
|
+
tFoot: HTMLTableSectionElement | null = null;
|
|
850
|
+
tBodies: HTMLCollectionOf<HTMLTableSectionElement> = {
|
|
851
|
+
length: 0,
|
|
852
|
+
item: () => null,
|
|
853
|
+
namedItem: () => null,
|
|
854
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
855
|
+
} as HTMLCollectionOf<HTMLTableSectionElement>;
|
|
856
|
+
rows: HTMLCollectionOf<HTMLTableRowElement> = {
|
|
857
|
+
length: 0,
|
|
858
|
+
item: () => null,
|
|
859
|
+
namedItem: () => null,
|
|
860
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
861
|
+
} as HTMLCollectionOf<HTMLTableRowElement>;
|
|
862
|
+
align: string = '';
|
|
863
|
+
bgColor: string = '';
|
|
864
|
+
border: string = '';
|
|
865
|
+
cellPadding: string = '';
|
|
866
|
+
cellSpacing: string = '';
|
|
867
|
+
frame: string = '';
|
|
868
|
+
rules: string = '';
|
|
869
|
+
summary: string = '';
|
|
870
|
+
width: string = '';
|
|
871
|
+
createCaption(): HTMLTableCaptionElement {
|
|
872
|
+
return new (globalThis as any).HTMLTableCaptionElement();
|
|
873
|
+
}
|
|
874
|
+
deleteCaption(): void {}
|
|
875
|
+
createTHead(): HTMLTableSectionElement {
|
|
876
|
+
return new (globalThis as any).HTMLTableSectionElement();
|
|
877
|
+
}
|
|
878
|
+
deleteTHead(): void {}
|
|
879
|
+
createTFoot(): HTMLTableSectionElement {
|
|
880
|
+
return new (globalThis as any).HTMLTableSectionElement();
|
|
881
|
+
}
|
|
882
|
+
deleteTFoot(): void {}
|
|
883
|
+
createTBody(): HTMLTableSectionElement {
|
|
884
|
+
return new (globalThis as any).HTMLTableSectionElement();
|
|
885
|
+
}
|
|
886
|
+
insertRow(index?: number): HTMLTableRowElement {
|
|
887
|
+
return new (globalThis as any).HTMLTableRowElement();
|
|
888
|
+
}
|
|
889
|
+
deleteRow(index: number): void {}
|
|
890
|
+
};
|
|
891
|
+
|
|
892
|
+
(globalThis as any).HTMLTableRowElement = class extends (globalThis as any).HTMLElement {
|
|
893
|
+
constructor() {
|
|
894
|
+
super();
|
|
895
|
+
}
|
|
896
|
+
rowIndex: number = 0;
|
|
897
|
+
sectionRowIndex: number = 0;
|
|
898
|
+
cells: HTMLCollectionOf<HTMLTableCellElement> = {
|
|
899
|
+
length: 0,
|
|
900
|
+
item: () => null,
|
|
901
|
+
namedItem: () => null,
|
|
902
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
903
|
+
} as HTMLCollectionOf<HTMLTableCellElement>;
|
|
904
|
+
align: string = '';
|
|
905
|
+
bgColor: string = '';
|
|
906
|
+
ch: string = '';
|
|
907
|
+
chOff: string = '';
|
|
908
|
+
vAlign: string = '';
|
|
909
|
+
insertCell(index?: number): HTMLTableCellElement {
|
|
910
|
+
return new (globalThis as any).HTMLTableCellElement();
|
|
911
|
+
}
|
|
912
|
+
deleteCell(index: number): void {}
|
|
913
|
+
};
|
|
914
|
+
|
|
915
|
+
(globalThis as any).HTMLTableCellElement = class extends (globalThis as any).HTMLElement {
|
|
916
|
+
constructor() {
|
|
917
|
+
super();
|
|
918
|
+
}
|
|
919
|
+
colSpan: number = 1;
|
|
920
|
+
rowSpan: number = 1;
|
|
921
|
+
headers: DOMSettableTokenList = {
|
|
922
|
+
length: 0,
|
|
923
|
+
value: '',
|
|
924
|
+
add: () => {},
|
|
925
|
+
contains: () => false,
|
|
926
|
+
item: () => null,
|
|
927
|
+
remove: () => {},
|
|
928
|
+
replace: () => false,
|
|
929
|
+
supports: () => false,
|
|
930
|
+
toggle: () => false,
|
|
931
|
+
[Symbol.iterator]: function*() { yield ''; return undefined; }
|
|
932
|
+
} as unknown as DOMSettableTokenList;
|
|
933
|
+
cellIndex: number = 0;
|
|
934
|
+
scope: string = '';
|
|
935
|
+
abbr: string = '';
|
|
936
|
+
align: string = '';
|
|
937
|
+
axis: string = '';
|
|
938
|
+
bgColor: string = '';
|
|
939
|
+
ch: string = '';
|
|
940
|
+
chOff: string = '';
|
|
941
|
+
height: string = '';
|
|
942
|
+
noWrap: boolean = false;
|
|
943
|
+
vAlign: string = '';
|
|
944
|
+
width: string = '';
|
|
945
|
+
};
|
|
946
|
+
|
|
947
|
+
(globalThis as any).HTMLTableSectionElement = class extends (globalThis as any).HTMLElement {
|
|
948
|
+
constructor() {
|
|
949
|
+
super();
|
|
950
|
+
}
|
|
951
|
+
rows: HTMLCollectionOf<HTMLTableRowElement> = {
|
|
952
|
+
length: 0,
|
|
953
|
+
item: () => null,
|
|
954
|
+
namedItem: () => null,
|
|
955
|
+
[Symbol.iterator]: function*() { yield null; return undefined; }
|
|
956
|
+
} as HTMLCollectionOf<HTMLTableRowElement>;
|
|
957
|
+
align: string = '';
|
|
958
|
+
ch: string = '';
|
|
959
|
+
chOff: string = '';
|
|
960
|
+
vAlign: string = '';
|
|
961
|
+
insertRow(index?: number): HTMLTableRowElement {
|
|
962
|
+
return new (globalThis as any).HTMLTableRowElement();
|
|
963
|
+
}
|
|
964
|
+
deleteRow(index: number): void {}
|
|
965
|
+
};
|
|
966
|
+
|
|
967
|
+
(globalThis as any).HTMLTableCaptionElement = class extends (globalThis as any).HTMLElement {
|
|
968
|
+
constructor() {
|
|
969
|
+
super();
|
|
970
|
+
}
|
|
971
|
+
align: string = '';
|
|
972
|
+
};
|
|
973
|
+
|
|
974
|
+
const program = new Command();
|
|
975
|
+
|
|
976
|
+
program
|
|
977
|
+
.name('defuddle')
|
|
978
|
+
.description('Extract article content from web pages')
|
|
979
|
+
.version('0.1.0');
|
|
980
|
+
|
|
981
|
+
program
|
|
982
|
+
.command('parse')
|
|
983
|
+
.description('Parse HTML content from a file or URL')
|
|
984
|
+
.argument('<source>', 'HTML file path or URL to parse')
|
|
985
|
+
.option('-o, --output <file>', 'Output file path (default: stdout)')
|
|
986
|
+
.option('-m, --markdown', 'Convert content to markdown format')
|
|
987
|
+
.option('--md', 'Alias for --markdown')
|
|
988
|
+
.option('-j, --json', 'Output as JSON with metadata and content')
|
|
989
|
+
.option('-p, --property <name>', 'Extract a specific property (e.g., title, description, domain)')
|
|
990
|
+
.option('--debug', 'Enable debug mode')
|
|
991
|
+
.action(async (source: string, options: ParseOptions) => {
|
|
992
|
+
try {
|
|
993
|
+
// Handle --md alias
|
|
994
|
+
if (options.md) {
|
|
995
|
+
options.markdown = true;
|
|
996
|
+
}
|
|
997
|
+
let html: string;
|
|
998
|
+
|
|
999
|
+
try {
|
|
1000
|
+
// Determine if source is a URL or file path
|
|
1001
|
+
if (source.startsWith('http://') || source.startsWith('https://')) {
|
|
1002
|
+
const response = await fetch(source);
|
|
1003
|
+
html = await response.text();
|
|
1004
|
+
} else {
|
|
1005
|
+
const filePath = resolve(process.cwd(), source);
|
|
1006
|
+
html = await readFile(filePath, 'utf-8');
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
// Create a new JSDOM instance with the HTML content
|
|
1010
|
+
const contentDom = new JSDOM(html, {
|
|
1011
|
+
virtualConsole,
|
|
1012
|
+
runScripts: 'dangerously',
|
|
1013
|
+
resources: 'usable',
|
|
1014
|
+
pretendToBeVisual: true,
|
|
1015
|
+
url: source.startsWith('http') ? source : undefined,
|
|
1016
|
+
beforeParse(window: DOMWindow) {
|
|
1017
|
+
try {
|
|
1018
|
+
setupDOMInterfaces(window);
|
|
1019
|
+
} catch (error) {
|
|
1020
|
+
console.error('Error setting up DOM interfaces:', error);
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
});
|
|
1024
|
+
|
|
1025
|
+
// Initialize document properties
|
|
1026
|
+
const doc = contentDom.window.document;
|
|
1027
|
+
|
|
1028
|
+
// Ensure document has required properties
|
|
1029
|
+
if (!doc.documentElement) {
|
|
1030
|
+
throw new Error('Document has no root element');
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
// Set up document properties
|
|
1034
|
+
try {
|
|
1035
|
+
doc.documentElement.style.cssText = '';
|
|
1036
|
+
doc.documentElement.className = '';
|
|
1037
|
+
} catch (error) {
|
|
1038
|
+
console.warn('Warning: Could not set document element properties:', error);
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
// Ensure body exists and is properly set up
|
|
1042
|
+
if (!doc.body) {
|
|
1043
|
+
const body = doc.createElement('body');
|
|
1044
|
+
doc.documentElement.appendChild(body);
|
|
1045
|
+
}
|
|
1046
|
+
try {
|
|
1047
|
+
doc.body.style.cssText = '';
|
|
1048
|
+
doc.body.className = '';
|
|
1049
|
+
} catch (error) {
|
|
1050
|
+
console.warn('Warning: Could not set body properties:', error);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
// Set up viewport and ensure head exists
|
|
1054
|
+
if (!doc.head) {
|
|
1055
|
+
const head = doc.createElement('head');
|
|
1056
|
+
doc.documentElement.insertBefore(head, doc.body);
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
// Add viewport meta tag
|
|
1060
|
+
try {
|
|
1061
|
+
const viewport = doc.createElement('meta');
|
|
1062
|
+
viewport.setAttribute('name', 'viewport');
|
|
1063
|
+
viewport.setAttribute('content', 'width=device-width, initial-scale=1');
|
|
1064
|
+
doc.head.appendChild(viewport);
|
|
1065
|
+
} catch (error) {
|
|
1066
|
+
console.warn('Warning: Could not add viewport meta tag:', error);
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
// Add a base style element for mobile styles
|
|
1070
|
+
try {
|
|
1071
|
+
const style = doc.createElement('style');
|
|
1072
|
+
style.textContent = `
|
|
1073
|
+
@media (max-width: 768px) {
|
|
1074
|
+
body { width: 100%; }
|
|
1075
|
+
}
|
|
1076
|
+
`;
|
|
1077
|
+
doc.head.appendChild(style);
|
|
1078
|
+
} catch (error) {
|
|
1079
|
+
console.warn('Warning: Could not add style element:', error);
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
// Parse content with debug mode if enabled
|
|
1083
|
+
try {
|
|
1084
|
+
const defuddle = new Defuddle(doc, {
|
|
1085
|
+
debug: options.debug
|
|
1086
|
+
});
|
|
1087
|
+
|
|
1088
|
+
const result = await defuddle.parse();
|
|
1089
|
+
|
|
1090
|
+
// Format output
|
|
1091
|
+
let output: string;
|
|
1092
|
+
let content: string;
|
|
1093
|
+
let contentMarkdown: string | undefined;
|
|
1094
|
+
|
|
1095
|
+
// Convert content to markdown if requested
|
|
1096
|
+
if (options.markdown || options.json) {
|
|
1097
|
+
contentMarkdown = createMarkdownContent(result.content, source);
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
// Format the response based on options
|
|
1101
|
+
if (options.property) {
|
|
1102
|
+
// Extract specific property
|
|
1103
|
+
const property = options.property.toLowerCase();
|
|
1104
|
+
if (property in result) {
|
|
1105
|
+
output = result[property as keyof typeof result]?.toString() || '';
|
|
1106
|
+
} else {
|
|
1107
|
+
console.error(chalk.red(`Error: Property "${property}" not found in response`));
|
|
1108
|
+
process.exit(1);
|
|
1109
|
+
}
|
|
1110
|
+
} else if (options.json) {
|
|
1111
|
+
const jsonObj: any = {
|
|
1112
|
+
content: result.content,
|
|
1113
|
+
title: result.title,
|
|
1114
|
+
description: result.description,
|
|
1115
|
+
domain: result.domain,
|
|
1116
|
+
favicon: result.favicon,
|
|
1117
|
+
image: result.image,
|
|
1118
|
+
parseTime: result.parseTime,
|
|
1119
|
+
published: result.published,
|
|
1120
|
+
author: result.author,
|
|
1121
|
+
site: result.site,
|
|
1122
|
+
schemaOrgData: result.schemaOrgData,
|
|
1123
|
+
wordCount: result.wordCount
|
|
1124
|
+
};
|
|
1125
|
+
|
|
1126
|
+
// Only include markdown content if markdown flag is set
|
|
1127
|
+
if (options.markdown) {
|
|
1128
|
+
jsonObj.contentMarkdown = contentMarkdown;
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
output = JSON.stringify(jsonObj, null, 2)
|
|
1132
|
+
.replace(/"([^"]+)":/g, chalk.cyan('"$1":'))
|
|
1133
|
+
.replace(/: "([^"]+)"/g, chalk.yellow(': "$1"'))
|
|
1134
|
+
.replace(/: (\d+)/g, chalk.yellow(': $1'))
|
|
1135
|
+
.replace(/: (true|false|null)/g, chalk.magenta(': $1'));
|
|
1136
|
+
} else {
|
|
1137
|
+
output = options.markdown ? contentMarkdown! : result.content;
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
// Handle output
|
|
1141
|
+
if (options.output) {
|
|
1142
|
+
const outputPath = resolve(process.cwd(), options.output);
|
|
1143
|
+
await writeFile(outputPath, output, 'utf-8');
|
|
1144
|
+
console.log(chalk.green(`Output written to ${options.output}`));
|
|
1145
|
+
} else {
|
|
1146
|
+
console.log(output);
|
|
1147
|
+
}
|
|
1148
|
+
} catch (error) {
|
|
1149
|
+
console.error(chalk.red('Error during parsing:'), error);
|
|
1150
|
+
process.exit(1);
|
|
1151
|
+
}
|
|
1152
|
+
} catch (error) {
|
|
1153
|
+
console.error(chalk.red('Error loading content:'), error instanceof Error ? error.message : 'Unknown error occurred');
|
|
1154
|
+
process.exit(1);
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
} catch (error) {
|
|
1158
|
+
console.error(chalk.red('Error:'), error instanceof Error ? error.message : 'Unknown error occurred');
|
|
1159
|
+
process.exit(1);
|
|
1160
|
+
}
|
|
1161
|
+
});
|
|
1162
|
+
|
|
1163
|
+
program.parse();
|